PyPI - ethyca-fides - Versions diffs - 2.67.2b3__py2.py3-none-any.whl → 2.67.2rc0__py2.py3-none-any.whl - Mend

ethyca-fides 2.67.2b3py2.py3-none-any.whl → 2.67.2rc0py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ethyca-fides might be problematic. Click here for more details.

Files changed (114) hide show

fides/api/task/manual/manual_task_graph_task.py CHANGED Viewed

@@ -7,11 +7,11 @@ from fides.api.common_exceptions import AwaitingAsyncTaskCallback
 from fides.api.models.attachment import AttachmentType
 from fides.api.models.manual_task import (
     ManualTask,
+    ManualTaskConfig,
     ManualTaskConfigurationType,
     ManualTaskEntityType,
     ManualTaskFieldType,
     ManualTaskInstance,
-    ManualTaskSubmission,
     StatusType,
 )
 from fides.api.models.privacy_request import PrivacyRequest
@@ -23,7 +23,6 @@ from fides.api.task.manual.manual_task_utils import (
     get_manual_task_for_connection_config,
 )
 from fides.api.util.collection_util import Row
-from fides.api.util.storage_util import format_size
 class ManualTaskGraphTask(GraphTask):
@@ -123,36 +122,29 @@ class ManualTaskGraphTask(GraphTask):
         # request has started, while allowing different config types (access vs erasure)
         # to have separate instances.
         # ------------------------------------------------------------------
-        existing_task_instance = next(
-            (
-                instance
-                for instance in privacy_request.manual_task_instances
-                if instance.task_id == manual_task.id
-                and instance.config.config_type == allowed_config_type
-            ),
-            None,
+        existing_task_instance = (
+            db.query(ManualTaskInstance)
+            .join(ManualTaskInstance.config)  # Join to access config information
+            .filter(
+                ManualTaskInstance.task_id == manual_task.id,
+                ManualTaskInstance.entity_id == privacy_request.id,
+                ManualTaskInstance.entity_type == ManualTaskEntityType.privacy_request,
+                # Only check for instances of the same config type
+                ManualTaskConfig.config_type == allowed_config_type,
+            )
+            .first()
         )
         if existing_task_instance:
             # An instance already exists for this privacy request and config type – no need
             # to create another one tied to a newer config version.
             return
-        # If no existing instances, create a new one for the current config
-        # There will only be one config of each type per manual task
-        config = next(
-            (
-                config
-                for config in sorted(
-                    manual_task.configs,
-                    key=lambda c: c.version if hasattr(c, "version") else 0,
-                    reverse=True,
-                )
-                if config.is_current and config.config_type == allowed_config_type
-            ),
-            None,
-        )
+        # Check each active config for instances (now we know none exist yet for this config type)
+        for config in manual_task.configs:
+            if not config.is_current or config.config_type != allowed_config_type:
+                # Skip configs that are not current or not relevant for this request type
+                continue
-        if config:
             ManualTaskInstance.create(
                 db=db,
                 data={
@@ -164,6 +156,7 @@ class ManualTaskGraphTask(GraphTask):
                 },
             )
+    # pylint: disable=too-many-branches,too-many-nested-blocks
     def _get_submitted_data(
         self,
         db: Session,
@@ -175,90 +168,93 @@ class ManualTaskGraphTask(GraphTask):
         Check if all manual task instances have submissions for ALL fields and return aggregated data
         Returns None if any field submissions are missing (all fields must be completed or skipped)
         """
-        candidate_instances: list[ManualTaskInstance] = [
-            instance
-            for instance in privacy_request.manual_task_instances
-            if instance.task_id == manual_task.id
-            and instance.config.config_type == allowed_config_type
-        ]
+        aggregated_data: dict[str, Any] = {}
+        def _format_size(size_bytes: int) -> str:
+            units = ["B", "KB", "MB", "GB", "TB"]
+            size = float(size_bytes)
+            for unit in units:
+                if size < 1024.0:
+                    return f"{size:.1f} {unit}"
+                size /= 1024.0
+            return f"{size:.1f} PB"
+        candidate_instances: list[ManualTaskInstance] = (
+            db.query(ManualTaskInstance)
+            .filter(
+                ManualTaskInstance.task_id == manual_task.id,
+                ManualTaskInstance.entity_id == privacy_request.id,
+                ManualTaskInstance.entity_type == ManualTaskEntityType.privacy_request,
+            )
+            .all()
+        )
         if not candidate_instances:
             return None  # No instance yet for this manual task
-        # Check for incomplete fields and update status in single pass
         for inst in candidate_instances:
-            if inst.incomplete_fields:
+            # Skip instances tied to other request types
+            if not inst.config or inst.config.config_type != allowed_config_type:
+                continue
+            all_fields = inst.config.field_definitions or []
+            # Every field must have a submission
+            if not all(inst.get_submission_for_field(f.id) for f in all_fields):
                 return None  # At least one instance still incomplete
-            # Update status if needed
+            # Ensure status set
             if inst.status != StatusType.completed:
                 inst.status = StatusType.completed
                 inst.save(db)
-        # Aggregate submission data from all instances
-        aggregated_data = self._aggregate_submission_data(candidate_instances)
-        return aggregated_data or None
-    def _aggregate_submission_data(
-        self, instances: list[ManualTaskInstance]
-    ) -> dict[str, Any]:
-        """Aggregate submission data from all instances into a single dictionary."""
-        aggregated_data: dict[str, Any] = {}
-        for inst in instances:
-            # Filter valid submissions and process them
-            valid_submissions = (
-                submission
-                for submission in inst.submissions
-                if (
-                    submission.field
-                    and submission.field.field_key
-                    and isinstance(submission.data, dict)
-                )
-            )
+            # Aggregate submission data from this instance
+            for submission in inst.submissions:
+                if not submission.field or not submission.field.field_key:
+                    continue
-            for submission in valid_submissions:
                 field_key = submission.field.field_key
-                # We already checked isinstance(submission.data, dict) in valid_submissions
-                data_dict: dict[str, Any] = submission.data  # type: ignore[assignment]
-                field_type = data_dict.get("field_type")
-                # Process field data based on type
-                aggregated_data[field_key] = (
-                    self._process_attachment_field(submission)
-                    if field_type == ManualTaskFieldType.attachment.value
-                    else data_dict.get("value")
-                )
+                if not isinstance(submission.data, dict):
+                    continue
-        return aggregated_data
+                data_dict: dict[str, Any] = submission.data
-    def _process_attachment_field(
-        self, submission: ManualTaskSubmission
-    ) -> Optional[dict[str, dict[str, Any]]]:
-        """Process attachment field and return attachment map or None."""
-        attachment_map: dict[str, dict[str, Any]] = {}
+                field_type = data_dict.get("field_type")
-        for attachment in filter(
-            lambda a: a.attachment_type == AttachmentType.include_with_access_package,
-            submission.attachments,
-        ):
-            try:
-                size, url = attachment.retrieve_attachment()
-                attachment_map[attachment.file_name] = {
-                    "url": str(url) if url else None,
-                    "size": (format_size(size) if size else "Unknown"),
-                }
-            except Exception as exc:  # pylint: disable=broad-exception-caught
-                logger.warning(
-                    f"Error retrieving attachment {attachment.file_name}: {str(exc)}"
-                )
-        return attachment_map or None
+                if field_type == ManualTaskFieldType.attachment.value:
+                    attachment_map: dict[str, dict[str, Any]] = {}
+                    for attachment in submission.attachments or []:
+                        if (
+                            attachment.attachment_type
+                            == AttachmentType.include_with_access_package
+                        ):
+                            try:
+                                size, url = attachment.retrieve_attachment()
+                                attachment_map[attachment.file_name] = {
+                                    "url": str(url) if url else None,
+                                    "size": (_format_size(size) if size else "Unknown"),
+                                }
+                            except (
+                                Exception
+                            ) as exc:  # pylint: disable=broad-exception-caught
+                                logger.warning(
+                                    "Error retrieving attachment {}: {}",
+                                    attachment.file_name,
+                                    str(exc),
+                                )
+                    aggregated_data[field_key] = attachment_map or None
+                else:
+                    aggregated_data[field_key] = data_dict.get("value")
+        return aggregated_data if aggregated_data else None
     def dry_run_task(self) -> int:
         """Return estimated row count for dry run - manual tasks don't have predictable counts"""
         return 1  # Placeholder - manual tasks generate variable data
-    # Provide erasure support for manual tasks
+    # NEW METHOD: Provide erasure support for manual tasks
     @retry(action_type=ActionType.erasure, default_return=0)
     def erasure_request(
         self,

fides/api/task/manual/manual_task_utils.py CHANGED Viewed

@@ -1,43 +1,40 @@
-from typing import Optional
-from loguru import logger
 from sqlalchemy.orm import Session
 from fides.api.graph.config import (
     Collection,
     CollectionAddress,
-    FieldAddress,
+    Field,
     GraphDataset,
     ScalarField,
 )
+from fides.api.graph.graph import Node
+from fides.api.graph.traversal import TraversalNode
 from fides.api.models.connectionconfig import ConnectionConfig
 # Import application models
 from fides.api.models.manual_task import (
     ManualTask,
-    ManualTaskConditionalDependencyType,
+    ManualTaskConfig,
     ManualTaskConfigurationType,
+    ManualTaskEntityType,
+    ManualTaskInstance,
 )
+from fides.api.models.privacy_request import PrivacyRequest
+from fides.api.schemas.policy import ActionType
 from fides.api.task.manual.manual_task_address import ManualTaskAddress
-PRIVACY_REQUEST_CONFIG_TYPES = {
-    ManualTaskConfigurationType.access_privacy_request,
-    ManualTaskConfigurationType.erasure_privacy_request,
-}
 def get_connection_configs_with_manual_tasks(db: Session) -> list[ConnectionConfig]:
     """
     Get all connection configs that have manual tasks.
     """
-    connection_configs = (
+    return (
         db.query(ConnectionConfig)
         .join(ManualTask, ConnectionConfig.id == ManualTask.parent_entity_id)
         .filter(ManualTask.parent_entity_type == "connection_config")
         .filter(ConnectionConfig.disabled.is_(False))
         .all()
     )
-    return connection_configs
 def get_manual_task_addresses(db: Session) -> list[CollectionAddress]:
@@ -51,11 +48,12 @@ def get_manual_task_addresses(db: Session) -> list[CollectionAddress]:
     # Get all connection configs that have manual tasks (excluding disabled ones)
     connection_configs_with_manual_tasks = get_connection_configs_with_manual_tasks(db)
-    # Return addresses for all connections that have manual tasks
-    return [
-        ManualTaskAddress.create(config.key)
-        for config in connection_configs_with_manual_tasks
-    ]
+    # Create addresses for all connections that have manual tasks
+    manual_task_addresses = []
+    for config in connection_configs_with_manual_tasks:
+        manual_task_addresses.append(ManualTaskAddress.create(config.key))
+    return manual_task_addresses
 def get_manual_task_for_connection_config(
@@ -75,18 +73,20 @@ def get_manual_task_for_connection_config(
     )
-def create_data_category_scalar_fields(manual_task: ManualTask) -> list[ScalarField]:
+def create_manual_data_traversal_node(
+    db: Session, address: CollectionAddress
+) -> "TraversalNode":
     """
-    Create scalar fields for each field in the given manual task configs.
+    Create a TraversalNode for a manual_data collection
     """
-    fields = []
-    # Get current privacy request configs for this manual task
-    current_configs = [
-        config
-        for config in manual_task.configs
-        if config.is_current and config.config_type in PRIVACY_REQUEST_CONFIG_TYPES
-    ]
-    for config in current_configs:
+    connection_key = address.dataset
+    # Get manual tasks for this connection to determine fields
+    manual_task = get_manual_task_for_connection_config(db, connection_key)
+    # Create fields based on ManualTaskConfigFields
+    fields: list[Field] = []
+    for config in manual_task.configs:
         for field in config.field_definitions:
             # Create a scalar field for each manual task field
             # Extract data categories from field metadata if available
@@ -99,94 +99,211 @@ def create_data_category_scalar_fields(manual_task: ManualTask) -> list[ScalarFi
                 # Manual task fields don't have complex relationships
             )
             fields.append(scalar_field)
-    return fields
-def create_conditional_dependency_scalar_fields(
-    field_addresses: set[str],
-) -> list[ScalarField]:
-    fields: list[ScalarField] = []
-    for field_address in field_addresses:
-        # Use the full field address as the field name to preserve collection context
-        # This allows the manual task to receive data from specific collections
-        # e.g., "user.name" or "customer.profile.email" instead of just "name" or "email"
-        logger.info(
-            f"Creating conditional dependency scalar field for field address: {field_address}"
-        )
-        field_address_obj = FieldAddress.from_string(field_address)
-        scalar_field = ScalarField(
-            name=field_address_obj.value,
-            # Conditional dependency fields don't have predefined data categories
-            data_categories=[],
-            references=[(field_address_obj, "from")],
-        )
-        fields.append(scalar_field)
+    # Create a synthetic Collection
+    collection = Collection(
+        name=ManualTaskAddress.MANUAL_DATA_COLLECTION,
+        fields=fields,
+        # Manual tasks don't have complex dependencies
+        after=set(),
+    )
-    return fields
+    # Create a synthetic GraphDataset
+    dataset = GraphDataset(
+        name=connection_key,
+        collections=[collection],
+        connection_key=connection_key,
+        after=set(),
+    )
+    node = Node(dataset, collection)
+    traversal_node = TraversalNode(node)
-def create_collection_for_connection_key(
-    db: Session, connection_key: str
-) -> Optional[Collection]:
-    # Get the manual task for this connection config
-    manual_task = get_manual_task_for_connection_config(db, connection_key)
+    return traversal_node
+def create_manual_task_instances_for_privacy_request(
+    db: Session, privacy_request: PrivacyRequest
+) -> list[ManualTaskInstance]:
+    """Create ManualTaskInstance entries for all active manual tasks relevant to a privacy request."""
+    instances = []
+    # Get all connection configs that have manual tasks (excluding disabled ones)
+    connection_configs_with_manual_tasks = get_connection_configs_with_manual_tasks(db)
-    if not manual_task:
-        return None
-    # Get conditional dependency field addresses - raw field data
-    conditional_field_addresses: set[str] = {
-        dependency.field_address
-        for dependency in manual_task.conditional_dependencies
-        if dependency.condition_type == ManualTaskConditionalDependencyType.leaf
-        and dependency.field_address is not None
-    }
-    # Create scalar fields for data category fields and conditional dependency field addresses
-    fields: list[ScalarField] = []
-    fields.extend(create_data_category_scalar_fields(manual_task))
-    fields.extend(
-        create_conditional_dependency_scalar_fields(conditional_field_addresses)
+    # Determine the privacy request type based on policy rules
+    has_access_rules = bool(
+        privacy_request.policy.get_rules_for_action(action_type=ActionType.access)
+    )
+    has_erasure_rules = bool(
+        privacy_request.policy.get_rules_for_action(action_type=ActionType.erasure)
     )
-    # Only create collection if there are fields
-    if not fields:
-        return None
+    for connection_config in connection_configs_with_manual_tasks:
+        manual_tasks = (
+            db.query(ManualTask)
+            .filter(
+                ManualTask.parent_entity_id == connection_config.id,
+                ManualTask.parent_entity_type == "connection_config",
+            )
+            .all()
+        )
-    return Collection(name=ManualTaskAddress.MANUAL_DATA_COLLECTION, fields=fields)
+        for manual_task in manual_tasks:
+            # Get the active config for this manual task, filtered by request type
+            active_config_query = db.query(ManualTaskConfig).filter(
+                ManualTaskConfig.task_id == manual_task.id,
+                ManualTaskConfig.is_current.is_(True),
+            )
+            # Filter by configuration type based on privacy request type
+            if has_access_rules and has_erasure_rules:
+                # If both access and erasure rules exist, include both types
+                active_config_query = active_config_query.filter(
+                    ManualTaskConfig.config_type.in_(
+                        [
+                            ManualTaskConfigurationType.access_privacy_request,
+                            ManualTaskConfigurationType.erasure_privacy_request,
+                        ]
+                    )
+                )
+            elif has_access_rules:
+                # Only access rules - only include access configurations
+                active_config_query = active_config_query.filter(
+                    ManualTaskConfig.config_type
+                    == ManualTaskConfigurationType.access_privacy_request
+                )
+            elif has_erasure_rules:
+                # Only erasure rules - only include erasure configurations
+                active_config_query = active_config_query.filter(
+                    ManualTaskConfig.config_type
+                    == ManualTaskConfigurationType.erasure_privacy_request
+                )
+            else:
+                # No relevant rules - skip this manual task
+                continue
+            active_configs = active_config_query.all()
+            if not active_configs:
+                continue  # Skip if no active configs
+            # Create instances for each active config
+            for active_config in active_configs:
+                # Check if instance already exists for this config
+                existing_instance = (
+                    db.query(ManualTaskInstance)
+                    .filter(
+                        ManualTaskInstance.entity_id == privacy_request.id,
+                        ManualTaskInstance.entity_type == "privacy_request",
+                        ManualTaskInstance.task_id == manual_task.id,
+                        ManualTaskInstance.config_id == active_config.id,
+                    )
+                    .first()
+                )
+                if not existing_instance:
+                    instance = ManualTaskInstance(
+                        entity_id=privacy_request.id,
+                        entity_type=ManualTaskEntityType.privacy_request,
+                        task_id=manual_task.id,
+                        config_id=active_config.id,
+                    )
+                    db.add(instance)
+                    instances.append(instance)
+    if instances:
+        db.commit()
+    return instances
+def get_manual_task_instances_for_privacy_request(
+    db: Session, privacy_request: PrivacyRequest
+) -> list[ManualTaskInstance]:
+    """Get all manual task instances for a privacy request."""
+    return (
+        db.query(ManualTaskInstance)
+        .filter(
+            ManualTaskInstance.entity_id == privacy_request.id,
+            ManualTaskInstance.entity_type == "privacy_request",
+        )
+        .all()
+    )
-def create_manual_task_artificial_graphs(db: Session) -> list[GraphDataset]:
+def create_manual_task_artificial_graphs(
+    db: Session,
+) -> list:
     """
     Create artificial GraphDataset objects for manual tasks that can be included
     in the main dataset graph during the dataset configuration phase.
-    Each manual task gets its own collection with its own dependencies based on
-    its specific conditional dependencies. This allows individual manual tasks
-    to receive only the data they need from regular tasks.
+    Manual tasks should be treated as data sources/datasets rather than being
+    appended to the traversal graph later.
+    Manual task collections are designed as root nodes that execute immediately when
+    the privacy request starts, in parallel with identity processing. They don't depend
+    on identity data since they provide manually-entered data rather than consuming it.
     Args:
         db: Database session
+        policy: The policy being executed (optional, for filtering manual task configs)
     Returns:
-        List of GraphDataset objects representing manual tasks as individual collections
+        List of GraphDataset objects representing manual tasks as root nodes
     """
     manual_task_graphs = []
     manual_addresses = get_manual_task_addresses(db)
     for address in manual_addresses:
         connection_key = address.dataset
-        # Get the collection for this connection config using the reusable function
-        collection = create_collection_for_connection_key(db, connection_key)
+        # Get manual tasks for this connection to determine fields
+        manual_task = get_manual_task_for_connection_config(db, connection_key)
+        # Create fields based only on ManualTaskConfigFields
+        fields: list = []
+        # Manual task collections act as root nodes - they don't need identity dependencies
+        # since they provide manually-entered data rather than consuming identity data.
+        current_configs = [
+            config for config in manual_task.configs if config.is_current
+        ]
+        for config in current_configs:
+            if config.config_type not in [
+                ManualTaskConfigurationType.access_privacy_request,
+                ManualTaskConfigurationType.erasure_privacy_request,
+            ]:
+                continue
+            for field in config.field_definitions:
+                # Create a scalar field for each manual task field
+                field_metadata = field.field_metadata or {}
+                data_categories = field_metadata.get("data_categories", [])
+                scalar_field = ScalarField(
+                    name=field.field_key,
+                    data_categories=data_categories,
+                )
+                fields.append(scalar_field)
+        if fields:  # Only create graph if there are fields
+            # Create a synthetic Collection
+            collection = Collection(
+                name=ManualTaskAddress.MANUAL_DATA_COLLECTION,
+                fields=fields,
+                # Manual tasks have no dependencies - they're root nodes
+                after=set(),
+            )
-        if collection:  # Only create graph if there are collections
-            # Create a synthetic GraphDataset with all manual task collections
+            # Create a synthetic GraphDataset
             graph_dataset = GraphDataset(
                 name=connection_key,
                 collections=[collection],
                 connection_key=connection_key,
+                after=set(),
             )
             manual_task_graphs.append(graph_dataset)

fides/api/tasks/storage.py CHANGED Viewed

@@ -122,9 +122,6 @@ def upload_to_s3(  # pylint: disable=R0913
         s3_client = get_s3_client(
             auth_method,
             storage_secrets,
-            assume_role_arn=CONFIG.credentials.get(  # pylint: disable=no-member
-                "storage", {}
-            ).get("aws_s3_assume_role_arn"),
         )
     except (ClientError, ParamValidationError) as e:
         logger.error(f"Error getting s3 client: {str(e)}")

fides/api/util/aws_util.py CHANGED Viewed

@@ -7,6 +7,7 @@ from loguru import logger
 from fides.api.common_exceptions import StorageUploadError
 from fides.api.schemas.storage.storage import AWSAuthMethod, StorageSecrets
+from fides.config import CONFIG
 def get_aws_session(
@@ -94,11 +95,22 @@ def get_s3_client(
     If an `assume_role_arn` is provided, the secrets will be used to
     assume that role and return a Session instantiated with that role.
+    If no `assume_role_arn` is provided, and `aws_s3_assume_role_arn` is
+    configured in the global `credentials.storage` config, then the secrets
+    will be used to assume that role and return a Session instantiated with
+    that role.
     """
+    configured_assume_role_arn = CONFIG.credentials.get(  # pylint: disable=no-member
+        "storage", {}
+    ).get(  # pylint: disable=no-member
+        "aws_s3_assume_role_arn"
+    )
     session = get_aws_session(
         auth_method=auth_method,
         storage_secrets=storage_secrets,
-        assume_role_arn=assume_role_arn,
+        assume_role_arn=assume_role_arn or configured_assume_role_arn,
     )
     # Configure S3 client to use signature version 4 for KMS compatibility

ethyca-fides 2.67.2b3__py2.py3-none-any.whl → 2.67.2rc0__py2.py3-none-any.whl

Potentially problematic release.

ethyca-fides 2.67.2b3py2.py3-none-any.whl → 2.67.2rc0py2.py3-none-any.whl