PyPI - atlan-application-sdk - Versions diffs - 0.1.1rc40__py3-none-any.whl → 0.1.1rc42__py3-none-any.whl - Mend

atlan-application-sdk 0.1.1rc40py3-none-any.whl → 0.1.1rc42py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

application_sdk/clients/models.py ADDED Viewed

@@ -0,0 +1,42 @@
+"""
+Pydantic models for database client configurations.
+This module provides Pydantic models for database connection configurations,
+ensuring type safety and validation for database client settings.
+"""
+from typing import Any, Dict, List, Optional
+from pydantic import BaseModel, Field
+class DatabaseConfig(BaseModel):
+    """
+    Pydantic model for database connection configuration.
+    This model defines the structure for database connection configurations,
+    including connection templates, required parameters, defaults, and additional
+    connection parameters.
+    """
+    template: str = Field(
+        ...,
+        description="SQLAlchemy connection string template with placeholders for connection parameters",
+    )
+    required: List[str] = Field(
+        default=[],
+        description="List of required connection parameters that must be provided",
+    )
+    defaults: Optional[Dict[str, Any]] = Field(
+        default=None,
+        description="Default connection parameters to be added to the connection string",
+    )
+    parameters: Optional[List[str]] = Field(
+        default=None,
+        description="List of additional connection parameter names that can be dynamically added from credentials",
+    )
+    class Config:
+        """Pydantic configuration for the DatabaseConfig model."""
+        extra = "forbid"  # Prevent additional fields
+        validate_assignment = True  # Validate on assignment
+        use_enum_values = True  # Use enum values instead of enum objects

application_sdk/clients/sql.py CHANGED Viewed

@@ -7,13 +7,14 @@ database operations, supporting batch processing and server-side cursors.
 import asyncio
 from concurrent.futures import ThreadPoolExecutor
-from typing import Any, Dict, List
+from typing import Any, Dict, List, Optional
 from urllib.parse import quote_plus
 from sqlalchemy.ext.asyncio import AsyncConnection, AsyncEngine
 from temporalio import activity
 from application_sdk.clients import ClientInterface
+from application_sdk.clients.models import DatabaseConfig
 from application_sdk.common.aws_utils import (
     generate_aws_rds_token_with_iam_role,
     generate_aws_rds_token_with_iam_user,
@@ -48,7 +49,7 @@ class BaseSQLClient(ClientInterface):
     credentials: Dict[str, Any] = {}
     resolved_credentials: Dict[str, Any] = {}
     use_server_side_cursor: bool = USE_SERVER_SIDE_CURSOR
-    DB_CONFIG: Dict[str, Any] = {}
+    DB_CONFIG: Optional[DatabaseConfig] = None
     def __init__(
         self,
@@ -262,7 +263,9 @@ class BaseSQLClient(ClientInterface):
         Returns:
             str: The updated URL with the dialect.
         """
-        installed_dialect = self.DB_CONFIG["template"].split("://")[0]
+        if not self.DB_CONFIG:
+            raise ValueError("DB_CONFIG is not configured for this SQL client.")
+        installed_dialect = self.DB_CONFIG.template.split("://")[0]
         url_dialect = sqlalchemy_url.split("://")[0]
         if installed_dialect != url_dialect:
             sqlalchemy_url = sqlalchemy_url.replace(url_dialect, installed_dialect)
@@ -281,6 +284,9 @@ class BaseSQLClient(ClientInterface):
         Raises:
             ValueError: If required connection parameters are missing.
         """
+        if not self.DB_CONFIG:
+            raise ValueError("DB_CONFIG is not configured for this SQL client.")
         extra = parse_credentials_extra(self.credentials)
         # TODO: Uncomment this when the native deployment is ready
@@ -293,7 +299,7 @@ class BaseSQLClient(ClientInterface):
         # Prepare parameters
         param_values = {}
-        for param in self.DB_CONFIG["required"]:
+        for param in self.DB_CONFIG.required:
             if param == "password":
                 param_values[param] = auth_token
             else:
@@ -303,21 +309,19 @@ class BaseSQLClient(ClientInterface):
                 param_values[param] = value
         # Fill in base template
-        conn_str = self.DB_CONFIG["template"].format(**param_values)
+        conn_str = self.DB_CONFIG.template.format(**param_values)
         # Append defaults if not already in the template
-        if self.DB_CONFIG.get("defaults"):
-            conn_str = self.add_connection_params(conn_str, self.DB_CONFIG["defaults"])
+        if self.DB_CONFIG.defaults:
+            conn_str = self.add_connection_params(conn_str, self.DB_CONFIG.defaults)
-        if self.DB_CONFIG.get("parameters"):
-            parameter_keys = self.DB_CONFIG["parameters"]
-            self.DB_CONFIG["parameters"] = {
+        if self.DB_CONFIG.parameters:
+            parameter_keys = self.DB_CONFIG.parameters
+            parameter_values = {
                 key: self.credentials.get(key) or extra.get(key)
                 for key in parameter_keys
             }
-            conn_str = self.add_connection_params(
-                conn_str, self.DB_CONFIG["parameters"]
-            )
+            conn_str = self.add_connection_params(conn_str, parameter_values)
         return conn_str

application_sdk/common/aws_utils.py CHANGED Viewed

@@ -1,4 +1,13 @@
+import re
+from typing import Any, Dict, Optional
+import boto3
+from sqlalchemy.engine.url import URL
 from application_sdk.constants import AWS_SESSION_NAME
+from application_sdk.observability.logger_adaptor import get_logger
+logger = get_logger(__name__)
 def get_region_name_from_hostname(hostname: str) -> str:
@@ -12,11 +21,14 @@ def get_region_name_from_hostname(hostname: str) -> str:
     Returns:
         str: AWS region name
     """
-    parts = hostname.split(".")
-    for part in parts:
-        if part.startswith(("us-", "eu-", "ap-", "ca-", "me-", "sa-", "af-")):
-            return part
-    raise ValueError(f"Could not find valid AWS region in hostname: {hostname}")
+    match = re.search(r"\.([a-z]{2}-[a-z]+-\d)\.", hostname)
+    if match:
+        return match.group(1)
+    # Some services may use - instead of . (rare)
+    match = re.search(r"-([a-z]{2}-[a-z]+-\d)\.", hostname)
+    if match:
+        return match.group(1)
+    raise ValueError("Could not find valid AWS region from hostname")
 def generate_aws_rds_token_with_iam_role(
@@ -55,12 +67,10 @@ def generate_aws_rds_token_with_iam_role(
         )
         credentials = assumed_role["Credentials"]
-        aws_client = client(
-            "rds",
-            aws_access_key_id=credentials["AccessKeyId"],
-            aws_secret_access_key=credentials["SecretAccessKey"],
-            aws_session_token=credentials["SessionToken"],
-            region_name=region or get_region_name_from_hostname(host),
+        aws_client = create_aws_client(
+            service="rds",
+            region=region or get_region_name_from_hostname(host),
+            temp_credentials=credentials,
         )
         token: str = aws_client.generate_db_auth_token(
             DBHostname=host, Port=port, DBUsername=user
@@ -107,3 +117,241 @@ def generate_aws_rds_token_with_iam_user(
         return token
     except Exception as e:
         raise Exception(f"Failed to get user credentials: {str(e)}")
+def get_cluster_identifier(aws_client) -> Optional[str]:
+    """
+    Retrieve the cluster identifier from AWS Redshift clusters.
+    Args:
+        aws_client: Boto3 Redshift client instance
+    Returns:
+        str: The cluster identifier
+    Raises:
+        RuntimeError: If no clusters are found
+    """
+    clusters = aws_client.describe_clusters()
+    for cluster in clusters["Clusters"]:
+        cluster_identifier = cluster.get("ClusterIdentifier")
+        if cluster_identifier:
+            # Optionally, you can add logic to filter clusters if needed
+            # we are reading first clusters ID if not provided
+            return cluster_identifier  # Just return the string
+    return None
+def create_aws_session(credentials: Dict[str, Any]) -> boto3.Session:
+    """
+    Create a boto3 session with AWS credentials.
+    Args:
+        credentials: Dictionary containing AWS credentials
+    Returns:
+        boto3.Session: Configured boto3 session
+    """
+    aws_access_key_id = credentials.get("aws_access_key_id") or credentials.get(
+        "username"
+    )
+    aws_secret_access_key = credentials.get("aws_secret_access_key") or credentials.get(
+        "password"
+    )
+    return boto3.Session(
+        aws_access_key_id=aws_access_key_id,
+        aws_secret_access_key=aws_secret_access_key,
+    )
+def get_cluster_credentials(
+    aws_client, credentials: Dict[str, Any], extra: Dict[str, Any]
+) -> Dict[str, str]:
+    """
+    Retrieve cluster credentials using IAM authentication.
+    Args:
+        aws_client: Boto3 Redshift client instance
+        credentials: Dictionary containing connection credentials
+    Returns:
+        Dict[str, str]: Dictionary containing DbUser and DbPassword
+    """
+    database = extra["database"]
+    cluster_identifier = credentials.get("cluster_id") or get_cluster_identifier(
+        aws_client
+    )
+    return aws_client.get_cluster_credentials_with_iam(
+        DbName=database,
+        ClusterIdentifier=cluster_identifier,
+    )
+def create_aws_client(
+    service: str,
+    region: str,
+    session: Optional[boto3.Session] = None,
+    temp_credentials: Optional[Dict[str, str]] = None,
+    use_default_credentials: bool = False,
+) -> Any:
+    """
+    Create an AWS client with flexible credential options.
+    Args:
+        service: AWS service name (e.g., 'redshift', 'redshift-serverless', 'sts', 'rds')
+        region: AWS region name
+        session: Optional boto3 session instance. If provided, uses session credentials
+        temp_credentials: Optional dictionary containing temporary credentials from assume_role.
+                         Must contain 'AccessKeyId', 'SecretAccessKey', and 'SessionToken'
+        use_default_credentials: If True, uses default AWS credentials (environment, IAM role, etc.)
+                                This is the fallback if no other credentials are provided
+    Returns:
+        AWS client instance
+    Raises:
+        ValueError: If invalid credential combination is provided
+        Exception: If client creation fails
+    Examples:
+        Using temporary credentials::
+            client = create_aws_client(
+                service="redshift",
+                region="us-east-1",
+                temp_credentials={
+                    "AccessKeyId": "AKIA...",
+                    "SecretAccessKey": "...",
+                    "SessionToken": "..."
+                }
+            )
+        Using a session::
+            session = boto3.Session(profile_name="my-profile")
+            client = create_aws_client(
+                service="rds",
+                region="us-west-2",
+                session=session
+            )
+        Using default credentials::
+            client = create_aws_client(
+                service="sts",
+                region="us-east-1",
+                use_default_credentials=True
+            )
+    """
+    # Validate credential options
+    credential_sources = sum(
+        [session is not None, temp_credentials is not None, use_default_credentials]
+    )
+    if credential_sources == 0:
+        raise ValueError("At least one credential source must be provided")
+    if credential_sources > 1:
+        raise ValueError("Only one credential source should be provided at a time")
+    try:
+        # Priority 1: Use provided session
+        if session is not None:
+            logger.debug(
+                f"Creating {service} client using provided session in region {region}"
+            )
+            return session.client(service, region_name=region)  # type: ignore
+        # Priority 2: Use temporary credentials
+        if temp_credentials is not None:
+            logger.debug(
+                f"Creating {service} client using temporary credentials in region {region}"
+            )
+            return boto3.client(  # type: ignore
+                service,
+                aws_access_key_id=temp_credentials["AccessKeyId"],
+                aws_secret_access_key=temp_credentials["SecretAccessKey"],
+                aws_session_token=temp_credentials["SessionToken"],
+                region_name=region,
+            )
+        # Priority 3: Use default credentials
+        if use_default_credentials:
+            logger.debug(
+                f"Creating {service} client using default credentials in region {region}"
+            )
+            return boto3.client(service, region_name=region)  # type: ignore
+    except Exception as e:
+        logger.error(f"Failed to create {service} client in region {region}: {e}")
+        raise Exception(f"Failed to create {service} client: {str(e)}")
+def create_engine_url(
+    drivername: str,
+    credentials: Dict[str, Any],
+    cluster_credentials: Dict[str, str],
+    extra: Dict[str, Any],
+) -> URL:
+    """
+    Create SQLAlchemy engine URL for Redshift connection.
+    Args:
+        credentials: Dictionary containing connection credentials
+        cluster_credentials: Dictionary containing DbUser and DbPassword
+    Returns:
+        URL: SQLAlchemy engine URL
+    """
+    host = credentials["host"]
+    port = credentials.get("port")
+    database = extra["database"]
+    return URL.create(
+        drivername=drivername,
+        username=cluster_credentials["DbUser"],
+        password=cluster_credentials["DbPassword"],
+        host=host,
+        port=port,
+        database=database,
+    )
+def get_all_aws_regions() -> list[str]:
+    """
+    Get all available AWS regions dynamically using EC2 describe_regions API.
+    Returns:
+        list[str]: List of all AWS region names
+    Raises:
+        Exception: If unable to retrieve regions from AWS
+    """
+    try:
+        # Use us-east-1 as the default region for the EC2 client since it's always available
+        ec2_client = boto3.client("ec2", region_name="us-east-1")
+        response = ec2_client.describe_regions()
+        regions = [region["RegionName"] for region in response["Regions"]]
+        return sorted(regions)  # Sort for consistent ordering
+    except Exception as e:
+        # Fallback to a comprehensive hardcoded list if API call fails
+        logger.warning(
+            f"Failed to retrieve AWS regions dynamically: {e}. Using fallback list."
+        )
+        return [
+            "ap-northeast-1",
+            "ap-south-1",
+            "ap-southeast-1",
+            "ap-southeast-2",
+            "aws-global",
+            "ca-central-1",
+            "eu-central-1",
+            "eu-north-1",
+            "eu-west-1",
+            "eu-west-2",
+            "eu-west-3",
+            "sa-east-1",
+            "us-east-1",
+            "us-east-2",
+            "us-west-1",
+            "us-west-2",
+        ]

application_sdk/common/utils.py CHANGED Viewed

@@ -17,8 +17,12 @@ from typing import (
     Union,
 )
+from application_sdk.activities.common.utils import get_object_store_prefix
 from application_sdk.common.error_codes import CommonError
+from application_sdk.constants import TEMPORARY_PATH
+from application_sdk.inputs.sql_query import SQLQueryInput
 from application_sdk.observability.logger_adaptor import get_logger
+from application_sdk.services.objectstore import ObjectStore
 logger = get_logger(__name__)
@@ -106,10 +110,42 @@ def extract_database_names_from_regex_common(
         return empty_default
+def transform_posix_regex(regex_pattern: str) -> str:
+    r"""
+    Transform regex pattern for POSIX compatibility.
+    Rules:
+    1. Add ^ before each database name before \.
+    2. Add an additional . between \. and * if * follows \.
+    Example: 'dev\.public$|dev\.atlan_test_schema$|wide_world_importers\.*'
+    Becomes: '^dev\.public$|^dev\.atlan_test_schema$|^wide_world_importers\..*'
+    """
+    if not regex_pattern:
+        return regex_pattern
+    # Split by | to handle each pattern separately
+    patterns = regex_pattern.split("|")
+    transformed_patterns = []
+    for pattern in patterns:
+        # Add ^ at the beginning if it's not already there
+        if not pattern.startswith("^"):
+            pattern = "^" + pattern
+            # Add additional . between \. and * if * follows \.
+            pattern = re.sub(r"\\\.\*", r"\..*", pattern)
+        transformed_patterns.append(pattern)
+    return "|".join(transformed_patterns)
 def prepare_query(
     query: Optional[str],
     workflow_args: Dict[str, Any],
     temp_table_regex_sql: Optional[str] = "",
+    use_posix_regex: Optional[bool] = False,
 ) -> Optional[str]:
     """
     Prepares a SQL query by applying include and exclude filters, and optional
@@ -158,6 +194,14 @@ def prepare_query(
             include_filter, exclude_filter
         )
+        if use_posix_regex:
+            normalized_include_regex_posix = transform_posix_regex(
+                normalized_include_regex
+            )
+            normalized_exclude_regex_posix = transform_posix_regex(
+                normalized_exclude_regex
+            )
         # Extract database names from the normalized regex patterns
         include_databases = extract_database_names_from_regex_common(
             normalized_regex=normalized_include_regex,
@@ -176,15 +220,26 @@ def prepare_query(
         )
         exclude_views = workflow_args.get("metadata", {}).get("exclude_views", False)
-        return query.format(
-            include_databases=include_databases,
-            exclude_databases=exclude_databases,
-            normalized_include_regex=normalized_include_regex,
-            normalized_exclude_regex=normalized_exclude_regex,
-            temp_table_regex_sql=temp_table_regex_sql,
-            exclude_empty_tables=exclude_empty_tables,
-            exclude_views=exclude_views,
-        )
+        if use_posix_regex:
+            return query.format(
+                include_databases=include_databases,
+                exclude_databases=exclude_databases,
+                normalized_include_regex=normalized_include_regex_posix,
+                normalized_exclude_regex=normalized_exclude_regex_posix,
+                temp_table_regex_sql=temp_table_regex_sql,
+                exclude_empty_tables=exclude_empty_tables,
+                exclude_views=exclude_views,
+            )
+        else:
+            return query.format(
+                include_databases=include_databases,
+                exclude_databases=exclude_databases,
+                normalized_include_regex=normalized_include_regex,
+                normalized_exclude_regex=normalized_exclude_regex,
+                temp_table_regex_sql=temp_table_regex_sql,
+                exclude_empty_tables=exclude_empty_tables,
+                exclude_views=exclude_views,
+            )
     except CommonError as e:
         # Extract the original error message from the CommonError
         error_message = str(e).split(": ", 1)[-1] if ": " in str(e) else str(e)
@@ -195,6 +250,47 @@ def prepare_query(
         return None
+async def get_database_names(
+    sql_client, workflow_args, fetch_database_sql
+) -> Optional[List[str]]:
+    """
+    Get the database names from the workflow args if include-filter is present
+    Args:
+        workflow_args: The workflow args
+    Returns:
+        List[str]: The database names
+    """
+    database_names = parse_filter_input(
+        workflow_args.get("metadata", {}).get("include-filter", {})
+    )
+    database_names = [
+        re.sub(r"^[^\w]+|[^\w]+$", "", database_name)
+        for database_name in database_names
+    ]
+    if not database_names:
+        # if database_names are not provided in the include-filter, we'll run the query to get all the database names
+        # because by default for an empty include-filter, we fetch details corresponding to all the databases.
+        temp_table_regex_sql = workflow_args.get("metadata", {}).get(
+            "temp-table-regex", ""
+        )
+        prepared_query = prepare_query(
+            query=fetch_database_sql,
+            workflow_args=workflow_args,
+            temp_table_regex_sql=temp_table_regex_sql,
+            use_posix_regex=True,
+        )
+        # We'll run the query to get all the database names
+        database_sql_input = SQLQueryInput(
+            engine=sql_client.engine,
+            query=prepared_query,  # type: ignore
+            chunk_size=None,
+        )
+        database_dataframe = await database_sql_input.get_dataframe()
+        database_names = list(database_dataframe["database_name"])
+    return database_names
 def parse_filter_input(
     filter_input: Union[str, Dict[str, Any], None],
 ) -> Dict[str, Any]:
@@ -416,6 +512,46 @@ def parse_credentials_extra(credentials: Dict[str, Any]) -> Dict[str, Any]:
     return extra  # We know it's a Dict[str, Any] due to the Union type and str check
+def has_custom_control_config(workflow_args: Dict[str, Any]) -> bool:
+    """
+    Check if custom control configuration is present in workflow arguments.
+    Args:
+        workflow_args: The workflow arguments
+    Returns:
+        bool: True if custom control configuration is present, False otherwise
+    """
+    return (
+        workflow_args.get("control-config-strategy") == "custom"
+        and workflow_args.get("control-config") is not None
+    )
+async def get_file_names(output_path: str, typename: str) -> List[str]:
+    """
+    Get file names for a specific asset type from the transformed directory.
+    Args:
+        output_path (str): The base output path
+        typename (str): The asset type (e.g., 'table', 'schema', 'column')
+    Returns:
+        List[str]: List of relative file paths for the asset type
+    """
+    source = get_object_store_prefix(os.path.join(output_path, typename))
+    await ObjectStore.download_prefix(source, TEMPORARY_PATH)
+    file_pattern = os.path.join(output_path, typename, "*.json")
+    file_names = glob.glob(file_pattern)
+    file_name_list = [
+        "/".join(file_name.rsplit("/", 2)[-2:]) for file_name in file_names
+    ]
+    return file_name_list
 def run_sync(func):
     """Run a function in a thread pool executor.

application_sdk/handlers/__init__.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from abc import ABC, abstractmethod
-from typing import Any
+from typing import Any, Dict
 class HandlerInterface(ABC):
@@ -37,3 +37,10 @@ class HandlerInterface(ABC):
         To be implemented by the subclass
         """
         raise NotImplementedError("fetch_metadata method not implemented")
+    @staticmethod
+    async def get_configmap(config_map_id: str) -> Dict[str, Any]:
+        """
+        Static method to get the configmap
+        """
+        return {}

atlan-application-sdk 0.1.1rc40__py3-none-any.whl → 0.1.1rc42__py3-none-any.whl

atlan-application-sdk 0.1.1rc40py3-none-any.whl → 0.1.1rc42py3-none-any.whl