PyPI - atlan-application-sdk - Versions diffs - 0.1.1rc39__py3-none-any.whl → 0.1.1rc41__py3-none-any.whl - Mend

atlan-application-sdk 0.1.1rc39py3-none-any.whl → 0.1.1rc41py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

application_sdk/activities/.cursor/BUGBOT.md +424 -0
application_sdk/activities/metadata_extraction/sql.py +400 -25
application_sdk/application/__init__.py +2 -0
application_sdk/application/metadata_extraction/sql.py +3 -0
application_sdk/clients/.cursor/BUGBOT.md +280 -0
application_sdk/clients/models.py +42 -0
application_sdk/clients/sql.py +127 -87
application_sdk/clients/temporal.py +3 -1
application_sdk/common/.cursor/BUGBOT.md +316 -0
application_sdk/common/aws_utils.py +259 -11
application_sdk/common/utils.py +145 -9
application_sdk/constants.py +8 -0
application_sdk/decorators/.cursor/BUGBOT.md +279 -0
application_sdk/handlers/__init__.py +8 -1
application_sdk/handlers/sql.py +63 -22
application_sdk/inputs/.cursor/BUGBOT.md +250 -0
application_sdk/interceptors/.cursor/BUGBOT.md +320 -0
application_sdk/interceptors/cleanup.py +171 -0
application_sdk/interceptors/events.py +6 -6
application_sdk/observability/decorators/observability_decorator.py +36 -22
application_sdk/outputs/.cursor/BUGBOT.md +295 -0
application_sdk/outputs/iceberg.py +4 -0
application_sdk/outputs/json.py +6 -0
application_sdk/outputs/parquet.py +13 -3
application_sdk/server/.cursor/BUGBOT.md +442 -0
application_sdk/server/fastapi/__init__.py +59 -3
application_sdk/server/fastapi/models.py +27 -0
application_sdk/services/objectstore.py +16 -3
application_sdk/version.py +1 -1
application_sdk/workflows/.cursor/BUGBOT.md +218 -0
{atlan_application_sdk-0.1.1rc39.dist-info → atlan_application_sdk-0.1.1rc41.dist-info}/METADATA +1 -1
{atlan_application_sdk-0.1.1rc39.dist-info → atlan_application_sdk-0.1.1rc41.dist-info}/RECORD +35 -24
{atlan_application_sdk-0.1.1rc39.dist-info → atlan_application_sdk-0.1.1rc41.dist-info}/WHEEL +0 -0
{atlan_application_sdk-0.1.1rc39.dist-info → atlan_application_sdk-0.1.1rc41.dist-info}/licenses/LICENSE +0 -0
{atlan_application_sdk-0.1.1rc39.dist-info → atlan_application_sdk-0.1.1rc41.dist-info}/licenses/NOTICE +0 -0

application_sdk/common/.cursor/BUGBOT.md ADDED Viewed

@@ -0,0 +1,316 @@
+# Common Code Review Guidelines - Shared Utilities and Constants
+## Context-Specific Patterns
+This directory contains shared utilities, constants, error codes, and common functionality used across the SDK. Code here must be high-quality, well-tested, and designed for reuse.
+### Phase 1: Critical Common Code Safety Issues
+**Constants Management:**
+- **All magic strings and numbers must be moved to constants.py**: No hardcoded values scattered across the codebase
+- **Centralized configuration**: Related constants should be grouped together with clear naming
+- **Environment variable patterns**: Use consistent naming conventions for environment variables
+- **Shared constant keys**: Constants used by multiple modules (like configuration keys) must be defined here
+**Error Code Standardization:**
+- **Use internal SDK error codes**: All custom exceptions should be defined in `error_codes.py`
+- **Specific exception types**: No generic `Exception` or `ValueError` for SDK-specific errors
+- **Error hierarchies**: Related errors should inherit from common base exceptions
+- **Consistent error messages**: Similar errors should have consistent message formats
+```python
+# ✅ DO: Proper constants and error management
+# In constants.py
+DISTRIBUTED_LOCK_CONFIG_KEY = "distributed_lock_config"
+DEFAULT_LOCK_TTL_SECONDS = 300
+DEFAULT_MAX_LOCKS = 10
+REDIS_KEY_PREFIX = "application_sdk"
+# Environment variable naming conventions
+FAIL_WORKFLOW_ON_REDIS_UNAVAILABLE = os.getenv("FAIL_WORKFLOW_ON_REDIS_UNAVAILABLE", "false").lower() == "true"
+DATABASE_TIMEOUT_SECONDS = int(os.getenv("DATABASE_TIMEOUT_SECONDS", "30"))
+# In error_codes.py
+class SDKError(Exception):
+    """Base exception for all SDK errors."""
+class ClientError(SDKError):
+    """Errors related to client operations."""
+class LockAcquisitionError(SDKError):
+    """Errors related to distributed lock operations."""
+# ❌ REJECT: Scattered constants and generic errors
+# Found across multiple files:
+LOCK_TTL = 300  # In one file
+DEFAULT_TIMEOUT = 300  # In another file
+"distributed_lock"  # Hardcoded string in various places
+# Using generic exceptions:
+raise Exception("Lock failed")  # Should be LockAcquisitionError
+raise ValueError("Invalid config")  # Should be ConfigurationError
+```
+### Phase 2: Utility Architecture Patterns
+**Utility Function Design:**
+- **Single responsibility**: Each utility function should do exactly one thing
+- **Pure functions**: Utilities should avoid side effects where possible
+- **Type safety**: All utility functions must have comprehensive type hints
+- **Error handling**: Utilities must handle edge cases gracefully
+- **Documentation**: Complete docstrings with usage examples
+**Code Reuse and DRY Principles:**
+- **Extract repeated logic**: Common patterns across modules should become utility functions
+- **Consolidate similar utilities**: Functions with overlapping purposes should be unified
+- **Shared abstractions**: Common interface patterns should be abstracted into base classes
+- **Configuration utilities**: Common configuration patterns should be centralized
+```python
+# ✅ DO: Proper utility function design
+def validate_environment_variable(
+    var_name: str,
+    default_value: str,
+    valid_values: Optional[List[str]] = None,
+    value_type: type = str
+) -> Any:
+    """
+    Validate and convert environment variable with comprehensive error handling.
+    Args:
+        var_name: Name of environment variable
+        default_value: Fallback value if not set
+        valid_values: List of allowed values (optional)
+        value_type: Expected type for conversion
+    Returns:
+        Validated and converted value
+    Raises:
+        ConfigurationError: If value is invalid or conversion fails
+    Example:
+        >>> timeout = validate_environment_variable(
+        ...     "DB_TIMEOUT", "30", value_type=int
+        ... )
+        >>> mode = validate_environment_variable(
+        ...     "LOG_LEVEL", "INFO", valid_values=["DEBUG", "INFO", "WARNING", "ERROR"]
+        ... )
+    """
+    raw_value = os.getenv(var_name, default_value)
+    try:
+        # Type conversion
+        if value_type == bool:
+            converted_value = raw_value.lower() in ('true', '1', 'yes', 'on')
+        elif value_type == int:
+            converted_value = int(raw_value)
+        elif value_type == float:
+            converted_value = float(raw_value)
+        else:
+            converted_value = raw_value
+        # Validation
+        if valid_values and converted_value not in valid_values:
+            raise ConfigurationError(
+                f"Invalid value for {var_name}: {raw_value}. "
+                f"Valid values: {valid_values}"
+            )
+        return converted_value
+    except (ValueError, TypeError) as e:
+        raise ConfigurationError(
+            f"Failed to convert {var_name}={raw_value} to {value_type.__name__}: {e}"
+        )
+# ❌ REJECT: Poor utility design
+def bad_get_config(name):  # No type hints, no validation, no documentation
+    return os.getenv(name, "")  # No defaults, no error handling
+```
+### Phase 3: Common Code Testing Requirements
+**Utility Testing Standards:**
+- **Comprehensive edge case testing**: Test all possible input combinations
+- **Error condition testing**: Verify proper error handling for invalid inputs
+- **Type safety testing**: Test with various input types to verify type hints
+- **Integration testing**: Test utilities in context of actual usage
+- **Performance testing**: Ensure utilities don't create performance bottlenecks
+**Shared Code Quality:**
+- All utility functions must have corresponding unit tests
+- Test coverage must be >90% for common utilities
+- Include property-based testing with hypothesis for complex utilities
+- Mock external dependencies in utility tests
+- Test thread safety for utilities used in concurrent contexts
+### Phase 4: Performance and Reusability
+**Utility Performance:**
+- **Caching for expensive operations**: Cache results of expensive utility calculations
+- **Async where appropriate**: Use async for I/O utilities, sync for CPU-bound utilities
+- **Memory efficiency**: Avoid creating unnecessary object copies in utilities
+- **Algorithm efficiency**: Use appropriate data structures and algorithms
+**Reusability Patterns:**
+- **Generic implementations**: Write utilities that work for multiple use cases
+- **Parameterizable behavior**: Allow customization through parameters, not hardcoded behavior
+- **Composable utilities**: Design utilities that can be easily combined
+- **Backwards compatibility**: Maintain API stability for widely-used utilities
+### Phase 5: Common Code Maintainability
+**Documentation and Examples:**
+- **Complete documentation**: All public utilities must have comprehensive docstrings
+- **Usage examples**: Include realistic examples showing typical usage patterns
+- **Performance characteristics**: Document time/space complexity for non-trivial utilities
+- **Thread safety**: Document whether utilities are thread-safe
+- **Version compatibility**: Document any version-specific behaviors
+**Code Organization:**
+- **Logical grouping**: Group related utilities in appropriately named modules
+- **Consistent interfaces**: Similar utilities should have consistent parameter patterns
+- **Clear abstractions**: Separate interface definitions from implementations
+- **Dependency management**: Minimize dependencies in common utilities
+---
+## Common Code Anti-Patterns
+**Always Reject:**
+- **Scattered constants**: Magic numbers or strings not centralized in constants.py
+- **Generic exceptions**: Using `Exception`, `ValueError`, or `RuntimeError` instead of SDK-specific errors
+- **Duplicate utilities**: Multiple functions doing essentially the same thing
+- **Poor error handling**: Utilities without proper exception handling
+- **Missing validation**: Utilities that don't validate their inputs
+- **Undocumented utilities**: Shared code without proper documentation
+**Constants Management Anti-Patterns:**
+```python
+# ❌ REJECT: Scattered constants across files
+# In multiple different files:
+LOCK_TTL = 300  # locks.py
+DEFAULT_TIMEOUT = 300  # client.py
+MAX_RETRIES = 3  # activities.py
+"distributed_lock_config"  # Hardcoded string in 5 different places
+# ✅ REQUIRE: Centralized constants
+# In constants.py only:
+DEFAULT_LOCK_TTL_SECONDS = 300
+DEFAULT_DATABASE_TIMEOUT_SECONDS = 300
+DEFAULT_MAX_RETRY_ATTEMPTS = 3
+DISTRIBUTED_LOCK_CONFIG_KEY = "distributed_lock_config"
+# Other files import from constants:
+from application_sdk.constants import DISTRIBUTED_LOCK_CONFIG_KEY, DEFAULT_LOCK_TTL_SECONDS
+```
+**Error Handling Anti-Patterns:**
+```python
+# ❌ REJECT: Generic error handling
+def bad_utility_function(value: str) -> dict:
+    if not value:
+        raise ValueError("Invalid value")  # Generic error
+    try:
+        result = process_value(value)
+        return result
+    except Exception as e:
+        raise Exception(f"Processing failed: {e}")  # Generic error
+# ✅ REQUIRE: SDK-specific error handling
+from application_sdk.common.error_codes import ValidationError, ProcessingError
+def good_utility_function(value: str) -> dict:
+    """Utility function with proper error handling."""
+    if not value or not value.strip():
+        raise ValidationError(f"Value cannot be empty or whitespace: '{value}'")
+    try:
+        result = process_value(value)
+        if not result:
+            raise ProcessingError(f"Processing returned empty result for value: '{value}'")
+        return result
+    except ProcessingError:
+        raise  # Re-raise SDK errors
+    except Exception as e:
+        raise ProcessingError(f"Unexpected error processing '{value}': {e}")
+```
+**Code Duplication Anti-Patterns:**
+```python
+# ❌ REJECT: Repeated logic in multiple files
+# Found in client.py:
+def setup_database_connection(host, port, user, password):
+    connection_string = f"postgresql://{user}:{password}@{host}:{port}"
+    return create_connection(connection_string)
+# Found in activities.py:
+def create_db_connection(host, port, user, password):
+    conn_str = f"postgresql://{user}:{password}@{host}:{port}"
+    return establish_connection(conn_str)
+# ✅ REQUIRE: Extracted shared utility
+# In common/utils.py:
+def build_database_connection_string(
+    host: str,
+    port: int,
+    username: str,
+    password: str,
+    database: Optional[str] = None,
+    ssl_mode: str = "require"
+) -> str:
+    """
+    Build a standardized database connection string.
+    Used consistently across all database clients and activities.
+    """
+    base_url = f"postgresql://{username}:{password}@{host}:{port}"
+    if database:
+        base_url += f"/{database}"
+    params = []
+    if ssl_mode:
+        params.append(f"sslmode={ssl_mode}")
+    if params:
+        base_url += "?" + "&".join(params)
+    return base_url
+# Other modules import and use the shared utility:
+from application_sdk.common.utils import build_database_connection_string
+```
+## Educational Context for Common Code Reviews
+When reviewing common code, emphasize:
+1. **Consistency Impact**: "Centralized constants and utilities ensure consistency across the entire SDK. Scattered constants lead to inconsistencies and make global changes nearly impossible."
+2. **Maintainability Impact**: "Well-designed utilities reduce code duplication and make the codebase easier to maintain. Changes to common functionality only need to be made in one place."
+3. **Error Handling Impact**: "SDK-specific exceptions provide clearer error messages and enable better error handling throughout the application. Generic exceptions hide the root cause and make debugging difficult."
+4. **Reusability Impact**: "Properly designed common utilities can be reused across multiple contexts, reducing development time and ensuring consistent behavior."
+5. **Performance Impact**: "Shared utilities are called frequently throughout the application. Performance issues in common code have amplified impact across the entire system."
+6. **Testing Impact**: "Common utilities require especially thorough testing because they're used in many contexts. Bugs in utilities affect multiple parts of the system simultaneously."

application_sdk/common/aws_utils.py CHANGED Viewed

@@ -1,4 +1,13 @@
+import re
+from typing import Any, Dict, Optional
+import boto3
+from sqlalchemy.engine.url import URL
 from application_sdk.constants import AWS_SESSION_NAME
+from application_sdk.observability.logger_adaptor import get_logger
+logger = get_logger(__name__)
 def get_region_name_from_hostname(hostname: str) -> str:
@@ -12,11 +21,14 @@ def get_region_name_from_hostname(hostname: str) -> str:
     Returns:
         str: AWS region name
     """
-    parts = hostname.split(".")
-    for part in parts:
-        if part.startswith(("us-", "eu-", "ap-", "ca-", "me-", "sa-", "af-")):
-            return part
-    raise ValueError(f"Could not find valid AWS region in hostname: {hostname}")
+    match = re.search(r"\.([a-z]{2}-[a-z]+-\d)\.", hostname)
+    if match:
+        return match.group(1)
+    # Some services may use - instead of . (rare)
+    match = re.search(r"-([a-z]{2}-[a-z]+-\d)\.", hostname)
+    if match:
+        return match.group(1)
+    raise ValueError("Could not find valid AWS region from hostname")
 def generate_aws_rds_token_with_iam_role(
@@ -55,12 +67,10 @@ def generate_aws_rds_token_with_iam_role(
         )
         credentials = assumed_role["Credentials"]
-        aws_client = client(
-            "rds",
-            aws_access_key_id=credentials["AccessKeyId"],
-            aws_secret_access_key=credentials["SecretAccessKey"],
-            aws_session_token=credentials["SessionToken"],
-            region_name=region or get_region_name_from_hostname(host),
+        aws_client = create_aws_client(
+            service="rds",
+            region=region or get_region_name_from_hostname(host),
+            temp_credentials=credentials,
         )
         token: str = aws_client.generate_db_auth_token(
             DBHostname=host, Port=port, DBUsername=user
@@ -107,3 +117,241 @@ def generate_aws_rds_token_with_iam_user(
         return token
     except Exception as e:
         raise Exception(f"Failed to get user credentials: {str(e)}")
+def get_cluster_identifier(aws_client) -> Optional[str]:
+    """
+    Retrieve the cluster identifier from AWS Redshift clusters.
+    Args:
+        aws_client: Boto3 Redshift client instance
+    Returns:
+        str: The cluster identifier
+    Raises:
+        RuntimeError: If no clusters are found
+    """
+    clusters = aws_client.describe_clusters()
+    for cluster in clusters["Clusters"]:
+        cluster_identifier = cluster.get("ClusterIdentifier")
+        if cluster_identifier:
+            # Optionally, you can add logic to filter clusters if needed
+            # we are reading first clusters ID if not provided
+            return cluster_identifier  # Just return the string
+    return None
+def create_aws_session(credentials: Dict[str, Any]) -> boto3.Session:
+    """
+    Create a boto3 session with AWS credentials.
+    Args:
+        credentials: Dictionary containing AWS credentials
+    Returns:
+        boto3.Session: Configured boto3 session
+    """
+    aws_access_key_id = credentials.get("aws_access_key_id") or credentials.get(
+        "username"
+    )
+    aws_secret_access_key = credentials.get("aws_secret_access_key") or credentials.get(
+        "password"
+    )
+    return boto3.Session(
+        aws_access_key_id=aws_access_key_id,
+        aws_secret_access_key=aws_secret_access_key,
+    )
+def get_cluster_credentials(
+    aws_client, credentials: Dict[str, Any], extra: Dict[str, Any]
+) -> Dict[str, str]:
+    """
+    Retrieve cluster credentials using IAM authentication.
+    Args:
+        aws_client: Boto3 Redshift client instance
+        credentials: Dictionary containing connection credentials
+    Returns:
+        Dict[str, str]: Dictionary containing DbUser and DbPassword
+    """
+    database = extra["database"]
+    cluster_identifier = credentials.get("cluster_id") or get_cluster_identifier(
+        aws_client
+    )
+    return aws_client.get_cluster_credentials_with_iam(
+        DbName=database,
+        ClusterIdentifier=cluster_identifier,
+    )
+def create_aws_client(
+    service: str,
+    region: str,
+    session: Optional[boto3.Session] = None,
+    temp_credentials: Optional[Dict[str, str]] = None,
+    use_default_credentials: bool = False,
+) -> Any:
+    """
+    Create an AWS client with flexible credential options.
+    Args:
+        service: AWS service name (e.g., 'redshift', 'redshift-serverless', 'sts', 'rds')
+        region: AWS region name
+        session: Optional boto3 session instance. If provided, uses session credentials
+        temp_credentials: Optional dictionary containing temporary credentials from assume_role.
+                         Must contain 'AccessKeyId', 'SecretAccessKey', and 'SessionToken'
+        use_default_credentials: If True, uses default AWS credentials (environment, IAM role, etc.)
+                                This is the fallback if no other credentials are provided
+    Returns:
+        AWS client instance
+    Raises:
+        ValueError: If invalid credential combination is provided
+        Exception: If client creation fails
+    Examples:
+        Using temporary credentials::
+            client = create_aws_client(
+                service="redshift",
+                region="us-east-1",
+                temp_credentials={
+                    "AccessKeyId": "AKIA...",
+                    "SecretAccessKey": "...",
+                    "SessionToken": "..."
+                }
+            )
+        Using a session::
+            session = boto3.Session(profile_name="my-profile")
+            client = create_aws_client(
+                service="rds",
+                region="us-west-2",
+                session=session
+            )
+        Using default credentials::
+            client = create_aws_client(
+                service="sts",
+                region="us-east-1",
+                use_default_credentials=True
+            )
+    """
+    # Validate credential options
+    credential_sources = sum(
+        [session is not None, temp_credentials is not None, use_default_credentials]
+    )
+    if credential_sources == 0:
+        raise ValueError("At least one credential source must be provided")
+    if credential_sources > 1:
+        raise ValueError("Only one credential source should be provided at a time")
+    try:
+        # Priority 1: Use provided session
+        if session is not None:
+            logger.debug(
+                f"Creating {service} client using provided session in region {region}"
+            )
+            return session.client(service, region_name=region)  # type: ignore
+        # Priority 2: Use temporary credentials
+        if temp_credentials is not None:
+            logger.debug(
+                f"Creating {service} client using temporary credentials in region {region}"
+            )
+            return boto3.client(  # type: ignore
+                service,
+                aws_access_key_id=temp_credentials["AccessKeyId"],
+                aws_secret_access_key=temp_credentials["SecretAccessKey"],
+                aws_session_token=temp_credentials["SessionToken"],
+                region_name=region,
+            )
+        # Priority 3: Use default credentials
+        if use_default_credentials:
+            logger.debug(
+                f"Creating {service} client using default credentials in region {region}"
+            )
+            return boto3.client(service, region_name=region)  # type: ignore
+    except Exception as e:
+        logger.error(f"Failed to create {service} client in region {region}: {e}")
+        raise Exception(f"Failed to create {service} client: {str(e)}")
+def create_engine_url(
+    drivername: str,
+    credentials: Dict[str, Any],
+    cluster_credentials: Dict[str, str],
+    extra: Dict[str, Any],
+) -> URL:
+    """
+    Create SQLAlchemy engine URL for Redshift connection.
+    Args:
+        credentials: Dictionary containing connection credentials
+        cluster_credentials: Dictionary containing DbUser and DbPassword
+    Returns:
+        URL: SQLAlchemy engine URL
+    """
+    host = credentials["host"]
+    port = credentials.get("port")
+    database = extra["database"]
+    return URL.create(
+        drivername=drivername,
+        username=cluster_credentials["DbUser"],
+        password=cluster_credentials["DbPassword"],
+        host=host,
+        port=port,
+        database=database,
+    )
+def get_all_aws_regions() -> list[str]:
+    """
+    Get all available AWS regions dynamically using EC2 describe_regions API.
+    Returns:
+        list[str]: List of all AWS region names
+    Raises:
+        Exception: If unable to retrieve regions from AWS
+    """
+    try:
+        # Use us-east-1 as the default region for the EC2 client since it's always available
+        ec2_client = boto3.client("ec2", region_name="us-east-1")
+        response = ec2_client.describe_regions()
+        regions = [region["RegionName"] for region in response["Regions"]]
+        return sorted(regions)  # Sort for consistent ordering
+    except Exception as e:
+        # Fallback to a comprehensive hardcoded list if API call fails
+        logger.warning(
+            f"Failed to retrieve AWS regions dynamically: {e}. Using fallback list."
+        )
+        return [
+            "ap-northeast-1",
+            "ap-south-1",
+            "ap-southeast-1",
+            "ap-southeast-2",
+            "aws-global",
+            "ca-central-1",
+            "eu-central-1",
+            "eu-north-1",
+            "eu-west-1",
+            "eu-west-2",
+            "eu-west-3",
+            "sa-east-1",
+            "us-east-1",
+            "us-east-2",
+            "us-west-1",
+            "us-west-2",
+        ]

atlan-application-sdk 0.1.1rc39__py3-none-any.whl → 0.1.1rc41__py3-none-any.whl

atlan-application-sdk 0.1.1rc39py3-none-any.whl → 0.1.1rc41py3-none-any.whl