PyPI - atlan-application-sdk - Versions diffs - 0.1.1rc40__py3-none-any.whl → 0.1.1rc42__py3-none-any.whl - Mend

atlan-application-sdk 0.1.1rc40py3-none-any.whl → 0.1.1rc42py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

application_sdk/inputs/parquet.py CHANGED Viewed

@@ -1,11 +1,7 @@
-import glob
-import os
 from typing import TYPE_CHECKING, AsyncIterator, Iterator, List, Optional, Union
-from application_sdk.activities.common.utils import get_object_store_prefix
 from application_sdk.inputs import Input
 from application_sdk.observability.logger_adaptor import get_logger
-from application_sdk.services.objectstore import ObjectStore
 logger = get_logger(__name__)
@@ -20,107 +16,139 @@ class ParquetInput(Input):
     Supports reading both single files and directories containing multiple parquet files.
     """
+    _EXTENSION = ".parquet"
     def __init__(
         self,
-        path: Optional[str] = None,
-        chunk_size: Optional[int] = 100000,
-        input_prefix: Optional[str] = None,
+        path: str,
+        chunk_size: int = 100000,
         file_names: Optional[List[str]] = None,
     ):
         """Initialize the Parquet input class.
         Args:
             path (str): Path to parquet file or directory containing parquet files.
-            chunk_size (Optional[int], optional): Number of rows per batch.
-                Defaults to 100000.
-            input_prefix (Optional[str], optional): Prefix for files when reading from object store.
-                If provided, files will be read from object store. Defaults to None.
-            file_names (Optional[List[str]], optional): List of file names to read.
-                Defaults to None.
+                It accepts both types of paths:
+                local path or object store path
+                Wildcards are not supported.
+            chunk_size (int): Number of rows per batch. Defaults to 100000.
+            file_names (Optional[List[str]]): List of file names to read. Defaults to None.
+        Raises:
+            ValueError: When path is not provided or when single file path is combined with file_names
         """
+        # Validate that single file path and file_names are not both specified
+        if path.endswith(self._EXTENSION) and file_names:
+            raise ValueError(
+                f"Cannot specify both a single file path ('{path}') and file_names filter. "
+                f"Either provide a directory path with file_names, or specify the exact file path without file_names."
+            )
         self.path = path
         self.chunk_size = chunk_size
-        self.input_prefix = input_prefix
         self.file_names = file_names
-    async def download_files(self, local_path: str) -> Optional[str]:
-        """Read a file from the object store.
-        Args:
-            local_path (str): Path to the local data in the temp directory.
+    async def get_dataframe(self) -> "pd.DataFrame":
+        """Read data from parquet file(s) and return as pandas DataFrame.
         Returns:
-            Optional[str]: Path to the downloaded local file.
-        """
-        # if the path is a directory, then check if the directory has any parquet files
-        parquet_files = []
-        if os.path.isdir(local_path):
-            parquet_files = glob.glob(os.path.join(local_path, "*.parquet"))
-        else:
-            parquet_files = glob.glob(local_path)
-        if not parquet_files:
-            if self.input_prefix:
-                logger.info(
-                    f"Reading file from object store: {local_path} from {self.input_prefix}"
-                )
-                if os.path.isdir(local_path):
-                    await ObjectStore.download_prefix(
-                        source=get_object_store_prefix(local_path),
-                        destination=local_path,
-                    )
-                else:
-                    await ObjectStore.download_file(
-                        source=get_object_store_prefix(local_path),
-                        destination=local_path,
-                    )
-            else:
-                raise ValueError(
-                    f"No parquet files found in {local_path} and no input prefix provided"
-                )
+            pd.DataFrame: Combined dataframe from specified parquet files
-    async def get_dataframe(self) -> "pd.DataFrame":
-        """
-        Method to read the data from the parquet file(s)
-        and return as a single combined pandas dataframe.
+        Raises:
+            ValueError: When no valid path can be determined or no matching files found
+            Exception: When reading parquet files fails
-        Returns:
-            "pd.DataFrame": Combined dataframe from all parquet files.
+        Example transformation:
+        Input files:
+        +------------------+
+        | file1.parquet    |
+        | file2.parquet    |
+        | file3.parquet    |
+        +------------------+
+        With file_names=["file1.parquet", "file3.parquet"]:
+        +-------+-------+-------+
+        | col1  | col2  | col3  |
+        +-------+-------+-------+
+        | val1  | val2  | val3  |  # from file1.parquet
+        | val7  | val8  | val9  |  # from file3.parquet
+        +-------+-------+-------+
+        Transformations:
+        - Only specified files are read and combined
+        - Column schemas must be compatible across files
+        - Only reads files in the specified directory
         """
         try:
             import pandas as pd
-            path = self.path
-            if self.input_prefix and self.path:
-                path = await self.download_files(self.path)
-            # Use pandas native read_parquet which can handle both single files and directories
-            return pd.read_parquet(path)
+            # Ensure files are available (local or downloaded)
+            parquet_files = await self.download_files()
+            logger.info(f"Reading {len(parquet_files)} parquet files")
+            return pd.concat(
+                (pd.read_parquet(parquet_file) for parquet_file in parquet_files),
+                ignore_index=True,
+            )
         except Exception as e:
             logger.error(f"Error reading data from parquet file(s): {str(e)}")
-            # Re-raise to match IcebergInput behavior
             raise
     async def get_batched_dataframe(
         self,
     ) -> Union[AsyncIterator["pd.DataFrame"], Iterator["pd.DataFrame"]]:
-        """
-        Method to read the data from the parquet file(s) in batches
-        and return as an async iterator of pandas dataframes.
+        """Read data from parquet file(s) in batches as pandas DataFrames.
         Returns:
-            AsyncIterator["pd.DataFrame"]: Async iterator of pandas dataframes.
+            AsyncIterator[pd.DataFrame]: Async iterator of pandas dataframes
+        Raises:
+            ValueError: When no parquet files found locally or in object store
+            Exception: When reading parquet files fails
+        Example transformation:
+        Input files:
+        +------------------+
+        | file1.parquet    |
+        | file2.parquet    |
+        | file3.parquet    |
+        +------------------+
+        With file_names=["file1.parquet", "file2.parquet"] and chunk_size=2:
+        Batch 1:
+        +-------+-------+
+        | col1  | col2  |
+        +-------+-------+
+        | val1  | val2  |  # from file1.parquet
+        | val3  | val4  |  # from file1.parquet
+        +-------+-------+
+        Batch 2:
+        +-------+-------+
+        | col1  | col2  |
+        +-------+-------+
+        | val5  | val6  |  # from file2.parquet
+        | val7  | val8  |  # from file2.parquet
+        +-------+-------+
+        Transformations:
+        - Only specified files are combined then split into chunks
+        - Each batch is a separate DataFrame
+        - Only reads files in the specified directory
         """
         try:
             import pandas as pd
-            path = self.path
-            if self.input_prefix and self.path:
-                path = await self.download_files(self.path)
-            df = pd.read_parquet(path)
-            if self.chunk_size:
+            # Ensure files are available (local or downloaded)
+            parquet_files = await self.download_files()
+            logger.info(f"Reading {len(parquet_files)} parquet files in batches")
+            # Process each file individually to maintain memory efficiency
+            for parquet_file in parquet_files:
+                df = pd.read_parquet(parquet_file)
                 for i in range(0, len(df), self.chunk_size):
                     yield df.iloc[i : i + self.chunk_size]
-            else:
-                yield df
         except Exception as e:
             logger.error(
                 f"Error reading data from parquet file(s) in batches: {str(e)}"
@@ -128,51 +156,102 @@ class ParquetInput(Input):
             raise
     async def get_daft_dataframe(self) -> "daft.DataFrame":  # noqa: F821
-        """
-        Method to read the data from the parquet file(s)
-        and return as a single combined daft dataframe.
+        """Read data from parquet file(s) and return as daft DataFrame.
         Returns:
-            daft.DataFrame: Combined daft dataframe from all parquet files.
+            daft.DataFrame: Combined daft dataframe from specified parquet files
+        Raises:
+            ValueError: When no parquet files found locally or in object store
+            Exception: When reading parquet files fails
+        Example transformation:
+        Input files:
+        +------------------+
+        | file1.parquet    |
+        | file2.parquet    |
+        | file3.parquet    |
+        +------------------+
+        With file_names=["file1.parquet", "file3.parquet"]:
+        +-------+-------+-------+
+        | col1  | col2  | col3  |
+        +-------+-------+-------+
+        | val1  | val2  | val3  |  # from file1.parquet
+        | val7  | val8  | val9  |  # from file3.parquet
+        +-------+-------+-------+
+        Transformations:
+        - Only specified parquet files combined into single daft DataFrame
+        - Lazy evaluation for better performance
+        - Column schemas must be compatible across files
         """
         try:
             import daft  # type: ignore
-            if self.file_names:
-                path = f"{self.path}/{self.file_names[0].split('/')[0]}"
-            else:
-                path = self.path
-            if self.input_prefix and path:
-                await self.download_files(path)
-            return daft.read_parquet(f"{path}/*.parquet")
+            # Ensure files are available (local or downloaded)
+            parquet_files = await self.download_files()
+            logger.info(f"Reading {len(parquet_files)} parquet files with daft")
+            # Use the discovered/downloaded files directly
+            return daft.read_parquet(parquet_files)
         except Exception as e:
             logger.error(
                 f"Error reading data from parquet file(s) using daft: {str(e)}"
             )
-            # Re-raise to match IcebergInput behavior
             raise
     async def get_batched_daft_dataframe(self) -> AsyncIterator["daft.DataFrame"]:  # type: ignore
-        """
-        Get batched daft dataframe from parquet file(s)
+        """Get batched daft dataframe from parquet file(s).
         Returns:
             AsyncIterator[daft.DataFrame]: An async iterator of daft DataFrames, each containing
-            a batch of data from the parquet file(s).
+            a batch of data from individual parquet files
+        Raises:
+            ValueError: When no parquet files found locally or in object store
+            Exception: When reading parquet files fails
+        Example transformation:
+        Input files:
+        +------------------+
+        | file1.parquet    |
+        | file2.parquet    |
+        | file3.parquet    |
+        +------------------+
+        With file_names=["file1.parquet", "file3.parquet"]:
+        Batch 1 (file1.parquet):
+        +-------+-------+
+        | col1  | col2  |
+        +-------+-------+
+        | val1  | val2  |
+        | val3  | val4  |
+        +-------+-------+
+        Batch 2 (file3.parquet):
+        +-------+-------+
+        | col1  | col2  |
+        +-------+-------+
+        | val7  | val8  |
+        | val9  | val10 |
+        +-------+-------+
+        Transformations:
+        - Each specified file becomes a separate daft DataFrame batch
+        - Lazy evaluation for better performance
+        - Files processed individually for memory efficiency
         """
         try:
             import daft  # type: ignore
-            if self.file_names:
-                for file_name in self.file_names:
-                    path = f"{self.path}/{file_name}"
-                    if self.input_prefix and path:
-                        await self.download_files(path)
-                        yield daft.read_parquet(path)
-            else:
-                if self.path and self.input_prefix:
-                    await self.download_files(self.path)
-                    yield daft.read_parquet(f"{self.path}/*.parquet")
+            # Ensure files are available (local or downloaded)
+            parquet_files = await self.download_files()
+            logger.info(f"Reading {len(parquet_files)} parquet files as daft batches")
+            # Yield each discovered file as separate batch
+            for parquet_file in parquet_files:
+                yield daft.read_parquet(parquet_file)
         except Exception as error:
             logger.error(

application_sdk/observability/decorators/observability_decorator.py CHANGED Viewed

@@ -4,7 +4,9 @@ import time
 import uuid
 from typing import Any, Callable, TypeVar, cast
-from application_sdk.observability.metrics_adaptor import MetricType
+from application_sdk.observability.logger_adaptor import get_logger
+from application_sdk.observability.metrics_adaptor import MetricType, get_metrics
+from application_sdk.observability.traces_adaptor import get_traces
 T = TypeVar("T")
@@ -136,9 +138,9 @@ def _record_error_observability(
 def observability(
-    logger: Any,
-    metrics: Any,
-    traces: Any,
+    logger: Any = None,
+    metrics: Any = None,
+    traces: Any = None,
 ) -> Callable[[Callable[..., T]], Callable[..., T]]:
     """Decorator for adding observability to functions.
@@ -146,16 +148,23 @@ def observability(
     It handles both synchronous and asynchronous functions.
     Args:
-        logger: Logger instance for operation logging
-        metrics: Metrics adapter for recording operation metrics
-        traces: Traces adapter for recording operation traces
+        logger: Logger instance for operation logging. If None, auto-initializes using get_logger()
+        metrics: Metrics adapter for recording operation metrics. If None, auto-initializes using get_metrics()
+        traces: Traces adapter for recording operation traces. If None, auto-initializes using get_traces()
     Returns:
         Callable: Decorated function with observability
     Example:
         ```python
+        # With explicit observability components
         @observability(logger, metrics, traces)
+        async def my_function():
+            # Function implementation
+            pass
+        # With auto-initialization (recommended)
+        @observability()
         async def my_function():
             # Function implementation
             pass
@@ -163,6 +172,11 @@ def observability(
     """
     def decorator(func: Callable[..., T]) -> Callable[..., T]:
+        # Auto-initialize observability components if not provided
+        actual_logger = logger or get_logger(func.__module__)
+        actual_metrics = metrics or get_metrics()
+        actual_traces = traces or get_traces()
         # Get function metadata
         func_name = func.__name__
         func_doc = func.__doc__ or f"Executing {func_name}"
@@ -170,7 +184,7 @@ def observability(
         is_async = inspect.iscoroutinefunction(func)
         # Debug logging for function decoration
-        logger.debug(f"Decorating function {func_name} (async={is_async})")
+        actual_logger.debug(f"Decorating function {func_name} (async={is_async})")
         @functools.wraps(func)
         async def async_wrapper(*args: Any, **kwargs: Any) -> T:
@@ -181,16 +195,16 @@ def observability(
             try:
                 # Log start of operation
-                logger.debug(f"Starting async function {func_name}")
+                actual_logger.debug(f"Starting async function {func_name}")
                 # Execute the function
                 result = await func(*args, **kwargs)
                 # Record success observability
                 _record_success_observability(
-                    logger,
-                    metrics,
-                    traces,
+                    actual_logger,
+                    actual_metrics,
+                    actual_traces,
                     func_name,
                     func_doc,
                     func_module,
@@ -204,9 +218,9 @@ def observability(
             except Exception as e:
                 # Record error observability
                 _record_error_observability(
-                    logger,
-                    metrics,
-                    traces,
+                    actual_logger,
+                    actual_metrics,
+                    actual_traces,
                     func_name,
                     func_doc,
                     func_module,
@@ -226,16 +240,16 @@ def observability(
             try:
                 # Log start of operation
-                logger.debug(f"Starting sync function {func_name}")
+                actual_logger.debug(f"Starting sync function {func_name}")
                 # Execute the function
                 result = func(*args, **kwargs)
                 # Record success observability
                 _record_success_observability(
-                    logger,
-                    metrics,
-                    traces,
+                    actual_logger,
+                    actual_metrics,
+                    actual_traces,
                     func_name,
                     func_doc,
                     func_module,
@@ -249,9 +263,9 @@ def observability(
             except Exception as e:
                 # Record error observability
                 _record_error_observability(
-                    logger,
-                    metrics,
-                    traces,
+                    actual_logger,
+                    actual_metrics,
+                    actual_traces,
                     func_name,
                     func_doc,
                     func_module,

application_sdk/server/fastapi/__init__.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import os
 import time
 from typing import Any, Callable, List, Optional, Type
@@ -32,6 +33,7 @@ from application_sdk.server import ServerInterface
 from application_sdk.server.fastapi.middleware.logmiddleware import LogMiddleware
 from application_sdk.server.fastapi.middleware.metrics import MetricsMiddleware
 from application_sdk.server.fastapi.models import (
+    ConfigMapResponse,
     EventWorkflowRequest,
     EventWorkflowResponse,
     EventWorkflowTrigger,
@@ -95,6 +97,8 @@ class APIServer(ServerInterface):
     docs_directory_path: str = "docs"
     docs_export_path: str = "dist"
+    frontend_assets_path: str = "frontend/static"
     workflows: List[WorkflowInterface] = []
     event_triggers: List[EventWorkflowTrigger] = []
@@ -107,6 +111,7 @@ class APIServer(ServerInterface):
         workflow_client: Optional[WorkflowClient] = None,
         frontend_templates_path: str = "frontend/templates",
         ui_enabled: bool = True,
+        has_configmap: bool = False,
     ):
         """Initialize the FastAPI application.
@@ -121,6 +126,7 @@ class APIServer(ServerInterface):
         self.templates = Jinja2Templates(directory=frontend_templates_path)
         self.duckdb_ui = DuckDBUI()
         self.ui_enabled = ui_enabled
+        self.has_configmap = has_configmap
         # Create the FastAPI app using the renamed import
         if isinstance(lifespan, Callable):
@@ -177,6 +183,20 @@ class APIServer(ServerInterface):
         except Exception as e:
             logger.warning(str(e))
+    def frontend_home(self, request: Request) -> HTMLResponse:
+        frontend_html_path = os.path.join(
+            self.frontend_assets_path,
+            "index.html",
+        )
+        if not os.path.exists(frontend_html_path) or not self.has_configmap:
+            return self.fallback_home(request)
+        with open(frontend_html_path, "r", encoding="utf-8") as file:
+            contents = file.read()
+        return HTMLResponse(content=contents)
     def register_routers(self):
         """Register all routers with the FastAPI application.
@@ -195,7 +215,7 @@ class APIServer(ServerInterface):
         self.app.include_router(self.dapr_router, prefix="/dapr")
         self.app.include_router(self.events_router, prefix="/events/v1")
-    async def home(self, request: Request) -> HTMLResponse:
+    def fallback_home(self, request: Request) -> HTMLResponse:
         return self.templates.TemplateResponse(
             "index.html",
             {
@@ -328,7 +348,6 @@ class APIServer(ServerInterface):
             methods=["GET"],
             response_class=RedirectResponse,
         )
         self.workflow_router.add_api_route(
             "/auth",
             self.test_auth,
@@ -374,6 +393,13 @@ class APIServer(ServerInterface):
             methods=["POST"],
         )
+        self.workflow_router.add_api_route(
+            "/configmap/{config_map_id}",
+            self.get_configmap,
+            methods=["GET"],
+            response_model=ConfigMapResponse,
+        )
         self.dapr_router.add_api_route(
             "/subscribe",
             self.get_dapr_subscriptions,
@@ -390,7 +416,8 @@ class APIServer(ServerInterface):
     def register_ui_routes(self):
         """Register the UI routes for the FastAPI application."""
-        self.app.get("/")(self.home)
+        self.app.get("/")(self.frontend_home)
         # Mount static files
         self.app.mount("/", StaticFiles(directory="frontend/static"), name="static")
@@ -587,6 +614,35 @@ class APIServer(ServerInterface):
             )
             raise e
+    async def get_configmap(self, config_map_id: str) -> ConfigMapResponse:
+        """Get a configuration map by its ID.
+        Args:
+            config_map_id (str): The ID of the configuration map to retrieve.
+        Returns:
+            ConfigMapResponse: Response containing the configuration map.
+        """
+        try:
+            if not self.handler:
+                raise Exception("Handler not initialized")
+            # Call the getConfigmap method on the workflow class
+            config_map_data = await self.handler.get_configmap(config_map_id)
+            return ConfigMapResponse(
+                success=True,
+                message="Configuration map fetched successfully",
+                data=config_map_data,
+            )
+        except Exception as e:
+            logger.error(f"Error fetching configuration map: {e}")
+            return ConfigMapResponse(
+                success=False,
+                message=f"Failed to fetch configuration map: {str(e)}",
+                data={},
+            )
     async def get_workflow_config(
         self, config_id: str, type: str = "workflows"
     ) -> WorkflowConfigResponse:

application_sdk/server/fastapi/models.py CHANGED Viewed

@@ -195,6 +195,33 @@ class WorkflowConfigResponse(BaseModel):
         }
+class ConfigMapResponse(BaseModel):
+    success: bool = Field(
+        ..., description="Indicates whether the operation was successful"
+    )
+    message: str = Field(
+        ..., description="Message describing the result of the operation"
+    )
+    data: Dict[str, Any] = Field(..., description="Configuration map object")
+    class Config:
+        schema_extra = {
+            "example": {
+                "success": True,
+                "message": "Configuration map fetched successfully",
+                "data": {
+                    "config_map_id": "pikachu-config-001",
+                    "name": "Pikachu Configuration",
+                    "settings": {
+                        "electric_type": True,
+                        "level": 25,
+                        "moves": ["Thunderbolt", "Quick Attack"],
+                    },
+                },
+            }
+        }
 class WorkflowTrigger(BaseModel):
     workflow_class: Optional[Type[WorkflowInterface]] = None
     model_config = {"arbitrary_types_allowed": True}

application_sdk/test_utils/hypothesis/strategies/inputs/json_input.py CHANGED Viewed

@@ -2,11 +2,17 @@ from hypothesis import strategies as st
 # Strategy for generating safe file path components
 safe_path_strategy = st.text(
-    alphabet=st.characters(),
-)
+    alphabet="abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-",
+    min_size=1,
+    max_size=20,
+).map(lambda x: f"/data/{x}")
 # Strategy for generating file names
-file_name_strategy = st.builds(lambda name: f"{name}.json", name=safe_path_strategy)
+file_name_strategy = st.text(
+    alphabet="abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-",
+    min_size=1,
+    max_size=10,
+).map(lambda x: f"{x}.json")
 # Strategy for generating lists of file names
 file_names_strategy = st.lists(file_name_strategy, unique=True)
@@ -18,7 +24,6 @@ download_prefix_strategy = safe_path_strategy
 json_input_config_strategy = st.fixed_dictionaries(
     {
         "path": safe_path_strategy,
-        "download_file_prefix": download_prefix_strategy,
-        "file_names": file_names_strategy,
+        "file_names": st.one_of(st.none(), file_names_strategy),
     }
 )

atlan-application-sdk 0.1.1rc40__py3-none-any.whl → 0.1.1rc42__py3-none-any.whl

atlan-application-sdk 0.1.1rc40py3-none-any.whl → 0.1.1rc42py3-none-any.whl