PyPI - atlan-application-sdk - Versions diffs - 2.0.0__py3-none-any.whl → 2.1.1__py3-none-any.whl - Mend

atlan-application-sdk 2.0.0py3-none-any.whl → 2.1.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

application_sdk/activities/__init__.py CHANGED Viewed

@@ -15,6 +15,7 @@ Example:
 import os
 from abc import ABC
+from datetime import datetime, timedelta
 from typing import Any, Dict, Generic, Optional, TypeVar
 from pydantic import BaseModel
@@ -62,6 +63,7 @@ class ActivitiesState(BaseModel, Generic[HandlerType]):
     model_config = {"arbitrary_types_allowed": True}
     handler: Optional[HandlerType] = None
     workflow_args: Optional[Dict[str, Any]] = None
+    last_updated_timestamp: Optional[datetime] = None
 ActivitiesStateType = TypeVar("ActivitiesStateType", bound=ActivitiesState)
@@ -113,12 +115,15 @@ class ActivitiesInterface(ABC, Generic[ActivitiesStateType]):
         Note:
             The workflow ID is automatically retrieved from the current activity context.
             If no state exists for the current workflow, a new one will be created.
+            This method also updates the last_updated_timestamp to enable time-based
+            state refresh functionality.
         """
         workflow_id = get_workflow_id()
         if not self._state.get(workflow_id):
             self._state[workflow_id] = ActivitiesState()
         self._state[workflow_id].workflow_args = workflow_args
+        self._state[workflow_id].last_updated_timestamp = datetime.now()
     async def _get_state(self, workflow_args: Dict[str, Any]) -> ActivitiesStateType:
         """Retrieve the state for the current workflow.
@@ -142,6 +147,15 @@ class ActivitiesInterface(ABC, Generic[ActivitiesStateType]):
             workflow_id = get_workflow_id()
             if workflow_id not in self._state:
                 await self._set_state(workflow_args)
+            else:
+                current_timestamp = datetime.now()
+                # if difference of current_timestamp and last_updated_timestamp is greater than 15 minutes, then again _set_state
+                last_updated = self._state[workflow_id].last_updated_timestamp
+                if last_updated and current_timestamp - last_updated > timedelta(
+                    minutes=15
+                ):
+                    await self._set_state(workflow_args)
             return self._state[workflow_id]
         except OrchestratorError as e:
             logger.error(

application_sdk/activities/common/sql_utils.py CHANGED Viewed

@@ -131,7 +131,9 @@ async def finalize_multidb_results(
     dataframe_list: List[
         Union[AsyncIterator["pd.DataFrame"], Iterator["pd.DataFrame"]]
     ],
-    setup_parquet_output_func: Callable[[str, bool], Optional[ParquetFileWriter]],
+    setup_parquet_output_func: Callable[
+        [str, bool, Optional[str]], Optional[ParquetFileWriter]
+    ],
     output_path: str,
     typename: str,
 ) -> Optional[Union[ActivityStatistics, "pd.DataFrame"]]:
@@ -189,7 +191,9 @@ async def finalize_multidb_results(
                 return concatenated
             # Create new parquet output for concatenated data
-            concatenated_parquet_output = setup_parquet_output_func(output_path, True)
+            concatenated_parquet_output = setup_parquet_output_func(
+                output_path, True, typename
+            )
             if concatenated_parquet_output:
                 await concatenated_parquet_output.write(concatenated)  # type: ignore
                 return await concatenated_parquet_output.close()

application_sdk/activities/metadata_extraction/sql.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import os
+from datetime import datetime
 from typing import (
     TYPE_CHECKING,
     Any,
@@ -60,6 +61,7 @@ class BaseSQLMetadataExtractionActivitiesState(ActivitiesState):
     sql_client: Optional[BaseSQLClient] = None
     handler: Optional[BaseSQLHandler] = None
     transformer: Optional[TransformerInterface] = None
+    last_updated_timestamp: Optional[datetime] = None
 class BaseSQLMetadataExtractionActivities(ActivitiesInterface):
@@ -149,13 +151,30 @@ class BaseSQLMetadataExtractionActivities(ActivitiesInterface):
         Args:
             workflow_args (Dict[str, Any]): Arguments passed to the workflow.
+        Note:
+            This method creates and configures the new SQL client before closing
+            the old one to ensure state is never left with a closed client if
+            initialization fails. The timestamp is only updated after the new
+            client is successfully created and assigned.
         """
         workflow_id = get_workflow_id()
         if not self._state.get(workflow_id):
             self._state[workflow_id] = BaseSQLMetadataExtractionActivitiesState()
-        await super()._set_state(workflow_args)
+        existing_state = self._state[workflow_id]
+        # Update workflow_args early, but preserve old timestamp until new client is ready
+        # This ensures that if initialization fails, the state can still be refreshed
+        existing_state.workflow_args = workflow_args
+        # Store reference to old client for cleanup after new client is ready
+        old_sql_client = None
+        if existing_state and existing_state.sql_client is not None:
+            old_sql_client = existing_state.sql_client
+        # Create and configure new client BEFORE closing old one
+        # This ensures state is never left with a closed client if initialization fails
         sql_client = self.sql_client_class()
         # Load credentials BEFORE creating handler to avoid race condition
@@ -165,10 +184,29 @@ class BaseSQLMetadataExtractionActivities(ActivitiesInterface):
             )
             await sql_client.load(credentials)
-        # Assign sql_client and handler to state AFTER credentials are loaded
+        # Only after new client is successfully created and configured,
+        # close old client and assign new one to state
+        if old_sql_client is not None:
+            try:
+                await old_sql_client.close()
+                logger.debug(
+                    f"Closed existing SQL client for workflow {workflow_id} during state refresh"
+                )
+            except Exception as e:
+                logger.warning(
+                    f"Failed to close existing SQL client for workflow {workflow_id}: {e}",
+                    exc_info=True,
+                )
+                # Continue even if close fails - new client is already ready
+        # Assign sql_client and handler to state AFTER new client is ready
         self._state[workflow_id].sql_client = sql_client
         handler = self.handler_class(sql_client)
         self._state[workflow_id].handler = handler
+        # Update timestamp only after successful client creation and assignment
+        # This ensures that if initialization fails, the old timestamp remains
+        # and the state can be refreshed again immediately
+        self._state[workflow_id].last_updated_timestamp = datetime.now()
         # Create transformer with required parameters from ApplicationConstants
         transformer_params = {

application_sdk/io/__init__.py CHANGED Viewed

@@ -49,9 +49,104 @@ if TYPE_CHECKING:
 class Reader(ABC):
+    """Abstract base class for reader data sources.
+    This class defines the interface for reader handlers that can read data
+    from various sources in different formats. Follows Python's file I/O
+    pattern with read/close semantics and supports context managers.
+    Attributes:
+        path (str): Path where the reader will read from.
+        _is_closed (bool): Whether the reader has been closed.
+        _downloaded_files (List[str]): List of downloaded temporary files to clean up.
+        cleanup_on_close (bool): Whether to clean up downloaded temp files on close.
+    Example:
+        Using close() explicitly::
+            reader = ParquetFileReader(path="/data/input")
+            df = await reader.read()
+            await reader.close()  # Cleans up any downloaded temp files
+        Using context manager (recommended)::
+            async with ParquetFileReader(path="/data/input") as reader:
+                df = await reader.read()
+            # close() called automatically
+        Reading in batches with context manager::
+            async with JsonFileReader(path="/data/input") as reader:
+                async for batch in reader.read_batches():
+                    process(batch)
+            # close() called automatically
     """
-    Abstract base class for reader data sources.
-    """
+    path: str
+    _is_closed: bool = False
+    _downloaded_files: List[str] = []
+    cleanup_on_close: bool = True
+    async def __aenter__(self) -> "Reader":
+        """Enter the async context manager.
+        Returns:
+            Reader: The reader instance.
+        """
+        return self
+    async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
+        """Exit the async context manager, closing the reader.
+        Args:
+            exc_type: Exception type if an exception was raised.
+            exc_val: Exception value if an exception was raised.
+            exc_tb: Exception traceback if an exception was raised.
+        """
+        await self.close()
+    async def close(self) -> None:
+        """Close the reader and clean up any downloaded temporary files.
+        This method cleans up any temporary files that were downloaded from
+        the object store during read operations. Calling close() multiple
+        times is safe (subsequent calls are no-ops).
+        Note:
+            Set ``cleanup_on_close=False`` during initialization to retain
+            downloaded files after closing.
+        Example::
+            reader = ParquetFileReader(path="/data/input")
+            df = await reader.read()
+            await reader.close()  # Cleans up temp files
+        """
+        if self._is_closed:
+            return
+        if self.cleanup_on_close and self._downloaded_files:
+            await self._cleanup_downloaded_files()
+        self._is_closed = True
+    async def _cleanup_downloaded_files(self) -> None:
+        """Clean up downloaded temporary files.
+        Override this method in subclasses for custom cleanup behavior.
+        """
+        import shutil
+        for file_path in self._downloaded_files:
+            try:
+                if os.path.isfile(file_path):
+                    os.remove(file_path)
+                elif os.path.isdir(file_path):
+                    shutil.rmtree(file_path, ignore_errors=True)
+            except Exception as e:
+                logger.warning(f"Failed to clean up temporary file {file_path}: {e}")
+        self._downloaded_files.clear()
     @abstractmethod
     def read_batches(
@@ -62,27 +157,27 @@ class Reader(ABC):
         Iterator["daft.DataFrame"],
         AsyncIterator["daft.DataFrame"],
     ]:
-        """
-        Get an iterator of batched pandas DataFrames.
+        """Get an iterator of batched pandas DataFrames.
         Returns:
             Iterator["pd.DataFrame"]: An iterator of batched pandas DataFrames.
         Raises:
             NotImplementedError: If the method is not implemented.
+            ValueError: If the reader has been closed.
         """
         raise NotImplementedError
     @abstractmethod
     async def read(self) -> Union["pd.DataFrame", "daft.DataFrame"]:
-        """
-        Get a single pandas or daft DataFrame.
+        """Get a single pandas or daft DataFrame.
         Returns:
             Union["pd.DataFrame", "daft.DataFrame"]: A pandas or daft DataFrame.
         Raises:
             NotImplementedError: If the method is not implemented.
+            ValueError: If the reader has been closed.
         """
         raise NotImplementedError

application_sdk/io/json.py CHANGED Viewed

@@ -27,9 +27,36 @@ activity.logger = logger
 class JsonFileReader(Reader):
-    """
-    JSON File Reader class to read data from JSON files using daft and pandas.
+    """JSON File Reader class to read data from JSON files using daft and pandas.
     Supports reading both single files and directories containing multiple JSON files.
+    Follows Python's file I/O pattern with read/close semantics and supports context managers.
+    Attributes:
+        path (str): Path to JSON file or directory containing JSON files.
+        chunk_size (int): Number of rows per batch.
+        file_names (Optional[List[str]]): List of specific file names to read.
+        dataframe_type (DataframeType): Type of dataframe to return (pandas or daft).
+        cleanup_on_close (bool): Whether to clean up downloaded temp files on close.
+    Example:
+        Using context manager (recommended)::
+            async with JsonFileReader(path="/data/input") as reader:
+                df = await reader.read()
+            # close() called automatically, temp files cleaned up
+        Reading in batches::
+            async with JsonFileReader(path="/data/input", chunk_size=50000) as reader:
+                async for batch in reader.read_batches():
+                    process(batch)
+        Using close() explicitly::
+            reader = JsonFileReader(path="/data/input")
+            df = await reader.read()
+            await reader.close()  # Clean up downloaded temp files
     """
     def __init__(
@@ -38,6 +65,7 @@ class JsonFileReader(Reader):
         file_names: Optional[List[str]] = None,
         chunk_size: Optional[int] = 100000,
         dataframe_type: DataframeType = DataframeType.pandas,
+        cleanup_on_close: bool = True,
     ):
         """Initialize the JsonInput class.
@@ -48,6 +76,8 @@ class JsonFileReader(Reader):
                 Wildcards are not supported.
             file_names (Optional[List[str]]): List of specific file names to read. Defaults to None.
             chunk_size (int): Number of rows per batch. Defaults to 100000.
+            dataframe_type (DataframeType): Type of dataframe to read. Defaults to DataframeType.pandas.
+            cleanup_on_close (bool): Whether to clean up downloaded temp files on close. Defaults to True.
         Raises:
             ValueError: When path is not provided or when single file path is combined with file_names
@@ -65,12 +95,22 @@ class JsonFileReader(Reader):
         self.chunk_size = chunk_size
         self.file_names = file_names
         self.dataframe_type = dataframe_type
+        self.cleanup_on_close = cleanup_on_close
+        self._is_closed = False
+        self._downloaded_files: List[str] = []
     async def read(self) -> Union["pd.DataFrame", "daft.DataFrame"]:
+        """Read the data from the JSON files and return as a single DataFrame.
+        Returns:
+            Union[pd.DataFrame, daft.DataFrame]: Combined dataframe from JSON files.
+        Raises:
+            ValueError: If the reader has been closed or dataframe_type is unsupported.
         """
-        Method to read the data from the json files in the path
-        and return as a single combined pandas dataframe
-        """
+        if self._is_closed:
+            raise ValueError("Cannot read from a closed reader")
         if self.dataframe_type == DataframeType.pandas:
             return await self._get_dataframe()
         elif self.dataframe_type == DataframeType.daft:
@@ -84,10 +124,18 @@ class JsonFileReader(Reader):
         AsyncIterator["pd.DataFrame"],
         AsyncIterator["daft.DataFrame"],
     ]:
+        """Read the data from the JSON files and return as batched DataFrames.
+        Returns:
+            Union[AsyncIterator[pd.DataFrame], AsyncIterator[daft.DataFrame]]:
+                Async iterator of DataFrames.
+        Raises:
+            ValueError: If the reader has been closed or dataframe_type is unsupported.
         """
-        Method to read the data from the json files in the path
-        and return as a batched pandas dataframe
-        """
+        if self._is_closed:
+            raise ValueError("Cannot read from a closed reader")
         if self.dataframe_type == DataframeType.pandas:
             return self._get_batched_dataframe()
         elif self.dataframe_type == DataframeType.daft:
@@ -98,10 +146,7 @@ class JsonFileReader(Reader):
     async def _get_batched_dataframe(
         self,
     ) -> AsyncIterator["pd.DataFrame"]:
-        """
-        Method to read the data from the json files in the path
-        and return as a batched pandas dataframe
-        """
+        """Read the data from the JSON files and return as a batched pandas dataframe."""
         try:
             import pandas as pd
@@ -109,6 +154,8 @@ class JsonFileReader(Reader):
             json_files = await download_files(
                 self.path, self.extension, self.file_names
             )
+            # Track downloaded files for cleanup on close
+            self._downloaded_files.extend(json_files)
             logger.info(f"Reading {len(json_files)} JSON files in batches")
             for json_file in json_files:
@@ -124,10 +171,7 @@ class JsonFileReader(Reader):
             raise
     async def _get_dataframe(self) -> "pd.DataFrame":
-        """
-        Method to read the data from the json files in the path
-        and return as a single combined pandas dataframe
-        """
+        """Read the data from the JSON files and return as a single pandas dataframe."""
         try:
             import pandas as pd
@@ -135,6 +179,8 @@ class JsonFileReader(Reader):
             json_files = await download_files(
                 self.path, self.extension, self.file_names
             )
+            # Track downloaded files for cleanup on close
+            self._downloaded_files.extend(json_files)
             logger.info(f"Reading {len(json_files)} JSON files as pandas dataframe")
             return pd.concat(
@@ -149,10 +195,7 @@ class JsonFileReader(Reader):
     async def _get_batched_daft_dataframe(
         self,
     ) -> AsyncIterator["daft.DataFrame"]:  # noqa: F821
-        """
-        Method to read the data from the json files in the path
-        and return as a batched daft dataframe
-        """
+        """Read the data from the JSON files and return as a batched daft dataframe."""
         try:
             import daft
@@ -160,6 +203,8 @@ class JsonFileReader(Reader):
             json_files = await download_files(
                 self.path, self.extension, self.file_names
             )
+            # Track downloaded files for cleanup on close
+            self._downloaded_files.extend(json_files)
             logger.info(f"Reading {len(json_files)} JSON files as daft batches")
             # Yield each discovered file as separate batch with chunking
@@ -170,10 +215,7 @@ class JsonFileReader(Reader):
             raise
     async def _get_daft_dataframe(self) -> "daft.DataFrame":  # noqa: F821
-        """
-        Method to read the data from the json files in the path
-        and return as a single combined daft dataframe
-        """
+        """Read the data from the JSON files and return as a single daft dataframe."""
         try:
             import daft
@@ -181,6 +223,8 @@ class JsonFileReader(Reader):
             json_files = await download_files(
                 self.path, self.extension, self.file_names
             )
+            # Track downloaded files for cleanup on close
+            self._downloaded_files.extend(json_files)
             logger.info(f"Reading {len(json_files)} JSON files with daft")
             # Use the discovered/downloaded files directly

application_sdk/io/parquet.py CHANGED Viewed

@@ -40,9 +40,37 @@ if TYPE_CHECKING:
 class ParquetFileReader(Reader):
-    """
-    Parquet File Reader class to read data from Parquet files using daft and pandas.
+    """Parquet File Reader class to read data from Parquet files using daft and pandas.
     Supports reading both single files and directories containing multiple parquet files.
+    Follows Python's file I/O pattern with read/close semantics and supports context managers.
+    Attributes:
+        path (str): Path to parquet file or directory containing parquet files.
+        chunk_size (int): Number of rows per batch.
+        buffer_size (int): Number of rows per batch for daft.
+        file_names (Optional[List[str]]): List of specific file names to read.
+        dataframe_type (DataframeType): Type of dataframe to return (pandas or daft).
+        cleanup_on_close (bool): Whether to clean up downloaded temp files on close.
+    Example:
+        Using context manager (recommended)::
+            async with ParquetFileReader(path="/data/input") as reader:
+                df = await reader.read()
+            # close() called automatically, temp files cleaned up
+        Reading in batches::
+            async with ParquetFileReader(path="/data/input", chunk_size=50000) as reader:
+                async for batch in reader.read_batches():
+                    process(batch)
+        Using close() explicitly::
+            reader = ParquetFileReader(path="/data/input")
+            df = await reader.read()
+            await reader.close()  # Clean up downloaded temp files
     """
     def __init__(
@@ -52,6 +80,7 @@ class ParquetFileReader(Reader):
         buffer_size: Optional[int] = 5000,
         file_names: Optional[List[str]] = None,
         dataframe_type: DataframeType = DataframeType.pandas,
+        cleanup_on_close: bool = True,
     ):
         """Initialize the Parquet input class.
@@ -64,6 +93,7 @@ class ParquetFileReader(Reader):
             buffer_size (int): Number of rows per batch. Defaults to 5000.
             file_names (Optional[List[str]]): List of file names to read. Defaults to None.
             dataframe_type (DataframeType): Type of dataframe to read. Defaults to DataframeType.pandas.
+            cleanup_on_close (bool): Whether to clean up downloaded temp files on close. Defaults to True.
         Raises:
             ValueError: When path is not provided or when single file path is combined with file_names
@@ -81,12 +111,22 @@ class ParquetFileReader(Reader):
         self.buffer_size = buffer_size
         self.file_names = file_names
         self.dataframe_type = dataframe_type
+        self.cleanup_on_close = cleanup_on_close
+        self._is_closed = False
+        self._downloaded_files: List[str] = []
     async def read(self) -> Union["pd.DataFrame", "daft.DataFrame"]:
+        """Read the data from the parquet files and return as a single DataFrame.
+        Returns:
+            Union[pd.DataFrame, daft.DataFrame]: Combined dataframe from parquet files.
+        Raises:
+            ValueError: If the reader has been closed or dataframe_type is unsupported.
         """
-        Method to read the data from the parquet files in the path
-        and return as a single combined pandas dataframe
-        """
+        if self._is_closed:
+            raise ValueError("Cannot read from a closed reader")
         if self.dataframe_type == DataframeType.pandas:
             return await self._get_dataframe()
         elif self.dataframe_type == DataframeType.daft:
@@ -100,10 +140,18 @@ class ParquetFileReader(Reader):
         AsyncIterator["pd.DataFrame"],
         AsyncIterator["daft.DataFrame"],
     ]:
+        """Read the data from the parquet files and return as batched DataFrames.
+        Returns:
+            Union[AsyncIterator[pd.DataFrame], AsyncIterator[daft.DataFrame]]:
+                Async iterator of DataFrames.
+        Raises:
+            ValueError: If the reader has been closed or dataframe_type is unsupported.
         """
-        Method to read the data from the parquet files in the path
-        and return as a batched pandas dataframe
-        """
+        if self._is_closed:
+            raise ValueError("Cannot read from a closed reader")
         if self.dataframe_type == DataframeType.pandas:
             return self._get_batched_dataframe()
         elif self.dataframe_type == DataframeType.daft:
@@ -149,6 +197,8 @@ class ParquetFileReader(Reader):
             parquet_files = await download_files(
                 self.path, PARQUET_FILE_EXTENSION, self.file_names
             )
+            # Track downloaded files for cleanup on close
+            self._downloaded_files.extend(parquet_files)
             logger.info(f"Reading {len(parquet_files)} parquet files")
             return pd.concat(
@@ -208,6 +258,8 @@ class ParquetFileReader(Reader):
             parquet_files = await download_files(
                 self.path, PARQUET_FILE_EXTENSION, self.file_names
             )
+            # Track downloaded files for cleanup on close
+            self._downloaded_files.extend(parquet_files)
             logger.info(f"Reading {len(parquet_files)} parquet files in batches")
             # Process each file individually to maintain memory efficiency
@@ -259,6 +311,8 @@ class ParquetFileReader(Reader):
             parquet_files = await download_files(
                 self.path, PARQUET_FILE_EXTENSION, self.file_names
             )
+            # Track downloaded files for cleanup on close
+            self._downloaded_files.extend(parquet_files)
             logger.info(f"Reading {len(parquet_files)} parquet files with daft")
             # Use the discovered/downloaded files directly
@@ -317,6 +371,8 @@ class ParquetFileReader(Reader):
             parquet_files = await download_files(
                 self.path, PARQUET_FILE_EXTENSION, self.file_names
             )
+            # Track downloaded files for cleanup on close
+            self._downloaded_files.extend(parquet_files)
             logger.info(f"Reading {len(parquet_files)} parquet files as daft batches")
             # Create a lazy dataframe without loading data into memory

application_sdk/observability/observability.py CHANGED Viewed

@@ -426,16 +426,9 @@ class AtlanObservability(Generic[T], ABC):
                         chunk_start=0,
                         chunk_part=int(time()),
                     )
-                    logging.info(
-                        f"Successfully instantiated ParquetFileWriter for partition: {partition_path}"
-                    )
                     await parquet_writer._write_dataframe(dataframe=df)
-                    logging.info(
-                        f"Successfully wrote {len(df)} records to partition: {partition_path}"
-                    )
                 except Exception as partition_error:
                     logging.error(
                         f"Error processing partition {partition_path}: {str(partition_error)}"

application_sdk/services/objectstore.py CHANGED Viewed

@@ -459,9 +459,22 @@ class ObjectStore:
             logger.info(f"Found {len(file_list)} files to download from: {source}")
+            # Normalize source prefix to use forward slashes for comparison
+            normalized_source = cls._normalize_object_store_key(source)
             # Download each file
             for file_path in file_list:
-                local_file_path = os.path.join(destination, file_path)
+                normalized_file_path = cls._normalize_object_store_key(file_path)
+                if normalized_file_path.startswith(normalized_source):
+                    # Extract relative path after the prefix
+                    relative_path = normalized_file_path[
+                        len(normalized_source) :
+                    ].lstrip("/")
+                else:
+                    # Fallback to just the filename
+                    relative_path = os.path.basename(normalized_file_path)
+                local_file_path = os.path.join(destination, relative_path)
                 await cls.download_file(file_path, local_file_path, store_name)
             logger.info(f"Successfully downloaded all files from: {source}")

application_sdk/transformers/query/__init__.py CHANGED Viewed

@@ -4,6 +4,7 @@ from typing import Any, Dict, List, Optional, Tuple, Type
 import daft
 import yaml
+from daft.functions import to_struct, when
 from pyatlan.model.enums import AtlanConnectorType
 from application_sdk.observability.logger_adaptor import get_logger
@@ -227,7 +228,7 @@ class QueryBasedTransformer(TransformerInterface):
         # Only create a struct if we have fields
         if struct_fields:
             # Create the struct first
-            struct = daft.struct(*struct_fields)
+            struct = to_struct(*struct_fields)
             # If we have non-null checks, apply them
             if non_null_fields:
@@ -236,8 +237,8 @@ class QueryBasedTransformer(TransformerInterface):
                 for check in non_null_fields[1:]:
                     any_non_null = any_non_null | check
-                # Use if_else on the any_non_null Expression
-                return any_non_null.if_else(struct, None).alias(prefix)
+                # Use when().otherwise() for conditional expression (replaces if_else in daft 0.7+)
+                return when(any_non_null, struct).otherwise(None).alias(prefix)
             return struct.alias(prefix)

application_sdk/version.py CHANGED Viewed

@@ -2,4 +2,4 @@
 Version information for the application_sdk package.
 """
-__version__ = "2.0.0"
+__version__ = "2.1.1"

{atlan_application_sdk-2.0.0.dist-info → atlan_application_sdk-2.1.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: atlan-application-sdk
-Version: 2.0.0
+Version: 2.1.1
 Summary: Atlan Application SDK is a Python library for developing applications on the Atlan Platform
 Project-URL: Repository, https://github.com/atlanhq/application-sdk
 Project-URL: Documentation, https://github.com/atlanhq/application-sdk/README.md
@@ -31,7 +31,7 @@ Requires-Dist: pydantic<2.13.0,>=2.10.6
 Requires-Dist: python-dotenv<1.3.0,>=1.1.0
 Requires-Dist: uvloop<0.23.0,>=0.21.0; sys_platform != 'win32'
 Provides-Extra: daft
-Requires-Dist: daft<0.8.0,>=0.4.12; extra == 'daft'
+Requires-Dist: daft<0.8.0,>=0.7.1; extra == 'daft'
 Provides-Extra: distributed-lock
 Requires-Dist: redis[hiredis]<7.2.0,>=5.2.0; extra == 'distributed-lock'
 Provides-Extra: iam-auth

{atlan_application_sdk-2.0.0.dist-info → atlan_application_sdk-2.1.1.dist-info}/RECORD RENAMED Viewed

@@ -1,18 +1,18 @@
 application_sdk/__init__.py,sha256=2e2mvmLJ5dxmJGPELtb33xwP-j6JMdoIuqKycEn7hjg,151
 application_sdk/constants.py,sha256=TvdmKQShVWBNQZdVF2y-fxuE31FmeraTnqQ9jT_n5XY,11567
-application_sdk/version.py,sha256=TY0SZFUH9N2qGF6tlbmJIww-CY7x-myMGotNEhFJko4,84
+application_sdk/version.py,sha256=sNbvXviG7NgxM58lOHKhbZfERat5qAJNr3UZy_toVQs,84
 application_sdk/worker.py,sha256=DLMocpHvvwpdAopyXhxwM7ftaNlKvZMQfkgy1MFyiik,7561
-application_sdk/activities/__init__.py,sha256=6SiefuOPUDGfN3z6oPY4RkQLiUmkHpoDy5xadzpDzAw,11588
+application_sdk/activities/__init__.py,sha256=i7iY6aL1VFg185n2rLLvD_sI2BA9zJ33jL5rD_sY__U,12350
 application_sdk/activities/lock_management.py,sha256=6Wdf3jMKitoarHQP91PIJOoGFz4aaOLS_40c7n1yAOA,3902
 application_sdk/activities/.cursor/BUGBOT.md,sha256=FNykX5aMkdOhzgpiGqstOnSp9JN63iR2XP3onU4AGh8,15843
 application_sdk/activities/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 application_sdk/activities/common/models.py,sha256=43MF_w0EzEQiJvGIqF_FNet4X6MEmwqYd3YAsHdQn08,1362
-application_sdk/activities/common/sql_utils.py,sha256=csLM8H9L5NY5_rhxHBFo-jkMoOKjxhB9xaFbnLbkBGg,10177
+application_sdk/activities/common/sql_utils.py,sha256=QD4qOGkgJmlAGZKaSxqfC0AkjZVdTqdr6Q_Tw2CjIsM,10246
 application_sdk/activities/common/utils.py,sha256=ngyFmiZnMCAQtyu6vGeAlkzwNkM29MD_gBU5pWqOxJ4,8392
 application_sdk/activities/metadata_extraction/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 application_sdk/activities/metadata_extraction/base.py,sha256=ENFojpxqKdN_eVSL4iet3cGfylPOfcl1jnflfo4zhs8,3920
 application_sdk/activities/metadata_extraction/rest.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-application_sdk/activities/metadata_extraction/sql.py,sha256=IkI1ZhOKyoSwosRT-g8c8IDBuFBq7mwyHLpDvwYO_B4,25451
+application_sdk/activities/metadata_extraction/sql.py,sha256=CmE77EsgbOuDL5AKaRCnq1jApJnDWNVxx-RZ49cJwus,27415
 application_sdk/activities/query_extraction/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 application_sdk/activities/query_extraction/sql.py,sha256=Gsa79R8CYY0uyt3rA2nLMfQs8-C4_zg1pJ_yYSF2cZw,21193
 application_sdk/application/__init__.py,sha256=vcrQsqlfmGvKcCZuOtHHaNRqHSGdXlEDftkb8Tv_shI,9867
@@ -64,15 +64,15 @@ application_sdk/interceptors/events.py,sha256=e0O6uK9_aCTmOORaTGN9RbcTg9_KNaakq-
 application_sdk/interceptors/lock.py,sha256=5ETm20zrTaH2b9fepN4Ckp1tGJV-uINqDrno_5RW3aw,6169
 application_sdk/interceptors/models.py,sha256=kEzJKvb-G1M7aKrLPgAmsukJXLXeh8hIJKwEkOiaY28,6115
 application_sdk/interceptors/.cursor/BUGBOT.md,sha256=pxmUF2c7dtaXAX8yAa1-LBa6FCrj_uw7aQcHrppjf1A,14570
-application_sdk/io/__init__.py,sha256=i5EbFW0IInErum4DTRAhWBjwT2EwiUHTpdYBHNmI-I4,24317
-application_sdk/io/json.py,sha256=yjgCMzbz4Ltyt0PCylVRrUqGl2FhQL_wfWW1KtSi-II,17435
-application_sdk/io/parquet.py,sha256=1J_C5c_LwDfuugIpiQqYgmU2uVcGearF5Kkrx-_34_I,31740
+application_sdk/io/__init__.py,sha256=Fse-fEyrpMlLUxwyFkH8vWWSXz8rdWGlAjZy5ulAZCU,27767
+application_sdk/io/json.py,sha256=sNSyHZCM_ZeaiJHUrolYVHKreBQqSCBsfsjD3JSkoD0,19729
+application_sdk/io/parquet.py,sha256=zy9H_TvWI5CkktJ582NH7Ut_5rUH_S0Jy7ZbTD0JxeI,34227
 application_sdk/io/utils.py,sha256=sn_8Q6HgjeC8uyZp2XGMAfqdJ8XzkIllOEVYXIH54DY,10724
 application_sdk/observability/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 application_sdk/observability/context.py,sha256=lJjpfxEjMY_hrdSDqq519YaWcztgc_1nM4d-mGV5shs,634
 application_sdk/observability/logger_adaptor.py,sha256=Fq5OE579ozr0EzsNYEh2H0q3POVAxtlWfJ-PSwWDGLM,30194
 application_sdk/observability/metrics_adaptor.py,sha256=5Oz02lUED60duryoVDF9mbD11fpxhbXi7P1609n_15Y,16446
-application_sdk/observability/observability.py,sha256=m5OgD_akc3YrkF5mCme2HcRaY7ysjLbVSaEs2C-K2Fs,24062
+application_sdk/observability/observability.py,sha256=O2rBal_0pmFRen7Yx4c4dSH1NyiT937b4bY2w63q-4U,23751
 application_sdk/observability/traces_adaptor.py,sha256=0eQJPN-tYA_dV8D3uEa5ZiX9g12NDuLnPaFuQMVDdL0,18242
 application_sdk/observability/utils.py,sha256=-02GAFom8Bg4SNyCTNYySmen2dzvLfTu43bqsNq1AH0,3096
 application_sdk/observability/decorators/observability_decorator.py,sha256=yd6qfrg1MmH5KcZ5Ydzb0RaBzmxx5FrmiI9qwvZx3EU,8963
@@ -92,7 +92,7 @@ application_sdk/services/__init__.py,sha256=H-5HZEPdr53MUfAggyHqHhRXDRLZFZsxvJgW
 application_sdk/services/_utils.py,sha256=0yHqDP6qNb1OT-bX2XRYQPZ5xkGkV13nyRw6GkPlHs8,1136
 application_sdk/services/atlan_storage.py,sha256=TKzXxu0yXeUcmZehwp8PcnQTC4A9w9RlZ0Fl-Xp1bLE,8509
 application_sdk/services/eventstore.py,sha256=wCT921KRzUe3fAWKC-bbM6_OtIJTKpSQrOutPQzMEgs,6745
-application_sdk/services/objectstore.py,sha256=JgQrL9z_6aG5micVd7I2N6l3RA4EUJh4T2BCuC_ATwQ,20161
+application_sdk/services/objectstore.py,sha256=dLljCsPPSr24bPKh0l3-xRblofzKVQ4LDfqDrMp8JGc,20819
 application_sdk/services/secretstore.py,sha256=Pmn1WlmHmgaDhWz5OXBB5_rKXQQMyLMzadwZSNKwc6Q,19070
 application_sdk/services/statestore.py,sha256=3-afiM3Vsoe1XDYRokdGTB5I5CwOKyieuX5RwIZf77o,9413
 application_sdk/test_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -138,7 +138,7 @@ application_sdk/transformers/atlas/__init__.py,sha256=fw3D8bBtt61SseAfYut3JZddpX
 application_sdk/transformers/atlas/sql.py,sha256=rkQXNZ7oebts5oF5E_Bw8NpcHHKScU0TmKciH_1l_k4,50419
 application_sdk/transformers/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 application_sdk/transformers/common/utils.py,sha256=4ISMIQ0Gzghmi31p51FOFm5KLF7XF-fmH9PVT7i0DFE,4899
-application_sdk/transformers/query/__init__.py,sha256=yG1dGP3NhUizwkCgyFAzsr9SV9uWYZKjXoCWPrsIxVw,17358
+application_sdk/transformers/query/__init__.py,sha256=4uVCU-NfDe08PlffjWQ5p4smQa7c518IL2rDgIk6694,17446
 application_sdk/transformers/query/templates/column.yaml,sha256=EXLYwGXN7LKT-v51n2EZnY99o6vHucyFaVSpM-sUSXw,7679
 application_sdk/transformers/query/templates/database.yaml,sha256=SD1hJg5LI7gsBHQL5mW341sa51EkhcsIDDFlIOi9zdk,1374
 application_sdk/transformers/query/templates/extras-procedure.yaml,sha256=XhAfVY4zm99K8fcgkYA1XPLv4ks-SA6SzMO3SMtQ60s,2298
@@ -152,8 +152,8 @@ application_sdk/workflows/metadata_extraction/__init__.py,sha256=jHUe_ZBQ66jx8bg
 application_sdk/workflows/metadata_extraction/sql.py,sha256=6ZaVt84n-8U2ZvR9GR7uIJKv5v8CuyQjhlnoRJvDszc,12435
 application_sdk/workflows/query_extraction/__init__.py,sha256=n066_CX5RpJz6DIxGMkKS3eGSRg03ilaCtsqfJWQb7Q,117
 application_sdk/workflows/query_extraction/sql.py,sha256=kT_JQkLCRZ44ZpaC4QvPL6DxnRIIVh8gYHLqRbMI-hA,4826
-atlan_application_sdk-2.0.0.dist-info/METADATA,sha256=B9-UVeLlDGDuxko6Nvb6Y8zrLNueaATYaJZMESikYZU,5806
-atlan_application_sdk-2.0.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
-atlan_application_sdk-2.0.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-atlan_application_sdk-2.0.0.dist-info/licenses/NOTICE,sha256=A-XVVGt3KOYuuMmvSMIFkg534F1vHiCggEBp4Ez3wGk,1041
-atlan_application_sdk-2.0.0.dist-info/RECORD,,
+atlan_application_sdk-2.1.1.dist-info/METADATA,sha256=Vc2uG2FMhuNXyZFXmGMmvc_LRpCBaNTcQEHpSV8NpOE,5805
+atlan_application_sdk-2.1.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+atlan_application_sdk-2.1.1.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+atlan_application_sdk-2.1.1.dist-info/licenses/NOTICE,sha256=A-XVVGt3KOYuuMmvSMIFkg534F1vHiCggEBp4Ez3wGk,1041
+atlan_application_sdk-2.1.1.dist-info/RECORD,,

{atlan_application_sdk-2.0.0.dist-info → atlan_application_sdk-2.1.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{atlan_application_sdk-2.0.0.dist-info → atlan_application_sdk-2.1.1.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{atlan_application_sdk-2.0.0.dist-info → atlan_application_sdk-2.1.1.dist-info}/licenses/NOTICE RENAMED Viewed

File without changes

atlan-application-sdk 2.0.0__py3-none-any.whl → 2.1.1__py3-none-any.whl

atlan-application-sdk 2.0.0py3-none-any.whl → 2.1.1py3-none-any.whl