PyPI - atlan-application-sdk - Versions diffs - 0.1.1rc36__py3-none-any.whl → 0.1.1rc38__py3-none-any.whl - Mend

atlan-application-sdk 0.1.1rc36py3-none-any.whl → 0.1.1rc38py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

application_sdk/activities/common/models.py CHANGED Viewed

@@ -4,7 +4,7 @@ This module contains Pydantic models used to represent various data structures
 needed by activities, such as statistics and configuration.
 """
-from typing import Optional
+from typing import List, Optional
 from pydantic import BaseModel
@@ -34,4 +34,5 @@ class ActivityStatistics(BaseModel):
     total_record_count: int = 0
     chunk_count: int = 0
+    partitions: Optional[List[int]] = []
     typename: Optional[str] = None

application_sdk/clients/atlan.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from typing import Optional
+from pyatlan.client.aio import AsyncAtlanClient
 from pyatlan.client.atlan import AtlanClient
 from application_sdk.common.error_codes import ClientError
@@ -68,3 +69,58 @@ def _get_client_from_token(api_token_guid: str):
             f"{ClientError.AUTH_CONFIG_ERROR}: Environment variable CLIENT_SECRET is required when API_TOKEN_GUID is set."
         )
     return AtlanClient.from_token_guid(guid=api_token_guid)
+async def get_async_client(
+    base_url: Optional[str] = None,
+    api_key: Optional[str] = None,
+    api_token_guid: Optional[str] = None,
+) -> AsyncAtlanClient:
+    """
+    Returns an authenticated AsyncAtlanClient instance using provided parameters or environment variables.
+    Selects authentication method based on the presence of parameters or environment variables and validates the required configuration.
+    In general, the use of environment variables is recommended. Any parameters specified will override the environment variables.
+    Args:
+    base_url: Atlan base URL (overrides ATLAN_BASE_URL)
+    api_key: Atlan API key (overrides ATLAN_API_KEY)
+    api_token_guid: API token GUID (overrides API_TOKEN_GUID)
+    """
+    # Resolve final values (parameters override env vars)
+    final_token_guid = api_token_guid or ATLAN_API_TOKEN_GUID
+    final_base_url = base_url or ATLAN_BASE_URL
+    final_api_key = api_key or ATLAN_API_KEY
+    # Priority 1: Token-based auth (recommended for production)
+    if final_token_guid:
+        if final_base_url or final_api_key:
+            logger.warning(
+                "Token auth takes precedence - ignoring base_url/api_key parameters as well as ATLAN_BASE_URL and ATLAN_API_KEY environment variables."
+            )
+        return await _get_async_client_from_token(final_token_guid)
+    # Priority 2: API key + base URL auth
+    if not final_base_url:
+        raise ClientError(
+            "ATLAN_BASE_URL is required (via parameter or environment variable)"
+        )
+    if not final_api_key:
+        raise ClientError(
+            "ATLAN_API_KEY is required (via parameter or environment variable)"
+        )
+    logger.info("Using API key-based authentication")
+    return AsyncAtlanClient(base_url=final_base_url, api_key=final_api_key)
+async def _get_async_client_from_token(api_token_guid: str):
+    if not ATLAN_CLIENT_ID:
+        raise ClientError(
+            f"{ClientError.AUTH_CONFIG_ERROR}: Environment variable CLIENT_ID is required when API_TOKEN_GUID is set."
+        )
+    if not ATLAN_CLIENT_SECRET:
+        raise ClientError(
+            f"{ClientError.AUTH_CONFIG_ERROR}: Environment variable CLIENT_SECRET is required when API_TOKEN_GUID is set."
+        )
+    return await AsyncAtlanClient.from_token_guid(guid=api_token_guid)

application_sdk/inputs/parquet.py CHANGED Viewed

@@ -10,7 +10,7 @@ from application_sdk.services.objectstore import ObjectStore
 logger = get_logger(__name__)
 if TYPE_CHECKING:
-    import daft
+    import daft  # type: ignore
     import pandas as pd
@@ -136,7 +136,7 @@ class ParquetInput(Input):
             daft.DataFrame: Combined daft dataframe from all parquet files.
         """
         try:
-            import daft
+            import daft  # type: ignore
             if self.file_names:
                 path = f"{self.path}/{self.file_names[0].split('/')[0]}"
@@ -161,11 +161,11 @@ class ParquetInput(Input):
             a batch of data from the parquet file(s).
         """
         try:
-            import daft
+            import daft  # type: ignore
             if self.file_names:
                 for file_name in self.file_names:
-                    path = f"{self.path}/{file_name.replace('.json', '.parquet')}"
+                    path = f"{self.path}/{file_name}"
                     if self.input_prefix and path:
                         await self.download_files(path)
                         yield daft.read_parquet(path)

application_sdk/outputs/__init__.py CHANGED Viewed

@@ -13,6 +13,7 @@ from typing import (
     Dict,
     Generator,
     List,
+    Literal,
     Optional,
     Union,
     cast,
@@ -31,7 +32,7 @@ logger = get_logger(__name__)
 activity.logger = logger
 if TYPE_CHECKING:
-    import daft
+    import daft  # type: ignore
     import pandas as pd
@@ -52,6 +53,27 @@ class Output(ABC):
     output_prefix: str
     total_record_count: int
     chunk_count: int
+    statistics: List[int] = []
+    def estimate_dataframe_file_size(
+        self, dataframe: "pd.DataFrame", file_type: Literal["json", "parquet"]
+    ) -> int:
+        """Estimate File size of a DataFrame by sampling a few records."""
+        if len(dataframe) == 0:
+            return 0
+        # Sample up to 10 records to estimate average size
+        sample_size = min(10, len(dataframe))
+        sample = dataframe.head(sample_size)
+        if file_type == "json":
+            sample_file = sample.to_json(orient="records", lines=True)
+        else:
+            sample_file = sample.to_parquet(index=False, compression="snappy")
+        if sample_file is not None:
+            avg_record_size = len(sample_file) / sample_size
+            return int(avg_record_size * len(dataframe))
+        return 0
     def process_null_fields(
         self,
@@ -217,6 +239,7 @@ class Output(ABC):
             statistics = {
                 "total_record_count": self.total_record_count,
                 "chunk_count": self.chunk_count,
+                "partitions": self.statistics,
             }
             # Write the statistics to a json file

application_sdk/outputs/json.py CHANGED Viewed

@@ -6,6 +6,7 @@ import orjson
 from temporalio import activity
 from application_sdk.activities.common.utils import get_object_store_prefix
+from application_sdk.constants import DAPR_MAX_GRPC_MESSAGE_LENGTH
 from application_sdk.observability.logger_adaptor import get_logger
 from application_sdk.observability.metrics_adaptor import MetricType, get_metrics
 from application_sdk.outputs import Output
@@ -15,7 +16,7 @@ logger = get_logger(__name__)
 activity.logger = logger
 if TYPE_CHECKING:
-    import daft
+    import daft  # type: ignore
     import pandas as pd
@@ -32,7 +33,7 @@ def path_gen(chunk_start: int | None, chunk_count: int) -> str:
     if chunk_start is None:
         return f"{str(chunk_count)}.json"
     else:
-        return f"{str(chunk_start+chunk_count)}.json"
+        return f"chunk-{chunk_start}-part{chunk_count}.json"
 def convert_datetime_to_epoch(data: Any) -> Any:
@@ -124,6 +125,10 @@ class JsonOutput(Output):
         self.chunk_size = chunk_size or 100000
         self.buffer: List[Union["pd.DataFrame", "daft.DataFrame"]] = []  # noqa: F821
         self.current_buffer_size = 0
+        self.current_buffer_size_bytes = 0  # Track estimated buffer size in bytes
+        self.max_file_size_bytes = int(
+            DAPR_MAX_GRPC_MESSAGE_LENGTH * 0.9
+        )  # 90% of DAPR limit as safety buffer
         self.path_gen = path_gen
         self.start_marker = start_marker
         self.end_marker = end_marker
@@ -172,8 +177,21 @@ class JsonOutput(Output):
             ]
             for chunk in chunks:
+                # Estimate size of this chunk
+                chunk_size_bytes = self.estimate_dataframe_file_size(chunk, "json")
+                # Check if adding this chunk would exceed size limit
+                if (
+                    self.current_buffer_size_bytes + chunk_size_bytes
+                    > self.max_file_size_bytes
+                    and self.current_buffer_size > 0
+                ):
+                    # Flush current buffer before adding this chunk
+                    await self._flush_buffer()
                 self.buffer.append(chunk)
                 self.current_buffer_size += len(chunk)
+                self.current_buffer_size_bytes += chunk_size_bytes
                 if self.current_buffer_size >= partition:
                     await self._flush_buffer()
@@ -237,45 +255,19 @@ class JsonOutput(Output):
                     row, preserve_fields, null_to_empty_dict_fields
                 )
                 # Serialize the row and add it to the buffer
-                buffer.append(
-                    orjson.dumps(cleaned_row, option=orjson.OPT_APPEND_NEWLINE).decode(
-                        "utf-8"
-                    )
-                )
-            # If the buffer reaches the specified size, write it to the file
-            if self.chunk_size and len(buffer) >= self.chunk_size:
-                self.chunk_count += 1
-                output_file_name = f"{self.output_path}/{self.path_gen(self.chunk_start, self.chunk_count)}"
-                with open(output_file_name, "w") as f:
-                    f.writelines(buffer)
-                buffer.clear()  # Clear the buffer
-                # Record chunk metrics
-                self.metrics.record_metric(
-                    name="json_chunks_written",
-                    value=1,
-                    metric_type=MetricType.COUNTER,
-                    labels={"type": "daft"},
-                    description="Number of chunks written to JSON files",
-                )
+                serialized_row = orjson.dumps(
+                    cleaned_row, option=orjson.OPT_APPEND_NEWLINE
+                ).decode("utf-8")
+                buffer.append(serialized_row)
+                self.current_buffer_size_bytes += len(serialized_row)
+                if (self.chunk_size and len(buffer) >= self.chunk_size) or (
+                    self.current_buffer_size_bytes > self.max_file_size_bytes
+                ):
+                    await self.flush_daft_buffer(buffer)
             # Write any remaining rows in the buffer
             if buffer:
-                self.chunk_count += 1
-                output_file_name = f"{self.output_path}/{self.path_gen(self.chunk_start, self.chunk_count)}"
-                with open(output_file_name, "w") as f:
-                    f.writelines(buffer)
-                buffer.clear()
-                # Record chunk metrics
-                self.metrics.record_metric(
-                    name="json_chunks_written",
-                    value=1,
-                    metric_type=MetricType.COUNTER,
-                    labels={"type": "daft"},
-                    description="Number of chunks written to JSON files",
-                )
+                await self.flush_daft_buffer(buffer)
             # Record metrics for successful write
             self.metrics.record_metric(
@@ -303,6 +295,32 @@ class JsonOutput(Output):
             )
             logger.error(f"Error writing daft dataframe to json: {str(e)}")
+    async def flush_daft_buffer(self, buffer: List[str]):
+        """Flush the current buffer to a JSON file.
+        This method combines all DataFrames in the buffer, writes them to a JSON file,
+        and uploads the file to the object store.
+        """
+        self.chunk_count += 1
+        output_file_name = (
+            f"{self.output_path}/{self.path_gen(self.chunk_start, self.chunk_count)}"
+        )
+        with open(output_file_name, "w") as f:
+            f.writelines(buffer)
+        buffer.clear()  # Clear the buffer
+        self.current_buffer_size = 0
+        self.current_buffer_size_bytes = 0
+        # Record chunk metrics
+        self.metrics.record_metric(
+            name="json_chunks_written",
+            value=1,
+            metric_type=MetricType.COUNTER,
+            labels={"type": "daft"},
+            description="Number of chunks written to JSON files",
+        )
     async def _flush_buffer(self):
         """Flush the current buffer to a JSON file.
@@ -353,6 +371,7 @@ class JsonOutput(Output):
             self.buffer.clear()
             self.current_buffer_size = 0
+            self.current_buffer_size_bytes = 0
         except Exception as e:
             # Record metrics for failed write

application_sdk/outputs/parquet.py CHANGED Viewed

@@ -1,9 +1,10 @@
 import os
-from typing import TYPE_CHECKING, Literal, Optional
+from typing import TYPE_CHECKING, List, Literal, Optional, Union
 from temporalio import activity
 from application_sdk.activities.common.utils import get_object_store_prefix
+from application_sdk.constants import DAPR_MAX_GRPC_MESSAGE_LENGTH
 from application_sdk.observability.logger_adaptor import get_logger
 from application_sdk.observability.metrics_adaptor import MetricType, get_metrics
 from application_sdk.outputs import Output
@@ -13,7 +14,7 @@ logger = get_logger(__name__)
 activity.logger = logger
 if TYPE_CHECKING:
-    import daft
+    import daft  # type: ignore
     import pandas as pd
@@ -46,6 +47,7 @@ class ParquetOutput(Output):
         typename: Optional[str] = None,
         write_mode: Literal["append", "overwrite", "overwrite-partitions"] = "append",
         chunk_size: Optional[int] = 100000,
+        buffer_size: Optional[int] = 100000,
         total_record_count: int = 0,
         chunk_count: int = 0,
         chunk_start: Optional[int] = None,
@@ -78,11 +80,19 @@ class ParquetOutput(Output):
         self.typename = typename
         self.write_mode = write_mode
         self.chunk_size = chunk_size
+        self.buffer_size = buffer_size
+        self.buffer: List[Union["pd.DataFrame", "daft.DataFrame"]] = []  # noqa: F821
         self.total_record_count = total_record_count
         self.chunk_count = chunk_count
+        self.current_buffer_size = 0
+        self.current_buffer_size_bytes = 0  # Track estimated buffer size in bytes
+        self.max_file_size_bytes = int(
+            DAPR_MAX_GRPC_MESSAGE_LENGTH * 0.9
+        )  # 90% of DAPR limit as safety buffer
         self.chunk_start = chunk_start
         self.start_marker = start_marker
         self.end_marker = end_marker
+        self.statistics = []
         self.metrics = get_metrics()
         # Create output directory
@@ -117,7 +127,7 @@ class ParquetOutput(Output):
         if chunk_start is None:
             return f"{str(chunk_count)}.parquet"
         else:
-            return f"{str(chunk_start+chunk_count)}.parquet"
+            return f"chunk-{str(chunk_start)}-part{str(chunk_count)}.parquet"
     async def write_dataframe(self, dataframe: "pd.DataFrame"):
         """Write a pandas DataFrame to Parquet files and upload to object store.
@@ -126,20 +136,46 @@ class ParquetOutput(Output):
             dataframe (pd.DataFrame): The DataFrame to write.
         """
         try:
+            chunk_part = 0
             if len(dataframe) == 0:
                 return
-            # Update counters
-            self.chunk_count += 1
-            self.total_record_count += len(dataframe)
-            file_path = f"{self.output_path}/{self.path_gen(self.chunk_start, self.chunk_count, self.start_marker, self.end_marker)}"
-            # Write the dataframe to parquet using pandas native method
-            dataframe.to_parquet(
-                file_path,
-                index=False,
-                compression="snappy",  # Using snappy compression by default
+            # Split the DataFrame into chunks
+            partition = (
+                self.chunk_size
+                if self.chunk_start is None
+                else min(self.chunk_size, self.buffer_size)
             )
+            chunks = [
+                dataframe[i : i + partition]  # type: ignore
+                for i in range(0, len(dataframe), partition)
+            ]
+            for chunk in chunks:
+                # Estimate size of this chunk
+                chunk_size_bytes = self.estimate_dataframe_file_size(chunk, "parquet")
+                # Check if adding this chunk would exceed size limit
+                if (
+                    self.current_buffer_size_bytes + chunk_size_bytes
+                    > self.max_file_size_bytes
+                    and self.current_buffer_size > 0
+                ):
+                    # Flush current buffer before adding this chunk
+                    chunk_part += 1
+                    await self._flush_buffer(chunk_part)
+                self.buffer.append(chunk)
+                self.current_buffer_size += len(chunk)
+                self.current_buffer_size_bytes += chunk_size_bytes
+                if self.current_buffer_size >= partition:  # type: ignore
+                    chunk_part += 1
+                    await self._flush_buffer(chunk_part)
+            if self.buffer and self.current_buffer_size > 0:
+                chunk_part += 1
+                await self._flush_buffer(chunk_part)
             # Record metrics for successful write
             self.metrics.record_metric(
@@ -159,11 +195,8 @@ class ParquetOutput(Output):
                 description="Number of chunks written to Parquet files",
             )
-            # Upload the file to object store
-            await ObjectStore.upload_file(
-                source=file_path,
-                destination=get_object_store_prefix(file_path),
-            )
+            self.chunk_count += 1
+            self.statistics.append(chunk_part)
         except Exception as e:
             # Record metrics for failed write
             self.metrics.record_metric(
@@ -245,3 +278,68 @@ class ParquetOutput(Output):
             str: The full path of the output file.
         """
         return self.output_path
+    async def _flush_buffer(self, chunk_part):
+        """Flush the current buffer to a Parquet file.
+        This method combines all DataFrames in the buffer, writes them to a Parquet file,
+        and uploads the file to the object store.
+        Note:
+            If the buffer is empty or has no records, the method returns without writing.
+        """
+        import pandas as pd
+        if not self.buffer or not self.current_buffer_size:
+            return
+        if not all(isinstance(df, pd.DataFrame) for df in self.buffer):
+            raise TypeError(
+                "_flush_buffer encountered non-DataFrame elements in buffer. This should not happen."
+            )
+        try:
+            # Now it's safe to cast for pd.concat
+            pd_buffer: List[pd.DataFrame] = self.buffer  # type: ignore
+            combined_dataframe = pd.concat(pd_buffer)
+            # Write DataFrame to Parquet file
+            if not combined_dataframe.empty:
+                self.total_record_count += len(combined_dataframe)
+                output_file_name = (
+                    f"{self.output_path}/{self.path_gen(self.chunk_count, chunk_part)}"
+                )
+                combined_dataframe.to_parquet(
+                    output_file_name, index=False, compression="snappy"
+                )
+                # Record chunk metrics
+                self.metrics.record_metric(
+                    name="parquet_chunks_written",
+                    value=1,
+                    metric_type=MetricType.COUNTER,
+                    labels={"type": "pandas"},
+                    description="Number of chunks written to Parquet files",
+                )
+                # Push the file to the object store
+                await ObjectStore.upload_file(
+                    source=output_file_name,
+                    destination=get_object_store_prefix(output_file_name),
+                )
+            self.buffer.clear()
+            self.current_buffer_size = 0
+            self.current_buffer_size_bytes = 0
+        except Exception as e:
+            # Record metrics for failed write
+            self.metrics.record_metric(
+                name="parquet_write_errors",
+                value=1,
+                metric_type=MetricType.COUNTER,
+                labels={"type": "pandas", "error": str(e)},
+                description="Number of errors while writing to Parquet files",
+            )
+            logger.error(f"Error flushing buffer to parquet: {str(e)}")
+            raise e

application_sdk/version.py CHANGED Viewed

@@ -2,4 +2,4 @@
 Version information for the application_sdk package.
 """
-__version__ = "0.1.1rc36"
+__version__ = "0.1.1rc38"

application_sdk/workflows/metadata_extraction/sql.py CHANGED Viewed

@@ -107,7 +107,11 @@ class BaseSQLMetadataExtractionWorkflow(MetadataExtractionWorkflow):
         activity_statistics = ActivityStatistics.model_validate(raw_statistics)
         transform_activities: List[Any] = []
-        if activity_statistics is None or activity_statistics.chunk_count == 0:
+        if (
+            activity_statistics is None
+            or activity_statistics.chunk_count == 0
+            or not activity_statistics.partitions
+        ):
             # to handle the case where the fetch_fn returns None or no chunks
             return
@@ -115,7 +119,9 @@ class BaseSQLMetadataExtractionWorkflow(MetadataExtractionWorkflow):
             raise ValueError("Invalid typename")
         batches, chunk_starts = self.get_transform_batches(
-            activity_statistics.chunk_count, activity_statistics.typename
+            activity_statistics.chunk_count,
+            activity_statistics.typename,
+            activity_statistics.partitions,
         )
         for i in range(len(batches)):
@@ -144,7 +150,9 @@ class BaseSQLMetadataExtractionWorkflow(MetadataExtractionWorkflow):
             total_record_count += metadata_model.total_record_count
             chunk_count += metadata_model.chunk_count
-    def get_transform_batches(self, chunk_count: int, typename: str):
+    def get_transform_batches(
+        self, chunk_count: int, typename: str, partitions: List[int]
+    ):
         """Get batches for parallel transformation processing.
         Args:
@@ -159,12 +167,17 @@ class BaseSQLMetadataExtractionWorkflow(MetadataExtractionWorkflow):
         batches: List[List[str]] = []
         chunk_start_numbers: List[int] = []
-        for i in range(chunk_count):
+        for i, partition in enumerate(partitions):
             # Track starting chunk number (which is just i)
             chunk_start_numbers.append(i)
             # Each batch contains exactly one chunk
-            batches.append([f"{typename}/{i+1}.json"])
+            batches.append(
+                [
+                    f"{typename}/chunk-{i}-part{file+1}.parquet"
+                    for file in range(partition)
+                ]
+            )
         return batches, chunk_start_numbers

{atlan_application_sdk-0.1.1rc36.dist-info → atlan_application_sdk-0.1.1rc38.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: atlan-application-sdk
-Version: 0.1.1rc36
+Version: 0.1.1rc38
 Summary: Atlan Application SDK is a Python library for developing applications on the Atlan Platform
 Project-URL: Repository, https://github.com/atlanhq/application-sdk
 Project-URL: Documentation, https://github.com/atlanhq/application-sdk/README.md

{atlan_application_sdk-0.1.1rc36.dist-info → atlan_application_sdk-0.1.1rc38.dist-info}/RECORD RENAMED Viewed

@@ -1,11 +1,11 @@
 application_sdk/__init__.py,sha256=2e2mvmLJ5dxmJGPELtb33xwP-j6JMdoIuqKycEn7hjg,151
 application_sdk/constants.py,sha256=GzwZO0pa9M-FgibmfIs1lh-Fwo06K9Tk6WzGqMyJgpI,10362
-application_sdk/version.py,sha256=SlVLwzlzsYJZi1Wtxmscry6pqrSiOm-LICxqBBQXx70,88
+application_sdk/version.py,sha256=4PQKkm_QOy4mEJgwY5eIcUeeEIpIWPjzc_pKiJQmpXw,88
 application_sdk/worker.py,sha256=i5f0AeKI39IfsLO05QkwC6uMz0zDPSJqP7B2byri1VI,7489
 application_sdk/activities/__init__.py,sha256=QaXLOBYbb0zPOY5kfDQh56qbXQFaYNXOjJ5PCvatiZ4,9530
 application_sdk/activities/lock_management.py,sha256=L__GZ9BsArwU1ntYwAgCKsSjCqN6QBeOfT-OT4WyD4Y,3983
 application_sdk/activities/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-application_sdk/activities/common/models.py,sha256=305WdrZB7EAtCOAU_q9hMw81XowUdCeuFs9zfzb-MHQ,1196
+application_sdk/activities/common/models.py,sha256=LIZfWvTtgtbAUvvn-rwrPQgD7fP2J0Gxdxr_ITgw-jM,1243
 application_sdk/activities/common/utils.py,sha256=F4Fq9Gl_gvUQj_fSdwzTU7obqUnemYL1dgb_yS34vTM,6967
 application_sdk/activities/metadata_extraction/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 application_sdk/activities/metadata_extraction/base.py,sha256=ENFojpxqKdN_eVSL4iet3cGfylPOfcl1jnflfo4zhs8,3920
@@ -16,8 +16,7 @@ application_sdk/activities/query_extraction/sql.py,sha256=mesGP_kiWzrJ8wboWFVt2j
 application_sdk/application/__init__.py,sha256=WDWDWP-IQ-ny7okqsrdTwH60cXKgXBRcnlJ1XVYfiNU,7957
 application_sdk/application/metadata_extraction/sql.py,sha256=ohpV4qZ92uKRlH7I_8G67ocnWkZJAZCU_7XdvqYPiN4,7966
 application_sdk/clients/__init__.py,sha256=C9T84J7V6ZumcoWJPAxdd3tqSmbyciaGBJn-CaCCny0,1341
-application_sdk/clients/async_atlan.py,sha256=RTgRbMw6zJWcv1C-7cU4ccaSW5XZsB5dcA1Tlkj32p8,2699
-application_sdk/clients/atlan.py,sha256=f2-Uk5KiPIDJEhGkfYctA_f3CwoVB_mWNBMVvxeLuY4,2684
+application_sdk/clients/atlan.py,sha256=l6yV39fr1006SJFwkOTNDQlbSFlHCZQaUPfdUlzdVEg,5053
 application_sdk/clients/atlan_auth.py,sha256=D7FuNqv81ohNXLJtdx1AFw_jU6a3g0Pw6149ia4ucFY,8930
 application_sdk/clients/base.py,sha256=TIn3pG89eXUc1XSYf4jk66m1vajWp0WxcCQOOltdazA,14021
 application_sdk/clients/redis.py,sha256=IfAD32vLp88BCvsDTaQtxFHxzHlEx4V7TK7h1HwDDBg,15917
@@ -56,7 +55,7 @@ application_sdk/handlers/sql.py,sha256=oeB-sgWwPYo31xaD87TyMc0h51Sary1F-CmhExt9_
 application_sdk/inputs/__init__.py,sha256=_d-cUhcDyoJTJR3PdQkC831go6VDw9AM6Bg7-qm3NHI,1900
 application_sdk/inputs/iceberg.py,sha256=xiv1kNtVx1k0h3ZJbJeXjZwdfBGSy9j9orYP_AyCYlI,2756
 application_sdk/inputs/json.py,sha256=Yv70Y9YuutN2trqK5-z2UNtBL0895ZbdEiBDt9cYM9s,6216
-application_sdk/inputs/parquet.py,sha256=KPcfqXOnK4C2orBCAPIO0DZgw1sYMC69MRNrqXPbCBU,6704
+application_sdk/inputs/parquet.py,sha256=GnyB0r4-7GNLBl3ooVFUzsxunZsrHStKK2h7XRc7AIY,6723
 application_sdk/inputs/sql_query.py,sha256=1EREgea6kKNaMIyX2HLJgbJ07rtAgLasd9NyvDcdZok,10636
 application_sdk/interceptors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 application_sdk/interceptors/events.py,sha256=Kh0dEsc6q7YtlN9cxatiL_ZrmBxriv55r9lxvIKGg3A,6548
@@ -68,10 +67,10 @@ application_sdk/observability/observability.py,sha256=DP0I4bHyg3TA4hxCqDFy2IiRmB
 application_sdk/observability/traces_adaptor.py,sha256=0eQJPN-tYA_dV8D3uEa5ZiX9g12NDuLnPaFuQMVDdL0,18242
 application_sdk/observability/utils.py,sha256=MKEpT0WYtpATUgLgJDkGQaAP_t-jpDYMUKDfEvr8Phg,2448
 application_sdk/observability/decorators/observability_decorator.py,sha256=JNrWNXT5W4klmlAc5b8C3_VBjDu0PI64W2ptr7LMzk4,8110
-application_sdk/outputs/__init__.py,sha256=-0EGS3EeBMGJ-jbsFn2PlsSAkSdN4uFNvcQFZiTLbEM,8583
+application_sdk/outputs/__init__.py,sha256=HIENr2w9gu6u3sF_nvraj45yk53NDAddtaXSUHIVBjs,9469
 application_sdk/outputs/iceberg.py,sha256=IGtj5WDgqLu6vzDEvw5DLsKsjm29Krto3AHvWpemr0A,5311
-application_sdk/outputs/json.py,sha256=YrynTptxtsJksUuVk6jCwUYjr_DTtQGGy_BuFfHA-Ps,14268
-application_sdk/outputs/parquet.py,sha256=vIsrnCQy573sYqxgYAY-Ks4QwDfRr-TQG9d1uLR9wmI,10022
+application_sdk/outputs/json.py,sha256=zyYQjGj5tb7bJhNt3ObwsuHT6Gakj8qNey-siUlWdP4,15065
+application_sdk/outputs/parquet.py,sha256=pJkOw-CV-JXr0Q4nJXCu0SScbv3I0usONZ4CT1KYxYI,13973
 application_sdk/server/__init__.py,sha256=KTqE1YPw_3WDVMWatJUuf9OOiobLM2K5SMaBrI62sCo,1568
 application_sdk/server/fastapi/__init__.py,sha256=YOdWNE-qqiXfo-exvxPg8T0PSuOxTdeSetUn6-BXxZg,27704
 application_sdk/server/fastapi/models.py,sha256=K6eNl3XXiTXKUvRTpq3oqdGH3jY1-ApobXma04J86fE,6665
@@ -139,11 +138,11 @@ application_sdk/transformers/query/templates/table.yaml,sha256=QQAGLD1UFjbpSA5wv
 application_sdk/transformers/query/templates/tag_attachment.yaml,sha256=dWNDGwRU4_P-t7ibv5XelMP36aGLG29U6MEXOA8zYt0,2884
 application_sdk/workflows/__init__.py,sha256=byluvgzTovr4L1co7YGb4--ktMBqt2pXBjYoxz4dIeU,3869
 application_sdk/workflows/metadata_extraction/__init__.py,sha256=jHUe_ZBQ66jx8bgyduPuECo2RdmJtQsQAKlakADEQbc,120
-application_sdk/workflows/metadata_extraction/sql.py,sha256=_NhszxIgmcQI6lVpjJoyJRFLwPYvJw1Dyqox_m9K2RA,11947
+application_sdk/workflows/metadata_extraction/sql.py,sha256=BhaZavEL8H3Jvf28FGcHtZwqdsUT_EHZ4VTqiaieWek,12278
 application_sdk/workflows/query_extraction/__init__.py,sha256=n066_CX5RpJz6DIxGMkKS3eGSRg03ilaCtsqfJWQb7Q,117
 application_sdk/workflows/query_extraction/sql.py,sha256=kT_JQkLCRZ44ZpaC4QvPL6DxnRIIVh8gYHLqRbMI-hA,4826
-atlan_application_sdk-0.1.1rc36.dist-info/METADATA,sha256=KbWZqqRTyfyif21aRpmdiE2LWuZ0LBr9q33-DMXFZB0,5567
-atlan_application_sdk-0.1.1rc36.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-atlan_application_sdk-0.1.1rc36.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-atlan_application_sdk-0.1.1rc36.dist-info/licenses/NOTICE,sha256=A-XVVGt3KOYuuMmvSMIFkg534F1vHiCggEBp4Ez3wGk,1041
-atlan_application_sdk-0.1.1rc36.dist-info/RECORD,,
+atlan_application_sdk-0.1.1rc38.dist-info/METADATA,sha256=dQKtTnaVMPF0DU96vA850Gl6gzpRSmuL48ovbq1wD5o,5567
+atlan_application_sdk-0.1.1rc38.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+atlan_application_sdk-0.1.1rc38.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+atlan_application_sdk-0.1.1rc38.dist-info/licenses/NOTICE,sha256=A-XVVGt3KOYuuMmvSMIFkg534F1vHiCggEBp4Ez3wGk,1041
+atlan_application_sdk-0.1.1rc38.dist-info/RECORD,,

application_sdk/clients/async_atlan.py DELETED Viewed

@@ -1,70 +0,0 @@
-from typing import Optional
-from pyatlan.client.aio.client import AsyncAtlanClient
-from application_sdk.common.error_codes import ClientError
-from application_sdk.constants import (
-    ATLAN_API_KEY,
-    ATLAN_API_TOKEN_GUID,
-    ATLAN_BASE_URL,
-    ATLAN_CLIENT_ID,
-    ATLAN_CLIENT_SECRET,
-)
-from application_sdk.observability.logger_adaptor import get_logger
-logger = get_logger(__name__)
-async def get_client(
-    base_url: Optional[str] = None,
-    api_key: Optional[str] = None,
-    api_token_guid: Optional[str] = None,
-) -> AsyncAtlanClient:
-    """
-    Returns an authenticated AsyncAtlanClient instance using provided parameters or environment variables.
-    Selects authentication method based on the presence of parameters or environment variables and validates the required configuration.
-    In general, the use of environment variables is recommended. Any parameters specified will override the environment variables.
-    Args:
-    base_url: Atlan base URL (overrides ATLAN_BASE_URL)
-    api_key: Atlan API key (overrides ATLAN_API_KEY)
-    api_token_guid: API token GUID (overrides API_TOKEN_GUID)
-    """
-    # Resolve final values (parameters override env vars)
-    final_token_guid = api_token_guid or ATLAN_API_TOKEN_GUID
-    final_base_url = base_url or ATLAN_BASE_URL
-    final_api_key = api_key or ATLAN_API_KEY
-    # Priority 1: Token-based auth (recommended for production)
-    if final_token_guid:
-        if final_base_url or final_api_key:
-            logger.warning(
-                "Token auth takes precedence - ignoring base_url/api_key parameters as well as ATLAN_BASE_URL and ATLAN_API_KEY environment variables."
-            )
-        return await _get_client_from_token(final_token_guid)
-    # Priority 2: API key + base URL auth
-    if not final_base_url:
-        raise ClientError(
-            "ATLAN_BASE_URL is required (via parameter or environment variable)"
-        )
-    if not final_api_key:
-        raise ClientError(
-            "ATLAN_API_KEY is required (via parameter or environment variable)"
-        )
-    logger.info("Using API key-based authentication")
-    return AsyncAtlanClient(base_url=final_base_url, api_key=final_api_key)
-async def _get_client_from_token(api_token_guid: str):
-    if not ATLAN_CLIENT_ID:
-        raise ClientError(
-            f"{ClientError.AUTH_CONFIG_ERROR}: Environment variable CLIENT_ID is required when API_TOKEN_GUID is set."
-        )
-    if not ATLAN_CLIENT_SECRET:
-        raise ClientError(
-            f"{ClientError.AUTH_CONFIG_ERROR}: Environment variable CLIENT_SECRET is required when API_TOKEN_GUID is set."
-        )
-    return await AsyncAtlanClient.from_token_guid(guid=api_token_guid)

{atlan_application_sdk-0.1.1rc36.dist-info → atlan_application_sdk-0.1.1rc38.dist-info}/WHEEL RENAMED Viewed

File without changes

{atlan_application_sdk-0.1.1rc36.dist-info → atlan_application_sdk-0.1.1rc38.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{atlan_application_sdk-0.1.1rc36.dist-info → atlan_application_sdk-0.1.1rc38.dist-info}/licenses/NOTICE RENAMED Viewed

File without changes

atlan-application-sdk 0.1.1rc36__py3-none-any.whl → 0.1.1rc38__py3-none-any.whl

atlan-application-sdk 0.1.1rc36py3-none-any.whl → 0.1.1rc38py3-none-any.whl