atlan-application-sdk 2.0.0__py3-none-any.whl → 2.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- application_sdk/activities/__init__.py +14 -0
- application_sdk/activities/common/sql_utils.py +6 -2
- application_sdk/activities/metadata_extraction/sql.py +40 -2
- application_sdk/io/__init__.py +101 -6
- application_sdk/io/json.py +68 -24
- application_sdk/io/parquet.py +64 -8
- application_sdk/observability/observability.py +0 -7
- application_sdk/services/objectstore.py +14 -1
- application_sdk/transformers/query/__init__.py +4 -3
- application_sdk/version.py +1 -1
- {atlan_application_sdk-2.0.0.dist-info → atlan_application_sdk-2.1.1.dist-info}/METADATA +2 -2
- {atlan_application_sdk-2.0.0.dist-info → atlan_application_sdk-2.1.1.dist-info}/RECORD +15 -15
- {atlan_application_sdk-2.0.0.dist-info → atlan_application_sdk-2.1.1.dist-info}/WHEEL +0 -0
- {atlan_application_sdk-2.0.0.dist-info → atlan_application_sdk-2.1.1.dist-info}/licenses/LICENSE +0 -0
- {atlan_application_sdk-2.0.0.dist-info → atlan_application_sdk-2.1.1.dist-info}/licenses/NOTICE +0 -0
|
@@ -15,6 +15,7 @@ Example:
|
|
|
15
15
|
|
|
16
16
|
import os
|
|
17
17
|
from abc import ABC
|
|
18
|
+
from datetime import datetime, timedelta
|
|
18
19
|
from typing import Any, Dict, Generic, Optional, TypeVar
|
|
19
20
|
|
|
20
21
|
from pydantic import BaseModel
|
|
@@ -62,6 +63,7 @@ class ActivitiesState(BaseModel, Generic[HandlerType]):
|
|
|
62
63
|
model_config = {"arbitrary_types_allowed": True}
|
|
63
64
|
handler: Optional[HandlerType] = None
|
|
64
65
|
workflow_args: Optional[Dict[str, Any]] = None
|
|
66
|
+
last_updated_timestamp: Optional[datetime] = None
|
|
65
67
|
|
|
66
68
|
|
|
67
69
|
ActivitiesStateType = TypeVar("ActivitiesStateType", bound=ActivitiesState)
|
|
@@ -113,12 +115,15 @@ class ActivitiesInterface(ABC, Generic[ActivitiesStateType]):
|
|
|
113
115
|
Note:
|
|
114
116
|
The workflow ID is automatically retrieved from the current activity context.
|
|
115
117
|
If no state exists for the current workflow, a new one will be created.
|
|
118
|
+
This method also updates the last_updated_timestamp to enable time-based
|
|
119
|
+
state refresh functionality.
|
|
116
120
|
"""
|
|
117
121
|
workflow_id = get_workflow_id()
|
|
118
122
|
if not self._state.get(workflow_id):
|
|
119
123
|
self._state[workflow_id] = ActivitiesState()
|
|
120
124
|
|
|
121
125
|
self._state[workflow_id].workflow_args = workflow_args
|
|
126
|
+
self._state[workflow_id].last_updated_timestamp = datetime.now()
|
|
122
127
|
|
|
123
128
|
async def _get_state(self, workflow_args: Dict[str, Any]) -> ActivitiesStateType:
|
|
124
129
|
"""Retrieve the state for the current workflow.
|
|
@@ -142,6 +147,15 @@ class ActivitiesInterface(ABC, Generic[ActivitiesStateType]):
|
|
|
142
147
|
workflow_id = get_workflow_id()
|
|
143
148
|
if workflow_id not in self._state:
|
|
144
149
|
await self._set_state(workflow_args)
|
|
150
|
+
|
|
151
|
+
else:
|
|
152
|
+
current_timestamp = datetime.now()
|
|
153
|
+
# if difference of current_timestamp and last_updated_timestamp is greater than 15 minutes, then again _set_state
|
|
154
|
+
last_updated = self._state[workflow_id].last_updated_timestamp
|
|
155
|
+
if last_updated and current_timestamp - last_updated > timedelta(
|
|
156
|
+
minutes=15
|
|
157
|
+
):
|
|
158
|
+
await self._set_state(workflow_args)
|
|
145
159
|
return self._state[workflow_id]
|
|
146
160
|
except OrchestratorError as e:
|
|
147
161
|
logger.error(
|
|
@@ -131,7 +131,9 @@ async def finalize_multidb_results(
|
|
|
131
131
|
dataframe_list: List[
|
|
132
132
|
Union[AsyncIterator["pd.DataFrame"], Iterator["pd.DataFrame"]]
|
|
133
133
|
],
|
|
134
|
-
setup_parquet_output_func: Callable[
|
|
134
|
+
setup_parquet_output_func: Callable[
|
|
135
|
+
[str, bool, Optional[str]], Optional[ParquetFileWriter]
|
|
136
|
+
],
|
|
135
137
|
output_path: str,
|
|
136
138
|
typename: str,
|
|
137
139
|
) -> Optional[Union[ActivityStatistics, "pd.DataFrame"]]:
|
|
@@ -189,7 +191,9 @@ async def finalize_multidb_results(
|
|
|
189
191
|
return concatenated
|
|
190
192
|
|
|
191
193
|
# Create new parquet output for concatenated data
|
|
192
|
-
concatenated_parquet_output = setup_parquet_output_func(
|
|
194
|
+
concatenated_parquet_output = setup_parquet_output_func(
|
|
195
|
+
output_path, True, typename
|
|
196
|
+
)
|
|
193
197
|
if concatenated_parquet_output:
|
|
194
198
|
await concatenated_parquet_output.write(concatenated) # type: ignore
|
|
195
199
|
return await concatenated_parquet_output.close()
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import os
|
|
2
|
+
from datetime import datetime
|
|
2
3
|
from typing import (
|
|
3
4
|
TYPE_CHECKING,
|
|
4
5
|
Any,
|
|
@@ -60,6 +61,7 @@ class BaseSQLMetadataExtractionActivitiesState(ActivitiesState):
|
|
|
60
61
|
sql_client: Optional[BaseSQLClient] = None
|
|
61
62
|
handler: Optional[BaseSQLHandler] = None
|
|
62
63
|
transformer: Optional[TransformerInterface] = None
|
|
64
|
+
last_updated_timestamp: Optional[datetime] = None
|
|
63
65
|
|
|
64
66
|
|
|
65
67
|
class BaseSQLMetadataExtractionActivities(ActivitiesInterface):
|
|
@@ -149,13 +151,30 @@ class BaseSQLMetadataExtractionActivities(ActivitiesInterface):
|
|
|
149
151
|
|
|
150
152
|
Args:
|
|
151
153
|
workflow_args (Dict[str, Any]): Arguments passed to the workflow.
|
|
154
|
+
|
|
155
|
+
Note:
|
|
156
|
+
This method creates and configures the new SQL client before closing
|
|
157
|
+
the old one to ensure state is never left with a closed client if
|
|
158
|
+
initialization fails. The timestamp is only updated after the new
|
|
159
|
+
client is successfully created and assigned.
|
|
152
160
|
"""
|
|
153
161
|
workflow_id = get_workflow_id()
|
|
154
162
|
if not self._state.get(workflow_id):
|
|
155
163
|
self._state[workflow_id] = BaseSQLMetadataExtractionActivitiesState()
|
|
156
164
|
|
|
157
|
-
|
|
165
|
+
existing_state = self._state[workflow_id]
|
|
166
|
+
|
|
167
|
+
# Update workflow_args early, but preserve old timestamp until new client is ready
|
|
168
|
+
# This ensures that if initialization fails, the state can still be refreshed
|
|
169
|
+
existing_state.workflow_args = workflow_args
|
|
170
|
+
|
|
171
|
+
# Store reference to old client for cleanup after new client is ready
|
|
172
|
+
old_sql_client = None
|
|
173
|
+
if existing_state and existing_state.sql_client is not None:
|
|
174
|
+
old_sql_client = existing_state.sql_client
|
|
158
175
|
|
|
176
|
+
# Create and configure new client BEFORE closing old one
|
|
177
|
+
# This ensures state is never left with a closed client if initialization fails
|
|
159
178
|
sql_client = self.sql_client_class()
|
|
160
179
|
|
|
161
180
|
# Load credentials BEFORE creating handler to avoid race condition
|
|
@@ -165,10 +184,29 @@ class BaseSQLMetadataExtractionActivities(ActivitiesInterface):
|
|
|
165
184
|
)
|
|
166
185
|
await sql_client.load(credentials)
|
|
167
186
|
|
|
168
|
-
#
|
|
187
|
+
# Only after new client is successfully created and configured,
|
|
188
|
+
# close old client and assign new one to state
|
|
189
|
+
if old_sql_client is not None:
|
|
190
|
+
try:
|
|
191
|
+
await old_sql_client.close()
|
|
192
|
+
logger.debug(
|
|
193
|
+
f"Closed existing SQL client for workflow {workflow_id} during state refresh"
|
|
194
|
+
)
|
|
195
|
+
except Exception as e:
|
|
196
|
+
logger.warning(
|
|
197
|
+
f"Failed to close existing SQL client for workflow {workflow_id}: {e}",
|
|
198
|
+
exc_info=True,
|
|
199
|
+
)
|
|
200
|
+
# Continue even if close fails - new client is already ready
|
|
201
|
+
|
|
202
|
+
# Assign sql_client and handler to state AFTER new client is ready
|
|
169
203
|
self._state[workflow_id].sql_client = sql_client
|
|
170
204
|
handler = self.handler_class(sql_client)
|
|
171
205
|
self._state[workflow_id].handler = handler
|
|
206
|
+
# Update timestamp only after successful client creation and assignment
|
|
207
|
+
# This ensures that if initialization fails, the old timestamp remains
|
|
208
|
+
# and the state can be refreshed again immediately
|
|
209
|
+
self._state[workflow_id].last_updated_timestamp = datetime.now()
|
|
172
210
|
|
|
173
211
|
# Create transformer with required parameters from ApplicationConstants
|
|
174
212
|
transformer_params = {
|
application_sdk/io/__init__.py
CHANGED
|
@@ -49,9 +49,104 @@ if TYPE_CHECKING:
|
|
|
49
49
|
|
|
50
50
|
|
|
51
51
|
class Reader(ABC):
|
|
52
|
+
"""Abstract base class for reader data sources.
|
|
53
|
+
|
|
54
|
+
This class defines the interface for reader handlers that can read data
|
|
55
|
+
from various sources in different formats. Follows Python's file I/O
|
|
56
|
+
pattern with read/close semantics and supports context managers.
|
|
57
|
+
|
|
58
|
+
Attributes:
|
|
59
|
+
path (str): Path where the reader will read from.
|
|
60
|
+
_is_closed (bool): Whether the reader has been closed.
|
|
61
|
+
_downloaded_files (List[str]): List of downloaded temporary files to clean up.
|
|
62
|
+
cleanup_on_close (bool): Whether to clean up downloaded temp files on close.
|
|
63
|
+
|
|
64
|
+
Example:
|
|
65
|
+
Using close() explicitly::
|
|
66
|
+
|
|
67
|
+
reader = ParquetFileReader(path="/data/input")
|
|
68
|
+
df = await reader.read()
|
|
69
|
+
await reader.close() # Cleans up any downloaded temp files
|
|
70
|
+
|
|
71
|
+
Using context manager (recommended)::
|
|
72
|
+
|
|
73
|
+
async with ParquetFileReader(path="/data/input") as reader:
|
|
74
|
+
df = await reader.read()
|
|
75
|
+
# close() called automatically
|
|
76
|
+
|
|
77
|
+
Reading in batches with context manager::
|
|
78
|
+
|
|
79
|
+
async with JsonFileReader(path="/data/input") as reader:
|
|
80
|
+
async for batch in reader.read_batches():
|
|
81
|
+
process(batch)
|
|
82
|
+
# close() called automatically
|
|
52
83
|
"""
|
|
53
|
-
|
|
54
|
-
|
|
84
|
+
|
|
85
|
+
path: str
|
|
86
|
+
_is_closed: bool = False
|
|
87
|
+
_downloaded_files: List[str] = []
|
|
88
|
+
cleanup_on_close: bool = True
|
|
89
|
+
|
|
90
|
+
async def __aenter__(self) -> "Reader":
|
|
91
|
+
"""Enter the async context manager.
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
Reader: The reader instance.
|
|
95
|
+
"""
|
|
96
|
+
return self
|
|
97
|
+
|
|
98
|
+
async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
|
|
99
|
+
"""Exit the async context manager, closing the reader.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
exc_type: Exception type if an exception was raised.
|
|
103
|
+
exc_val: Exception value if an exception was raised.
|
|
104
|
+
exc_tb: Exception traceback if an exception was raised.
|
|
105
|
+
"""
|
|
106
|
+
await self.close()
|
|
107
|
+
|
|
108
|
+
async def close(self) -> None:
|
|
109
|
+
"""Close the reader and clean up any downloaded temporary files.
|
|
110
|
+
|
|
111
|
+
This method cleans up any temporary files that were downloaded from
|
|
112
|
+
the object store during read operations. Calling close() multiple
|
|
113
|
+
times is safe (subsequent calls are no-ops).
|
|
114
|
+
|
|
115
|
+
Note:
|
|
116
|
+
Set ``cleanup_on_close=False`` during initialization to retain
|
|
117
|
+
downloaded files after closing.
|
|
118
|
+
|
|
119
|
+
Example::
|
|
120
|
+
|
|
121
|
+
reader = ParquetFileReader(path="/data/input")
|
|
122
|
+
df = await reader.read()
|
|
123
|
+
await reader.close() # Cleans up temp files
|
|
124
|
+
"""
|
|
125
|
+
if self._is_closed:
|
|
126
|
+
return
|
|
127
|
+
|
|
128
|
+
if self.cleanup_on_close and self._downloaded_files:
|
|
129
|
+
await self._cleanup_downloaded_files()
|
|
130
|
+
|
|
131
|
+
self._is_closed = True
|
|
132
|
+
|
|
133
|
+
async def _cleanup_downloaded_files(self) -> None:
|
|
134
|
+
"""Clean up downloaded temporary files.
|
|
135
|
+
|
|
136
|
+
Override this method in subclasses for custom cleanup behavior.
|
|
137
|
+
"""
|
|
138
|
+
import shutil
|
|
139
|
+
|
|
140
|
+
for file_path in self._downloaded_files:
|
|
141
|
+
try:
|
|
142
|
+
if os.path.isfile(file_path):
|
|
143
|
+
os.remove(file_path)
|
|
144
|
+
elif os.path.isdir(file_path):
|
|
145
|
+
shutil.rmtree(file_path, ignore_errors=True)
|
|
146
|
+
except Exception as e:
|
|
147
|
+
logger.warning(f"Failed to clean up temporary file {file_path}: {e}")
|
|
148
|
+
|
|
149
|
+
self._downloaded_files.clear()
|
|
55
150
|
|
|
56
151
|
@abstractmethod
|
|
57
152
|
def read_batches(
|
|
@@ -62,27 +157,27 @@ class Reader(ABC):
|
|
|
62
157
|
Iterator["daft.DataFrame"],
|
|
63
158
|
AsyncIterator["daft.DataFrame"],
|
|
64
159
|
]:
|
|
65
|
-
"""
|
|
66
|
-
Get an iterator of batched pandas DataFrames.
|
|
160
|
+
"""Get an iterator of batched pandas DataFrames.
|
|
67
161
|
|
|
68
162
|
Returns:
|
|
69
163
|
Iterator["pd.DataFrame"]: An iterator of batched pandas DataFrames.
|
|
70
164
|
|
|
71
165
|
Raises:
|
|
72
166
|
NotImplementedError: If the method is not implemented.
|
|
167
|
+
ValueError: If the reader has been closed.
|
|
73
168
|
"""
|
|
74
169
|
raise NotImplementedError
|
|
75
170
|
|
|
76
171
|
@abstractmethod
|
|
77
172
|
async def read(self) -> Union["pd.DataFrame", "daft.DataFrame"]:
|
|
78
|
-
"""
|
|
79
|
-
Get a single pandas or daft DataFrame.
|
|
173
|
+
"""Get a single pandas or daft DataFrame.
|
|
80
174
|
|
|
81
175
|
Returns:
|
|
82
176
|
Union["pd.DataFrame", "daft.DataFrame"]: A pandas or daft DataFrame.
|
|
83
177
|
|
|
84
178
|
Raises:
|
|
85
179
|
NotImplementedError: If the method is not implemented.
|
|
180
|
+
ValueError: If the reader has been closed.
|
|
86
181
|
"""
|
|
87
182
|
raise NotImplementedError
|
|
88
183
|
|
application_sdk/io/json.py
CHANGED
|
@@ -27,9 +27,36 @@ activity.logger = logger
|
|
|
27
27
|
|
|
28
28
|
|
|
29
29
|
class JsonFileReader(Reader):
|
|
30
|
-
"""
|
|
31
|
-
|
|
30
|
+
"""JSON File Reader class to read data from JSON files using daft and pandas.
|
|
31
|
+
|
|
32
32
|
Supports reading both single files and directories containing multiple JSON files.
|
|
33
|
+
Follows Python's file I/O pattern with read/close semantics and supports context managers.
|
|
34
|
+
|
|
35
|
+
Attributes:
|
|
36
|
+
path (str): Path to JSON file or directory containing JSON files.
|
|
37
|
+
chunk_size (int): Number of rows per batch.
|
|
38
|
+
file_names (Optional[List[str]]): List of specific file names to read.
|
|
39
|
+
dataframe_type (DataframeType): Type of dataframe to return (pandas or daft).
|
|
40
|
+
cleanup_on_close (bool): Whether to clean up downloaded temp files on close.
|
|
41
|
+
|
|
42
|
+
Example:
|
|
43
|
+
Using context manager (recommended)::
|
|
44
|
+
|
|
45
|
+
async with JsonFileReader(path="/data/input") as reader:
|
|
46
|
+
df = await reader.read()
|
|
47
|
+
# close() called automatically, temp files cleaned up
|
|
48
|
+
|
|
49
|
+
Reading in batches::
|
|
50
|
+
|
|
51
|
+
async with JsonFileReader(path="/data/input", chunk_size=50000) as reader:
|
|
52
|
+
async for batch in reader.read_batches():
|
|
53
|
+
process(batch)
|
|
54
|
+
|
|
55
|
+
Using close() explicitly::
|
|
56
|
+
|
|
57
|
+
reader = JsonFileReader(path="/data/input")
|
|
58
|
+
df = await reader.read()
|
|
59
|
+
await reader.close() # Clean up downloaded temp files
|
|
33
60
|
"""
|
|
34
61
|
|
|
35
62
|
def __init__(
|
|
@@ -38,6 +65,7 @@ class JsonFileReader(Reader):
|
|
|
38
65
|
file_names: Optional[List[str]] = None,
|
|
39
66
|
chunk_size: Optional[int] = 100000,
|
|
40
67
|
dataframe_type: DataframeType = DataframeType.pandas,
|
|
68
|
+
cleanup_on_close: bool = True,
|
|
41
69
|
):
|
|
42
70
|
"""Initialize the JsonInput class.
|
|
43
71
|
|
|
@@ -48,6 +76,8 @@ class JsonFileReader(Reader):
|
|
|
48
76
|
Wildcards are not supported.
|
|
49
77
|
file_names (Optional[List[str]]): List of specific file names to read. Defaults to None.
|
|
50
78
|
chunk_size (int): Number of rows per batch. Defaults to 100000.
|
|
79
|
+
dataframe_type (DataframeType): Type of dataframe to read. Defaults to DataframeType.pandas.
|
|
80
|
+
cleanup_on_close (bool): Whether to clean up downloaded temp files on close. Defaults to True.
|
|
51
81
|
|
|
52
82
|
Raises:
|
|
53
83
|
ValueError: When path is not provided or when single file path is combined with file_names
|
|
@@ -65,12 +95,22 @@ class JsonFileReader(Reader):
|
|
|
65
95
|
self.chunk_size = chunk_size
|
|
66
96
|
self.file_names = file_names
|
|
67
97
|
self.dataframe_type = dataframe_type
|
|
98
|
+
self.cleanup_on_close = cleanup_on_close
|
|
99
|
+
self._is_closed = False
|
|
100
|
+
self._downloaded_files: List[str] = []
|
|
68
101
|
|
|
69
102
|
async def read(self) -> Union["pd.DataFrame", "daft.DataFrame"]:
|
|
103
|
+
"""Read the data from the JSON files and return as a single DataFrame.
|
|
104
|
+
|
|
105
|
+
Returns:
|
|
106
|
+
Union[pd.DataFrame, daft.DataFrame]: Combined dataframe from JSON files.
|
|
107
|
+
|
|
108
|
+
Raises:
|
|
109
|
+
ValueError: If the reader has been closed or dataframe_type is unsupported.
|
|
70
110
|
"""
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
111
|
+
if self._is_closed:
|
|
112
|
+
raise ValueError("Cannot read from a closed reader")
|
|
113
|
+
|
|
74
114
|
if self.dataframe_type == DataframeType.pandas:
|
|
75
115
|
return await self._get_dataframe()
|
|
76
116
|
elif self.dataframe_type == DataframeType.daft:
|
|
@@ -84,10 +124,18 @@ class JsonFileReader(Reader):
|
|
|
84
124
|
AsyncIterator["pd.DataFrame"],
|
|
85
125
|
AsyncIterator["daft.DataFrame"],
|
|
86
126
|
]:
|
|
127
|
+
"""Read the data from the JSON files and return as batched DataFrames.
|
|
128
|
+
|
|
129
|
+
Returns:
|
|
130
|
+
Union[AsyncIterator[pd.DataFrame], AsyncIterator[daft.DataFrame]]:
|
|
131
|
+
Async iterator of DataFrames.
|
|
132
|
+
|
|
133
|
+
Raises:
|
|
134
|
+
ValueError: If the reader has been closed or dataframe_type is unsupported.
|
|
87
135
|
"""
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
136
|
+
if self._is_closed:
|
|
137
|
+
raise ValueError("Cannot read from a closed reader")
|
|
138
|
+
|
|
91
139
|
if self.dataframe_type == DataframeType.pandas:
|
|
92
140
|
return self._get_batched_dataframe()
|
|
93
141
|
elif self.dataframe_type == DataframeType.daft:
|
|
@@ -98,10 +146,7 @@ class JsonFileReader(Reader):
|
|
|
98
146
|
async def _get_batched_dataframe(
|
|
99
147
|
self,
|
|
100
148
|
) -> AsyncIterator["pd.DataFrame"]:
|
|
101
|
-
"""
|
|
102
|
-
Method to read the data from the json files in the path
|
|
103
|
-
and return as a batched pandas dataframe
|
|
104
|
-
"""
|
|
149
|
+
"""Read the data from the JSON files and return as a batched pandas dataframe."""
|
|
105
150
|
try:
|
|
106
151
|
import pandas as pd
|
|
107
152
|
|
|
@@ -109,6 +154,8 @@ class JsonFileReader(Reader):
|
|
|
109
154
|
json_files = await download_files(
|
|
110
155
|
self.path, self.extension, self.file_names
|
|
111
156
|
)
|
|
157
|
+
# Track downloaded files for cleanup on close
|
|
158
|
+
self._downloaded_files.extend(json_files)
|
|
112
159
|
logger.info(f"Reading {len(json_files)} JSON files in batches")
|
|
113
160
|
|
|
114
161
|
for json_file in json_files:
|
|
@@ -124,10 +171,7 @@ class JsonFileReader(Reader):
|
|
|
124
171
|
raise
|
|
125
172
|
|
|
126
173
|
async def _get_dataframe(self) -> "pd.DataFrame":
|
|
127
|
-
"""
|
|
128
|
-
Method to read the data from the json files in the path
|
|
129
|
-
and return as a single combined pandas dataframe
|
|
130
|
-
"""
|
|
174
|
+
"""Read the data from the JSON files and return as a single pandas dataframe."""
|
|
131
175
|
try:
|
|
132
176
|
import pandas as pd
|
|
133
177
|
|
|
@@ -135,6 +179,8 @@ class JsonFileReader(Reader):
|
|
|
135
179
|
json_files = await download_files(
|
|
136
180
|
self.path, self.extension, self.file_names
|
|
137
181
|
)
|
|
182
|
+
# Track downloaded files for cleanup on close
|
|
183
|
+
self._downloaded_files.extend(json_files)
|
|
138
184
|
logger.info(f"Reading {len(json_files)} JSON files as pandas dataframe")
|
|
139
185
|
|
|
140
186
|
return pd.concat(
|
|
@@ -149,10 +195,7 @@ class JsonFileReader(Reader):
|
|
|
149
195
|
async def _get_batched_daft_dataframe(
|
|
150
196
|
self,
|
|
151
197
|
) -> AsyncIterator["daft.DataFrame"]: # noqa: F821
|
|
152
|
-
"""
|
|
153
|
-
Method to read the data from the json files in the path
|
|
154
|
-
and return as a batched daft dataframe
|
|
155
|
-
"""
|
|
198
|
+
"""Read the data from the JSON files and return as a batched daft dataframe."""
|
|
156
199
|
try:
|
|
157
200
|
import daft
|
|
158
201
|
|
|
@@ -160,6 +203,8 @@ class JsonFileReader(Reader):
|
|
|
160
203
|
json_files = await download_files(
|
|
161
204
|
self.path, self.extension, self.file_names
|
|
162
205
|
)
|
|
206
|
+
# Track downloaded files for cleanup on close
|
|
207
|
+
self._downloaded_files.extend(json_files)
|
|
163
208
|
logger.info(f"Reading {len(json_files)} JSON files as daft batches")
|
|
164
209
|
|
|
165
210
|
# Yield each discovered file as separate batch with chunking
|
|
@@ -170,10 +215,7 @@ class JsonFileReader(Reader):
|
|
|
170
215
|
raise
|
|
171
216
|
|
|
172
217
|
async def _get_daft_dataframe(self) -> "daft.DataFrame": # noqa: F821
|
|
173
|
-
"""
|
|
174
|
-
Method to read the data from the json files in the path
|
|
175
|
-
and return as a single combined daft dataframe
|
|
176
|
-
"""
|
|
218
|
+
"""Read the data from the JSON files and return as a single daft dataframe."""
|
|
177
219
|
try:
|
|
178
220
|
import daft
|
|
179
221
|
|
|
@@ -181,6 +223,8 @@ class JsonFileReader(Reader):
|
|
|
181
223
|
json_files = await download_files(
|
|
182
224
|
self.path, self.extension, self.file_names
|
|
183
225
|
)
|
|
226
|
+
# Track downloaded files for cleanup on close
|
|
227
|
+
self._downloaded_files.extend(json_files)
|
|
184
228
|
logger.info(f"Reading {len(json_files)} JSON files with daft")
|
|
185
229
|
|
|
186
230
|
# Use the discovered/downloaded files directly
|
application_sdk/io/parquet.py
CHANGED
|
@@ -40,9 +40,37 @@ if TYPE_CHECKING:
|
|
|
40
40
|
|
|
41
41
|
|
|
42
42
|
class ParquetFileReader(Reader):
|
|
43
|
-
"""
|
|
44
|
-
|
|
43
|
+
"""Parquet File Reader class to read data from Parquet files using daft and pandas.
|
|
44
|
+
|
|
45
45
|
Supports reading both single files and directories containing multiple parquet files.
|
|
46
|
+
Follows Python's file I/O pattern with read/close semantics and supports context managers.
|
|
47
|
+
|
|
48
|
+
Attributes:
|
|
49
|
+
path (str): Path to parquet file or directory containing parquet files.
|
|
50
|
+
chunk_size (int): Number of rows per batch.
|
|
51
|
+
buffer_size (int): Number of rows per batch for daft.
|
|
52
|
+
file_names (Optional[List[str]]): List of specific file names to read.
|
|
53
|
+
dataframe_type (DataframeType): Type of dataframe to return (pandas or daft).
|
|
54
|
+
cleanup_on_close (bool): Whether to clean up downloaded temp files on close.
|
|
55
|
+
|
|
56
|
+
Example:
|
|
57
|
+
Using context manager (recommended)::
|
|
58
|
+
|
|
59
|
+
async with ParquetFileReader(path="/data/input") as reader:
|
|
60
|
+
df = await reader.read()
|
|
61
|
+
# close() called automatically, temp files cleaned up
|
|
62
|
+
|
|
63
|
+
Reading in batches::
|
|
64
|
+
|
|
65
|
+
async with ParquetFileReader(path="/data/input", chunk_size=50000) as reader:
|
|
66
|
+
async for batch in reader.read_batches():
|
|
67
|
+
process(batch)
|
|
68
|
+
|
|
69
|
+
Using close() explicitly::
|
|
70
|
+
|
|
71
|
+
reader = ParquetFileReader(path="/data/input")
|
|
72
|
+
df = await reader.read()
|
|
73
|
+
await reader.close() # Clean up downloaded temp files
|
|
46
74
|
"""
|
|
47
75
|
|
|
48
76
|
def __init__(
|
|
@@ -52,6 +80,7 @@ class ParquetFileReader(Reader):
|
|
|
52
80
|
buffer_size: Optional[int] = 5000,
|
|
53
81
|
file_names: Optional[List[str]] = None,
|
|
54
82
|
dataframe_type: DataframeType = DataframeType.pandas,
|
|
83
|
+
cleanup_on_close: bool = True,
|
|
55
84
|
):
|
|
56
85
|
"""Initialize the Parquet input class.
|
|
57
86
|
|
|
@@ -64,6 +93,7 @@ class ParquetFileReader(Reader):
|
|
|
64
93
|
buffer_size (int): Number of rows per batch. Defaults to 5000.
|
|
65
94
|
file_names (Optional[List[str]]): List of file names to read. Defaults to None.
|
|
66
95
|
dataframe_type (DataframeType): Type of dataframe to read. Defaults to DataframeType.pandas.
|
|
96
|
+
cleanup_on_close (bool): Whether to clean up downloaded temp files on close. Defaults to True.
|
|
67
97
|
|
|
68
98
|
Raises:
|
|
69
99
|
ValueError: When path is not provided or when single file path is combined with file_names
|
|
@@ -81,12 +111,22 @@ class ParquetFileReader(Reader):
|
|
|
81
111
|
self.buffer_size = buffer_size
|
|
82
112
|
self.file_names = file_names
|
|
83
113
|
self.dataframe_type = dataframe_type
|
|
114
|
+
self.cleanup_on_close = cleanup_on_close
|
|
115
|
+
self._is_closed = False
|
|
116
|
+
self._downloaded_files: List[str] = []
|
|
84
117
|
|
|
85
118
|
async def read(self) -> Union["pd.DataFrame", "daft.DataFrame"]:
|
|
119
|
+
"""Read the data from the parquet files and return as a single DataFrame.
|
|
120
|
+
|
|
121
|
+
Returns:
|
|
122
|
+
Union[pd.DataFrame, daft.DataFrame]: Combined dataframe from parquet files.
|
|
123
|
+
|
|
124
|
+
Raises:
|
|
125
|
+
ValueError: If the reader has been closed or dataframe_type is unsupported.
|
|
86
126
|
"""
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
127
|
+
if self._is_closed:
|
|
128
|
+
raise ValueError("Cannot read from a closed reader")
|
|
129
|
+
|
|
90
130
|
if self.dataframe_type == DataframeType.pandas:
|
|
91
131
|
return await self._get_dataframe()
|
|
92
132
|
elif self.dataframe_type == DataframeType.daft:
|
|
@@ -100,10 +140,18 @@ class ParquetFileReader(Reader):
|
|
|
100
140
|
AsyncIterator["pd.DataFrame"],
|
|
101
141
|
AsyncIterator["daft.DataFrame"],
|
|
102
142
|
]:
|
|
143
|
+
"""Read the data from the parquet files and return as batched DataFrames.
|
|
144
|
+
|
|
145
|
+
Returns:
|
|
146
|
+
Union[AsyncIterator[pd.DataFrame], AsyncIterator[daft.DataFrame]]:
|
|
147
|
+
Async iterator of DataFrames.
|
|
148
|
+
|
|
149
|
+
Raises:
|
|
150
|
+
ValueError: If the reader has been closed or dataframe_type is unsupported.
|
|
103
151
|
"""
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
152
|
+
if self._is_closed:
|
|
153
|
+
raise ValueError("Cannot read from a closed reader")
|
|
154
|
+
|
|
107
155
|
if self.dataframe_type == DataframeType.pandas:
|
|
108
156
|
return self._get_batched_dataframe()
|
|
109
157
|
elif self.dataframe_type == DataframeType.daft:
|
|
@@ -149,6 +197,8 @@ class ParquetFileReader(Reader):
|
|
|
149
197
|
parquet_files = await download_files(
|
|
150
198
|
self.path, PARQUET_FILE_EXTENSION, self.file_names
|
|
151
199
|
)
|
|
200
|
+
# Track downloaded files for cleanup on close
|
|
201
|
+
self._downloaded_files.extend(parquet_files)
|
|
152
202
|
logger.info(f"Reading {len(parquet_files)} parquet files")
|
|
153
203
|
|
|
154
204
|
return pd.concat(
|
|
@@ -208,6 +258,8 @@ class ParquetFileReader(Reader):
|
|
|
208
258
|
parquet_files = await download_files(
|
|
209
259
|
self.path, PARQUET_FILE_EXTENSION, self.file_names
|
|
210
260
|
)
|
|
261
|
+
# Track downloaded files for cleanup on close
|
|
262
|
+
self._downloaded_files.extend(parquet_files)
|
|
211
263
|
logger.info(f"Reading {len(parquet_files)} parquet files in batches")
|
|
212
264
|
|
|
213
265
|
# Process each file individually to maintain memory efficiency
|
|
@@ -259,6 +311,8 @@ class ParquetFileReader(Reader):
|
|
|
259
311
|
parquet_files = await download_files(
|
|
260
312
|
self.path, PARQUET_FILE_EXTENSION, self.file_names
|
|
261
313
|
)
|
|
314
|
+
# Track downloaded files for cleanup on close
|
|
315
|
+
self._downloaded_files.extend(parquet_files)
|
|
262
316
|
logger.info(f"Reading {len(parquet_files)} parquet files with daft")
|
|
263
317
|
|
|
264
318
|
# Use the discovered/downloaded files directly
|
|
@@ -317,6 +371,8 @@ class ParquetFileReader(Reader):
|
|
|
317
371
|
parquet_files = await download_files(
|
|
318
372
|
self.path, PARQUET_FILE_EXTENSION, self.file_names
|
|
319
373
|
)
|
|
374
|
+
# Track downloaded files for cleanup on close
|
|
375
|
+
self._downloaded_files.extend(parquet_files)
|
|
320
376
|
logger.info(f"Reading {len(parquet_files)} parquet files as daft batches")
|
|
321
377
|
|
|
322
378
|
# Create a lazy dataframe without loading data into memory
|
|
@@ -426,16 +426,9 @@ class AtlanObservability(Generic[T], ABC):
|
|
|
426
426
|
chunk_start=0,
|
|
427
427
|
chunk_part=int(time()),
|
|
428
428
|
)
|
|
429
|
-
logging.info(
|
|
430
|
-
f"Successfully instantiated ParquetFileWriter for partition: {partition_path}"
|
|
431
|
-
)
|
|
432
429
|
|
|
433
430
|
await parquet_writer._write_dataframe(dataframe=df)
|
|
434
431
|
|
|
435
|
-
logging.info(
|
|
436
|
-
f"Successfully wrote {len(df)} records to partition: {partition_path}"
|
|
437
|
-
)
|
|
438
|
-
|
|
439
432
|
except Exception as partition_error:
|
|
440
433
|
logging.error(
|
|
441
434
|
f"Error processing partition {partition_path}: {str(partition_error)}"
|
|
@@ -459,9 +459,22 @@ class ObjectStore:
|
|
|
459
459
|
|
|
460
460
|
logger.info(f"Found {len(file_list)} files to download from: {source}")
|
|
461
461
|
|
|
462
|
+
# Normalize source prefix to use forward slashes for comparison
|
|
463
|
+
normalized_source = cls._normalize_object_store_key(source)
|
|
464
|
+
|
|
462
465
|
# Download each file
|
|
463
466
|
for file_path in file_list:
|
|
464
|
-
|
|
467
|
+
normalized_file_path = cls._normalize_object_store_key(file_path)
|
|
468
|
+
if normalized_file_path.startswith(normalized_source):
|
|
469
|
+
# Extract relative path after the prefix
|
|
470
|
+
relative_path = normalized_file_path[
|
|
471
|
+
len(normalized_source) :
|
|
472
|
+
].lstrip("/")
|
|
473
|
+
else:
|
|
474
|
+
# Fallback to just the filename
|
|
475
|
+
relative_path = os.path.basename(normalized_file_path)
|
|
476
|
+
|
|
477
|
+
local_file_path = os.path.join(destination, relative_path)
|
|
465
478
|
await cls.download_file(file_path, local_file_path, store_name)
|
|
466
479
|
|
|
467
480
|
logger.info(f"Successfully downloaded all files from: {source}")
|
|
@@ -4,6 +4,7 @@ from typing import Any, Dict, List, Optional, Tuple, Type
|
|
|
4
4
|
|
|
5
5
|
import daft
|
|
6
6
|
import yaml
|
|
7
|
+
from daft.functions import to_struct, when
|
|
7
8
|
from pyatlan.model.enums import AtlanConnectorType
|
|
8
9
|
|
|
9
10
|
from application_sdk.observability.logger_adaptor import get_logger
|
|
@@ -227,7 +228,7 @@ class QueryBasedTransformer(TransformerInterface):
|
|
|
227
228
|
# Only create a struct if we have fields
|
|
228
229
|
if struct_fields:
|
|
229
230
|
# Create the struct first
|
|
230
|
-
struct =
|
|
231
|
+
struct = to_struct(*struct_fields)
|
|
231
232
|
|
|
232
233
|
# If we have non-null checks, apply them
|
|
233
234
|
if non_null_fields:
|
|
@@ -236,8 +237,8 @@ class QueryBasedTransformer(TransformerInterface):
|
|
|
236
237
|
for check in non_null_fields[1:]:
|
|
237
238
|
any_non_null = any_non_null | check
|
|
238
239
|
|
|
239
|
-
# Use if_else
|
|
240
|
-
return any_non_null.
|
|
240
|
+
# Use when().otherwise() for conditional expression (replaces if_else in daft 0.7+)
|
|
241
|
+
return when(any_non_null, struct).otherwise(None).alias(prefix)
|
|
241
242
|
|
|
242
243
|
return struct.alias(prefix)
|
|
243
244
|
|
application_sdk/version.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: atlan-application-sdk
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.1.1
|
|
4
4
|
Summary: Atlan Application SDK is a Python library for developing applications on the Atlan Platform
|
|
5
5
|
Project-URL: Repository, https://github.com/atlanhq/application-sdk
|
|
6
6
|
Project-URL: Documentation, https://github.com/atlanhq/application-sdk/README.md
|
|
@@ -31,7 +31,7 @@ Requires-Dist: pydantic<2.13.0,>=2.10.6
|
|
|
31
31
|
Requires-Dist: python-dotenv<1.3.0,>=1.1.0
|
|
32
32
|
Requires-Dist: uvloop<0.23.0,>=0.21.0; sys_platform != 'win32'
|
|
33
33
|
Provides-Extra: daft
|
|
34
|
-
Requires-Dist: daft<0.8.0,>=0.
|
|
34
|
+
Requires-Dist: daft<0.8.0,>=0.7.1; extra == 'daft'
|
|
35
35
|
Provides-Extra: distributed-lock
|
|
36
36
|
Requires-Dist: redis[hiredis]<7.2.0,>=5.2.0; extra == 'distributed-lock'
|
|
37
37
|
Provides-Extra: iam-auth
|
|
@@ -1,18 +1,18 @@
|
|
|
1
1
|
application_sdk/__init__.py,sha256=2e2mvmLJ5dxmJGPELtb33xwP-j6JMdoIuqKycEn7hjg,151
|
|
2
2
|
application_sdk/constants.py,sha256=TvdmKQShVWBNQZdVF2y-fxuE31FmeraTnqQ9jT_n5XY,11567
|
|
3
|
-
application_sdk/version.py,sha256=
|
|
3
|
+
application_sdk/version.py,sha256=sNbvXviG7NgxM58lOHKhbZfERat5qAJNr3UZy_toVQs,84
|
|
4
4
|
application_sdk/worker.py,sha256=DLMocpHvvwpdAopyXhxwM7ftaNlKvZMQfkgy1MFyiik,7561
|
|
5
|
-
application_sdk/activities/__init__.py,sha256=
|
|
5
|
+
application_sdk/activities/__init__.py,sha256=i7iY6aL1VFg185n2rLLvD_sI2BA9zJ33jL5rD_sY__U,12350
|
|
6
6
|
application_sdk/activities/lock_management.py,sha256=6Wdf3jMKitoarHQP91PIJOoGFz4aaOLS_40c7n1yAOA,3902
|
|
7
7
|
application_sdk/activities/.cursor/BUGBOT.md,sha256=FNykX5aMkdOhzgpiGqstOnSp9JN63iR2XP3onU4AGh8,15843
|
|
8
8
|
application_sdk/activities/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
9
|
application_sdk/activities/common/models.py,sha256=43MF_w0EzEQiJvGIqF_FNet4X6MEmwqYd3YAsHdQn08,1362
|
|
10
|
-
application_sdk/activities/common/sql_utils.py,sha256=
|
|
10
|
+
application_sdk/activities/common/sql_utils.py,sha256=QD4qOGkgJmlAGZKaSxqfC0AkjZVdTqdr6Q_Tw2CjIsM,10246
|
|
11
11
|
application_sdk/activities/common/utils.py,sha256=ngyFmiZnMCAQtyu6vGeAlkzwNkM29MD_gBU5pWqOxJ4,8392
|
|
12
12
|
application_sdk/activities/metadata_extraction/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
13
13
|
application_sdk/activities/metadata_extraction/base.py,sha256=ENFojpxqKdN_eVSL4iet3cGfylPOfcl1jnflfo4zhs8,3920
|
|
14
14
|
application_sdk/activities/metadata_extraction/rest.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
15
|
-
application_sdk/activities/metadata_extraction/sql.py,sha256=
|
|
15
|
+
application_sdk/activities/metadata_extraction/sql.py,sha256=CmE77EsgbOuDL5AKaRCnq1jApJnDWNVxx-RZ49cJwus,27415
|
|
16
16
|
application_sdk/activities/query_extraction/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
17
17
|
application_sdk/activities/query_extraction/sql.py,sha256=Gsa79R8CYY0uyt3rA2nLMfQs8-C4_zg1pJ_yYSF2cZw,21193
|
|
18
18
|
application_sdk/application/__init__.py,sha256=vcrQsqlfmGvKcCZuOtHHaNRqHSGdXlEDftkb8Tv_shI,9867
|
|
@@ -64,15 +64,15 @@ application_sdk/interceptors/events.py,sha256=e0O6uK9_aCTmOORaTGN9RbcTg9_KNaakq-
|
|
|
64
64
|
application_sdk/interceptors/lock.py,sha256=5ETm20zrTaH2b9fepN4Ckp1tGJV-uINqDrno_5RW3aw,6169
|
|
65
65
|
application_sdk/interceptors/models.py,sha256=kEzJKvb-G1M7aKrLPgAmsukJXLXeh8hIJKwEkOiaY28,6115
|
|
66
66
|
application_sdk/interceptors/.cursor/BUGBOT.md,sha256=pxmUF2c7dtaXAX8yAa1-LBa6FCrj_uw7aQcHrppjf1A,14570
|
|
67
|
-
application_sdk/io/__init__.py,sha256=
|
|
68
|
-
application_sdk/io/json.py,sha256=
|
|
69
|
-
application_sdk/io/parquet.py,sha256=
|
|
67
|
+
application_sdk/io/__init__.py,sha256=Fse-fEyrpMlLUxwyFkH8vWWSXz8rdWGlAjZy5ulAZCU,27767
|
|
68
|
+
application_sdk/io/json.py,sha256=sNSyHZCM_ZeaiJHUrolYVHKreBQqSCBsfsjD3JSkoD0,19729
|
|
69
|
+
application_sdk/io/parquet.py,sha256=zy9H_TvWI5CkktJ582NH7Ut_5rUH_S0Jy7ZbTD0JxeI,34227
|
|
70
70
|
application_sdk/io/utils.py,sha256=sn_8Q6HgjeC8uyZp2XGMAfqdJ8XzkIllOEVYXIH54DY,10724
|
|
71
71
|
application_sdk/observability/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
72
72
|
application_sdk/observability/context.py,sha256=lJjpfxEjMY_hrdSDqq519YaWcztgc_1nM4d-mGV5shs,634
|
|
73
73
|
application_sdk/observability/logger_adaptor.py,sha256=Fq5OE579ozr0EzsNYEh2H0q3POVAxtlWfJ-PSwWDGLM,30194
|
|
74
74
|
application_sdk/observability/metrics_adaptor.py,sha256=5Oz02lUED60duryoVDF9mbD11fpxhbXi7P1609n_15Y,16446
|
|
75
|
-
application_sdk/observability/observability.py,sha256=
|
|
75
|
+
application_sdk/observability/observability.py,sha256=O2rBal_0pmFRen7Yx4c4dSH1NyiT937b4bY2w63q-4U,23751
|
|
76
76
|
application_sdk/observability/traces_adaptor.py,sha256=0eQJPN-tYA_dV8D3uEa5ZiX9g12NDuLnPaFuQMVDdL0,18242
|
|
77
77
|
application_sdk/observability/utils.py,sha256=-02GAFom8Bg4SNyCTNYySmen2dzvLfTu43bqsNq1AH0,3096
|
|
78
78
|
application_sdk/observability/decorators/observability_decorator.py,sha256=yd6qfrg1MmH5KcZ5Ydzb0RaBzmxx5FrmiI9qwvZx3EU,8963
|
|
@@ -92,7 +92,7 @@ application_sdk/services/__init__.py,sha256=H-5HZEPdr53MUfAggyHqHhRXDRLZFZsxvJgW
|
|
|
92
92
|
application_sdk/services/_utils.py,sha256=0yHqDP6qNb1OT-bX2XRYQPZ5xkGkV13nyRw6GkPlHs8,1136
|
|
93
93
|
application_sdk/services/atlan_storage.py,sha256=TKzXxu0yXeUcmZehwp8PcnQTC4A9w9RlZ0Fl-Xp1bLE,8509
|
|
94
94
|
application_sdk/services/eventstore.py,sha256=wCT921KRzUe3fAWKC-bbM6_OtIJTKpSQrOutPQzMEgs,6745
|
|
95
|
-
application_sdk/services/objectstore.py,sha256=
|
|
95
|
+
application_sdk/services/objectstore.py,sha256=dLljCsPPSr24bPKh0l3-xRblofzKVQ4LDfqDrMp8JGc,20819
|
|
96
96
|
application_sdk/services/secretstore.py,sha256=Pmn1WlmHmgaDhWz5OXBB5_rKXQQMyLMzadwZSNKwc6Q,19070
|
|
97
97
|
application_sdk/services/statestore.py,sha256=3-afiM3Vsoe1XDYRokdGTB5I5CwOKyieuX5RwIZf77o,9413
|
|
98
98
|
application_sdk/test_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -138,7 +138,7 @@ application_sdk/transformers/atlas/__init__.py,sha256=fw3D8bBtt61SseAfYut3JZddpX
|
|
|
138
138
|
application_sdk/transformers/atlas/sql.py,sha256=rkQXNZ7oebts5oF5E_Bw8NpcHHKScU0TmKciH_1l_k4,50419
|
|
139
139
|
application_sdk/transformers/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
140
140
|
application_sdk/transformers/common/utils.py,sha256=4ISMIQ0Gzghmi31p51FOFm5KLF7XF-fmH9PVT7i0DFE,4899
|
|
141
|
-
application_sdk/transformers/query/__init__.py,sha256=
|
|
141
|
+
application_sdk/transformers/query/__init__.py,sha256=4uVCU-NfDe08PlffjWQ5p4smQa7c518IL2rDgIk6694,17446
|
|
142
142
|
application_sdk/transformers/query/templates/column.yaml,sha256=EXLYwGXN7LKT-v51n2EZnY99o6vHucyFaVSpM-sUSXw,7679
|
|
143
143
|
application_sdk/transformers/query/templates/database.yaml,sha256=SD1hJg5LI7gsBHQL5mW341sa51EkhcsIDDFlIOi9zdk,1374
|
|
144
144
|
application_sdk/transformers/query/templates/extras-procedure.yaml,sha256=XhAfVY4zm99K8fcgkYA1XPLv4ks-SA6SzMO3SMtQ60s,2298
|
|
@@ -152,8 +152,8 @@ application_sdk/workflows/metadata_extraction/__init__.py,sha256=jHUe_ZBQ66jx8bg
|
|
|
152
152
|
application_sdk/workflows/metadata_extraction/sql.py,sha256=6ZaVt84n-8U2ZvR9GR7uIJKv5v8CuyQjhlnoRJvDszc,12435
|
|
153
153
|
application_sdk/workflows/query_extraction/__init__.py,sha256=n066_CX5RpJz6DIxGMkKS3eGSRg03ilaCtsqfJWQb7Q,117
|
|
154
154
|
application_sdk/workflows/query_extraction/sql.py,sha256=kT_JQkLCRZ44ZpaC4QvPL6DxnRIIVh8gYHLqRbMI-hA,4826
|
|
155
|
-
atlan_application_sdk-2.
|
|
156
|
-
atlan_application_sdk-2.
|
|
157
|
-
atlan_application_sdk-2.
|
|
158
|
-
atlan_application_sdk-2.
|
|
159
|
-
atlan_application_sdk-2.
|
|
155
|
+
atlan_application_sdk-2.1.1.dist-info/METADATA,sha256=Vc2uG2FMhuNXyZFXmGMmvc_LRpCBaNTcQEHpSV8NpOE,5805
|
|
156
|
+
atlan_application_sdk-2.1.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
157
|
+
atlan_application_sdk-2.1.1.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
158
|
+
atlan_application_sdk-2.1.1.dist-info/licenses/NOTICE,sha256=A-XVVGt3KOYuuMmvSMIFkg534F1vHiCggEBp4Ez3wGk,1041
|
|
159
|
+
atlan_application_sdk-2.1.1.dist-info/RECORD,,
|
|
File without changes
|
{atlan_application_sdk-2.0.0.dist-info → atlan_application_sdk-2.1.1.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|
{atlan_application_sdk-2.0.0.dist-info → atlan_application_sdk-2.1.1.dist-info}/licenses/NOTICE
RENAMED
|
File without changes
|