atlan-application-sdk 0.1.1rc41__py3-none-any.whl → 0.1.1rc42__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- application_sdk/activities/common/utils.py +78 -4
- application_sdk/activities/metadata_extraction/sql.py +0 -2
- application_sdk/inputs/__init__.py +98 -2
- application_sdk/inputs/json.py +59 -87
- application_sdk/inputs/parquet.py +173 -94
- application_sdk/test_utils/hypothesis/strategies/inputs/json_input.py +10 -5
- application_sdk/test_utils/hypothesis/strategies/inputs/parquet_input.py +9 -4
- application_sdk/version.py +1 -1
- {atlan_application_sdk-0.1.1rc41.dist-info → atlan_application_sdk-0.1.1rc42.dist-info}/METADATA +1 -1
- {atlan_application_sdk-0.1.1rc41.dist-info → atlan_application_sdk-0.1.1rc42.dist-info}/RECORD +13 -13
- {atlan_application_sdk-0.1.1rc41.dist-info → atlan_application_sdk-0.1.1rc42.dist-info}/WHEEL +0 -0
- {atlan_application_sdk-0.1.1rc41.dist-info → atlan_application_sdk-0.1.1rc42.dist-info}/licenses/LICENSE +0 -0
- {atlan_application_sdk-0.1.1rc41.dist-info → atlan_application_sdk-0.1.1rc42.dist-info}/licenses/NOTICE +0 -0
|
@@ -5,10 +5,11 @@ including workflow ID retrieval, automatic heartbeating, and periodic heartbeat
|
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
7
|
import asyncio
|
|
8
|
+
import glob
|
|
8
9
|
import os
|
|
9
10
|
from datetime import timedelta
|
|
10
11
|
from functools import wraps
|
|
11
|
-
from typing import Any, Awaitable, Callable, Optional, TypeVar, cast
|
|
12
|
+
from typing import Any, Awaitable, Callable, List, Optional, TypeVar, cast
|
|
12
13
|
|
|
13
14
|
from temporalio import activity
|
|
14
15
|
|
|
@@ -79,17 +80,47 @@ def build_output_path() -> str:
|
|
|
79
80
|
|
|
80
81
|
def get_object_store_prefix(path: str) -> str:
|
|
81
82
|
"""Get the object store prefix for the path.
|
|
83
|
+
|
|
84
|
+
This function handles two types of paths:
|
|
85
|
+
1. Paths under TEMPORARY_PATH - converts them to relative object store paths
|
|
86
|
+
2. User-provided paths - returns them as-is (already relative object store paths)
|
|
87
|
+
|
|
82
88
|
Args:
|
|
83
|
-
path: The path to
|
|
89
|
+
path: The path to convert to object store prefix.
|
|
84
90
|
|
|
85
91
|
Returns:
|
|
86
92
|
The object store prefix for the path.
|
|
87
93
|
|
|
88
|
-
|
|
94
|
+
Examples:
|
|
95
|
+
>>> # Temporary path case
|
|
89
96
|
>>> get_object_store_prefix("./local/tmp/artifacts/apps/appName/workflows/wf-123/run-456")
|
|
90
97
|
"artifacts/apps/appName/workflows/wf-123/run-456"
|
|
98
|
+
|
|
99
|
+
>>> # User-provided path case
|
|
100
|
+
>>> get_object_store_prefix("datasets/sales/2024/")
|
|
101
|
+
"datasets/sales/2024"
|
|
91
102
|
"""
|
|
92
|
-
|
|
103
|
+
# Normalize paths for comparison
|
|
104
|
+
abs_path = os.path.abspath(path)
|
|
105
|
+
abs_temp_path = os.path.abspath(TEMPORARY_PATH)
|
|
106
|
+
|
|
107
|
+
# Check if path is under TEMPORARY_PATH
|
|
108
|
+
try:
|
|
109
|
+
# Use os.path.commonpath to properly check if path is under temp directory
|
|
110
|
+
# This prevents false positives like '/tmp/local123' matching '/tmp/local'
|
|
111
|
+
common_path = os.path.commonpath([abs_path, abs_temp_path])
|
|
112
|
+
if common_path == abs_temp_path:
|
|
113
|
+
# Path is under temp directory, convert to relative object store path
|
|
114
|
+
relative_path = os.path.relpath(abs_path, abs_temp_path)
|
|
115
|
+
# Normalize path separators to forward slashes for object store
|
|
116
|
+
return relative_path.replace(os.path.sep, "/")
|
|
117
|
+
else:
|
|
118
|
+
# Path is already a relative object store path, return as-is
|
|
119
|
+
return path.strip("/")
|
|
120
|
+
except ValueError:
|
|
121
|
+
# os.path.commonpath or os.path.relpath can raise ValueError on Windows with different drives
|
|
122
|
+
# In this case, treat as user-provided path, return as-is
|
|
123
|
+
return path.strip("/")
|
|
93
124
|
|
|
94
125
|
|
|
95
126
|
def auto_heartbeater(fn: F) -> F:
|
|
@@ -199,3 +230,46 @@ async def send_periodic_heartbeat(delay: float, *details: Any) -> None:
|
|
|
199
230
|
while True:
|
|
200
231
|
await asyncio.sleep(delay)
|
|
201
232
|
activity.heartbeat(*details)
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def find_local_files_by_extension(
|
|
236
|
+
path: str,
|
|
237
|
+
extension: str,
|
|
238
|
+
file_names: Optional[List[str]] = None,
|
|
239
|
+
) -> List[str]:
|
|
240
|
+
"""Find local files at the specified local path, optionally filtering by file names.
|
|
241
|
+
|
|
242
|
+
Args:
|
|
243
|
+
path (str): Local path to search in (file or directory)
|
|
244
|
+
extension (str): File extension to filter by (e.g., '.parquet', '.json')
|
|
245
|
+
file_names (Optional[List[str]]): List of file names (basenames) to filter by, paths are not supported
|
|
246
|
+
|
|
247
|
+
Returns:
|
|
248
|
+
List[str]: List of matching file paths
|
|
249
|
+
|
|
250
|
+
Example:
|
|
251
|
+
>>> find_local_files_by_extension("/data", ".parquet", ["file1.parquet", "file2.parquet"])
|
|
252
|
+
['file1.parquet', 'file2.parquet']
|
|
253
|
+
|
|
254
|
+
>>> find_local_files_by_extension("/data/single.json", ".json")
|
|
255
|
+
['single.json']
|
|
256
|
+
"""
|
|
257
|
+
if os.path.isfile(path) and path.endswith(extension):
|
|
258
|
+
# Single file - return it directly
|
|
259
|
+
return [path]
|
|
260
|
+
|
|
261
|
+
elif os.path.isdir(path):
|
|
262
|
+
# Directory - find all files in directory
|
|
263
|
+
all_files = glob.glob(
|
|
264
|
+
os.path.join(path, "**", f"*{extension}"),
|
|
265
|
+
recursive=True,
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
# Filter by file names if specified
|
|
269
|
+
if file_names:
|
|
270
|
+
file_names_set = set(file_names) # Convert to set for O(1) lookup
|
|
271
|
+
return [f for f in all_files if os.path.basename(f) in file_names_set]
|
|
272
|
+
else:
|
|
273
|
+
return all_files
|
|
274
|
+
|
|
275
|
+
return []
|
|
@@ -860,9 +860,7 @@ class BaseSQLMetadataExtractionActivities(ActivitiesInterface):
|
|
|
860
860
|
|
|
861
861
|
raw_input = ParquetInput(
|
|
862
862
|
path=os.path.join(output_path, "raw"),
|
|
863
|
-
input_prefix=output_prefix,
|
|
864
863
|
file_names=workflow_args.get("file_names"),
|
|
865
|
-
chunk_size=None,
|
|
866
864
|
)
|
|
867
865
|
raw_input = raw_input.get_batched_daft_dataframe()
|
|
868
866
|
transformed_output = JsonOutput(
|
|
@@ -1,7 +1,15 @@
|
|
|
1
|
+
import os
|
|
1
2
|
from abc import ABC, abstractmethod
|
|
2
|
-
from typing import TYPE_CHECKING, AsyncIterator, Iterator, Union
|
|
3
|
-
|
|
3
|
+
from typing import TYPE_CHECKING, AsyncIterator, Iterator, List, Union
|
|
4
|
+
|
|
5
|
+
from application_sdk.activities.common.utils import (
|
|
6
|
+
find_local_files_by_extension,
|
|
7
|
+
get_object_store_prefix,
|
|
8
|
+
)
|
|
9
|
+
from application_sdk.common.error_codes import IOError
|
|
10
|
+
from application_sdk.constants import TEMPORARY_PATH
|
|
4
11
|
from application_sdk.observability.logger_adaptor import get_logger
|
|
12
|
+
from application_sdk.services.objectstore import ObjectStore
|
|
5
13
|
|
|
6
14
|
logger = get_logger(__name__)
|
|
7
15
|
|
|
@@ -15,6 +23,94 @@ class Input(ABC):
|
|
|
15
23
|
Abstract base class for input data sources.
|
|
16
24
|
"""
|
|
17
25
|
|
|
26
|
+
async def download_files(self) -> List[str]:
|
|
27
|
+
"""Download files from object store if not available locally.
|
|
28
|
+
|
|
29
|
+
Flow:
|
|
30
|
+
1. Check if files exist locally at self.path
|
|
31
|
+
2. If not, try to download from object store
|
|
32
|
+
3. Filter by self.file_names if provided
|
|
33
|
+
4. Return list of file paths for logging purposes
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
List[str]: List of file paths
|
|
37
|
+
|
|
38
|
+
Raises:
|
|
39
|
+
AttributeError: When the input class doesn't support file operations or _extension
|
|
40
|
+
IOError: When no files found locally or in object store
|
|
41
|
+
"""
|
|
42
|
+
# Step 1: Check if files exist locally
|
|
43
|
+
local_files = find_local_files_by_extension(
|
|
44
|
+
self.path, self._EXTENSION, self.file_names
|
|
45
|
+
)
|
|
46
|
+
if local_files:
|
|
47
|
+
logger.info(
|
|
48
|
+
f"Found {len(local_files)} {self._EXTENSION} files locally at: {self.path}"
|
|
49
|
+
)
|
|
50
|
+
return local_files
|
|
51
|
+
|
|
52
|
+
# Step 2: Try to download from object store
|
|
53
|
+
logger.info(
|
|
54
|
+
f"No local {self._EXTENSION} files found at {self.path}, checking object store..."
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
try:
|
|
58
|
+
# Determine what to download based on path type and filters
|
|
59
|
+
downloaded_paths = []
|
|
60
|
+
|
|
61
|
+
if self.path.endswith(self._EXTENSION):
|
|
62
|
+
# Single file case (file_names validation already ensures this is valid)
|
|
63
|
+
source_path = get_object_store_prefix(self.path)
|
|
64
|
+
destination_path = os.path.join(TEMPORARY_PATH, source_path)
|
|
65
|
+
await ObjectStore.download_file(
|
|
66
|
+
source=source_path,
|
|
67
|
+
destination=destination_path,
|
|
68
|
+
)
|
|
69
|
+
downloaded_paths.append(destination_path)
|
|
70
|
+
|
|
71
|
+
elif self.file_names:
|
|
72
|
+
# Directory with specific files - download each file individually
|
|
73
|
+
for file_name in self.file_names:
|
|
74
|
+
file_path = os.path.join(self.path, file_name)
|
|
75
|
+
source_path = get_object_store_prefix(file_path)
|
|
76
|
+
destination_path = os.path.join(TEMPORARY_PATH, source_path)
|
|
77
|
+
await ObjectStore.download_file(
|
|
78
|
+
source=source_path,
|
|
79
|
+
destination=destination_path,
|
|
80
|
+
)
|
|
81
|
+
downloaded_paths.append(destination_path)
|
|
82
|
+
else:
|
|
83
|
+
# Download entire directory
|
|
84
|
+
source_path = get_object_store_prefix(self.path)
|
|
85
|
+
destination_path = os.path.join(TEMPORARY_PATH, source_path)
|
|
86
|
+
await ObjectStore.download_prefix(
|
|
87
|
+
source=source_path,
|
|
88
|
+
destination=destination_path,
|
|
89
|
+
)
|
|
90
|
+
# Find the actual files in the downloaded directory
|
|
91
|
+
found_files = find_local_files_by_extension(
|
|
92
|
+
destination_path, self._EXTENSION, getattr(self, "file_names", None)
|
|
93
|
+
)
|
|
94
|
+
downloaded_paths.extend(found_files)
|
|
95
|
+
|
|
96
|
+
# Check results
|
|
97
|
+
if downloaded_paths:
|
|
98
|
+
logger.info(
|
|
99
|
+
f"Successfully downloaded {len(downloaded_paths)} {self._EXTENSION} files from object store"
|
|
100
|
+
)
|
|
101
|
+
return downloaded_paths
|
|
102
|
+
else:
|
|
103
|
+
raise IOError(
|
|
104
|
+
f"{IOError.OBJECT_STORE_READ_ERROR}: Downloaded from object store but no {self._EXTENSION} files found"
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
except Exception as e:
|
|
108
|
+
logger.error(f"Failed to download from object store: {str(e)}")
|
|
109
|
+
raise IOError(
|
|
110
|
+
f"{IOError.OBJECT_STORE_DOWNLOAD_ERROR}: No {self._EXTENSION} files found locally at '{self.path}' and failed to download from object store. "
|
|
111
|
+
f"Error: {str(e)}"
|
|
112
|
+
)
|
|
113
|
+
|
|
18
114
|
@abstractmethod
|
|
19
115
|
async def get_batched_dataframe(
|
|
20
116
|
self,
|
application_sdk/inputs/json.py
CHANGED
|
@@ -1,11 +1,7 @@
|
|
|
1
|
-
import os
|
|
2
1
|
from typing import TYPE_CHECKING, AsyncIterator, Iterator, List, Optional, Union
|
|
3
2
|
|
|
4
|
-
from application_sdk.activities.common.utils import get_object_store_prefix
|
|
5
|
-
from application_sdk.common.error_codes import IOError
|
|
6
3
|
from application_sdk.inputs import Input
|
|
7
4
|
from application_sdk.observability.logger_adaptor import get_logger
|
|
8
|
-
from application_sdk.services.objectstore import ObjectStore
|
|
9
5
|
|
|
10
6
|
if TYPE_CHECKING:
|
|
11
7
|
import daft
|
|
@@ -15,56 +11,43 @@ logger = get_logger(__name__)
|
|
|
15
11
|
|
|
16
12
|
|
|
17
13
|
class JsonInput(Input):
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
14
|
+
"""
|
|
15
|
+
JSON Input class to read data from JSON files using daft and pandas.
|
|
16
|
+
Supports reading both single files and directories containing multiple JSON files.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
_EXTENSION = ".json"
|
|
22
20
|
|
|
23
21
|
def __init__(
|
|
24
22
|
self,
|
|
25
23
|
path: str,
|
|
26
24
|
file_names: Optional[List[str]] = None,
|
|
27
|
-
|
|
28
|
-
chunk_size: Optional[int] = None,
|
|
25
|
+
chunk_size: int = 100000,
|
|
29
26
|
):
|
|
30
27
|
"""Initialize the JsonInput class.
|
|
31
28
|
|
|
32
29
|
Args:
|
|
33
|
-
path (str):
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
30
|
+
path (str): Path to JSON file or directory containing JSON files.
|
|
31
|
+
It accepts both types of paths:
|
|
32
|
+
local path or object store path
|
|
33
|
+
Wildcards are not supported.
|
|
34
|
+
file_names (Optional[List[str]]): List of specific file names to read. Defaults to None.
|
|
35
|
+
chunk_size (int): Number of rows per batch. Defaults to 100000.
|
|
36
|
+
|
|
37
|
+
Raises:
|
|
38
|
+
ValueError: When path is not provided or when single file path is combined with file_names
|
|
37
39
|
"""
|
|
40
|
+
|
|
41
|
+
# Validate that single file path and file_names are not both specified
|
|
42
|
+
if path.endswith(self._EXTENSION) and file_names:
|
|
43
|
+
raise ValueError(
|
|
44
|
+
f"Cannot specify both a single file path ('{path}') and file_names filter. "
|
|
45
|
+
f"Either provide a directory path with file_names, or specify the exact file path without file_names."
|
|
46
|
+
)
|
|
47
|
+
|
|
38
48
|
self.path = path
|
|
39
|
-
|
|
40
|
-
self.chunk_size = chunk_size if chunk_size is not None else 100000
|
|
49
|
+
self.chunk_size = chunk_size
|
|
41
50
|
self.file_names = file_names
|
|
42
|
-
self.download_file_prefix = download_file_prefix
|
|
43
|
-
|
|
44
|
-
async def download_files(self):
|
|
45
|
-
"""Download the files from the object store to the local path"""
|
|
46
|
-
if not self.file_names:
|
|
47
|
-
logger.debug("No files to download")
|
|
48
|
-
return
|
|
49
|
-
|
|
50
|
-
for file_name in self.file_names or []:
|
|
51
|
-
try:
|
|
52
|
-
if self.download_file_prefix is not None and not os.path.exists(
|
|
53
|
-
os.path.join(self.path, file_name)
|
|
54
|
-
):
|
|
55
|
-
destination_file_path = os.path.join(self.path, file_name)
|
|
56
|
-
await ObjectStore.download_file(
|
|
57
|
-
source=get_object_store_prefix(destination_file_path),
|
|
58
|
-
destination=destination_file_path,
|
|
59
|
-
)
|
|
60
|
-
except IOError as e:
|
|
61
|
-
logger.error(
|
|
62
|
-
f"{IOError.OBJECT_STORE_DOWNLOAD_ERROR}: Error downloading file {file_name}: {str(e)}",
|
|
63
|
-
error_code=IOError.OBJECT_STORE_DOWNLOAD_ERROR.code,
|
|
64
|
-
)
|
|
65
|
-
raise IOError(
|
|
66
|
-
f"{IOError.OBJECT_STORE_DOWNLOAD_ERROR}: Error downloading file {file_name}: {str(e)}"
|
|
67
|
-
)
|
|
68
51
|
|
|
69
52
|
async def get_batched_dataframe(
|
|
70
53
|
self,
|
|
@@ -76,22 +59,20 @@ class JsonInput(Input):
|
|
|
76
59
|
try:
|
|
77
60
|
import pandas as pd
|
|
78
61
|
|
|
79
|
-
|
|
62
|
+
# Ensure files are available (local or downloaded)
|
|
63
|
+
json_files = await self.download_files()
|
|
64
|
+
logger.info(f"Reading {len(json_files)} JSON files in batches")
|
|
80
65
|
|
|
81
|
-
for
|
|
82
|
-
file_path = os.path.join(self.path, file_name)
|
|
66
|
+
for json_file in json_files:
|
|
83
67
|
json_reader_obj = pd.read_json(
|
|
84
|
-
|
|
68
|
+
json_file,
|
|
85
69
|
chunksize=self.chunk_size,
|
|
86
70
|
lines=True,
|
|
87
71
|
)
|
|
88
72
|
for chunk in json_reader_obj:
|
|
89
73
|
yield chunk
|
|
90
|
-
except
|
|
91
|
-
logger.error(
|
|
92
|
-
f"{IOError.OBJECT_STORE_DOWNLOAD_ERROR}: Error reading batched data from JSON: {str(e)}",
|
|
93
|
-
error_code=IOError.OBJECT_STORE_DOWNLOAD_ERROR.code,
|
|
94
|
-
)
|
|
74
|
+
except Exception as e:
|
|
75
|
+
logger.error(f"Error reading batched data from JSON: {str(e)}")
|
|
95
76
|
raise
|
|
96
77
|
|
|
97
78
|
async def get_dataframe(self) -> "pd.DataFrame":
|
|
@@ -102,21 +83,17 @@ class JsonInput(Input):
|
|
|
102
83
|
try:
|
|
103
84
|
import pandas as pd
|
|
104
85
|
|
|
105
|
-
|
|
106
|
-
await self.download_files()
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
)
|
|
113
|
-
)
|
|
114
|
-
return pd.concat(dataframes, ignore_index=True)
|
|
115
|
-
except IOError as e:
|
|
116
|
-
logger.error(
|
|
117
|
-
f"{IOError.OBJECT_STORE_DOWNLOAD_ERROR}: Error reading data from JSON: {str(e)}",
|
|
118
|
-
error_code=IOError.OBJECT_STORE_DOWNLOAD_ERROR.code,
|
|
86
|
+
# Ensure files are available (local or downloaded)
|
|
87
|
+
json_files = await self.download_files()
|
|
88
|
+
logger.info(f"Reading {len(json_files)} JSON files as pandas dataframe")
|
|
89
|
+
|
|
90
|
+
return pd.concat(
|
|
91
|
+
(pd.read_json(json_file, lines=True) for json_file in json_files),
|
|
92
|
+
ignore_index=True,
|
|
119
93
|
)
|
|
94
|
+
|
|
95
|
+
except Exception as e:
|
|
96
|
+
logger.error(f"Error reading data from JSON: {str(e)}")
|
|
120
97
|
raise
|
|
121
98
|
|
|
122
99
|
async def get_batched_daft_dataframe(
|
|
@@ -129,18 +106,15 @@ class JsonInput(Input):
|
|
|
129
106
|
try:
|
|
130
107
|
import daft
|
|
131
108
|
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
yield
|
|
139
|
-
except
|
|
140
|
-
logger.error(
|
|
141
|
-
f"{IOError.OBJECT_STORE_DOWNLOAD_ERROR}: Error reading batched data from JSON: {str(e)}",
|
|
142
|
-
error_code=IOError.OBJECT_STORE_DOWNLOAD_ERROR.code,
|
|
143
|
-
)
|
|
109
|
+
# Ensure files are available (local or downloaded)
|
|
110
|
+
json_files = await self.download_files()
|
|
111
|
+
logger.info(f"Reading {len(json_files)} JSON files as daft batches")
|
|
112
|
+
|
|
113
|
+
# Yield each discovered file as separate batch with chunking
|
|
114
|
+
for json_file in json_files:
|
|
115
|
+
yield daft.read_json(json_file, _chunk_size=self.chunk_size)
|
|
116
|
+
except Exception as e:
|
|
117
|
+
logger.error(f"Error reading batched data from JSON using daft: {str(e)}")
|
|
144
118
|
raise
|
|
145
119
|
|
|
146
120
|
async def get_daft_dataframe(self) -> "daft.DataFrame": # noqa: F821
|
|
@@ -151,14 +125,12 @@ class JsonInput(Input):
|
|
|
151
125
|
try:
|
|
152
126
|
import daft
|
|
153
127
|
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
error_code=IOError.OBJECT_STORE_DOWNLOAD_ERROR.code,
|
|
163
|
-
)
|
|
128
|
+
# Ensure files are available (local or downloaded)
|
|
129
|
+
json_files = await self.download_files()
|
|
130
|
+
logger.info(f"Reading {len(json_files)} JSON files with daft")
|
|
131
|
+
|
|
132
|
+
# Use the discovered/downloaded files directly
|
|
133
|
+
return daft.read_json(json_files)
|
|
134
|
+
except Exception as e:
|
|
135
|
+
logger.error(f"Error reading data from JSON using daft: {str(e)}")
|
|
164
136
|
raise
|
|
@@ -1,11 +1,7 @@
|
|
|
1
|
-
import glob
|
|
2
|
-
import os
|
|
3
1
|
from typing import TYPE_CHECKING, AsyncIterator, Iterator, List, Optional, Union
|
|
4
2
|
|
|
5
|
-
from application_sdk.activities.common.utils import get_object_store_prefix
|
|
6
3
|
from application_sdk.inputs import Input
|
|
7
4
|
from application_sdk.observability.logger_adaptor import get_logger
|
|
8
|
-
from application_sdk.services.objectstore import ObjectStore
|
|
9
5
|
|
|
10
6
|
logger = get_logger(__name__)
|
|
11
7
|
|
|
@@ -20,107 +16,139 @@ class ParquetInput(Input):
|
|
|
20
16
|
Supports reading both single files and directories containing multiple parquet files.
|
|
21
17
|
"""
|
|
22
18
|
|
|
19
|
+
_EXTENSION = ".parquet"
|
|
20
|
+
|
|
23
21
|
def __init__(
|
|
24
22
|
self,
|
|
25
|
-
path:
|
|
26
|
-
chunk_size:
|
|
27
|
-
input_prefix: Optional[str] = None,
|
|
23
|
+
path: str,
|
|
24
|
+
chunk_size: int = 100000,
|
|
28
25
|
file_names: Optional[List[str]] = None,
|
|
29
26
|
):
|
|
30
27
|
"""Initialize the Parquet input class.
|
|
31
28
|
|
|
32
29
|
Args:
|
|
33
30
|
path (str): Path to parquet file or directory containing parquet files.
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
file_names (Optional[List[str]]
|
|
39
|
-
|
|
31
|
+
It accepts both types of paths:
|
|
32
|
+
local path or object store path
|
|
33
|
+
Wildcards are not supported.
|
|
34
|
+
chunk_size (int): Number of rows per batch. Defaults to 100000.
|
|
35
|
+
file_names (Optional[List[str]]): List of file names to read. Defaults to None.
|
|
36
|
+
|
|
37
|
+
Raises:
|
|
38
|
+
ValueError: When path is not provided or when single file path is combined with file_names
|
|
40
39
|
"""
|
|
40
|
+
|
|
41
|
+
# Validate that single file path and file_names are not both specified
|
|
42
|
+
if path.endswith(self._EXTENSION) and file_names:
|
|
43
|
+
raise ValueError(
|
|
44
|
+
f"Cannot specify both a single file path ('{path}') and file_names filter. "
|
|
45
|
+
f"Either provide a directory path with file_names, or specify the exact file path without file_names."
|
|
46
|
+
)
|
|
47
|
+
|
|
41
48
|
self.path = path
|
|
42
49
|
self.chunk_size = chunk_size
|
|
43
|
-
self.input_prefix = input_prefix
|
|
44
50
|
self.file_names = file_names
|
|
45
51
|
|
|
46
|
-
async def
|
|
47
|
-
"""Read
|
|
48
|
-
|
|
49
|
-
Args:
|
|
50
|
-
local_path (str): Path to the local data in the temp directory.
|
|
52
|
+
async def get_dataframe(self) -> "pd.DataFrame":
|
|
53
|
+
"""Read data from parquet file(s) and return as pandas DataFrame.
|
|
51
54
|
|
|
52
55
|
Returns:
|
|
53
|
-
|
|
54
|
-
"""
|
|
55
|
-
# if the path is a directory, then check if the directory has any parquet files
|
|
56
|
-
parquet_files = []
|
|
57
|
-
if os.path.isdir(local_path):
|
|
58
|
-
parquet_files = glob.glob(os.path.join(local_path, "*.parquet"))
|
|
59
|
-
else:
|
|
60
|
-
parquet_files = glob.glob(local_path)
|
|
61
|
-
if not parquet_files:
|
|
62
|
-
if self.input_prefix:
|
|
63
|
-
logger.info(
|
|
64
|
-
f"Reading file from object store: {local_path} from {self.input_prefix}"
|
|
65
|
-
)
|
|
66
|
-
if os.path.isdir(local_path):
|
|
67
|
-
await ObjectStore.download_prefix(
|
|
68
|
-
source=get_object_store_prefix(local_path),
|
|
69
|
-
destination=local_path,
|
|
70
|
-
)
|
|
71
|
-
else:
|
|
72
|
-
await ObjectStore.download_file(
|
|
73
|
-
source=get_object_store_prefix(local_path),
|
|
74
|
-
destination=local_path,
|
|
75
|
-
)
|
|
76
|
-
else:
|
|
77
|
-
raise ValueError(
|
|
78
|
-
f"No parquet files found in {local_path} and no input prefix provided"
|
|
79
|
-
)
|
|
56
|
+
pd.DataFrame: Combined dataframe from specified parquet files
|
|
80
57
|
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
and return as a single combined pandas dataframe.
|
|
58
|
+
Raises:
|
|
59
|
+
ValueError: When no valid path can be determined or no matching files found
|
|
60
|
+
Exception: When reading parquet files fails
|
|
85
61
|
|
|
86
|
-
|
|
87
|
-
|
|
62
|
+
Example transformation:
|
|
63
|
+
Input files:
|
|
64
|
+
+------------------+
|
|
65
|
+
| file1.parquet |
|
|
66
|
+
| file2.parquet |
|
|
67
|
+
| file3.parquet |
|
|
68
|
+
+------------------+
|
|
69
|
+
|
|
70
|
+
With file_names=["file1.parquet", "file3.parquet"]:
|
|
71
|
+
+-------+-------+-------+
|
|
72
|
+
| col1 | col2 | col3 |
|
|
73
|
+
+-------+-------+-------+
|
|
74
|
+
| val1 | val2 | val3 | # from file1.parquet
|
|
75
|
+
| val7 | val8 | val9 | # from file3.parquet
|
|
76
|
+
+-------+-------+-------+
|
|
77
|
+
|
|
78
|
+
Transformations:
|
|
79
|
+
- Only specified files are read and combined
|
|
80
|
+
- Column schemas must be compatible across files
|
|
81
|
+
- Only reads files in the specified directory
|
|
88
82
|
"""
|
|
89
83
|
try:
|
|
90
84
|
import pandas as pd
|
|
91
85
|
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
return pd.
|
|
86
|
+
# Ensure files are available (local or downloaded)
|
|
87
|
+
parquet_files = await self.download_files()
|
|
88
|
+
logger.info(f"Reading {len(parquet_files)} parquet files")
|
|
89
|
+
|
|
90
|
+
return pd.concat(
|
|
91
|
+
(pd.read_parquet(parquet_file) for parquet_file in parquet_files),
|
|
92
|
+
ignore_index=True,
|
|
93
|
+
)
|
|
97
94
|
except Exception as e:
|
|
98
95
|
logger.error(f"Error reading data from parquet file(s): {str(e)}")
|
|
99
|
-
# Re-raise to match IcebergInput behavior
|
|
100
96
|
raise
|
|
101
97
|
|
|
102
98
|
async def get_batched_dataframe(
|
|
103
99
|
self,
|
|
104
100
|
) -> Union[AsyncIterator["pd.DataFrame"], Iterator["pd.DataFrame"]]:
|
|
105
|
-
"""
|
|
106
|
-
Method to read the data from the parquet file(s) in batches
|
|
107
|
-
and return as an async iterator of pandas dataframes.
|
|
101
|
+
"""Read data from parquet file(s) in batches as pandas DataFrames.
|
|
108
102
|
|
|
109
103
|
Returns:
|
|
110
|
-
AsyncIterator[
|
|
104
|
+
AsyncIterator[pd.DataFrame]: Async iterator of pandas dataframes
|
|
105
|
+
|
|
106
|
+
Raises:
|
|
107
|
+
ValueError: When no parquet files found locally or in object store
|
|
108
|
+
Exception: When reading parquet files fails
|
|
109
|
+
|
|
110
|
+
Example transformation:
|
|
111
|
+
Input files:
|
|
112
|
+
+------------------+
|
|
113
|
+
| file1.parquet |
|
|
114
|
+
| file2.parquet |
|
|
115
|
+
| file3.parquet |
|
|
116
|
+
+------------------+
|
|
117
|
+
|
|
118
|
+
With file_names=["file1.parquet", "file2.parquet"] and chunk_size=2:
|
|
119
|
+
Batch 1:
|
|
120
|
+
+-------+-------+
|
|
121
|
+
| col1 | col2 |
|
|
122
|
+
+-------+-------+
|
|
123
|
+
| val1 | val2 | # from file1.parquet
|
|
124
|
+
| val3 | val4 | # from file1.parquet
|
|
125
|
+
+-------+-------+
|
|
126
|
+
|
|
127
|
+
Batch 2:
|
|
128
|
+
+-------+-------+
|
|
129
|
+
| col1 | col2 |
|
|
130
|
+
+-------+-------+
|
|
131
|
+
| val5 | val6 | # from file2.parquet
|
|
132
|
+
| val7 | val8 | # from file2.parquet
|
|
133
|
+
+-------+-------+
|
|
134
|
+
|
|
135
|
+
Transformations:
|
|
136
|
+
- Only specified files are combined then split into chunks
|
|
137
|
+
- Each batch is a separate DataFrame
|
|
138
|
+
- Only reads files in the specified directory
|
|
111
139
|
"""
|
|
112
140
|
try:
|
|
113
141
|
import pandas as pd
|
|
114
142
|
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
143
|
+
# Ensure files are available (local or downloaded)
|
|
144
|
+
parquet_files = await self.download_files()
|
|
145
|
+
logger.info(f"Reading {len(parquet_files)} parquet files in batches")
|
|
146
|
+
|
|
147
|
+
# Process each file individually to maintain memory efficiency
|
|
148
|
+
for parquet_file in parquet_files:
|
|
149
|
+
df = pd.read_parquet(parquet_file)
|
|
120
150
|
for i in range(0, len(df), self.chunk_size):
|
|
121
151
|
yield df.iloc[i : i + self.chunk_size]
|
|
122
|
-
else:
|
|
123
|
-
yield df
|
|
124
152
|
except Exception as e:
|
|
125
153
|
logger.error(
|
|
126
154
|
f"Error reading data from parquet file(s) in batches: {str(e)}"
|
|
@@ -128,51 +156,102 @@ class ParquetInput(Input):
|
|
|
128
156
|
raise
|
|
129
157
|
|
|
130
158
|
async def get_daft_dataframe(self) -> "daft.DataFrame": # noqa: F821
|
|
131
|
-
"""
|
|
132
|
-
Method to read the data from the parquet file(s)
|
|
133
|
-
and return as a single combined daft dataframe.
|
|
159
|
+
"""Read data from parquet file(s) and return as daft DataFrame.
|
|
134
160
|
|
|
135
161
|
Returns:
|
|
136
|
-
daft.DataFrame: Combined daft dataframe from
|
|
162
|
+
daft.DataFrame: Combined daft dataframe from specified parquet files
|
|
163
|
+
|
|
164
|
+
Raises:
|
|
165
|
+
ValueError: When no parquet files found locally or in object store
|
|
166
|
+
Exception: When reading parquet files fails
|
|
167
|
+
|
|
168
|
+
Example transformation:
|
|
169
|
+
Input files:
|
|
170
|
+
+------------------+
|
|
171
|
+
| file1.parquet |
|
|
172
|
+
| file2.parquet |
|
|
173
|
+
| file3.parquet |
|
|
174
|
+
+------------------+
|
|
175
|
+
|
|
176
|
+
With file_names=["file1.parquet", "file3.parquet"]:
|
|
177
|
+
+-------+-------+-------+
|
|
178
|
+
| col1 | col2 | col3 |
|
|
179
|
+
+-------+-------+-------+
|
|
180
|
+
| val1 | val2 | val3 | # from file1.parquet
|
|
181
|
+
| val7 | val8 | val9 | # from file3.parquet
|
|
182
|
+
+-------+-------+-------+
|
|
183
|
+
|
|
184
|
+
Transformations:
|
|
185
|
+
- Only specified parquet files combined into single daft DataFrame
|
|
186
|
+
- Lazy evaluation for better performance
|
|
187
|
+
- Column schemas must be compatible across files
|
|
137
188
|
"""
|
|
138
189
|
try:
|
|
139
190
|
import daft # type: ignore
|
|
140
191
|
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
return daft.read_parquet(f"{path}/*.parquet")
|
|
192
|
+
# Ensure files are available (local or downloaded)
|
|
193
|
+
parquet_files = await self.download_files()
|
|
194
|
+
logger.info(f"Reading {len(parquet_files)} parquet files with daft")
|
|
195
|
+
|
|
196
|
+
# Use the discovered/downloaded files directly
|
|
197
|
+
return daft.read_parquet(parquet_files)
|
|
148
198
|
except Exception as e:
|
|
149
199
|
logger.error(
|
|
150
200
|
f"Error reading data from parquet file(s) using daft: {str(e)}"
|
|
151
201
|
)
|
|
152
|
-
# Re-raise to match IcebergInput behavior
|
|
153
202
|
raise
|
|
154
203
|
|
|
155
204
|
async def get_batched_daft_dataframe(self) -> AsyncIterator["daft.DataFrame"]: # type: ignore
|
|
156
|
-
"""
|
|
157
|
-
Get batched daft dataframe from parquet file(s)
|
|
205
|
+
"""Get batched daft dataframe from parquet file(s).
|
|
158
206
|
|
|
159
207
|
Returns:
|
|
160
208
|
AsyncIterator[daft.DataFrame]: An async iterator of daft DataFrames, each containing
|
|
161
|
-
a batch of data from
|
|
209
|
+
a batch of data from individual parquet files
|
|
210
|
+
|
|
211
|
+
Raises:
|
|
212
|
+
ValueError: When no parquet files found locally or in object store
|
|
213
|
+
Exception: When reading parquet files fails
|
|
214
|
+
|
|
215
|
+
Example transformation:
|
|
216
|
+
Input files:
|
|
217
|
+
+------------------+
|
|
218
|
+
| file1.parquet |
|
|
219
|
+
| file2.parquet |
|
|
220
|
+
| file3.parquet |
|
|
221
|
+
+------------------+
|
|
222
|
+
|
|
223
|
+
With file_names=["file1.parquet", "file3.parquet"]:
|
|
224
|
+
Batch 1 (file1.parquet):
|
|
225
|
+
+-------+-------+
|
|
226
|
+
| col1 | col2 |
|
|
227
|
+
+-------+-------+
|
|
228
|
+
| val1 | val2 |
|
|
229
|
+
| val3 | val4 |
|
|
230
|
+
+-------+-------+
|
|
231
|
+
|
|
232
|
+
Batch 2 (file3.parquet):
|
|
233
|
+
+-------+-------+
|
|
234
|
+
| col1 | col2 |
|
|
235
|
+
+-------+-------+
|
|
236
|
+
| val7 | val8 |
|
|
237
|
+
| val9 | val10 |
|
|
238
|
+
+-------+-------+
|
|
239
|
+
|
|
240
|
+
Transformations:
|
|
241
|
+
- Each specified file becomes a separate daft DataFrame batch
|
|
242
|
+
- Lazy evaluation for better performance
|
|
243
|
+
- Files processed individually for memory efficiency
|
|
162
244
|
"""
|
|
163
245
|
try:
|
|
164
246
|
import daft # type: ignore
|
|
165
247
|
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
if self.path and self.input_prefix:
|
|
174
|
-
await self.download_files(self.path)
|
|
175
|
-
yield daft.read_parquet(f"{self.path}/*.parquet")
|
|
248
|
+
# Ensure files are available (local or downloaded)
|
|
249
|
+
parquet_files = await self.download_files()
|
|
250
|
+
logger.info(f"Reading {len(parquet_files)} parquet files as daft batches")
|
|
251
|
+
|
|
252
|
+
# Yield each discovered file as separate batch
|
|
253
|
+
for parquet_file in parquet_files:
|
|
254
|
+
yield daft.read_parquet(parquet_file)
|
|
176
255
|
|
|
177
256
|
except Exception as error:
|
|
178
257
|
logger.error(
|
|
@@ -2,11 +2,17 @@ from hypothesis import strategies as st
|
|
|
2
2
|
|
|
3
3
|
# Strategy for generating safe file path components
|
|
4
4
|
safe_path_strategy = st.text(
|
|
5
|
-
alphabet=
|
|
6
|
-
|
|
5
|
+
alphabet="abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-",
|
|
6
|
+
min_size=1,
|
|
7
|
+
max_size=20,
|
|
8
|
+
).map(lambda x: f"/data/{x}")
|
|
7
9
|
|
|
8
10
|
# Strategy for generating file names
|
|
9
|
-
file_name_strategy = st.
|
|
11
|
+
file_name_strategy = st.text(
|
|
12
|
+
alphabet="abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-",
|
|
13
|
+
min_size=1,
|
|
14
|
+
max_size=10,
|
|
15
|
+
).map(lambda x: f"{x}.json")
|
|
10
16
|
|
|
11
17
|
# Strategy for generating lists of file names
|
|
12
18
|
file_names_strategy = st.lists(file_name_strategy, unique=True)
|
|
@@ -18,7 +24,6 @@ download_prefix_strategy = safe_path_strategy
|
|
|
18
24
|
json_input_config_strategy = st.fixed_dictionaries(
|
|
19
25
|
{
|
|
20
26
|
"path": safe_path_strategy,
|
|
21
|
-
"
|
|
22
|
-
"file_names": file_names_strategy,
|
|
27
|
+
"file_names": st.one_of(st.none(), file_names_strategy),
|
|
23
28
|
}
|
|
24
29
|
)
|
|
@@ -2,11 +2,17 @@ from hypothesis import strategies as st
|
|
|
2
2
|
|
|
3
3
|
# Strategy for generating safe file path components
|
|
4
4
|
safe_path_strategy = st.text(
|
|
5
|
-
alphabet=
|
|
6
|
-
|
|
5
|
+
alphabet="abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-",
|
|
6
|
+
min_size=1,
|
|
7
|
+
max_size=20,
|
|
8
|
+
).map(lambda x: f"/data/{x}")
|
|
7
9
|
|
|
8
10
|
# Strategy for generating file names
|
|
9
|
-
file_name_strategy = st.
|
|
11
|
+
file_name_strategy = st.text(
|
|
12
|
+
alphabet="abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-",
|
|
13
|
+
min_size=1,
|
|
14
|
+
max_size=10,
|
|
15
|
+
).map(lambda x: f"{x}.parquet")
|
|
10
16
|
|
|
11
17
|
# Strategy for generating lists of file names
|
|
12
18
|
file_names_strategy = st.lists(file_name_strategy, unique=True)
|
|
@@ -22,7 +28,6 @@ parquet_input_config_strategy = st.fixed_dictionaries(
|
|
|
22
28
|
{
|
|
23
29
|
"path": safe_path_strategy,
|
|
24
30
|
"chunk_size": chunk_size_strategy,
|
|
25
|
-
"input_prefix": st.one_of(st.none(), input_prefix_strategy),
|
|
26
31
|
"file_names": st.one_of(st.none(), file_names_strategy),
|
|
27
32
|
}
|
|
28
33
|
)
|
application_sdk/version.py
CHANGED
{atlan_application_sdk-0.1.1rc41.dist-info → atlan_application_sdk-0.1.1rc42.dist-info}/METADATA
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: atlan-application-sdk
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.1rc42
|
|
4
4
|
Summary: Atlan Application SDK is a Python library for developing applications on the Atlan Platform
|
|
5
5
|
Project-URL: Repository, https://github.com/atlanhq/application-sdk
|
|
6
6
|
Project-URL: Documentation, https://github.com/atlanhq/application-sdk/README.md
|
{atlan_application_sdk-0.1.1rc41.dist-info → atlan_application_sdk-0.1.1rc42.dist-info}/RECORD
RENAMED
|
@@ -1,17 +1,17 @@
|
|
|
1
1
|
application_sdk/__init__.py,sha256=2e2mvmLJ5dxmJGPELtb33xwP-j6JMdoIuqKycEn7hjg,151
|
|
2
2
|
application_sdk/constants.py,sha256=1THiejjOEgm4kHFN-PrwrUkfRk7q1pjOLWLm-t2ph1Q,10674
|
|
3
|
-
application_sdk/version.py,sha256=
|
|
3
|
+
application_sdk/version.py,sha256=Q8mQScZPLmTAD4YMM35CTgtJ-tDFJqdKLeeq94J4GkU,88
|
|
4
4
|
application_sdk/worker.py,sha256=i5f0AeKI39IfsLO05QkwC6uMz0zDPSJqP7B2byri1VI,7489
|
|
5
5
|
application_sdk/activities/__init__.py,sha256=QaXLOBYbb0zPOY5kfDQh56qbXQFaYNXOjJ5PCvatiZ4,9530
|
|
6
6
|
application_sdk/activities/lock_management.py,sha256=L__GZ9BsArwU1ntYwAgCKsSjCqN6QBeOfT-OT4WyD4Y,3983
|
|
7
7
|
application_sdk/activities/.cursor/BUGBOT.md,sha256=FNykX5aMkdOhzgpiGqstOnSp9JN63iR2XP3onU4AGh8,15843
|
|
8
8
|
application_sdk/activities/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
9
|
application_sdk/activities/common/models.py,sha256=LIZfWvTtgtbAUvvn-rwrPQgD7fP2J0Gxdxr_ITgw-jM,1243
|
|
10
|
-
application_sdk/activities/common/utils.py,sha256=
|
|
10
|
+
application_sdk/activities/common/utils.py,sha256=nSNGkY5eS5pPc8etdPWkXBFTSaConGAD8LDtNqOMHF4,9836
|
|
11
11
|
application_sdk/activities/metadata_extraction/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
12
|
application_sdk/activities/metadata_extraction/base.py,sha256=ENFojpxqKdN_eVSL4iet3cGfylPOfcl1jnflfo4zhs8,3920
|
|
13
13
|
application_sdk/activities/metadata_extraction/rest.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
14
|
-
application_sdk/activities/metadata_extraction/sql.py,sha256=
|
|
14
|
+
application_sdk/activities/metadata_extraction/sql.py,sha256=I6TfA_sRb9w6slBhXuqJtw_2_4YSyK-1MiCHb4NWf-E,35829
|
|
15
15
|
application_sdk/activities/query_extraction/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
16
16
|
application_sdk/activities/query_extraction/sql.py,sha256=mesGP_kiWzrJ8wboWFVt2jbDuGG_Fl3kQVvVMdH3KWA,21228
|
|
17
17
|
application_sdk/application/__init__.py,sha256=PbSImXYaQQ2IIee2SvI8AjDiSo2QcCFrM1PX3x-_RQs,8035
|
|
@@ -57,10 +57,10 @@ application_sdk/events/models.py,sha256=7Esqp3WlbriT2EqT4kNiY_sHtRXRPLj27b8SbeC5
|
|
|
57
57
|
application_sdk/handlers/__init__.py,sha256=3Wf7jCVFR2nYOyHZEc9jj8BQUnHCylFqoezp70J2Df0,1329
|
|
58
58
|
application_sdk/handlers/base.py,sha256=ieWFbv8Gm7vfrrpS-mdMSm-mHGuQY02qiAVX2qPdj3w,2467
|
|
59
59
|
application_sdk/handlers/sql.py,sha256=6A_9xCtkXyNY5gPhImbftzrdPIEWIeTTqjyIewVESHA,17815
|
|
60
|
-
application_sdk/inputs/__init__.py,sha256=
|
|
60
|
+
application_sdk/inputs/__init__.py,sha256=_O5lK2A5EYyqwid8txKNEds3pHkoHGKrSTTWnQ-UzRA,6022
|
|
61
61
|
application_sdk/inputs/iceberg.py,sha256=xiv1kNtVx1k0h3ZJbJeXjZwdfBGSy9j9orYP_AyCYlI,2756
|
|
62
|
-
application_sdk/inputs/json.py,sha256=
|
|
63
|
-
application_sdk/inputs/parquet.py,sha256=
|
|
62
|
+
application_sdk/inputs/json.py,sha256=ZOgB3tuZSsb2m_KxiAdnbUQgU5ythCs-Mq-n4pPfeHA,4905
|
|
63
|
+
application_sdk/inputs/parquet.py,sha256=51Wyvbv8vS6T_3bKHgq6pCva8w3PKCDH5jDuENy0z8c,9060
|
|
64
64
|
application_sdk/inputs/sql_query.py,sha256=1EREgea6kKNaMIyX2HLJgbJ07rtAgLasd9NyvDcdZok,10636
|
|
65
65
|
application_sdk/inputs/.cursor/BUGBOT.md,sha256=hwKGDbopv3NU0bpC_ElpAPDFcS59GWS3TunObGC6eLQ,9731
|
|
66
66
|
application_sdk/interceptors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -115,8 +115,8 @@ application_sdk/test_utils/hypothesis/strategies/handlers/sql/__init__.py,sha256
|
|
|
115
115
|
application_sdk/test_utils/hypothesis/strategies/handlers/sql/sql_metadata.py,sha256=xnf62RyS4UzxW1kTDys_4mg3Avg7KRfqRdP6O81FHp4,1883
|
|
116
116
|
application_sdk/test_utils/hypothesis/strategies/handlers/sql/sql_preflight.py,sha256=e9uo6Bx5w_ZAEu6bDTWbMbmzqB0MYl2dH-JlXg3bkV8,2648
|
|
117
117
|
application_sdk/test_utils/hypothesis/strategies/inputs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
118
|
-
application_sdk/test_utils/hypothesis/strategies/inputs/json_input.py,sha256=
|
|
119
|
-
application_sdk/test_utils/hypothesis/strategies/inputs/parquet_input.py,sha256=
|
|
118
|
+
application_sdk/test_utils/hypothesis/strategies/inputs/json_input.py,sha256=HfdCZnXIZFJiRuORpnmioXh8qHls9sWNSFDysy8il-o,913
|
|
119
|
+
application_sdk/test_utils/hypothesis/strategies/inputs/parquet_input.py,sha256=agjRA9agpak_GmWiIt9bi_oLGvLM_eunxXfxcNHK3MQ,1081
|
|
120
120
|
application_sdk/test_utils/hypothesis/strategies/outputs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
121
121
|
application_sdk/test_utils/hypothesis/strategies/outputs/json_output.py,sha256=p9wotUJwc-Wmm54_qVG5Ivp_mgl7YTeAcQfC6RXlxCc,1835
|
|
122
122
|
application_sdk/test_utils/hypothesis/strategies/outputs/statestore.py,sha256=gmYBwePNoSI_pl2WTXOClgkruzRwkOX_1SmBaUTha0c,2903
|
|
@@ -152,8 +152,8 @@ application_sdk/workflows/metadata_extraction/__init__.py,sha256=jHUe_ZBQ66jx8bg
|
|
|
152
152
|
application_sdk/workflows/metadata_extraction/sql.py,sha256=BhaZavEL8H3Jvf28FGcHtZwqdsUT_EHZ4VTqiaieWek,12278
|
|
153
153
|
application_sdk/workflows/query_extraction/__init__.py,sha256=n066_CX5RpJz6DIxGMkKS3eGSRg03ilaCtsqfJWQb7Q,117
|
|
154
154
|
application_sdk/workflows/query_extraction/sql.py,sha256=kT_JQkLCRZ44ZpaC4QvPL6DxnRIIVh8gYHLqRbMI-hA,4826
|
|
155
|
-
atlan_application_sdk-0.1.
|
|
156
|
-
atlan_application_sdk-0.1.
|
|
157
|
-
atlan_application_sdk-0.1.
|
|
158
|
-
atlan_application_sdk-0.1.
|
|
159
|
-
atlan_application_sdk-0.1.
|
|
155
|
+
atlan_application_sdk-0.1.1rc42.dist-info/METADATA,sha256=i1UmEi69_Uelfws3QsQ_AnUCMksuaeBb2O00kAXVGDc,5567
|
|
156
|
+
atlan_application_sdk-0.1.1rc42.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
157
|
+
atlan_application_sdk-0.1.1rc42.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
158
|
+
atlan_application_sdk-0.1.1rc42.dist-info/licenses/NOTICE,sha256=A-XVVGt3KOYuuMmvSMIFkg534F1vHiCggEBp4Ez3wGk,1041
|
|
159
|
+
atlan_application_sdk-0.1.1rc42.dist-info/RECORD,,
|
{atlan_application_sdk-0.1.1rc41.dist-info → atlan_application_sdk-0.1.1rc42.dist-info}/WHEEL
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|