atlan-application-sdk 0.1.1rc52__py3-none-any.whl → 0.1.1rc54__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- application_sdk/activities/__init__.py +25 -0
- application_sdk/activities/common/models.py +7 -1
- application_sdk/common/file_converter.py +99 -0
- application_sdk/version.py +1 -1
- {atlan_application_sdk-0.1.1rc52.dist-info → atlan_application_sdk-0.1.1rc54.dist-info}/METADATA +3 -1
- {atlan_application_sdk-0.1.1rc52.dist-info → atlan_application_sdk-0.1.1rc54.dist-info}/RECORD +9 -8
- {atlan_application_sdk-0.1.1rc52.dist-info → atlan_application_sdk-0.1.1rc54.dist-info}/WHEEL +0 -0
- {atlan_application_sdk-0.1.1rc52.dist-info → atlan_application_sdk-0.1.1rc54.dist-info}/licenses/LICENSE +0 -0
- {atlan_application_sdk-0.1.1rc52.dist-info → atlan_application_sdk-0.1.1rc54.dist-info}/licenses/NOTICE +0 -0
|
@@ -20,6 +20,7 @@ from typing import Any, Dict, Generic, Optional, TypeVar
|
|
|
20
20
|
from pydantic import BaseModel
|
|
21
21
|
from temporalio import activity
|
|
22
22
|
|
|
23
|
+
from application_sdk.activities.common.models import ActivityResult
|
|
23
24
|
from application_sdk.activities.common.utils import (
|
|
24
25
|
auto_heartbeater,
|
|
25
26
|
build_output_path,
|
|
@@ -27,6 +28,7 @@ from application_sdk.activities.common.utils import (
|
|
|
27
28
|
get_workflow_run_id,
|
|
28
29
|
)
|
|
29
30
|
from application_sdk.common.error_codes import OrchestratorError
|
|
31
|
+
from application_sdk.common.file_converter import FileType, convert_data_files
|
|
30
32
|
from application_sdk.constants import TEMPORARY_PATH
|
|
31
33
|
from application_sdk.handlers import HandlerInterface
|
|
32
34
|
from application_sdk.observability.logger_adaptor import get_logger
|
|
@@ -268,3 +270,26 @@ class ActivitiesInterface(ABC, Generic[ActivitiesStateType]):
|
|
|
268
270
|
exc_info=e,
|
|
269
271
|
)
|
|
270
272
|
raise
|
|
273
|
+
|
|
274
|
+
@activity.defn
|
|
275
|
+
@auto_heartbeater
|
|
276
|
+
async def convert_files(self, workflow_args: Dict[str, Any]) -> ActivityResult:
|
|
277
|
+
"""
|
|
278
|
+
Convert the input files to the specified output type.
|
|
279
|
+
"""
|
|
280
|
+
converted_files = []
|
|
281
|
+
if workflow_args.get("input_files") and workflow_args.get("output_file_type"):
|
|
282
|
+
converted_files = await convert_data_files(
|
|
283
|
+
workflow_args["input_files"],
|
|
284
|
+
FileType(workflow_args["output_file_type"]),
|
|
285
|
+
)
|
|
286
|
+
return ActivityResult(
|
|
287
|
+
status="success",
|
|
288
|
+
message=f"Successfully converted files to {workflow_args['output_file_type']}",
|
|
289
|
+
metadata={"input_files": converted_files},
|
|
290
|
+
)
|
|
291
|
+
return ActivityResult(
|
|
292
|
+
status="warning",
|
|
293
|
+
message="Unable to get input files or output file type",
|
|
294
|
+
metadata={"input_files": converted_files},
|
|
295
|
+
)
|
|
@@ -4,7 +4,7 @@ This module contains Pydantic models used to represent various data structures
|
|
|
4
4
|
needed by activities, such as statistics and configuration.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
|
-
from typing import List, Optional
|
|
7
|
+
from typing import Any, Dict, List, Optional, TypedDict
|
|
8
8
|
|
|
9
9
|
from pydantic import BaseModel
|
|
10
10
|
|
|
@@ -36,3 +36,9 @@ class ActivityStatistics(BaseModel):
|
|
|
36
36
|
chunk_count: int = 0
|
|
37
37
|
partitions: Optional[List[int]] = []
|
|
38
38
|
typename: Optional[str] = None
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class ActivityResult(TypedDict):
|
|
42
|
+
status: str
|
|
43
|
+
message: str
|
|
44
|
+
metadata: Dict[str, Any]
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
from collections import namedtuple
|
|
2
|
+
from enum import Enum
|
|
3
|
+
from typing import List, Optional
|
|
4
|
+
|
|
5
|
+
import pandas as pd
|
|
6
|
+
|
|
7
|
+
from application_sdk.observability.logger_adaptor import get_logger
|
|
8
|
+
|
|
9
|
+
logger = get_logger(__name__)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def enum_register():
|
|
13
|
+
"""
|
|
14
|
+
Helps us register custom function for enum values
|
|
15
|
+
"""
|
|
16
|
+
registry = {}
|
|
17
|
+
|
|
18
|
+
def add(name: str):
|
|
19
|
+
def inner(fn):
|
|
20
|
+
registry[name] = fn
|
|
21
|
+
return fn
|
|
22
|
+
|
|
23
|
+
return inner
|
|
24
|
+
|
|
25
|
+
Register = namedtuple("Register", ["add", "registry"])
|
|
26
|
+
return Register(add, registry)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
file_converter_registry = enum_register()
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
# Edit the enums here to add new file types
|
|
33
|
+
class FileType(Enum):
|
|
34
|
+
JSON = "json"
|
|
35
|
+
PARQUET = "parquet"
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
# Edit the enums here to add new file conversions
|
|
39
|
+
class ConvertFile(Enum):
|
|
40
|
+
JSON_TO_PARQUET = "json_to_parquet"
|
|
41
|
+
PARQUET_TO_JSON = "parquet_to_json"
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
async def convert_data_files(
|
|
45
|
+
input_file_paths: List[str], output_file_type: FileType
|
|
46
|
+
) -> List[str]:
|
|
47
|
+
"""
|
|
48
|
+
Convert the input files to the specified file type
|
|
49
|
+
Args:
|
|
50
|
+
input_file_paths: List[str] - List of input file paths
|
|
51
|
+
output_file_type: FileType - The file type to convert to
|
|
52
|
+
Returns:
|
|
53
|
+
List[str] - List of converted file paths
|
|
54
|
+
"""
|
|
55
|
+
if not input_file_paths:
|
|
56
|
+
return []
|
|
57
|
+
input_file_type = input_file_paths[0].split(".")[-1]
|
|
58
|
+
convert_file = ConvertFile(f"{input_file_type}_to_{output_file_type.value}")
|
|
59
|
+
converter_func = file_converter_registry.registry.get(convert_file)
|
|
60
|
+
converted_files = []
|
|
61
|
+
try:
|
|
62
|
+
for file in input_file_paths:
|
|
63
|
+
converted_file = converter_func(file)
|
|
64
|
+
if converted_file:
|
|
65
|
+
converted_files.append(converted_file)
|
|
66
|
+
except KeyError:
|
|
67
|
+
raise ValueError(f"No converter found for file type: {convert_file}")
|
|
68
|
+
|
|
69
|
+
return converted_files
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
# Add the main logic here to convert the files here
|
|
73
|
+
@file_converter_registry.add(ConvertFile.JSON_TO_PARQUET)
|
|
74
|
+
def convert_json_to_parquet(file_path: str) -> Optional[str]:
|
|
75
|
+
"""Convert the downloaded files from json to parquet"""
|
|
76
|
+
try:
|
|
77
|
+
logger.info(f"Converting {file_path} to parquet")
|
|
78
|
+
df = pd.read_json(file_path, orient="records", lines=True)
|
|
79
|
+
df = df.loc[:, ~df.where(df.astype(bool)).isna().all(axis=0)]
|
|
80
|
+
parquet_file_path = file_path.replace(".json", ".parquet")
|
|
81
|
+
df.to_parquet(parquet_file_path)
|
|
82
|
+
return parquet_file_path
|
|
83
|
+
except Exception as e:
|
|
84
|
+
logger.error(f"Error converting {file_path} to parquet: {e}")
|
|
85
|
+
return None
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
@file_converter_registry.add(ConvertFile.PARQUET_TO_JSON)
|
|
89
|
+
def convert_parquet_to_json(file_path: str) -> Optional[str]:
|
|
90
|
+
"""Convert the downloaded files from parquet to json"""
|
|
91
|
+
try:
|
|
92
|
+
logger.info(f"Converting {file_path} to json")
|
|
93
|
+
df = pd.read_parquet(file_path)
|
|
94
|
+
json_file_path = file_path.replace(".parquet", ".json")
|
|
95
|
+
df.to_json(json_file_path, orient="records", lines=True)
|
|
96
|
+
return json_file_path
|
|
97
|
+
except Exception as e:
|
|
98
|
+
logger.error(f"Error converting {file_path} to json: {e}")
|
|
99
|
+
return None
|
application_sdk/version.py
CHANGED
{atlan_application_sdk-0.1.1rc52.dist-info → atlan_application_sdk-0.1.1rc54.dist-info}/METADATA
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: atlan-application-sdk
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.1rc54
|
|
4
4
|
Summary: Atlan Application SDK is a Python library for developing applications on the Atlan Platform
|
|
5
5
|
Project-URL: Repository, https://github.com/atlanhq/application-sdk
|
|
6
6
|
Project-URL: Documentation, https://github.com/atlanhq/application-sdk/README.md
|
|
@@ -38,6 +38,8 @@ Provides-Extra: iam-auth
|
|
|
38
38
|
Requires-Dist: boto3>=1.38.6; extra == 'iam-auth'
|
|
39
39
|
Provides-Extra: iceberg
|
|
40
40
|
Requires-Dist: pyiceberg>=0.8.1; extra == 'iceberg'
|
|
41
|
+
Provides-Extra: mcp
|
|
42
|
+
Requires-Dist: fastmcp>=2.12.3; extra == 'mcp'
|
|
41
43
|
Provides-Extra: pandas
|
|
42
44
|
Requires-Dist: pandas>=2.2.3; extra == 'pandas'
|
|
43
45
|
Provides-Extra: scale-data-generator
|
{atlan_application_sdk-0.1.1rc52.dist-info → atlan_application_sdk-0.1.1rc54.dist-info}/RECORD
RENAMED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
application_sdk/__init__.py,sha256=2e2mvmLJ5dxmJGPELtb33xwP-j6JMdoIuqKycEn7hjg,151
|
|
2
2
|
application_sdk/constants.py,sha256=EDGR-3SuCxNV-3x0D4wA9is9vBbVWa3nHvJ8r2w4lYY,10977
|
|
3
|
-
application_sdk/version.py,sha256=
|
|
3
|
+
application_sdk/version.py,sha256=fj-WiPgW170l_hNvwhtXoZZD3cYDkf2TjkncwulxLlc,88
|
|
4
4
|
application_sdk/worker.py,sha256=i5f0AeKI39IfsLO05QkwC6uMz0zDPSJqP7B2byri1VI,7489
|
|
5
|
-
application_sdk/activities/__init__.py,sha256=
|
|
5
|
+
application_sdk/activities/__init__.py,sha256=L5WXkTwOwGtjWAlXrUJRCKGwyIyp3z8fBv8BZVCRFQI,11175
|
|
6
6
|
application_sdk/activities/lock_management.py,sha256=oX2qPpfEu_xP0MiaCakVGk9ivZDvG4EddVZag1DuHSE,3976
|
|
7
7
|
application_sdk/activities/.cursor/BUGBOT.md,sha256=FNykX5aMkdOhzgpiGqstOnSp9JN63iR2XP3onU4AGh8,15843
|
|
8
8
|
application_sdk/activities/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
|
-
application_sdk/activities/common/models.py,sha256=
|
|
9
|
+
application_sdk/activities/common/models.py,sha256=43MF_w0EzEQiJvGIqF_FNet4X6MEmwqYd3YAsHdQn08,1362
|
|
10
10
|
application_sdk/activities/common/utils.py,sha256=nSNGkY5eS5pPc8etdPWkXBFTSaConGAD8LDtNqOMHF4,9836
|
|
11
11
|
application_sdk/activities/metadata_extraction/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
12
|
application_sdk/activities/metadata_extraction/base.py,sha256=ENFojpxqKdN_eVSL4iet3cGfylPOfcl1jnflfo4zhs8,3920
|
|
@@ -32,6 +32,7 @@ application_sdk/common/aws_utils.py,sha256=xlSMIQyjvQ-CydEXaxXrnPUygv7AAbCLsxhZ2
|
|
|
32
32
|
application_sdk/common/dapr_utils.py,sha256=0yHqDP6qNb1OT-bX2XRYQPZ5xkGkV13nyRw6GkPlHs8,1136
|
|
33
33
|
application_sdk/common/dataframe_utils.py,sha256=PId9vT6AUoq3tesiTd4sSUvW7RUhPWdAAEBLuOprks4,1262
|
|
34
34
|
application_sdk/common/error_codes.py,sha256=bxgvugN_0H5b8VXfJw-44mybgX5I9lRJbRdYjtPjqDI,14561
|
|
35
|
+
application_sdk/common/file_converter.py,sha256=ta0PVh7uIEGJg0BTPUJnSjj55ve2iVAOkqwAeg96_-g,3079
|
|
35
36
|
application_sdk/common/utils.py,sha256=ImCrlyCj5Mj571CVWfqy5MynVVju9xhn1ItSlJoaebc,19572
|
|
36
37
|
application_sdk/common/.cursor/BUGBOT.md,sha256=OkB5TMAEJFzaBfbNb3g9ZDPW2r1krQE_KEuJbytMPuI,12176
|
|
37
38
|
application_sdk/decorators/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -156,8 +157,8 @@ application_sdk/workflows/metadata_extraction/__init__.py,sha256=jHUe_ZBQ66jx8bg
|
|
|
156
157
|
application_sdk/workflows/metadata_extraction/sql.py,sha256=6ZaVt84n-8U2ZvR9GR7uIJKv5v8CuyQjhlnoRJvDszc,12435
|
|
157
158
|
application_sdk/workflows/query_extraction/__init__.py,sha256=n066_CX5RpJz6DIxGMkKS3eGSRg03ilaCtsqfJWQb7Q,117
|
|
158
159
|
application_sdk/workflows/query_extraction/sql.py,sha256=kT_JQkLCRZ44ZpaC4QvPL6DxnRIIVh8gYHLqRbMI-hA,4826
|
|
159
|
-
atlan_application_sdk-0.1.
|
|
160
|
-
atlan_application_sdk-0.1.
|
|
161
|
-
atlan_application_sdk-0.1.
|
|
162
|
-
atlan_application_sdk-0.1.
|
|
163
|
-
atlan_application_sdk-0.1.
|
|
160
|
+
atlan_application_sdk-0.1.1rc54.dist-info/METADATA,sha256=k2X9_ZxOyWOz3LlTMHzhI8UNYIzGGQRI24on8cxaGLU,5634
|
|
161
|
+
atlan_application_sdk-0.1.1rc54.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
162
|
+
atlan_application_sdk-0.1.1rc54.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
163
|
+
atlan_application_sdk-0.1.1rc54.dist-info/licenses/NOTICE,sha256=A-XVVGt3KOYuuMmvSMIFkg534F1vHiCggEBp4Ez3wGk,1041
|
|
164
|
+
atlan_application_sdk-0.1.1rc54.dist-info/RECORD,,
|
{atlan_application_sdk-0.1.1rc52.dist-info → atlan_application_sdk-0.1.1rc54.dist-info}/WHEEL
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|