atlan-application-sdk 0.1.1rc53__py3-none-any.whl → 0.1.1rc54__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -20,6 +20,7 @@ from typing import Any, Dict, Generic, Optional, TypeVar
20
20
  from pydantic import BaseModel
21
21
  from temporalio import activity
22
22
 
23
+ from application_sdk.activities.common.models import ActivityResult
23
24
  from application_sdk.activities.common.utils import (
24
25
  auto_heartbeater,
25
26
  build_output_path,
@@ -27,6 +28,7 @@ from application_sdk.activities.common.utils import (
27
28
  get_workflow_run_id,
28
29
  )
29
30
  from application_sdk.common.error_codes import OrchestratorError
31
+ from application_sdk.common.file_converter import FileType, convert_data_files
30
32
  from application_sdk.constants import TEMPORARY_PATH
31
33
  from application_sdk.handlers import HandlerInterface
32
34
  from application_sdk.observability.logger_adaptor import get_logger
@@ -268,3 +270,26 @@ class ActivitiesInterface(ABC, Generic[ActivitiesStateType]):
268
270
  exc_info=e,
269
271
  )
270
272
  raise
273
+
274
+ @activity.defn
275
+ @auto_heartbeater
276
+ async def convert_files(self, workflow_args: Dict[str, Any]) -> ActivityResult:
277
+ """
278
+ Convert the input files to the specified output type.
279
+ """
280
+ converted_files = []
281
+ if workflow_args.get("input_files") and workflow_args.get("output_file_type"):
282
+ converted_files = await convert_data_files(
283
+ workflow_args["input_files"],
284
+ FileType(workflow_args["output_file_type"]),
285
+ )
286
+ return ActivityResult(
287
+ status="success",
288
+ message=f"Successfully converted files to {workflow_args['output_file_type']}",
289
+ metadata={"input_files": converted_files},
290
+ )
291
+ return ActivityResult(
292
+ status="warning",
293
+ message="Unable to get input files or output file type",
294
+ metadata={"input_files": converted_files},
295
+ )
@@ -4,7 +4,7 @@ This module contains Pydantic models used to represent various data structures
4
4
  needed by activities, such as statistics and configuration.
5
5
  """
6
6
 
7
- from typing import List, Optional
7
+ from typing import Any, Dict, List, Optional, TypedDict
8
8
 
9
9
  from pydantic import BaseModel
10
10
 
@@ -36,3 +36,9 @@ class ActivityStatistics(BaseModel):
36
36
  chunk_count: int = 0
37
37
  partitions: Optional[List[int]] = []
38
38
  typename: Optional[str] = None
39
+
40
+
41
+ class ActivityResult(TypedDict):
42
+ status: str
43
+ message: str
44
+ metadata: Dict[str, Any]
@@ -0,0 +1,99 @@
1
+ from collections import namedtuple
2
+ from enum import Enum
3
+ from typing import List, Optional
4
+
5
+ import pandas as pd
6
+
7
+ from application_sdk.observability.logger_adaptor import get_logger
8
+
9
+ logger = get_logger(__name__)
10
+
11
+
12
+ def enum_register():
13
+ """
14
+ Helps us register custom function for enum values
15
+ """
16
+ registry = {}
17
+
18
+ def add(name: str):
19
+ def inner(fn):
20
+ registry[name] = fn
21
+ return fn
22
+
23
+ return inner
24
+
25
+ Register = namedtuple("Register", ["add", "registry"])
26
+ return Register(add, registry)
27
+
28
+
29
+ file_converter_registry = enum_register()
30
+
31
+
32
+ # Edit the enums here to add new file types
33
+ class FileType(Enum):
34
+ JSON = "json"
35
+ PARQUET = "parquet"
36
+
37
+
38
+ # Edit the enums here to add new file conversions
39
+ class ConvertFile(Enum):
40
+ JSON_TO_PARQUET = "json_to_parquet"
41
+ PARQUET_TO_JSON = "parquet_to_json"
42
+
43
+
44
+ async def convert_data_files(
45
+ input_file_paths: List[str], output_file_type: FileType
46
+ ) -> List[str]:
47
+ """
48
+ Convert the input files to the specified file type
49
+ Args:
50
+ input_file_paths: List[str] - List of input file paths
51
+ output_file_type: FileType - The file type to convert to
52
+ Returns:
53
+ List[str] - List of converted file paths
54
+ """
55
+ if not input_file_paths:
56
+ return []
57
+ input_file_type = input_file_paths[0].split(".")[-1]
58
+ convert_file = ConvertFile(f"{input_file_type}_to_{output_file_type.value}")
59
+ converter_func = file_converter_registry.registry.get(convert_file)
60
+ converted_files = []
61
+ try:
62
+ for file in input_file_paths:
63
+ converted_file = converter_func(file)
64
+ if converted_file:
65
+ converted_files.append(converted_file)
66
+ except KeyError:
67
+ raise ValueError(f"No converter found for file type: {convert_file}")
68
+
69
+ return converted_files
70
+
71
+
72
+ # Add the main logic here to convert the files here
73
+ @file_converter_registry.add(ConvertFile.JSON_TO_PARQUET)
74
+ def convert_json_to_parquet(file_path: str) -> Optional[str]:
75
+ """Convert the downloaded files from json to parquet"""
76
+ try:
77
+ logger.info(f"Converting {file_path} to parquet")
78
+ df = pd.read_json(file_path, orient="records", lines=True)
79
+ df = df.loc[:, ~df.where(df.astype(bool)).isna().all(axis=0)]
80
+ parquet_file_path = file_path.replace(".json", ".parquet")
81
+ df.to_parquet(parquet_file_path)
82
+ return parquet_file_path
83
+ except Exception as e:
84
+ logger.error(f"Error converting {file_path} to parquet: {e}")
85
+ return None
86
+
87
+
88
+ @file_converter_registry.add(ConvertFile.PARQUET_TO_JSON)
89
+ def convert_parquet_to_json(file_path: str) -> Optional[str]:
90
+ """Convert the downloaded files from parquet to json"""
91
+ try:
92
+ logger.info(f"Converting {file_path} to json")
93
+ df = pd.read_parquet(file_path)
94
+ json_file_path = file_path.replace(".parquet", ".json")
95
+ df.to_json(json_file_path, orient="records", lines=True)
96
+ return json_file_path
97
+ except Exception as e:
98
+ logger.error(f"Error converting {file_path} to json: {e}")
99
+ return None
@@ -2,4 +2,4 @@
2
2
  Version information for the application_sdk package.
3
3
  """
4
4
 
5
- __version__ = "0.1.1rc53"
5
+ __version__ = "0.1.1rc54"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: atlan-application-sdk
3
- Version: 0.1.1rc53
3
+ Version: 0.1.1rc54
4
4
  Summary: Atlan Application SDK is a Python library for developing applications on the Atlan Platform
5
5
  Project-URL: Repository, https://github.com/atlanhq/application-sdk
6
6
  Project-URL: Documentation, https://github.com/atlanhq/application-sdk/README.md
@@ -1,12 +1,12 @@
1
1
  application_sdk/__init__.py,sha256=2e2mvmLJ5dxmJGPELtb33xwP-j6JMdoIuqKycEn7hjg,151
2
2
  application_sdk/constants.py,sha256=EDGR-3SuCxNV-3x0D4wA9is9vBbVWa3nHvJ8r2w4lYY,10977
3
- application_sdk/version.py,sha256=0GDfah3Is6w2M-p8lHG9431TleRLSCtZ79vP9LRgIfY,88
3
+ application_sdk/version.py,sha256=fj-WiPgW170l_hNvwhtXoZZD3cYDkf2TjkncwulxLlc,88
4
4
  application_sdk/worker.py,sha256=i5f0AeKI39IfsLO05QkwC6uMz0zDPSJqP7B2byri1VI,7489
5
- application_sdk/activities/__init__.py,sha256=OhbOrz8ildUCFe4xrjYvceQM36dlDpKTPtw0OnTaPwU,10090
5
+ application_sdk/activities/__init__.py,sha256=L5WXkTwOwGtjWAlXrUJRCKGwyIyp3z8fBv8BZVCRFQI,11175
6
6
  application_sdk/activities/lock_management.py,sha256=oX2qPpfEu_xP0MiaCakVGk9ivZDvG4EddVZag1DuHSE,3976
7
7
  application_sdk/activities/.cursor/BUGBOT.md,sha256=FNykX5aMkdOhzgpiGqstOnSp9JN63iR2XP3onU4AGh8,15843
8
8
  application_sdk/activities/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
- application_sdk/activities/common/models.py,sha256=LIZfWvTtgtbAUvvn-rwrPQgD7fP2J0Gxdxr_ITgw-jM,1243
9
+ application_sdk/activities/common/models.py,sha256=43MF_w0EzEQiJvGIqF_FNet4X6MEmwqYd3YAsHdQn08,1362
10
10
  application_sdk/activities/common/utils.py,sha256=nSNGkY5eS5pPc8etdPWkXBFTSaConGAD8LDtNqOMHF4,9836
11
11
  application_sdk/activities/metadata_extraction/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
12
  application_sdk/activities/metadata_extraction/base.py,sha256=ENFojpxqKdN_eVSL4iet3cGfylPOfcl1jnflfo4zhs8,3920
@@ -32,6 +32,7 @@ application_sdk/common/aws_utils.py,sha256=xlSMIQyjvQ-CydEXaxXrnPUygv7AAbCLsxhZ2
32
32
  application_sdk/common/dapr_utils.py,sha256=0yHqDP6qNb1OT-bX2XRYQPZ5xkGkV13nyRw6GkPlHs8,1136
33
33
  application_sdk/common/dataframe_utils.py,sha256=PId9vT6AUoq3tesiTd4sSUvW7RUhPWdAAEBLuOprks4,1262
34
34
  application_sdk/common/error_codes.py,sha256=bxgvugN_0H5b8VXfJw-44mybgX5I9lRJbRdYjtPjqDI,14561
35
+ application_sdk/common/file_converter.py,sha256=ta0PVh7uIEGJg0BTPUJnSjj55ve2iVAOkqwAeg96_-g,3079
35
36
  application_sdk/common/utils.py,sha256=ImCrlyCj5Mj571CVWfqy5MynVVju9xhn1ItSlJoaebc,19572
36
37
  application_sdk/common/.cursor/BUGBOT.md,sha256=OkB5TMAEJFzaBfbNb3g9ZDPW2r1krQE_KEuJbytMPuI,12176
37
38
  application_sdk/decorators/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -156,8 +157,8 @@ application_sdk/workflows/metadata_extraction/__init__.py,sha256=jHUe_ZBQ66jx8bg
156
157
  application_sdk/workflows/metadata_extraction/sql.py,sha256=6ZaVt84n-8U2ZvR9GR7uIJKv5v8CuyQjhlnoRJvDszc,12435
157
158
  application_sdk/workflows/query_extraction/__init__.py,sha256=n066_CX5RpJz6DIxGMkKS3eGSRg03ilaCtsqfJWQb7Q,117
158
159
  application_sdk/workflows/query_extraction/sql.py,sha256=kT_JQkLCRZ44ZpaC4QvPL6DxnRIIVh8gYHLqRbMI-hA,4826
159
- atlan_application_sdk-0.1.1rc53.dist-info/METADATA,sha256=IIkNqjucZ5QB-EUMFTFaucgdxl-U6ouulLKzH3atL_I,5634
160
- atlan_application_sdk-0.1.1rc53.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
161
- atlan_application_sdk-0.1.1rc53.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
162
- atlan_application_sdk-0.1.1rc53.dist-info/licenses/NOTICE,sha256=A-XVVGt3KOYuuMmvSMIFkg534F1vHiCggEBp4Ez3wGk,1041
163
- atlan_application_sdk-0.1.1rc53.dist-info/RECORD,,
160
+ atlan_application_sdk-0.1.1rc54.dist-info/METADATA,sha256=k2X9_ZxOyWOz3LlTMHzhI8UNYIzGGQRI24on8cxaGLU,5634
161
+ atlan_application_sdk-0.1.1rc54.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
162
+ atlan_application_sdk-0.1.1rc54.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
163
+ atlan_application_sdk-0.1.1rc54.dist-info/licenses/NOTICE,sha256=A-XVVGt3KOYuuMmvSMIFkg534F1vHiCggEBp4Ez3wGk,1041
164
+ atlan_application_sdk-0.1.1rc54.dist-info/RECORD,,