atlan-application-sdk 0.1.1rc53__py3-none-any.whl → 0.1.1rc55__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -20,6 +20,7 @@ from typing import Any, Dict, Generic, Optional, TypeVar
20
20
  from pydantic import BaseModel
21
21
  from temporalio import activity
22
22
 
23
+ from application_sdk.activities.common.models import ActivityResult
23
24
  from application_sdk.activities.common.utils import (
24
25
  auto_heartbeater,
25
26
  build_output_path,
@@ -27,6 +28,7 @@ from application_sdk.activities.common.utils import (
27
28
  get_workflow_run_id,
28
29
  )
29
30
  from application_sdk.common.error_codes import OrchestratorError
31
+ from application_sdk.common.file_converter import FileType, convert_data_files
30
32
  from application_sdk.constants import TEMPORARY_PATH
31
33
  from application_sdk.handlers import HandlerInterface
32
34
  from application_sdk.observability.logger_adaptor import get_logger
@@ -268,3 +270,26 @@ class ActivitiesInterface(ABC, Generic[ActivitiesStateType]):
268
270
  exc_info=e,
269
271
  )
270
272
  raise
273
+
274
+ @activity.defn
275
+ @auto_heartbeater
276
+ async def convert_files(self, workflow_args: Dict[str, Any]) -> ActivityResult:
277
+ """
278
+ Convert the input files to the specified output type.
279
+ """
280
+ converted_files = []
281
+ if workflow_args.get("input_files") and workflow_args.get("output_file_type"):
282
+ converted_files = await convert_data_files(
283
+ workflow_args["input_files"],
284
+ FileType(workflow_args["output_file_type"]),
285
+ )
286
+ return ActivityResult(
287
+ status="success",
288
+ message=f"Successfully converted files to {workflow_args['output_file_type']}",
289
+ metadata={"input_files": converted_files},
290
+ )
291
+ return ActivityResult(
292
+ status="warning",
293
+ message="Unable to get input files or output file type",
294
+ metadata={"input_files": converted_files},
295
+ )
@@ -4,7 +4,7 @@ This module contains Pydantic models used to represent various data structures
4
4
  needed by activities, such as statistics and configuration.
5
5
  """
6
6
 
7
- from typing import List, Optional
7
+ from typing import Any, Dict, List, Optional, TypedDict
8
8
 
9
9
  from pydantic import BaseModel
10
10
 
@@ -36,3 +36,9 @@ class ActivityStatistics(BaseModel):
36
36
  chunk_count: int = 0
37
37
  partitions: Optional[List[int]] = []
38
38
  typename: Optional[str] = None
39
+
40
+
41
+ class ActivityResult(TypedDict):
42
+ status: str
43
+ message: str
44
+ metadata: Dict[str, Any]
@@ -31,7 +31,11 @@ class DatabaseConfig(BaseModel):
31
31
  )
32
32
  parameters: Optional[List[str]] = Field(
33
33
  default=None,
34
- description="List of additional connection parameter names that can be dynamically added from credentials",
34
+ description="List of additional connection parameter names that can be dynamically added from credentials to the connection string. ex: ['ssl_mode'] will be added to the connection string as ?ssl_mode=require",
35
+ )
36
+ connect_args: Dict[str, Any] = Field(
37
+ default_factory=dict,
38
+ description="Additional connection arguments to be passed to SQLAlchemy. ex: {'sslmode': 'require'}",
35
39
  )
36
40
 
37
41
  class Config:
@@ -37,7 +37,6 @@ class BaseSQLClient(ClientInterface):
37
37
  Attributes:
38
38
  connection: Database connection instance.
39
39
  engine: SQLAlchemy engine instance.
40
- sql_alchemy_connect_args (Dict[str, Any]): Additional connection arguments.
41
40
  credentials (Dict[str, Any]): Database credentials.
42
41
  resolved_credentials (Dict[str, Any]): Resolved credentials after reading from secret manager.
43
42
  use_server_side_cursor (bool): Whether to use server-side cursors.
@@ -45,7 +44,6 @@ class BaseSQLClient(ClientInterface):
45
44
 
46
45
  connection = None
47
46
  engine = None
48
- sql_alchemy_connect_args: Dict[str, Any] = {}
49
47
  credentials: Dict[str, Any] = {}
50
48
  resolved_credentials: Dict[str, Any] = {}
51
49
  use_server_side_cursor: bool = USE_SERVER_SIDE_CURSOR
@@ -55,7 +53,6 @@ class BaseSQLClient(ClientInterface):
55
53
  self,
56
54
  use_server_side_cursor: bool = USE_SERVER_SIDE_CURSOR,
57
55
  credentials: Dict[str, Any] = {},
58
- sql_alchemy_connect_args: Dict[str, Any] = {},
59
56
  ):
60
57
  """
61
58
  Initialize the SQL client.
@@ -64,12 +61,9 @@ class BaseSQLClient(ClientInterface):
64
61
  use_server_side_cursor (bool, optional): Whether to use server-side cursors.
65
62
  Defaults to USE_SERVER_SIDE_CURSOR.
66
63
  credentials (Dict[str, Any], optional): Database credentials. Defaults to {}.
67
- sql_alchemy_connect_args (Dict[str, Any], optional): Additional SQLAlchemy
68
- connection arguments. Defaults to {}.
69
64
  """
70
65
  self.use_server_side_cursor = use_server_side_cursor
71
66
  self.credentials = credentials
72
- self.sql_alchemy_connect_args = sql_alchemy_connect_args
73
67
 
74
68
  async def load(self, credentials: Dict[str, Any]) -> None:
75
69
  """Load credentials and prepare engine for lazy connections.
@@ -83,6 +77,9 @@ class BaseSQLClient(ClientInterface):
83
77
  Raises:
84
78
  ClientError: If credentials are invalid or engine creation fails
85
79
  """
80
+ if not self.DB_CONFIG:
81
+ raise ValueError("DB_CONFIG is not configured for this SQL client.")
82
+
86
83
  self.credentials = credentials # Update the instance credentials
87
84
  try:
88
85
  from sqlalchemy import create_engine
@@ -90,7 +87,7 @@ class BaseSQLClient(ClientInterface):
90
87
  # Create engine but no persistent connection
91
88
  self.engine = create_engine(
92
89
  self.get_sqlalchemy_connection_string(),
93
- connect_args=self.sql_alchemy_connect_args,
90
+ connect_args=self.DB_CONFIG.connect_args,
94
91
  pool_pre_ping=True,
95
92
  )
96
93
 
@@ -397,7 +394,6 @@ class AsyncBaseSQLClient(BaseSQLClient):
397
394
  Attributes:
398
395
  connection (AsyncConnection): Async database connection instance.
399
396
  engine (AsyncEngine): Async SQLAlchemy engine instance.
400
- sql_alchemy_connect_args (Dict[str, Any]): Additional connection arguments.
401
397
  credentials (Dict[str, Any]): Database credentials.
402
398
  use_server_side_cursor (bool): Whether to use server-side cursors.
403
399
  """
@@ -419,13 +415,16 @@ class AsyncBaseSQLClient(BaseSQLClient):
419
415
  ValueError: If credentials are invalid or engine creation fails.
420
416
  """
421
417
  self.credentials = credentials
418
+ if not self.DB_CONFIG:
419
+ raise ValueError("DB_CONFIG is not configured for this SQL client.")
420
+
422
421
  try:
423
422
  from sqlalchemy.ext.asyncio import create_async_engine
424
423
 
425
424
  # Create async engine but no persistent connection
426
425
  self.engine = create_async_engine(
427
426
  self.get_sqlalchemy_connection_string(),
428
- connect_args=self.sql_alchemy_connect_args,
427
+ connect_args=self.DB_CONFIG.connect_args,
429
428
  pool_pre_ping=True,
430
429
  )
431
430
  if not self.engine:
@@ -0,0 +1,99 @@
1
+ from collections import namedtuple
2
+ from enum import Enum
3
+ from typing import List, Optional
4
+
5
+ import pandas as pd
6
+
7
+ from application_sdk.observability.logger_adaptor import get_logger
8
+
9
+ logger = get_logger(__name__)
10
+
11
+
12
+ def enum_register():
13
+ """
14
+ Helps us register custom function for enum values
15
+ """
16
+ registry = {}
17
+
18
+ def add(name: str):
19
+ def inner(fn):
20
+ registry[name] = fn
21
+ return fn
22
+
23
+ return inner
24
+
25
+ Register = namedtuple("Register", ["add", "registry"])
26
+ return Register(add, registry)
27
+
28
+
29
+ file_converter_registry = enum_register()
30
+
31
+
32
+ # Edit the enums here to add new file types
33
+ class FileType(Enum):
34
+ JSON = "json"
35
+ PARQUET = "parquet"
36
+
37
+
38
+ # Edit the enums here to add new file conversions
39
+ class ConvertFile(Enum):
40
+ JSON_TO_PARQUET = "json_to_parquet"
41
+ PARQUET_TO_JSON = "parquet_to_json"
42
+
43
+
44
+ async def convert_data_files(
45
+ input_file_paths: List[str], output_file_type: FileType
46
+ ) -> List[str]:
47
+ """
48
+ Convert the input files to the specified file type
49
+ Args:
50
+ input_file_paths: List[str] - List of input file paths
51
+ output_file_type: FileType - The file type to convert to
52
+ Returns:
53
+ List[str] - List of converted file paths
54
+ """
55
+ if not input_file_paths:
56
+ return []
57
+ input_file_type = input_file_paths[0].split(".")[-1]
58
+ convert_file = ConvertFile(f"{input_file_type}_to_{output_file_type.value}")
59
+ converter_func = file_converter_registry.registry.get(convert_file)
60
+ converted_files = []
61
+ try:
62
+ for file in input_file_paths:
63
+ converted_file = converter_func(file)
64
+ if converted_file:
65
+ converted_files.append(converted_file)
66
+ except KeyError:
67
+ raise ValueError(f"No converter found for file type: {convert_file}")
68
+
69
+ return converted_files
70
+
71
+
72
+ # Add the main logic here to convert the files here
73
+ @file_converter_registry.add(ConvertFile.JSON_TO_PARQUET)
74
+ def convert_json_to_parquet(file_path: str) -> Optional[str]:
75
+ """Convert the downloaded files from json to parquet"""
76
+ try:
77
+ logger.info(f"Converting {file_path} to parquet")
78
+ df = pd.read_json(file_path, orient="records", lines=True)
79
+ df = df.loc[:, ~df.where(df.astype(bool)).isna().all(axis=0)]
80
+ parquet_file_path = file_path.replace(".json", ".parquet")
81
+ df.to_parquet(parquet_file_path)
82
+ return parquet_file_path
83
+ except Exception as e:
84
+ logger.error(f"Error converting {file_path} to parquet: {e}")
85
+ return None
86
+
87
+
88
+ @file_converter_registry.add(ConvertFile.PARQUET_TO_JSON)
89
+ def convert_parquet_to_json(file_path: str) -> Optional[str]:
90
+ """Convert the downloaded files from parquet to json"""
91
+ try:
92
+ logger.info(f"Converting {file_path} to json")
93
+ df = pd.read_parquet(file_path)
94
+ json_file_path = file_path.replace(".parquet", ".json")
95
+ df.to_json(json_file_path, orient="records", lines=True)
96
+ return json_file_path
97
+ except Exception as e:
98
+ logger.error(f"Error converting {file_path} to json: {e}")
99
+ return None
@@ -2,4 +2,4 @@
2
2
  Version information for the application_sdk package.
3
3
  """
4
4
 
5
- __version__ = "0.1.1rc53"
5
+ __version__ = "0.1.1rc55"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: atlan-application-sdk
3
- Version: 0.1.1rc53
3
+ Version: 0.1.1rc55
4
4
  Summary: Atlan Application SDK is a Python library for developing applications on the Atlan Platform
5
5
  Project-URL: Repository, https://github.com/atlanhq/application-sdk
6
6
  Project-URL: Documentation, https://github.com/atlanhq/application-sdk/README.md
@@ -1,12 +1,12 @@
1
1
  application_sdk/__init__.py,sha256=2e2mvmLJ5dxmJGPELtb33xwP-j6JMdoIuqKycEn7hjg,151
2
2
  application_sdk/constants.py,sha256=EDGR-3SuCxNV-3x0D4wA9is9vBbVWa3nHvJ8r2w4lYY,10977
3
- application_sdk/version.py,sha256=0GDfah3Is6w2M-p8lHG9431TleRLSCtZ79vP9LRgIfY,88
3
+ application_sdk/version.py,sha256=GSgUx92mCFDwTnAC5gx_IwXSMV4KwnJrIkRB3TCBNxQ,88
4
4
  application_sdk/worker.py,sha256=i5f0AeKI39IfsLO05QkwC6uMz0zDPSJqP7B2byri1VI,7489
5
- application_sdk/activities/__init__.py,sha256=OhbOrz8ildUCFe4xrjYvceQM36dlDpKTPtw0OnTaPwU,10090
5
+ application_sdk/activities/__init__.py,sha256=L5WXkTwOwGtjWAlXrUJRCKGwyIyp3z8fBv8BZVCRFQI,11175
6
6
  application_sdk/activities/lock_management.py,sha256=oX2qPpfEu_xP0MiaCakVGk9ivZDvG4EddVZag1DuHSE,3976
7
7
  application_sdk/activities/.cursor/BUGBOT.md,sha256=FNykX5aMkdOhzgpiGqstOnSp9JN63iR2XP3onU4AGh8,15843
8
8
  application_sdk/activities/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
- application_sdk/activities/common/models.py,sha256=LIZfWvTtgtbAUvvn-rwrPQgD7fP2J0Gxdxr_ITgw-jM,1243
9
+ application_sdk/activities/common/models.py,sha256=43MF_w0EzEQiJvGIqF_FNet4X6MEmwqYd3YAsHdQn08,1362
10
10
  application_sdk/activities/common/utils.py,sha256=nSNGkY5eS5pPc8etdPWkXBFTSaConGAD8LDtNqOMHF4,9836
11
11
  application_sdk/activities/metadata_extraction/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
12
  application_sdk/activities/metadata_extraction/base.py,sha256=ENFojpxqKdN_eVSL4iet3cGfylPOfcl1jnflfo4zhs8,3920
@@ -20,9 +20,9 @@ application_sdk/clients/__init__.py,sha256=C9T84J7V6ZumcoWJPAxdd3tqSmbyciaGBJn-C
20
20
  application_sdk/clients/atlan.py,sha256=l6yV39fr1006SJFwkOTNDQlbSFlHCZQaUPfdUlzdVEg,5053
21
21
  application_sdk/clients/atlan_auth.py,sha256=D7FuNqv81ohNXLJtdx1AFw_jU6a3g0Pw6149ia4ucFY,8930
22
22
  application_sdk/clients/base.py,sha256=TIn3pG89eXUc1XSYf4jk66m1vajWp0WxcCQOOltdazA,14021
23
- application_sdk/clients/models.py,sha256=yPgmiqt3I7am2NdFF10BnKiwfgjLceEXrpie0rvGCg8,1509
23
+ application_sdk/clients/models.py,sha256=iZOTyH6LO64kozdiUPCFCN0NgLhd_Gtv0lH7ZIPdo8w,1800
24
24
  application_sdk/clients/redis.py,sha256=IfAD32vLp88BCvsDTaQtxFHxzHlEx4V7TK7h1HwDDBg,15917
25
- application_sdk/clients/sql.py,sha256=r-8rghnATLRxxISchLZaNnMCAZMRLKyPAwPZengzMMY,19846
25
+ application_sdk/clients/sql.py,sha256=lXeVu_dute30IaWWK5gHBhjEs2dXp_e0XkOMsbOsq64,19589
26
26
  application_sdk/clients/temporal.py,sha256=jC3U8LmW8G6gg-Qmxk0rcAifIGF0KekwP1UkMGXN7RA,18314
27
27
  application_sdk/clients/utils.py,sha256=zLFOJbTr_6TOqnjfVFGY85OtIXZ4FQy_rquzjaydkbY,779
28
28
  application_sdk/clients/workflow.py,sha256=6bSqmA3sNCk9oY68dOjBUDZ9DhNKQxPD75qqE0cfldc,6104
@@ -32,6 +32,7 @@ application_sdk/common/aws_utils.py,sha256=xlSMIQyjvQ-CydEXaxXrnPUygv7AAbCLsxhZ2
32
32
  application_sdk/common/dapr_utils.py,sha256=0yHqDP6qNb1OT-bX2XRYQPZ5xkGkV13nyRw6GkPlHs8,1136
33
33
  application_sdk/common/dataframe_utils.py,sha256=PId9vT6AUoq3tesiTd4sSUvW7RUhPWdAAEBLuOprks4,1262
34
34
  application_sdk/common/error_codes.py,sha256=bxgvugN_0H5b8VXfJw-44mybgX5I9lRJbRdYjtPjqDI,14561
35
+ application_sdk/common/file_converter.py,sha256=ta0PVh7uIEGJg0BTPUJnSjj55ve2iVAOkqwAeg96_-g,3079
35
36
  application_sdk/common/utils.py,sha256=ImCrlyCj5Mj571CVWfqy5MynVVju9xhn1ItSlJoaebc,19572
36
37
  application_sdk/common/.cursor/BUGBOT.md,sha256=OkB5TMAEJFzaBfbNb3g9ZDPW2r1krQE_KEuJbytMPuI,12176
37
38
  application_sdk/decorators/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -156,8 +157,8 @@ application_sdk/workflows/metadata_extraction/__init__.py,sha256=jHUe_ZBQ66jx8bg
156
157
  application_sdk/workflows/metadata_extraction/sql.py,sha256=6ZaVt84n-8U2ZvR9GR7uIJKv5v8CuyQjhlnoRJvDszc,12435
157
158
  application_sdk/workflows/query_extraction/__init__.py,sha256=n066_CX5RpJz6DIxGMkKS3eGSRg03ilaCtsqfJWQb7Q,117
158
159
  application_sdk/workflows/query_extraction/sql.py,sha256=kT_JQkLCRZ44ZpaC4QvPL6DxnRIIVh8gYHLqRbMI-hA,4826
159
- atlan_application_sdk-0.1.1rc53.dist-info/METADATA,sha256=IIkNqjucZ5QB-EUMFTFaucgdxl-U6ouulLKzH3atL_I,5634
160
- atlan_application_sdk-0.1.1rc53.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
161
- atlan_application_sdk-0.1.1rc53.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
162
- atlan_application_sdk-0.1.1rc53.dist-info/licenses/NOTICE,sha256=A-XVVGt3KOYuuMmvSMIFkg534F1vHiCggEBp4Ez3wGk,1041
163
- atlan_application_sdk-0.1.1rc53.dist-info/RECORD,,
160
+ atlan_application_sdk-0.1.1rc55.dist-info/METADATA,sha256=oPFC4krNFgKJYPm5q8u6Cji9hKoZfflnMTTGi8-c7SQ,5634
161
+ atlan_application_sdk-0.1.1rc55.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
162
+ atlan_application_sdk-0.1.1rc55.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
163
+ atlan_application_sdk-0.1.1rc55.dist-info/licenses/NOTICE,sha256=A-XVVGt3KOYuuMmvSMIFkg534F1vHiCggEBp4Ez3wGk,1041
164
+ atlan_application_sdk-0.1.1rc55.dist-info/RECORD,,