atlan-application-sdk 0.1.1rc33__py3-none-any.whl → 0.1.1rc35__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- application_sdk/activities/__init__.py +3 -2
- application_sdk/activities/common/utils.py +21 -1
- application_sdk/activities/metadata_extraction/base.py +104 -0
- application_sdk/activities/metadata_extraction/sql.py +13 -12
- application_sdk/activities/query_extraction/sql.py +24 -20
- application_sdk/application/__init__.py +8 -0
- application_sdk/clients/atlan_auth.py +2 -2
- application_sdk/clients/base.py +293 -0
- application_sdk/clients/temporal.py +6 -10
- application_sdk/handlers/base.py +50 -0
- application_sdk/inputs/json.py +6 -4
- application_sdk/inputs/parquet.py +16 -13
- application_sdk/outputs/__init__.py +6 -3
- application_sdk/outputs/json.py +9 -6
- application_sdk/outputs/parquet.py +10 -36
- application_sdk/server/fastapi/__init__.py +4 -5
- application_sdk/server/fastapi/models.py +1 -1
- application_sdk/services/__init__.py +18 -0
- application_sdk/{outputs → services}/atlan_storage.py +64 -16
- application_sdk/{outputs → services}/eventstore.py +68 -6
- application_sdk/services/objectstore.py +407 -0
- application_sdk/services/secretstore.py +344 -0
- application_sdk/services/statestore.py +267 -0
- application_sdk/version.py +1 -1
- application_sdk/worker.py +1 -1
- {atlan_application_sdk-0.1.1rc33.dist-info → atlan_application_sdk-0.1.1rc35.dist-info}/METADATA +1 -1
- {atlan_application_sdk-0.1.1rc33.dist-info → atlan_application_sdk-0.1.1rc35.dist-info}/RECORD +30 -30
- application_sdk/common/credential_utils.py +0 -85
- application_sdk/inputs/objectstore.py +0 -238
- application_sdk/inputs/secretstore.py +0 -130
- application_sdk/inputs/statestore.py +0 -101
- application_sdk/outputs/objectstore.py +0 -125
- application_sdk/outputs/secretstore.py +0 -38
- application_sdk/outputs/statestore.py +0 -113
- {atlan_application_sdk-0.1.1rc33.dist-info → atlan_application_sdk-0.1.1rc35.dist-info}/WHEEL +0 -0
- {atlan_application_sdk-0.1.1rc33.dist-info → atlan_application_sdk-0.1.1rc35.dist-info}/licenses/LICENSE +0 -0
- {atlan_application_sdk-0.1.1rc33.dist-info → atlan_application_sdk-0.1.1rc35.dist-info}/licenses/NOTICE +0 -0
|
@@ -42,12 +42,10 @@ from application_sdk.events.models import (
|
|
|
42
42
|
EventTypes,
|
|
43
43
|
WorkflowStates,
|
|
44
44
|
)
|
|
45
|
-
from application_sdk.inputs.secretstore import SecretStoreInput
|
|
46
|
-
from application_sdk.inputs.statestore import StateType
|
|
47
45
|
from application_sdk.observability.logger_adaptor import get_logger
|
|
48
|
-
from application_sdk.
|
|
49
|
-
from application_sdk.
|
|
50
|
-
from application_sdk.
|
|
46
|
+
from application_sdk.services.eventstore import EventStore
|
|
47
|
+
from application_sdk.services.secretstore import SecretStore
|
|
48
|
+
from application_sdk.services.statestore import StateStore, StateType
|
|
51
49
|
from application_sdk.workflows import WorkflowInterface
|
|
52
50
|
|
|
53
51
|
logger = get_logger(__name__)
|
|
@@ -269,9 +267,7 @@ class TemporalWorkflowClient(WorkflowClient):
|
|
|
269
267
|
self.port = port if port else WORKFLOW_PORT
|
|
270
268
|
self.namespace = namespace if namespace else WORKFLOW_NAMESPACE
|
|
271
269
|
|
|
272
|
-
self.deployment_config: Dict[str, Any] = (
|
|
273
|
-
SecretStoreInput.get_deployment_secret()
|
|
274
|
-
)
|
|
270
|
+
self.deployment_config: Dict[str, Any] = SecretStore.get_deployment_secret()
|
|
275
271
|
self.worker_task_queue = self.get_worker_task_queue()
|
|
276
272
|
self.auth_manager = AtlanAuthClient()
|
|
277
273
|
|
|
@@ -426,7 +422,7 @@ class TemporalWorkflowClient(WorkflowClient):
|
|
|
426
422
|
"""
|
|
427
423
|
if "credentials" in workflow_args:
|
|
428
424
|
# remove credentials from workflow_args and add reference to credentials
|
|
429
|
-
workflow_args["credential_guid"] = await
|
|
425
|
+
workflow_args["credential_guid"] = await SecretStore.save_secret(
|
|
430
426
|
workflow_args["credentials"]
|
|
431
427
|
)
|
|
432
428
|
del workflow_args["credentials"]
|
|
@@ -442,7 +438,7 @@ class TemporalWorkflowClient(WorkflowClient):
|
|
|
442
438
|
}
|
|
443
439
|
)
|
|
444
440
|
|
|
445
|
-
await
|
|
441
|
+
await StateStore.save_state_object(
|
|
446
442
|
id=workflow_id, value=workflow_args, type=StateType.WORKFLOWS
|
|
447
443
|
)
|
|
448
444
|
logger.info(f"Created workflow config with ID: {workflow_id}")
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
from typing import Any, Dict, Optional
|
|
2
|
+
|
|
3
|
+
from application_sdk.clients.base import BaseClient
|
|
4
|
+
from application_sdk.handlers import HandlerInterface
|
|
5
|
+
from application_sdk.observability.logger_adaptor import get_logger
|
|
6
|
+
|
|
7
|
+
logger = get_logger(__name__)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class BaseHandler(HandlerInterface):
|
|
11
|
+
"""
|
|
12
|
+
Base handler for non-SQL based applications.
|
|
13
|
+
|
|
14
|
+
This class provides a base implementation for handlers that need to interact with non-SQL data sources. It implements the HandlerInterface and provides basic functionality that can be extended by subclasses.
|
|
15
|
+
|
|
16
|
+
Attributes:
|
|
17
|
+
client (BaseClient): The client instance for connecting to the target system.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
def __init__(self, client: Optional[BaseClient] = None):
|
|
21
|
+
"""
|
|
22
|
+
Initialize the base handler.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
client (BaseClient, optional): The client instance to use for connections. Defaults to BaseClient().
|
|
26
|
+
"""
|
|
27
|
+
self.client = client or BaseClient()
|
|
28
|
+
|
|
29
|
+
async def load(self, credentials: Dict[str, Any]) -> None:
|
|
30
|
+
"""
|
|
31
|
+
Load and initialize the handler.
|
|
32
|
+
|
|
33
|
+
This method initializes the handler and loads the client with the provided credentials.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
credentials (Dict[str, Any]): Credentials for the client.
|
|
37
|
+
"""
|
|
38
|
+
logger.info("Loading base handler")
|
|
39
|
+
|
|
40
|
+
# Load the client with credentials
|
|
41
|
+
await self.client.load(credentials=credentials)
|
|
42
|
+
|
|
43
|
+
logger.info("Base handler loaded successfully")
|
|
44
|
+
|
|
45
|
+
# The following methods are inherited from HandlerInterface and should be implemented
|
|
46
|
+
# by subclasses to handle calls from their respective FastAPI endpoints:
|
|
47
|
+
#
|
|
48
|
+
# - test_auth(**kwargs) -> bool: Called by /workflow/v1/auth endpoint
|
|
49
|
+
# - preflight_check(**kwargs) -> Any: Called by /workflow/v1/check endpoint
|
|
50
|
+
# - fetch_metadata(**kwargs) -> Any: Called by /workflow/v1/metadata endpoint
|
application_sdk/inputs/json.py
CHANGED
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
import os
|
|
2
2
|
from typing import TYPE_CHECKING, AsyncIterator, Iterator, List, Optional, Union
|
|
3
3
|
|
|
4
|
+
from application_sdk.activities.common.utils import get_object_store_prefix
|
|
4
5
|
from application_sdk.common.error_codes import IOError
|
|
5
6
|
from application_sdk.inputs import Input
|
|
6
|
-
from application_sdk.inputs.objectstore import ObjectStoreInput
|
|
7
7
|
from application_sdk.observability.logger_adaptor import get_logger
|
|
8
|
+
from application_sdk.services.objectstore import ObjectStore
|
|
8
9
|
|
|
9
10
|
if TYPE_CHECKING:
|
|
10
11
|
import daft
|
|
@@ -51,9 +52,10 @@ class JsonInput(Input):
|
|
|
51
52
|
if self.download_file_prefix is not None and not os.path.exists(
|
|
52
53
|
os.path.join(self.path, file_name)
|
|
53
54
|
):
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
55
|
+
destination_file_path = os.path.join(self.path, file_name)
|
|
56
|
+
await ObjectStore.download_file(
|
|
57
|
+
source=get_object_store_prefix(destination_file_path),
|
|
58
|
+
destination=destination_file_path,
|
|
57
59
|
)
|
|
58
60
|
except IOError as e:
|
|
59
61
|
logger.error(
|
|
@@ -2,9 +2,10 @@ import glob
|
|
|
2
2
|
import os
|
|
3
3
|
from typing import TYPE_CHECKING, AsyncIterator, Iterator, List, Optional, Union
|
|
4
4
|
|
|
5
|
+
from application_sdk.activities.common.utils import get_object_store_prefix
|
|
5
6
|
from application_sdk.inputs import Input
|
|
6
|
-
from application_sdk.inputs.objectstore import ObjectStoreInput
|
|
7
7
|
from application_sdk.observability.logger_adaptor import get_logger
|
|
8
|
+
from application_sdk.services.objectstore import ObjectStore
|
|
8
9
|
|
|
9
10
|
logger = get_logger(__name__)
|
|
10
11
|
|
|
@@ -42,37 +43,39 @@ class ParquetInput(Input):
|
|
|
42
43
|
self.input_prefix = input_prefix
|
|
43
44
|
self.file_names = file_names
|
|
44
45
|
|
|
45
|
-
async def download_files(self,
|
|
46
|
+
async def download_files(self, local_path: str) -> Optional[str]:
|
|
46
47
|
"""Read a file from the object store.
|
|
47
48
|
|
|
48
49
|
Args:
|
|
49
|
-
|
|
50
|
+
local_path (str): Path to the local data in the temp directory.
|
|
50
51
|
|
|
51
52
|
Returns:
|
|
52
53
|
Optional[str]: Path to the downloaded local file.
|
|
53
54
|
"""
|
|
54
55
|
# if the path is a directory, then check if the directory has any parquet files
|
|
55
56
|
parquet_files = []
|
|
56
|
-
if os.path.isdir(
|
|
57
|
-
parquet_files = glob.glob(os.path.join(
|
|
57
|
+
if os.path.isdir(local_path):
|
|
58
|
+
parquet_files = glob.glob(os.path.join(local_path, "*.parquet"))
|
|
58
59
|
else:
|
|
59
|
-
parquet_files = glob.glob(
|
|
60
|
+
parquet_files = glob.glob(local_path)
|
|
60
61
|
if not parquet_files:
|
|
61
62
|
if self.input_prefix:
|
|
62
63
|
logger.info(
|
|
63
|
-
f"Reading file from object store: {
|
|
64
|
+
f"Reading file from object store: {local_path} from {self.input_prefix}"
|
|
64
65
|
)
|
|
65
|
-
if os.path.isdir(
|
|
66
|
-
|
|
67
|
-
|
|
66
|
+
if os.path.isdir(local_path):
|
|
67
|
+
await ObjectStore.download_prefix(
|
|
68
|
+
source=get_object_store_prefix(local_path),
|
|
69
|
+
destination=local_path,
|
|
68
70
|
)
|
|
69
71
|
else:
|
|
70
|
-
|
|
71
|
-
|
|
72
|
+
await ObjectStore.download_file(
|
|
73
|
+
source=get_object_store_prefix(local_path),
|
|
74
|
+
destination=local_path,
|
|
72
75
|
)
|
|
73
76
|
else:
|
|
74
77
|
raise ValueError(
|
|
75
|
-
f"No parquet files found in {
|
|
78
|
+
f"No parquet files found in {local_path} and no input prefix provided"
|
|
76
79
|
)
|
|
77
80
|
|
|
78
81
|
async def get_dataframe(self) -> "pd.DataFrame":
|
|
@@ -22,9 +22,10 @@ import orjson
|
|
|
22
22
|
from temporalio import activity
|
|
23
23
|
|
|
24
24
|
from application_sdk.activities.common.models import ActivityStatistics
|
|
25
|
+
from application_sdk.activities.common.utils import get_object_store_prefix
|
|
25
26
|
from application_sdk.common.dataframe_utils import is_empty_dataframe
|
|
26
27
|
from application_sdk.observability.logger_adaptor import get_logger
|
|
27
|
-
from application_sdk.
|
|
28
|
+
from application_sdk.services.objectstore import ObjectStore
|
|
28
29
|
|
|
29
30
|
logger = get_logger(__name__)
|
|
30
31
|
activity.logger = logger
|
|
@@ -223,9 +224,11 @@ class Output(ABC):
|
|
|
223
224
|
with open(output_file_name, "w") as f:
|
|
224
225
|
f.write(orjson.dumps(statistics).decode("utf-8"))
|
|
225
226
|
|
|
227
|
+
destination_file_path = get_object_store_prefix(output_file_name)
|
|
226
228
|
# Push the file to the object store
|
|
227
|
-
await
|
|
228
|
-
|
|
229
|
+
await ObjectStore.upload_file(
|
|
230
|
+
source=output_file_name,
|
|
231
|
+
destination=destination_file_path,
|
|
229
232
|
)
|
|
230
233
|
return statistics
|
|
231
234
|
except Exception as e:
|
application_sdk/outputs/json.py
CHANGED
|
@@ -5,10 +5,11 @@ from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Union
|
|
|
5
5
|
import orjson
|
|
6
6
|
from temporalio import activity
|
|
7
7
|
|
|
8
|
+
from application_sdk.activities.common.utils import get_object_store_prefix
|
|
8
9
|
from application_sdk.observability.logger_adaptor import get_logger
|
|
9
10
|
from application_sdk.observability.metrics_adaptor import MetricType, get_metrics
|
|
10
11
|
from application_sdk.outputs import Output
|
|
11
|
-
from application_sdk.
|
|
12
|
+
from application_sdk.services.objectstore import ObjectStore
|
|
12
13
|
|
|
13
14
|
logger = get_logger(__name__)
|
|
14
15
|
activity.logger = logger
|
|
@@ -285,9 +286,10 @@ class JsonOutput(Output):
|
|
|
285
286
|
description="Number of records written to JSON files from daft DataFrame",
|
|
286
287
|
)
|
|
287
288
|
|
|
288
|
-
# Push
|
|
289
|
-
await
|
|
290
|
-
self.
|
|
289
|
+
# Push files to the object store
|
|
290
|
+
await ObjectStore.upload_prefix(
|
|
291
|
+
source=self.output_path,
|
|
292
|
+
destination=get_object_store_prefix(self.output_path),
|
|
291
293
|
)
|
|
292
294
|
|
|
293
295
|
except Exception as e:
|
|
@@ -344,8 +346,9 @@ class JsonOutput(Output):
|
|
|
344
346
|
)
|
|
345
347
|
|
|
346
348
|
# Push the file to the object store
|
|
347
|
-
await
|
|
348
|
-
|
|
349
|
+
await ObjectStore.upload_file(
|
|
350
|
+
source=output_file_name,
|
|
351
|
+
destination=get_object_store_prefix(output_file_name),
|
|
349
352
|
)
|
|
350
353
|
|
|
351
354
|
self.buffer.clear()
|
|
@@ -3,10 +3,11 @@ from typing import TYPE_CHECKING, Literal, Optional
|
|
|
3
3
|
|
|
4
4
|
from temporalio import activity
|
|
5
5
|
|
|
6
|
+
from application_sdk.activities.common.utils import get_object_store_prefix
|
|
6
7
|
from application_sdk.observability.logger_adaptor import get_logger
|
|
7
8
|
from application_sdk.observability.metrics_adaptor import MetricType, get_metrics
|
|
8
9
|
from application_sdk.outputs import Output
|
|
9
|
-
from application_sdk.
|
|
10
|
+
from application_sdk.services.objectstore import ObjectStore
|
|
10
11
|
|
|
11
12
|
logger = get_logger(__name__)
|
|
12
13
|
activity.logger = logger
|
|
@@ -159,7 +160,10 @@ class ParquetOutput(Output):
|
|
|
159
160
|
)
|
|
160
161
|
|
|
161
162
|
# Upload the file to object store
|
|
162
|
-
await
|
|
163
|
+
await ObjectStore.upload_file(
|
|
164
|
+
source=file_path,
|
|
165
|
+
destination=get_object_store_prefix(file_path),
|
|
166
|
+
)
|
|
163
167
|
except Exception as e:
|
|
164
168
|
# Record metrics for failed write
|
|
165
169
|
self.metrics.record_metric(
|
|
@@ -218,7 +222,10 @@ class ParquetOutput(Output):
|
|
|
218
222
|
)
|
|
219
223
|
|
|
220
224
|
# Upload the file to object store
|
|
221
|
-
await
|
|
225
|
+
await ObjectStore.upload_file(
|
|
226
|
+
source=file_path,
|
|
227
|
+
destination=get_object_store_prefix(file_path),
|
|
228
|
+
)
|
|
222
229
|
except Exception as e:
|
|
223
230
|
# Record metrics for failed write
|
|
224
231
|
self.metrics.record_metric(
|
|
@@ -231,39 +238,6 @@ class ParquetOutput(Output):
|
|
|
231
238
|
logger.error(f"Error writing daft dataframe to parquet: {str(e)}")
|
|
232
239
|
raise
|
|
233
240
|
|
|
234
|
-
async def upload_file(self, local_file_path: str) -> None:
|
|
235
|
-
"""Upload a file to the object store.
|
|
236
|
-
|
|
237
|
-
Args:
|
|
238
|
-
local_file_path (str): Path to the local file to upload.
|
|
239
|
-
"""
|
|
240
|
-
try:
|
|
241
|
-
if os.path.isdir(local_file_path):
|
|
242
|
-
logger.info(
|
|
243
|
-
f"Uploading files: {local_file_path} to {self.output_prefix}"
|
|
244
|
-
)
|
|
245
|
-
await ObjectStoreOutput.push_files_to_object_store(
|
|
246
|
-
self.output_prefix, local_file_path
|
|
247
|
-
)
|
|
248
|
-
else:
|
|
249
|
-
logger.info(
|
|
250
|
-
f"Uploading file: {local_file_path} to {self.output_prefix}"
|
|
251
|
-
)
|
|
252
|
-
await ObjectStoreOutput.push_file_to_object_store(
|
|
253
|
-
self.output_prefix, local_file_path
|
|
254
|
-
)
|
|
255
|
-
except Exception as e:
|
|
256
|
-
# Record metrics for failed upload
|
|
257
|
-
self.metrics.record_metric(
|
|
258
|
-
name="parquet_upload_errors",
|
|
259
|
-
value=1,
|
|
260
|
-
metric_type=MetricType.COUNTER,
|
|
261
|
-
labels={"error": str(e)},
|
|
262
|
-
description="Number of errors while uploading Parquet files to object store",
|
|
263
|
-
)
|
|
264
|
-
logger.error(f"Error uploading file to object store: {str(e)}")
|
|
265
|
-
raise e
|
|
266
|
-
|
|
267
241
|
def get_full_path(self) -> str:
|
|
268
242
|
"""Get the full path of the output file.
|
|
269
243
|
|
|
@@ -25,11 +25,9 @@ from application_sdk.constants import (
|
|
|
25
25
|
)
|
|
26
26
|
from application_sdk.docgen import AtlanDocsGenerator
|
|
27
27
|
from application_sdk.handlers import HandlerInterface
|
|
28
|
-
from application_sdk.inputs.statestore import StateStoreInput, StateType
|
|
29
28
|
from application_sdk.observability.logger_adaptor import get_logger
|
|
30
29
|
from application_sdk.observability.metrics_adaptor import MetricType, get_metrics
|
|
31
30
|
from application_sdk.observability.observability import DuckDBUI
|
|
32
|
-
from application_sdk.outputs.statestore import StateStoreOutput
|
|
33
31
|
from application_sdk.server import ServerInterface
|
|
34
32
|
from application_sdk.server.fastapi.middleware.logmiddleware import LogMiddleware
|
|
35
33
|
from application_sdk.server.fastapi.middleware.metrics import MetricsMiddleware
|
|
@@ -53,6 +51,7 @@ from application_sdk.server.fastapi.models import (
|
|
|
53
51
|
)
|
|
54
52
|
from application_sdk.server.fastapi.routers.server import get_server_router
|
|
55
53
|
from application_sdk.server.fastapi.utils import internal_server_error_handler
|
|
54
|
+
from application_sdk.services.statestore import StateStore, StateType
|
|
56
55
|
from application_sdk.workflows import WorkflowInterface
|
|
57
56
|
|
|
58
57
|
logger = get_logger(__name__)
|
|
@@ -588,7 +587,7 @@ class APIServer(ServerInterface):
|
|
|
588
587
|
)
|
|
589
588
|
raise e
|
|
590
589
|
|
|
591
|
-
def get_workflow_config(
|
|
590
|
+
async def get_workflow_config(
|
|
592
591
|
self, config_id: str, type: str = "workflows"
|
|
593
592
|
) -> WorkflowConfigResponse:
|
|
594
593
|
"""Retrieve workflow configuration by ID.
|
|
@@ -603,7 +602,7 @@ class APIServer(ServerInterface):
|
|
|
603
602
|
if not StateType.is_member(type):
|
|
604
603
|
raise ValueError(f"Invalid type {type} for state store")
|
|
605
604
|
|
|
606
|
-
config =
|
|
605
|
+
config = await StateStore.get_state(config_id, StateType(type))
|
|
607
606
|
return WorkflowConfigResponse(
|
|
608
607
|
success=True,
|
|
609
608
|
message="Workflow configuration fetched successfully",
|
|
@@ -680,7 +679,7 @@ class APIServer(ServerInterface):
|
|
|
680
679
|
if not StateType.is_member(type):
|
|
681
680
|
raise ValueError(f"Invalid type {type} for state store")
|
|
682
681
|
|
|
683
|
-
config = await
|
|
682
|
+
config = await StateStore.save_state_object(
|
|
684
683
|
id=config_id, value=body.model_dump(), type=StateType(type)
|
|
685
684
|
)
|
|
686
685
|
return WorkflowConfigResponse(
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"""Services module for the application SDK."""
|
|
2
|
+
|
|
3
|
+
from .atlan_storage import AtlanStorage, MigrationSummary
|
|
4
|
+
from .eventstore import EventStore
|
|
5
|
+
from .objectstore import ObjectStore
|
|
6
|
+
from .secretstore import SecretStore
|
|
7
|
+
from .statestore import StateStore, StateType, build_state_store_path
|
|
8
|
+
|
|
9
|
+
__all__ = [
|
|
10
|
+
"AtlanStorage",
|
|
11
|
+
"EventStore",
|
|
12
|
+
"MigrationSummary",
|
|
13
|
+
"ObjectStore",
|
|
14
|
+
"SecretStore",
|
|
15
|
+
"StateStore",
|
|
16
|
+
"StateType",
|
|
17
|
+
"build_state_store_path",
|
|
18
|
+
]
|
|
@@ -1,4 +1,12 @@
|
|
|
1
|
-
"""Atlan storage
|
|
1
|
+
"""Atlan storage service for upload operations and migration from object store.
|
|
2
|
+
|
|
3
|
+
This module provides the AtlanStorage service for handling data migration between
|
|
4
|
+
local object storage and Atlan's upstream storage system. It's specifically designed
|
|
5
|
+
for the bucket cloning strategy used in customer-deployed applications.
|
|
6
|
+
|
|
7
|
+
The service supports parallel file migration with comprehensive error handling and
|
|
8
|
+
detailed reporting through the MigrationSummary model.
|
|
9
|
+
"""
|
|
2
10
|
|
|
3
11
|
import asyncio
|
|
4
12
|
from typing import Dict, List
|
|
@@ -11,8 +19,8 @@ from application_sdk.constants import (
|
|
|
11
19
|
DEPLOYMENT_OBJECT_STORE_NAME,
|
|
12
20
|
UPSTREAM_OBJECT_STORE_NAME,
|
|
13
21
|
)
|
|
14
|
-
from application_sdk.inputs.objectstore import ObjectStoreInput
|
|
15
22
|
from application_sdk.observability.logger_adaptor import get_logger
|
|
23
|
+
from application_sdk.services.objectstore import ObjectStore
|
|
16
24
|
|
|
17
25
|
logger = get_logger(__name__)
|
|
18
26
|
activity.logger = logger
|
|
@@ -43,27 +51,37 @@ class MigrationSummary(BaseModel):
|
|
|
43
51
|
destination: str = UPSTREAM_OBJECT_STORE_NAME
|
|
44
52
|
|
|
45
53
|
|
|
46
|
-
|
|
47
|
-
class AtlanStorageOutput:
|
|
54
|
+
class AtlanStorage:
|
|
48
55
|
"""Handles upload operations to Atlan storage and migration from objectstore."""
|
|
49
56
|
|
|
50
57
|
OBJECT_CREATE_OPERATION = "create"
|
|
51
58
|
|
|
52
59
|
@classmethod
|
|
53
60
|
async def _migrate_single_file(cls, file_path: str) -> tuple[str, bool, str]:
|
|
54
|
-
"""
|
|
55
|
-
|
|
61
|
+
"""Migrate a single file from object store to Atlan storage.
|
|
62
|
+
|
|
63
|
+
This internal method handles the migration of a single file, including
|
|
64
|
+
error handling and logging. It's designed to be called concurrently
|
|
65
|
+
for multiple files.
|
|
56
66
|
|
|
57
67
|
Args:
|
|
58
|
-
file_path (str): The path of the file to migrate
|
|
68
|
+
file_path (str): The path of the file to migrate in the object store.
|
|
59
69
|
|
|
60
70
|
Returns:
|
|
61
|
-
tuple[str, bool, str]:
|
|
71
|
+
tuple[str, bool, str]: A tuple containing:
|
|
72
|
+
- file_path: The path of the file that was processed
|
|
73
|
+
- success: Boolean indicating if migration was successful
|
|
74
|
+
- error_message: Error details if migration failed, empty string if successful
|
|
75
|
+
|
|
76
|
+
Note:
|
|
77
|
+
This method is internal and should not be called directly. Use
|
|
78
|
+
migrate_from_objectstore_to_atlan() instead for proper coordination
|
|
79
|
+
and error handling.
|
|
62
80
|
"""
|
|
63
81
|
try:
|
|
64
82
|
# Get file data from objectstore
|
|
65
|
-
file_data =
|
|
66
|
-
file_path,
|
|
83
|
+
file_data = await ObjectStore.get_content(
|
|
84
|
+
file_path, store_name=DEPLOYMENT_OBJECT_STORE_NAME
|
|
67
85
|
)
|
|
68
86
|
|
|
69
87
|
with DaprClient() as client:
|
|
@@ -91,14 +109,44 @@ class AtlanStorageOutput:
|
|
|
91
109
|
async def migrate_from_objectstore_to_atlan(
|
|
92
110
|
cls, prefix: str = ""
|
|
93
111
|
) -> MigrationSummary:
|
|
94
|
-
"""
|
|
95
|
-
|
|
112
|
+
"""Migrate all files from object store to Atlan storage under a given prefix.
|
|
113
|
+
|
|
114
|
+
This method performs a parallel migration of files from the local object store
|
|
115
|
+
to Atlan's upstream storage system. It provides comprehensive error handling
|
|
116
|
+
and detailed reporting of the migration process.
|
|
96
117
|
|
|
97
118
|
Args:
|
|
98
|
-
prefix (str): The prefix to filter which files to migrate.
|
|
119
|
+
prefix (str, optional): The prefix to filter which files to migrate.
|
|
120
|
+
Empty string migrates all files. Defaults to "".
|
|
99
121
|
|
|
100
122
|
Returns:
|
|
101
|
-
MigrationSummary:
|
|
123
|
+
MigrationSummary: Comprehensive migration summary including:
|
|
124
|
+
- total_files: Number of files found for migration
|
|
125
|
+
- migrated_files: Number successfully migrated
|
|
126
|
+
- failed_migrations: Number that failed to migrate
|
|
127
|
+
- failures: List of failure details with file paths and errors
|
|
128
|
+
- prefix: The prefix used for filtering
|
|
129
|
+
- source/destination: Storage system identifiers
|
|
130
|
+
|
|
131
|
+
Raises:
|
|
132
|
+
Exception: If there's a critical error during the migration process.
|
|
133
|
+
|
|
134
|
+
Examples:
|
|
135
|
+
>>> # Migrate all files
|
|
136
|
+
>>> summary = await AtlanStorage.migrate_from_objectstore_to_atlan()
|
|
137
|
+
>>> print(f"Success rate: {summary.migrated_files/summary.total_files*100:.1f}%")
|
|
138
|
+
|
|
139
|
+
>>> # Migrate specific dataset
|
|
140
|
+
>>> summary = await AtlanStorage.migrate_from_objectstore_to_atlan(
|
|
141
|
+
... prefix="processed_data/2024/"
|
|
142
|
+
... )
|
|
143
|
+
>>> if summary.total_files == 0:
|
|
144
|
+
... print("No files found with the specified prefix")
|
|
145
|
+
>>> elif summary.failed_migrations == 0:
|
|
146
|
+
... print(f"Successfully migrated all {summary.total_files} files")
|
|
147
|
+
>>> else:
|
|
148
|
+
... print(f"Migration completed with {summary.failed_migrations} failures")
|
|
149
|
+
... # Handle failures...
|
|
102
150
|
"""
|
|
103
151
|
try:
|
|
104
152
|
logger.info(
|
|
@@ -106,8 +154,8 @@ class AtlanStorageOutput:
|
|
|
106
154
|
)
|
|
107
155
|
|
|
108
156
|
# Get list of all files to migrate from objectstore
|
|
109
|
-
files_to_migrate =
|
|
110
|
-
prefix,
|
|
157
|
+
files_to_migrate = await ObjectStore.list_files(
|
|
158
|
+
prefix, store_name=DEPLOYMENT_OBJECT_STORE_NAME
|
|
111
159
|
)
|
|
112
160
|
|
|
113
161
|
total_files = len(files_to_migrate)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"""
|
|
1
|
+
"""Unified event store service for handling application events.
|
|
2
2
|
|
|
3
3
|
This module provides the EventStore class for publishing application events
|
|
4
4
|
to a pub/sub system with automatic fallback to HTTP binding.
|
|
@@ -10,7 +10,6 @@ from datetime import datetime
|
|
|
10
10
|
from dapr import clients
|
|
11
11
|
from temporalio import activity, workflow
|
|
12
12
|
|
|
13
|
-
from application_sdk.clients.atlan_auth import AtlanAuthClient
|
|
14
13
|
from application_sdk.common.dapr_utils import is_component_registered
|
|
15
14
|
from application_sdk.constants import (
|
|
16
15
|
APPLICATION_NAME,
|
|
@@ -25,17 +24,39 @@ activity.logger = logger
|
|
|
25
24
|
|
|
26
25
|
|
|
27
26
|
class EventStore:
|
|
28
|
-
"""
|
|
27
|
+
"""Unified event store service for publishing application events.
|
|
29
28
|
|
|
30
29
|
This class provides functionality to publish events to a pub/sub system.
|
|
31
30
|
"""
|
|
32
31
|
|
|
33
32
|
@classmethod
|
|
34
33
|
def enrich_event_metadata(cls, event: Event):
|
|
35
|
-
"""Enrich the event metadata with
|
|
34
|
+
"""Enrich the event metadata with workflow and activity context information.
|
|
35
|
+
|
|
36
|
+
This method automatically populates event metadata with context from the current
|
|
37
|
+
Temporal workflow and activity execution, including IDs, types, and execution state.
|
|
36
38
|
|
|
37
39
|
Args:
|
|
38
|
-
event (Event): Event data.
|
|
40
|
+
event (Event): Event data to enrich with metadata.
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
Event: The same event instance with enriched metadata.
|
|
44
|
+
|
|
45
|
+
Note:
|
|
46
|
+
This method safely handles cases where the code is not running within
|
|
47
|
+
a Temporal workflow or activity context.
|
|
48
|
+
|
|
49
|
+
Examples:
|
|
50
|
+
>>> from application_sdk.events.models import Event
|
|
51
|
+
|
|
52
|
+
>>> # Create basic event
|
|
53
|
+
>>> event = Event(event_type="data.processed", data={"count": 100})
|
|
54
|
+
|
|
55
|
+
>>> # Enrich with current context (if available)
|
|
56
|
+
>>> enriched = EventStore.enrich_event_metadata(event)
|
|
57
|
+
>>> print(f"Workflow ID: {enriched.metadata.workflow_id}")
|
|
58
|
+
>>> print(f"Activity: {enriched.metadata.activity_type}")
|
|
59
|
+
>>> print(f"Timestamp: {enriched.metadata.created_timestamp}")
|
|
39
60
|
"""
|
|
40
61
|
if not event.metadata:
|
|
41
62
|
event.metadata = EventMetadata()
|
|
@@ -70,10 +91,49 @@ class EventStore:
|
|
|
70
91
|
|
|
71
92
|
@classmethod
|
|
72
93
|
async def publish_event(cls, event: Event):
|
|
73
|
-
"""Publish event with automatic
|
|
94
|
+
"""Publish event with automatic metadata enrichment and authentication.
|
|
95
|
+
|
|
96
|
+
This method handles the complete event publishing flow including metadata
|
|
97
|
+
enrichment, authentication header injection, and component availability validation.
|
|
98
|
+
It automatically falls back gracefully if the event store component is not available.
|
|
74
99
|
|
|
75
100
|
Args:
|
|
76
101
|
event (Event): Event data to publish.
|
|
102
|
+
|
|
103
|
+
Note:
|
|
104
|
+
The method will silently skip publishing if the event store component
|
|
105
|
+
is not registered, allowing applications to run without event publishing
|
|
106
|
+
capability.
|
|
107
|
+
|
|
108
|
+
Raises:
|
|
109
|
+
Exception: If there's an error during event publishing (logged but not re-raised).
|
|
110
|
+
|
|
111
|
+
Examples:
|
|
112
|
+
>>> from application_sdk.events.models import Event
|
|
113
|
+
|
|
114
|
+
>>> # Publish workflow status event
|
|
115
|
+
>>> status_event = Event(
|
|
116
|
+
... event_type="workflow.status_changed",
|
|
117
|
+
... data={
|
|
118
|
+
... "workflow_id": "wf-123",
|
|
119
|
+
... "old_status": "running",
|
|
120
|
+
... "new_status": "completed",
|
|
121
|
+
... "duration_seconds": 1800
|
|
122
|
+
... }
|
|
123
|
+
... )
|
|
124
|
+
>>> await EventStore.publish_event(status_event)
|
|
125
|
+
|
|
126
|
+
>>> # Publish data processing event
|
|
127
|
+
>>> processing_event = Event(
|
|
128
|
+
... event_type="data.batch_processed",
|
|
129
|
+
... data={
|
|
130
|
+
... "batch_id": "batch-456",
|
|
131
|
+
... "records_processed": 10000,
|
|
132
|
+
... "success_count": 9995,
|
|
133
|
+
... "error_count": 5
|
|
134
|
+
... }
|
|
135
|
+
... )
|
|
136
|
+
>>> await EventStore.publish_event(processing_event)
|
|
77
137
|
"""
|
|
78
138
|
if not is_component_registered(EVENT_STORE_NAME):
|
|
79
139
|
logger.warning(
|
|
@@ -89,6 +149,8 @@ class EventStore:
|
|
|
89
149
|
binding_metadata = {"content-type": "application/json"}
|
|
90
150
|
|
|
91
151
|
# Add auth token - HTTP bindings will use it, others will ignore it
|
|
152
|
+
from application_sdk.clients.atlan_auth import AtlanAuthClient
|
|
153
|
+
|
|
92
154
|
auth_client = AtlanAuthClient()
|
|
93
155
|
binding_metadata.update(await auth_client.get_authenticated_headers())
|
|
94
156
|
|