atlan-application-sdk 0.1.1rc34__py3-none-any.whl → 0.1.1rc35__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- application_sdk/activities/__init__.py +3 -2
- application_sdk/activities/common/utils.py +21 -1
- application_sdk/activities/metadata_extraction/base.py +4 -2
- application_sdk/activities/metadata_extraction/sql.py +13 -12
- application_sdk/activities/query_extraction/sql.py +24 -20
- application_sdk/clients/atlan_auth.py +2 -2
- application_sdk/clients/temporal.py +6 -10
- application_sdk/inputs/json.py +6 -4
- application_sdk/inputs/parquet.py +16 -13
- application_sdk/outputs/__init__.py +6 -3
- application_sdk/outputs/json.py +9 -6
- application_sdk/outputs/parquet.py +10 -36
- application_sdk/server/fastapi/__init__.py +4 -5
- application_sdk/services/__init__.py +18 -0
- application_sdk/{outputs → services}/atlan_storage.py +64 -16
- application_sdk/{outputs → services}/eventstore.py +68 -6
- application_sdk/services/objectstore.py +407 -0
- application_sdk/services/secretstore.py +344 -0
- application_sdk/services/statestore.py +267 -0
- application_sdk/version.py +1 -1
- application_sdk/worker.py +1 -1
- {atlan_application_sdk-0.1.1rc34.dist-info → atlan_application_sdk-0.1.1rc35.dist-info}/METADATA +1 -1
- {atlan_application_sdk-0.1.1rc34.dist-info → atlan_application_sdk-0.1.1rc35.dist-info}/RECORD +26 -29
- application_sdk/common/credential_utils.py +0 -85
- application_sdk/inputs/objectstore.py +0 -238
- application_sdk/inputs/secretstore.py +0 -130
- application_sdk/inputs/statestore.py +0 -101
- application_sdk/outputs/objectstore.py +0 -125
- application_sdk/outputs/secretstore.py +0 -38
- application_sdk/outputs/statestore.py +0 -113
- {atlan_application_sdk-0.1.1rc34.dist-info → atlan_application_sdk-0.1.1rc35.dist-info}/WHEEL +0 -0
- {atlan_application_sdk-0.1.1rc34.dist-info → atlan_application_sdk-0.1.1rc35.dist-info}/licenses/LICENSE +0 -0
- {atlan_application_sdk-0.1.1rc34.dist-info → atlan_application_sdk-0.1.1rc35.dist-info}/licenses/NOTICE +0 -0
|
@@ -29,7 +29,6 @@ from application_sdk.activities.common.utils import (
|
|
|
29
29
|
from application_sdk.common.error_codes import OrchestratorError
|
|
30
30
|
from application_sdk.constants import TEMPORARY_PATH
|
|
31
31
|
from application_sdk.handlers import HandlerInterface
|
|
32
|
-
from application_sdk.inputs.statestore import StateStoreInput, StateType
|
|
33
32
|
from application_sdk.observability.logger_adaptor import get_logger
|
|
34
33
|
|
|
35
34
|
logger = get_logger(__name__)
|
|
@@ -190,7 +189,9 @@ class ActivitiesInterface(ABC, Generic[ActivitiesStateType]):
|
|
|
190
189
|
|
|
191
190
|
try:
|
|
192
191
|
# This already handles the Dapr call internally
|
|
193
|
-
|
|
192
|
+
from application_sdk.services.statestore import StateStore, StateType
|
|
193
|
+
|
|
194
|
+
workflow_args = await StateStore.get_state(workflow_id, StateType.WORKFLOWS)
|
|
194
195
|
workflow_args["output_prefix"] = workflow_args.get(
|
|
195
196
|
"output_prefix", TEMPORARY_PATH
|
|
196
197
|
)
|
|
@@ -5,13 +5,18 @@ including workflow ID retrieval, automatic heartbeating, and periodic heartbeat
|
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
7
|
import asyncio
|
|
8
|
+
import os
|
|
8
9
|
from datetime import timedelta
|
|
9
10
|
from functools import wraps
|
|
10
11
|
from typing import Any, Awaitable, Callable, Optional, TypeVar, cast
|
|
11
12
|
|
|
12
13
|
from temporalio import activity
|
|
13
14
|
|
|
14
|
-
from application_sdk.constants import
|
|
15
|
+
from application_sdk.constants import (
|
|
16
|
+
APPLICATION_NAME,
|
|
17
|
+
TEMPORARY_PATH,
|
|
18
|
+
WORKFLOW_OUTPUT_PATH_TEMPLATE,
|
|
19
|
+
)
|
|
15
20
|
from application_sdk.observability.logger_adaptor import get_logger
|
|
16
21
|
|
|
17
22
|
logger = get_logger(__name__)
|
|
@@ -72,6 +77,21 @@ def build_output_path() -> str:
|
|
|
72
77
|
)
|
|
73
78
|
|
|
74
79
|
|
|
80
|
+
def get_object_store_prefix(path: str) -> str:
|
|
81
|
+
"""Get the object store prefix for the path.
|
|
82
|
+
Args:
|
|
83
|
+
path: The path to the output directory.
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
The object store prefix for the path.
|
|
87
|
+
|
|
88
|
+
Example:
|
|
89
|
+
>>> get_object_store_prefix("./local/tmp/artifacts/apps/appName/workflows/wf-123/run-456")
|
|
90
|
+
"artifacts/apps/appName/workflows/wf-123/run-456"
|
|
91
|
+
"""
|
|
92
|
+
return os.path.relpath(path, TEMPORARY_PATH)
|
|
93
|
+
|
|
94
|
+
|
|
75
95
|
def auto_heartbeater(fn: F) -> F:
|
|
76
96
|
"""Decorator that automatically sends heartbeats during activity execution.
|
|
77
97
|
|
|
@@ -5,10 +5,10 @@ from temporalio import activity
|
|
|
5
5
|
from application_sdk.activities import ActivitiesInterface, ActivitiesState
|
|
6
6
|
from application_sdk.activities.common.utils import get_workflow_id
|
|
7
7
|
from application_sdk.clients.base import BaseClient
|
|
8
|
-
from application_sdk.common.credential_utils import get_credentials
|
|
9
8
|
from application_sdk.constants import APP_TENANT_ID, APPLICATION_NAME
|
|
10
9
|
from application_sdk.handlers.base import BaseHandler
|
|
11
10
|
from application_sdk.observability.logger_adaptor import get_logger
|
|
11
|
+
from application_sdk.services.secretstore import SecretStore
|
|
12
12
|
from application_sdk.transformers import TransformerInterface
|
|
13
13
|
|
|
14
14
|
logger = get_logger(__name__)
|
|
@@ -76,7 +76,9 @@ class BaseMetadataExtractionActivities(ActivitiesInterface):
|
|
|
76
76
|
f"Retrieving credentials for credential_guid: {workflow_args['credential_guid']}"
|
|
77
77
|
)
|
|
78
78
|
try:
|
|
79
|
-
credentials = await get_credentials(
|
|
79
|
+
credentials = await SecretStore.get_credentials(
|
|
80
|
+
workflow_args["credential_guid"]
|
|
81
|
+
)
|
|
80
82
|
logger.info(
|
|
81
83
|
f"Successfully retrieved credentials with keys: {list(credentials.keys())}"
|
|
82
84
|
)
|
|
@@ -5,25 +5,24 @@ from temporalio import activity
|
|
|
5
5
|
|
|
6
6
|
from application_sdk.activities import ActivitiesInterface, ActivitiesState
|
|
7
7
|
from application_sdk.activities.common.models import ActivityStatistics
|
|
8
|
-
from application_sdk.activities.common.utils import
|
|
8
|
+
from application_sdk.activities.common.utils import (
|
|
9
|
+
auto_heartbeater,
|
|
10
|
+
get_object_store_prefix,
|
|
11
|
+
get_workflow_id,
|
|
12
|
+
)
|
|
9
13
|
from application_sdk.clients.sql import BaseSQLClient
|
|
10
|
-
from application_sdk.common.credential_utils import get_credentials
|
|
11
14
|
from application_sdk.common.dataframe_utils import is_empty_dataframe
|
|
12
15
|
from application_sdk.common.error_codes import ActivityError
|
|
13
16
|
from application_sdk.common.utils import prepare_query, read_sql_files
|
|
14
|
-
from application_sdk.constants import
|
|
15
|
-
APP_TENANT_ID,
|
|
16
|
-
APPLICATION_NAME,
|
|
17
|
-
SQL_QUERIES_PATH,
|
|
18
|
-
TEMPORARY_PATH,
|
|
19
|
-
)
|
|
17
|
+
from application_sdk.constants import APP_TENANT_ID, APPLICATION_NAME, SQL_QUERIES_PATH
|
|
20
18
|
from application_sdk.handlers.sql import BaseSQLHandler
|
|
21
19
|
from application_sdk.inputs.parquet import ParquetInput
|
|
22
20
|
from application_sdk.inputs.sql_query import SQLQueryInput
|
|
23
21
|
from application_sdk.observability.logger_adaptor import get_logger
|
|
24
|
-
from application_sdk.outputs.atlan_storage import AtlanStorageOutput
|
|
25
22
|
from application_sdk.outputs.json import JsonOutput
|
|
26
23
|
from application_sdk.outputs.parquet import ParquetOutput
|
|
24
|
+
from application_sdk.services.atlan_storage import AtlanStorage
|
|
25
|
+
from application_sdk.services.secretstore import SecretStore
|
|
27
26
|
from application_sdk.transformers import TransformerInterface
|
|
28
27
|
from application_sdk.transformers.query import QueryBasedTransformer
|
|
29
28
|
|
|
@@ -144,7 +143,9 @@ class BaseSQLMetadataExtractionActivities(ActivitiesInterface):
|
|
|
144
143
|
self._state[workflow_id].handler = handler
|
|
145
144
|
|
|
146
145
|
if "credential_guid" in workflow_args:
|
|
147
|
-
credentials = await get_credentials(
|
|
146
|
+
credentials = await SecretStore.get_credentials(
|
|
147
|
+
workflow_args["credential_guid"]
|
|
148
|
+
)
|
|
148
149
|
await sql_client.load(credentials)
|
|
149
150
|
|
|
150
151
|
self._state[workflow_id].sql_client = sql_client
|
|
@@ -536,11 +537,11 @@ class BaseSQLMetadataExtractionActivities(ActivitiesInterface):
|
|
|
536
537
|
|
|
537
538
|
# Upload data from object store to Atlan storage
|
|
538
539
|
# Use workflow_id/workflow_run_id as the prefix to migrate specific data
|
|
539
|
-
migration_prefix =
|
|
540
|
+
migration_prefix = get_object_store_prefix(workflow_args["output_path"])
|
|
540
541
|
logger.info(
|
|
541
542
|
f"Starting migration from object store with prefix: {migration_prefix}"
|
|
542
543
|
)
|
|
543
|
-
upload_stats = await
|
|
544
|
+
upload_stats = await AtlanStorage.migrate_from_objectstore_to_atlan(
|
|
544
545
|
prefix=migration_prefix
|
|
545
546
|
)
|
|
546
547
|
|
|
@@ -7,17 +7,20 @@ from pydantic import BaseModel, Field
|
|
|
7
7
|
from temporalio import activity
|
|
8
8
|
|
|
9
9
|
from application_sdk.activities import ActivitiesInterface, ActivitiesState
|
|
10
|
-
from application_sdk.activities.common.utils import
|
|
10
|
+
from application_sdk.activities.common.utils import (
|
|
11
|
+
auto_heartbeater,
|
|
12
|
+
get_object_store_prefix,
|
|
13
|
+
get_workflow_id,
|
|
14
|
+
)
|
|
11
15
|
from application_sdk.clients.sql import BaseSQLClient
|
|
12
|
-
from application_sdk.common.credential_utils import get_credentials
|
|
13
16
|
from application_sdk.constants import UPSTREAM_OBJECT_STORE_NAME
|
|
14
17
|
from application_sdk.handlers import HandlerInterface
|
|
15
18
|
from application_sdk.handlers.sql import BaseSQLHandler
|
|
16
|
-
from application_sdk.inputs.objectstore import ObjectStoreInput
|
|
17
19
|
from application_sdk.inputs.sql_query import SQLQueryInput
|
|
18
20
|
from application_sdk.observability.logger_adaptor import get_logger
|
|
19
|
-
from application_sdk.outputs.objectstore import ObjectStoreOutput
|
|
20
21
|
from application_sdk.outputs.parquet import ParquetOutput
|
|
22
|
+
from application_sdk.services.objectstore import ObjectStore
|
|
23
|
+
from application_sdk.services.secretstore import SecretStore
|
|
21
24
|
from application_sdk.transformers import TransformerInterface
|
|
22
25
|
from application_sdk.transformers.atlas import AtlasTransformer
|
|
23
26
|
|
|
@@ -129,7 +132,9 @@ class SQLQueryExtractionActivities(ActivitiesInterface):
|
|
|
129
132
|
workflow_id = get_workflow_id()
|
|
130
133
|
sql_client = self.sql_client_class()
|
|
131
134
|
if "credential_guid" in workflow_args:
|
|
132
|
-
credentials = await get_credentials(
|
|
135
|
+
credentials = await SecretStore.get_credentials(
|
|
136
|
+
workflow_args["credential_guid"]
|
|
137
|
+
)
|
|
133
138
|
await sql_client.load(credentials)
|
|
134
139
|
|
|
135
140
|
handler = self.handler_class(sql_client)
|
|
@@ -412,14 +417,14 @@ class SQLQueryExtractionActivities(ActivitiesInterface):
|
|
|
412
417
|
f.write(last_marker)
|
|
413
418
|
|
|
414
419
|
logger.info(f"Last marker: {last_marker}")
|
|
415
|
-
await
|
|
416
|
-
|
|
417
|
-
marker_file_path,
|
|
418
|
-
|
|
420
|
+
await ObjectStore.upload_file(
|
|
421
|
+
source=marker_file_path,
|
|
422
|
+
destination=get_object_store_prefix(marker_file_path),
|
|
423
|
+
store_name=UPSTREAM_OBJECT_STORE_NAME,
|
|
419
424
|
)
|
|
420
425
|
logger.info(f"Marker file written to {marker_file_path}")
|
|
421
426
|
|
|
422
|
-
def read_marker(self, workflow_args: Dict[str, Any]) -> Optional[int]:
|
|
427
|
+
async def read_marker(self, workflow_args: Dict[str, Any]) -> Optional[int]:
|
|
423
428
|
"""Read the marker from the output path.
|
|
424
429
|
|
|
425
430
|
This method reads the current marker value from a marker file to determine the
|
|
@@ -442,15 +447,12 @@ class SQLQueryExtractionActivities(ActivitiesInterface):
|
|
|
442
447
|
marker_file_path = os.path.join(output_path, "markerfile")
|
|
443
448
|
logger.info(f"Downloading marker file from {marker_file_path}")
|
|
444
449
|
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
marker_file_path,
|
|
450
|
-
object_store_name=UPSTREAM_OBJECT_STORE_NAME,
|
|
450
|
+
await ObjectStore.download_file(
|
|
451
|
+
source=get_object_store_prefix(marker_file_path),
|
|
452
|
+
destination=marker_file_path,
|
|
453
|
+
store_name=UPSTREAM_OBJECT_STORE_NAME,
|
|
451
454
|
)
|
|
452
455
|
|
|
453
|
-
logger.info(f"Output prefix: {workflow_args['output_prefix']}")
|
|
454
456
|
logger.info(f"Marker file downloaded to {marker_file_path}")
|
|
455
457
|
if not os.path.exists(marker_file_path):
|
|
456
458
|
logger.warning(f"Marker file does not exist at {marker_file_path}")
|
|
@@ -487,7 +489,7 @@ class SQLQueryExtractionActivities(ActivitiesInterface):
|
|
|
487
489
|
|
|
488
490
|
miner_args = MinerArgs(**workflow_args.get("miner_args", {}))
|
|
489
491
|
|
|
490
|
-
current_marker = self.read_marker(workflow_args)
|
|
492
|
+
current_marker = await self.read_marker(workflow_args)
|
|
491
493
|
if current_marker:
|
|
492
494
|
miner_args.current_marker = current_marker
|
|
493
495
|
|
|
@@ -522,8 +524,10 @@ class SQLQueryExtractionActivities(ActivitiesInterface):
|
|
|
522
524
|
with open(metadata_file_path, "w") as f:
|
|
523
525
|
f.write(json.dumps(parallel_markers))
|
|
524
526
|
|
|
525
|
-
await
|
|
526
|
-
|
|
527
|
+
await ObjectStore.upload_file(
|
|
528
|
+
source=metadata_file_path,
|
|
529
|
+
destination=get_object_store_prefix(metadata_file_path),
|
|
530
|
+
store_name=UPSTREAM_OBJECT_STORE_NAME,
|
|
527
531
|
)
|
|
528
532
|
|
|
529
533
|
try:
|
|
@@ -13,8 +13,8 @@ from application_sdk.constants import (
|
|
|
13
13
|
WORKFLOW_AUTH_ENABLED,
|
|
14
14
|
WORKFLOW_AUTH_URL_KEY,
|
|
15
15
|
)
|
|
16
|
-
from application_sdk.inputs.secretstore import SecretStoreInput
|
|
17
16
|
from application_sdk.observability.logger_adaptor import get_logger
|
|
17
|
+
from application_sdk.services.secretstore import SecretStore
|
|
18
18
|
|
|
19
19
|
logger = get_logger(__name__)
|
|
20
20
|
|
|
@@ -39,7 +39,7 @@ class AtlanAuthClient:
|
|
|
39
39
|
(environment variables, AWS Secrets Manager, Azure Key Vault, etc.)
|
|
40
40
|
"""
|
|
41
41
|
self.application_name = APPLICATION_NAME
|
|
42
|
-
self.auth_config: Dict[str, Any] =
|
|
42
|
+
self.auth_config: Dict[str, Any] = SecretStore.get_deployment_secret()
|
|
43
43
|
self.auth_enabled: bool = WORKFLOW_AUTH_ENABLED
|
|
44
44
|
self.auth_url: Optional[str] = None
|
|
45
45
|
|
|
@@ -42,12 +42,10 @@ from application_sdk.events.models import (
|
|
|
42
42
|
EventTypes,
|
|
43
43
|
WorkflowStates,
|
|
44
44
|
)
|
|
45
|
-
from application_sdk.inputs.secretstore import SecretStoreInput
|
|
46
|
-
from application_sdk.inputs.statestore import StateType
|
|
47
45
|
from application_sdk.observability.logger_adaptor import get_logger
|
|
48
|
-
from application_sdk.
|
|
49
|
-
from application_sdk.
|
|
50
|
-
from application_sdk.
|
|
46
|
+
from application_sdk.services.eventstore import EventStore
|
|
47
|
+
from application_sdk.services.secretstore import SecretStore
|
|
48
|
+
from application_sdk.services.statestore import StateStore, StateType
|
|
51
49
|
from application_sdk.workflows import WorkflowInterface
|
|
52
50
|
|
|
53
51
|
logger = get_logger(__name__)
|
|
@@ -269,9 +267,7 @@ class TemporalWorkflowClient(WorkflowClient):
|
|
|
269
267
|
self.port = port if port else WORKFLOW_PORT
|
|
270
268
|
self.namespace = namespace if namespace else WORKFLOW_NAMESPACE
|
|
271
269
|
|
|
272
|
-
self.deployment_config: Dict[str, Any] = (
|
|
273
|
-
SecretStoreInput.get_deployment_secret()
|
|
274
|
-
)
|
|
270
|
+
self.deployment_config: Dict[str, Any] = SecretStore.get_deployment_secret()
|
|
275
271
|
self.worker_task_queue = self.get_worker_task_queue()
|
|
276
272
|
self.auth_manager = AtlanAuthClient()
|
|
277
273
|
|
|
@@ -426,7 +422,7 @@ class TemporalWorkflowClient(WorkflowClient):
|
|
|
426
422
|
"""
|
|
427
423
|
if "credentials" in workflow_args:
|
|
428
424
|
# remove credentials from workflow_args and add reference to credentials
|
|
429
|
-
workflow_args["credential_guid"] = await
|
|
425
|
+
workflow_args["credential_guid"] = await SecretStore.save_secret(
|
|
430
426
|
workflow_args["credentials"]
|
|
431
427
|
)
|
|
432
428
|
del workflow_args["credentials"]
|
|
@@ -442,7 +438,7 @@ class TemporalWorkflowClient(WorkflowClient):
|
|
|
442
438
|
}
|
|
443
439
|
)
|
|
444
440
|
|
|
445
|
-
await
|
|
441
|
+
await StateStore.save_state_object(
|
|
446
442
|
id=workflow_id, value=workflow_args, type=StateType.WORKFLOWS
|
|
447
443
|
)
|
|
448
444
|
logger.info(f"Created workflow config with ID: {workflow_id}")
|
application_sdk/inputs/json.py
CHANGED
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
import os
|
|
2
2
|
from typing import TYPE_CHECKING, AsyncIterator, Iterator, List, Optional, Union
|
|
3
3
|
|
|
4
|
+
from application_sdk.activities.common.utils import get_object_store_prefix
|
|
4
5
|
from application_sdk.common.error_codes import IOError
|
|
5
6
|
from application_sdk.inputs import Input
|
|
6
|
-
from application_sdk.inputs.objectstore import ObjectStoreInput
|
|
7
7
|
from application_sdk.observability.logger_adaptor import get_logger
|
|
8
|
+
from application_sdk.services.objectstore import ObjectStore
|
|
8
9
|
|
|
9
10
|
if TYPE_CHECKING:
|
|
10
11
|
import daft
|
|
@@ -51,9 +52,10 @@ class JsonInput(Input):
|
|
|
51
52
|
if self.download_file_prefix is not None and not os.path.exists(
|
|
52
53
|
os.path.join(self.path, file_name)
|
|
53
54
|
):
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
55
|
+
destination_file_path = os.path.join(self.path, file_name)
|
|
56
|
+
await ObjectStore.download_file(
|
|
57
|
+
source=get_object_store_prefix(destination_file_path),
|
|
58
|
+
destination=destination_file_path,
|
|
57
59
|
)
|
|
58
60
|
except IOError as e:
|
|
59
61
|
logger.error(
|
|
@@ -2,9 +2,10 @@ import glob
|
|
|
2
2
|
import os
|
|
3
3
|
from typing import TYPE_CHECKING, AsyncIterator, Iterator, List, Optional, Union
|
|
4
4
|
|
|
5
|
+
from application_sdk.activities.common.utils import get_object_store_prefix
|
|
5
6
|
from application_sdk.inputs import Input
|
|
6
|
-
from application_sdk.inputs.objectstore import ObjectStoreInput
|
|
7
7
|
from application_sdk.observability.logger_adaptor import get_logger
|
|
8
|
+
from application_sdk.services.objectstore import ObjectStore
|
|
8
9
|
|
|
9
10
|
logger = get_logger(__name__)
|
|
10
11
|
|
|
@@ -42,37 +43,39 @@ class ParquetInput(Input):
|
|
|
42
43
|
self.input_prefix = input_prefix
|
|
43
44
|
self.file_names = file_names
|
|
44
45
|
|
|
45
|
-
async def download_files(self,
|
|
46
|
+
async def download_files(self, local_path: str) -> Optional[str]:
|
|
46
47
|
"""Read a file from the object store.
|
|
47
48
|
|
|
48
49
|
Args:
|
|
49
|
-
|
|
50
|
+
local_path (str): Path to the local data in the temp directory.
|
|
50
51
|
|
|
51
52
|
Returns:
|
|
52
53
|
Optional[str]: Path to the downloaded local file.
|
|
53
54
|
"""
|
|
54
55
|
# if the path is a directory, then check if the directory has any parquet files
|
|
55
56
|
parquet_files = []
|
|
56
|
-
if os.path.isdir(
|
|
57
|
-
parquet_files = glob.glob(os.path.join(
|
|
57
|
+
if os.path.isdir(local_path):
|
|
58
|
+
parquet_files = glob.glob(os.path.join(local_path, "*.parquet"))
|
|
58
59
|
else:
|
|
59
|
-
parquet_files = glob.glob(
|
|
60
|
+
parquet_files = glob.glob(local_path)
|
|
60
61
|
if not parquet_files:
|
|
61
62
|
if self.input_prefix:
|
|
62
63
|
logger.info(
|
|
63
|
-
f"Reading file from object store: {
|
|
64
|
+
f"Reading file from object store: {local_path} from {self.input_prefix}"
|
|
64
65
|
)
|
|
65
|
-
if os.path.isdir(
|
|
66
|
-
|
|
67
|
-
|
|
66
|
+
if os.path.isdir(local_path):
|
|
67
|
+
await ObjectStore.download_prefix(
|
|
68
|
+
source=get_object_store_prefix(local_path),
|
|
69
|
+
destination=local_path,
|
|
68
70
|
)
|
|
69
71
|
else:
|
|
70
|
-
|
|
71
|
-
|
|
72
|
+
await ObjectStore.download_file(
|
|
73
|
+
source=get_object_store_prefix(local_path),
|
|
74
|
+
destination=local_path,
|
|
72
75
|
)
|
|
73
76
|
else:
|
|
74
77
|
raise ValueError(
|
|
75
|
-
f"No parquet files found in {
|
|
78
|
+
f"No parquet files found in {local_path} and no input prefix provided"
|
|
76
79
|
)
|
|
77
80
|
|
|
78
81
|
async def get_dataframe(self) -> "pd.DataFrame":
|
|
@@ -22,9 +22,10 @@ import orjson
|
|
|
22
22
|
from temporalio import activity
|
|
23
23
|
|
|
24
24
|
from application_sdk.activities.common.models import ActivityStatistics
|
|
25
|
+
from application_sdk.activities.common.utils import get_object_store_prefix
|
|
25
26
|
from application_sdk.common.dataframe_utils import is_empty_dataframe
|
|
26
27
|
from application_sdk.observability.logger_adaptor import get_logger
|
|
27
|
-
from application_sdk.
|
|
28
|
+
from application_sdk.services.objectstore import ObjectStore
|
|
28
29
|
|
|
29
30
|
logger = get_logger(__name__)
|
|
30
31
|
activity.logger = logger
|
|
@@ -223,9 +224,11 @@ class Output(ABC):
|
|
|
223
224
|
with open(output_file_name, "w") as f:
|
|
224
225
|
f.write(orjson.dumps(statistics).decode("utf-8"))
|
|
225
226
|
|
|
227
|
+
destination_file_path = get_object_store_prefix(output_file_name)
|
|
226
228
|
# Push the file to the object store
|
|
227
|
-
await
|
|
228
|
-
|
|
229
|
+
await ObjectStore.upload_file(
|
|
230
|
+
source=output_file_name,
|
|
231
|
+
destination=destination_file_path,
|
|
229
232
|
)
|
|
230
233
|
return statistics
|
|
231
234
|
except Exception as e:
|
application_sdk/outputs/json.py
CHANGED
|
@@ -5,10 +5,11 @@ from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Union
|
|
|
5
5
|
import orjson
|
|
6
6
|
from temporalio import activity
|
|
7
7
|
|
|
8
|
+
from application_sdk.activities.common.utils import get_object_store_prefix
|
|
8
9
|
from application_sdk.observability.logger_adaptor import get_logger
|
|
9
10
|
from application_sdk.observability.metrics_adaptor import MetricType, get_metrics
|
|
10
11
|
from application_sdk.outputs import Output
|
|
11
|
-
from application_sdk.
|
|
12
|
+
from application_sdk.services.objectstore import ObjectStore
|
|
12
13
|
|
|
13
14
|
logger = get_logger(__name__)
|
|
14
15
|
activity.logger = logger
|
|
@@ -285,9 +286,10 @@ class JsonOutput(Output):
|
|
|
285
286
|
description="Number of records written to JSON files from daft DataFrame",
|
|
286
287
|
)
|
|
287
288
|
|
|
288
|
-
# Push
|
|
289
|
-
await
|
|
290
|
-
self.
|
|
289
|
+
# Push files to the object store
|
|
290
|
+
await ObjectStore.upload_prefix(
|
|
291
|
+
source=self.output_path,
|
|
292
|
+
destination=get_object_store_prefix(self.output_path),
|
|
291
293
|
)
|
|
292
294
|
|
|
293
295
|
except Exception as e:
|
|
@@ -344,8 +346,9 @@ class JsonOutput(Output):
|
|
|
344
346
|
)
|
|
345
347
|
|
|
346
348
|
# Push the file to the object store
|
|
347
|
-
await
|
|
348
|
-
|
|
349
|
+
await ObjectStore.upload_file(
|
|
350
|
+
source=output_file_name,
|
|
351
|
+
destination=get_object_store_prefix(output_file_name),
|
|
349
352
|
)
|
|
350
353
|
|
|
351
354
|
self.buffer.clear()
|
|
@@ -3,10 +3,11 @@ from typing import TYPE_CHECKING, Literal, Optional
|
|
|
3
3
|
|
|
4
4
|
from temporalio import activity
|
|
5
5
|
|
|
6
|
+
from application_sdk.activities.common.utils import get_object_store_prefix
|
|
6
7
|
from application_sdk.observability.logger_adaptor import get_logger
|
|
7
8
|
from application_sdk.observability.metrics_adaptor import MetricType, get_metrics
|
|
8
9
|
from application_sdk.outputs import Output
|
|
9
|
-
from application_sdk.
|
|
10
|
+
from application_sdk.services.objectstore import ObjectStore
|
|
10
11
|
|
|
11
12
|
logger = get_logger(__name__)
|
|
12
13
|
activity.logger = logger
|
|
@@ -159,7 +160,10 @@ class ParquetOutput(Output):
|
|
|
159
160
|
)
|
|
160
161
|
|
|
161
162
|
# Upload the file to object store
|
|
162
|
-
await
|
|
163
|
+
await ObjectStore.upload_file(
|
|
164
|
+
source=file_path,
|
|
165
|
+
destination=get_object_store_prefix(file_path),
|
|
166
|
+
)
|
|
163
167
|
except Exception as e:
|
|
164
168
|
# Record metrics for failed write
|
|
165
169
|
self.metrics.record_metric(
|
|
@@ -218,7 +222,10 @@ class ParquetOutput(Output):
|
|
|
218
222
|
)
|
|
219
223
|
|
|
220
224
|
# Upload the file to object store
|
|
221
|
-
await
|
|
225
|
+
await ObjectStore.upload_file(
|
|
226
|
+
source=file_path,
|
|
227
|
+
destination=get_object_store_prefix(file_path),
|
|
228
|
+
)
|
|
222
229
|
except Exception as e:
|
|
223
230
|
# Record metrics for failed write
|
|
224
231
|
self.metrics.record_metric(
|
|
@@ -231,39 +238,6 @@ class ParquetOutput(Output):
|
|
|
231
238
|
logger.error(f"Error writing daft dataframe to parquet: {str(e)}")
|
|
232
239
|
raise
|
|
233
240
|
|
|
234
|
-
async def upload_file(self, local_file_path: str) -> None:
|
|
235
|
-
"""Upload a file to the object store.
|
|
236
|
-
|
|
237
|
-
Args:
|
|
238
|
-
local_file_path (str): Path to the local file to upload.
|
|
239
|
-
"""
|
|
240
|
-
try:
|
|
241
|
-
if os.path.isdir(local_file_path):
|
|
242
|
-
logger.info(
|
|
243
|
-
f"Uploading files: {local_file_path} to {self.output_prefix}"
|
|
244
|
-
)
|
|
245
|
-
await ObjectStoreOutput.push_files_to_object_store(
|
|
246
|
-
self.output_prefix, local_file_path
|
|
247
|
-
)
|
|
248
|
-
else:
|
|
249
|
-
logger.info(
|
|
250
|
-
f"Uploading file: {local_file_path} to {self.output_prefix}"
|
|
251
|
-
)
|
|
252
|
-
await ObjectStoreOutput.push_file_to_object_store(
|
|
253
|
-
self.output_prefix, local_file_path
|
|
254
|
-
)
|
|
255
|
-
except Exception as e:
|
|
256
|
-
# Record metrics for failed upload
|
|
257
|
-
self.metrics.record_metric(
|
|
258
|
-
name="parquet_upload_errors",
|
|
259
|
-
value=1,
|
|
260
|
-
metric_type=MetricType.COUNTER,
|
|
261
|
-
labels={"error": str(e)},
|
|
262
|
-
description="Number of errors while uploading Parquet files to object store",
|
|
263
|
-
)
|
|
264
|
-
logger.error(f"Error uploading file to object store: {str(e)}")
|
|
265
|
-
raise e
|
|
266
|
-
|
|
267
241
|
def get_full_path(self) -> str:
|
|
268
242
|
"""Get the full path of the output file.
|
|
269
243
|
|
|
@@ -25,11 +25,9 @@ from application_sdk.constants import (
|
|
|
25
25
|
)
|
|
26
26
|
from application_sdk.docgen import AtlanDocsGenerator
|
|
27
27
|
from application_sdk.handlers import HandlerInterface
|
|
28
|
-
from application_sdk.inputs.statestore import StateStoreInput, StateType
|
|
29
28
|
from application_sdk.observability.logger_adaptor import get_logger
|
|
30
29
|
from application_sdk.observability.metrics_adaptor import MetricType, get_metrics
|
|
31
30
|
from application_sdk.observability.observability import DuckDBUI
|
|
32
|
-
from application_sdk.outputs.statestore import StateStoreOutput
|
|
33
31
|
from application_sdk.server import ServerInterface
|
|
34
32
|
from application_sdk.server.fastapi.middleware.logmiddleware import LogMiddleware
|
|
35
33
|
from application_sdk.server.fastapi.middleware.metrics import MetricsMiddleware
|
|
@@ -53,6 +51,7 @@ from application_sdk.server.fastapi.models import (
|
|
|
53
51
|
)
|
|
54
52
|
from application_sdk.server.fastapi.routers.server import get_server_router
|
|
55
53
|
from application_sdk.server.fastapi.utils import internal_server_error_handler
|
|
54
|
+
from application_sdk.services.statestore import StateStore, StateType
|
|
56
55
|
from application_sdk.workflows import WorkflowInterface
|
|
57
56
|
|
|
58
57
|
logger = get_logger(__name__)
|
|
@@ -588,7 +587,7 @@ class APIServer(ServerInterface):
|
|
|
588
587
|
)
|
|
589
588
|
raise e
|
|
590
589
|
|
|
591
|
-
def get_workflow_config(
|
|
590
|
+
async def get_workflow_config(
|
|
592
591
|
self, config_id: str, type: str = "workflows"
|
|
593
592
|
) -> WorkflowConfigResponse:
|
|
594
593
|
"""Retrieve workflow configuration by ID.
|
|
@@ -603,7 +602,7 @@ class APIServer(ServerInterface):
|
|
|
603
602
|
if not StateType.is_member(type):
|
|
604
603
|
raise ValueError(f"Invalid type {type} for state store")
|
|
605
604
|
|
|
606
|
-
config =
|
|
605
|
+
config = await StateStore.get_state(config_id, StateType(type))
|
|
607
606
|
return WorkflowConfigResponse(
|
|
608
607
|
success=True,
|
|
609
608
|
message="Workflow configuration fetched successfully",
|
|
@@ -680,7 +679,7 @@ class APIServer(ServerInterface):
|
|
|
680
679
|
if not StateType.is_member(type):
|
|
681
680
|
raise ValueError(f"Invalid type {type} for state store")
|
|
682
681
|
|
|
683
|
-
config = await
|
|
682
|
+
config = await StateStore.save_state_object(
|
|
684
683
|
id=config_id, value=body.model_dump(), type=StateType(type)
|
|
685
684
|
)
|
|
686
685
|
return WorkflowConfigResponse(
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"""Services module for the application SDK."""
|
|
2
|
+
|
|
3
|
+
from .atlan_storage import AtlanStorage, MigrationSummary
|
|
4
|
+
from .eventstore import EventStore
|
|
5
|
+
from .objectstore import ObjectStore
|
|
6
|
+
from .secretstore import SecretStore
|
|
7
|
+
from .statestore import StateStore, StateType, build_state_store_path
|
|
8
|
+
|
|
9
|
+
__all__ = [
|
|
10
|
+
"AtlanStorage",
|
|
11
|
+
"EventStore",
|
|
12
|
+
"MigrationSummary",
|
|
13
|
+
"ObjectStore",
|
|
14
|
+
"SecretStore",
|
|
15
|
+
"StateStore",
|
|
16
|
+
"StateType",
|
|
17
|
+
"build_state_store_path",
|
|
18
|
+
]
|