atlan-application-sdk 0.1.1rc34__py3-none-any.whl → 0.1.1rc36__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- application_sdk/activities/__init__.py +3 -2
- application_sdk/activities/common/utils.py +21 -1
- application_sdk/activities/lock_management.py +110 -0
- application_sdk/activities/metadata_extraction/base.py +4 -2
- application_sdk/activities/metadata_extraction/sql.py +13 -12
- application_sdk/activities/query_extraction/sql.py +24 -20
- application_sdk/clients/atlan_auth.py +2 -2
- application_sdk/clients/redis.py +443 -0
- application_sdk/clients/temporal.py +36 -196
- application_sdk/common/error_codes.py +24 -3
- application_sdk/constants.py +18 -1
- application_sdk/decorators/__init__.py +0 -0
- application_sdk/decorators/locks.py +42 -0
- application_sdk/handlers/base.py +18 -1
- application_sdk/inputs/json.py +6 -4
- application_sdk/inputs/parquet.py +16 -13
- application_sdk/interceptors/__init__.py +0 -0
- application_sdk/interceptors/events.py +193 -0
- application_sdk/interceptors/lock.py +139 -0
- application_sdk/outputs/__init__.py +6 -3
- application_sdk/outputs/json.py +9 -6
- application_sdk/outputs/parquet.py +10 -36
- application_sdk/server/fastapi/__init__.py +4 -5
- application_sdk/services/__init__.py +18 -0
- application_sdk/{outputs → services}/atlan_storage.py +64 -16
- application_sdk/{outputs → services}/eventstore.py +68 -6
- application_sdk/services/objectstore.py +407 -0
- application_sdk/services/secretstore.py +344 -0
- application_sdk/services/statestore.py +267 -0
- application_sdk/version.py +1 -1
- application_sdk/worker.py +1 -1
- {atlan_application_sdk-0.1.1rc34.dist-info → atlan_application_sdk-0.1.1rc36.dist-info}/METADATA +4 -2
- {atlan_application_sdk-0.1.1rc34.dist-info → atlan_application_sdk-0.1.1rc36.dist-info}/RECORD +36 -32
- application_sdk/common/credential_utils.py +0 -85
- application_sdk/inputs/objectstore.py +0 -238
- application_sdk/inputs/secretstore.py +0 -130
- application_sdk/inputs/statestore.py +0 -101
- application_sdk/outputs/objectstore.py +0 -125
- application_sdk/outputs/secretstore.py +0 -38
- application_sdk/outputs/statestore.py +0 -113
- {atlan_application_sdk-0.1.1rc34.dist-info → atlan_application_sdk-0.1.1rc36.dist-info}/WHEEL +0 -0
- {atlan_application_sdk-0.1.1rc34.dist-info → atlan_application_sdk-0.1.1rc36.dist-info}/licenses/LICENSE +0 -0
- {atlan_application_sdk-0.1.1rc34.dist-info → atlan_application_sdk-0.1.1rc36.dist-info}/licenses/NOTICE +0 -0
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
from datetime import timedelta
|
|
2
|
+
from typing import Any, Optional, Type
|
|
3
|
+
|
|
4
|
+
from temporalio import activity, workflow
|
|
5
|
+
from temporalio.common import RetryPolicy
|
|
6
|
+
from temporalio.worker import (
|
|
7
|
+
ActivityInboundInterceptor,
|
|
8
|
+
ExecuteActivityInput,
|
|
9
|
+
ExecuteWorkflowInput,
|
|
10
|
+
Interceptor,
|
|
11
|
+
WorkflowInboundInterceptor,
|
|
12
|
+
WorkflowInterceptorClassInput,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
from application_sdk.events.models import (
|
|
16
|
+
ApplicationEventNames,
|
|
17
|
+
Event,
|
|
18
|
+
EventMetadata,
|
|
19
|
+
EventTypes,
|
|
20
|
+
WorkflowStates,
|
|
21
|
+
)
|
|
22
|
+
from application_sdk.observability.logger_adaptor import get_logger
|
|
23
|
+
from application_sdk.services.eventstore import EventStore
|
|
24
|
+
|
|
25
|
+
logger = get_logger(__name__)
|
|
26
|
+
|
|
27
|
+
TEMPORAL_NOT_FOUND_FAILURE = (
|
|
28
|
+
"type.googleapis.com/temporal.api.errordetails.v1.NotFoundFailure"
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
# Activity for publishing events (runs outside sandbox)
|
|
33
|
+
@activity.defn
|
|
34
|
+
async def publish_event(event_data: dict) -> None:
|
|
35
|
+
"""Activity to publish events outside the workflow sandbox.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
event_data (dict): Event data to publish containing event_type, event_name,
|
|
39
|
+
metadata, and data fields.
|
|
40
|
+
"""
|
|
41
|
+
try:
|
|
42
|
+
event = Event(**event_data)
|
|
43
|
+
await EventStore.publish_event(event)
|
|
44
|
+
activity.logger.info(f"Published event: {event_data.get('event_name','')}")
|
|
45
|
+
except Exception as e:
|
|
46
|
+
activity.logger.error(f"Failed to publish event: {e}")
|
|
47
|
+
raise
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class EventActivityInboundInterceptor(ActivityInboundInterceptor):
|
|
51
|
+
"""Interceptor for tracking activity execution events.
|
|
52
|
+
|
|
53
|
+
This interceptor captures the start and end of activity executions,
|
|
54
|
+
creating events that can be used for monitoring and tracking.
|
|
55
|
+
Activities run outside the sandbox so they can directly call EventStore.
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
async def execute_activity(self, input: ExecuteActivityInput) -> Any:
|
|
59
|
+
"""Execute an activity with event tracking.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
input (ExecuteActivityInput): The activity execution input.
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
Any: The result of the activity execution.
|
|
66
|
+
"""
|
|
67
|
+
# Extract activity information for tracking
|
|
68
|
+
|
|
69
|
+
start_event = Event(
|
|
70
|
+
event_type=EventTypes.APPLICATION_EVENT.value,
|
|
71
|
+
event_name=ApplicationEventNames.ACTIVITY_START.value,
|
|
72
|
+
data={},
|
|
73
|
+
)
|
|
74
|
+
await EventStore.publish_event(start_event)
|
|
75
|
+
|
|
76
|
+
output = None
|
|
77
|
+
try:
|
|
78
|
+
output = await super().execute_activity(input)
|
|
79
|
+
except Exception:
|
|
80
|
+
raise
|
|
81
|
+
finally:
|
|
82
|
+
end_event = Event(
|
|
83
|
+
event_type=EventTypes.APPLICATION_EVENT.value,
|
|
84
|
+
event_name=ApplicationEventNames.ACTIVITY_END.value,
|
|
85
|
+
data={},
|
|
86
|
+
)
|
|
87
|
+
await EventStore.publish_event(end_event)
|
|
88
|
+
|
|
89
|
+
return output
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class EventWorkflowInboundInterceptor(WorkflowInboundInterceptor):
|
|
93
|
+
"""Interceptor for tracking workflow execution events.
|
|
94
|
+
|
|
95
|
+
This interceptor captures the start and end of workflow executions,
|
|
96
|
+
creating events that can be used for monitoring and tracking.
|
|
97
|
+
Uses activities to publish events to avoid sandbox restrictions.
|
|
98
|
+
"""
|
|
99
|
+
|
|
100
|
+
async def execute_workflow(self, input: ExecuteWorkflowInput) -> Any:
|
|
101
|
+
"""Execute a workflow with event tracking.
|
|
102
|
+
|
|
103
|
+
Args:
|
|
104
|
+
input (ExecuteWorkflowInput): The workflow execution input.
|
|
105
|
+
|
|
106
|
+
Returns:
|
|
107
|
+
Any: The result of the workflow execution.
|
|
108
|
+
"""
|
|
109
|
+
|
|
110
|
+
# Publish workflow start event via activity
|
|
111
|
+
try:
|
|
112
|
+
await workflow.execute_activity(
|
|
113
|
+
publish_event,
|
|
114
|
+
{
|
|
115
|
+
"metadata": EventMetadata(
|
|
116
|
+
workflow_state=WorkflowStates.RUNNING.value
|
|
117
|
+
),
|
|
118
|
+
"event_type": EventTypes.APPLICATION_EVENT.value,
|
|
119
|
+
"event_name": ApplicationEventNames.WORKFLOW_START.value,
|
|
120
|
+
"data": {},
|
|
121
|
+
},
|
|
122
|
+
schedule_to_close_timeout=timedelta(seconds=30),
|
|
123
|
+
retry_policy=RetryPolicy(maximum_attempts=3),
|
|
124
|
+
)
|
|
125
|
+
except Exception as e:
|
|
126
|
+
workflow.logger.warning(f"Failed to publish workflow start event: {e}")
|
|
127
|
+
# Don't fail the workflow if event publishing fails
|
|
128
|
+
|
|
129
|
+
output = None
|
|
130
|
+
workflow_state = WorkflowStates.FAILED.value # Default to failed
|
|
131
|
+
|
|
132
|
+
try:
|
|
133
|
+
output = await super().execute_workflow(input)
|
|
134
|
+
workflow_state = (
|
|
135
|
+
WorkflowStates.COMPLETED.value
|
|
136
|
+
) # Update to completed on success
|
|
137
|
+
except Exception:
|
|
138
|
+
workflow_state = WorkflowStates.FAILED.value # Keep as failed
|
|
139
|
+
raise
|
|
140
|
+
finally:
|
|
141
|
+
# Always publish workflow end event
|
|
142
|
+
try:
|
|
143
|
+
await workflow.execute_activity(
|
|
144
|
+
publish_event,
|
|
145
|
+
{
|
|
146
|
+
"metadata": EventMetadata(workflow_state=workflow_state),
|
|
147
|
+
"event_type": EventTypes.APPLICATION_EVENT.value,
|
|
148
|
+
"event_name": ApplicationEventNames.WORKFLOW_END.value,
|
|
149
|
+
"data": {},
|
|
150
|
+
},
|
|
151
|
+
schedule_to_close_timeout=timedelta(seconds=30),
|
|
152
|
+
retry_policy=RetryPolicy(maximum_attempts=3),
|
|
153
|
+
)
|
|
154
|
+
except Exception as publish_error:
|
|
155
|
+
workflow.logger.warning(
|
|
156
|
+
f"Failed to publish workflow end event: {publish_error}"
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
return output
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
class EventInterceptor(Interceptor):
|
|
163
|
+
"""Temporal interceptor for event tracking.
|
|
164
|
+
|
|
165
|
+
This interceptor provides event tracking capabilities for both
|
|
166
|
+
workflow and activity executions.
|
|
167
|
+
"""
|
|
168
|
+
|
|
169
|
+
def intercept_activity(
|
|
170
|
+
self, next: ActivityInboundInterceptor
|
|
171
|
+
) -> ActivityInboundInterceptor:
|
|
172
|
+
"""Intercept activity executions.
|
|
173
|
+
|
|
174
|
+
Args:
|
|
175
|
+
next (ActivityInboundInterceptor): The next interceptor in the chain.
|
|
176
|
+
|
|
177
|
+
Returns:
|
|
178
|
+
ActivityInboundInterceptor: The activity interceptor.
|
|
179
|
+
"""
|
|
180
|
+
return EventActivityInboundInterceptor(super().intercept_activity(next))
|
|
181
|
+
|
|
182
|
+
def workflow_interceptor_class(
|
|
183
|
+
self, input: WorkflowInterceptorClassInput
|
|
184
|
+
) -> Optional[Type[WorkflowInboundInterceptor]]:
|
|
185
|
+
"""Get the workflow interceptor class.
|
|
186
|
+
|
|
187
|
+
Args:
|
|
188
|
+
input (WorkflowInterceptorClassInput): The interceptor input.
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
Optional[Type[WorkflowInboundInterceptor]]: The workflow interceptor class.
|
|
192
|
+
"""
|
|
193
|
+
return EventWorkflowInboundInterceptor
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
"""Redis lock interceptor for Temporal workflows.
|
|
2
|
+
|
|
3
|
+
Manages distributed locks for activities decorated with @needs_lock using
|
|
4
|
+
separate lock acquisition and release activities to avoid workflow deadlocks.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from datetime import timedelta
|
|
8
|
+
from typing import Any, Dict, Optional, Type
|
|
9
|
+
|
|
10
|
+
from temporalio import workflow
|
|
11
|
+
from temporalio.common import RetryPolicy
|
|
12
|
+
from temporalio.worker import (
|
|
13
|
+
Interceptor,
|
|
14
|
+
StartActivityInput,
|
|
15
|
+
WorkflowInboundInterceptor,
|
|
16
|
+
WorkflowInterceptorClassInput,
|
|
17
|
+
WorkflowOutboundInterceptor,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
from application_sdk.common.error_codes import WorkflowError
|
|
21
|
+
from application_sdk.constants import (
|
|
22
|
+
APPLICATION_NAME,
|
|
23
|
+
IS_LOCKING_DISABLED,
|
|
24
|
+
LOCK_METADATA_KEY,
|
|
25
|
+
)
|
|
26
|
+
from application_sdk.observability.logger_adaptor import get_logger
|
|
27
|
+
|
|
28
|
+
logger = get_logger(__name__)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class RedisLockInterceptor(Interceptor):
|
|
32
|
+
"""Main interceptor class for Redis distributed locking."""
|
|
33
|
+
|
|
34
|
+
def __init__(self, activities: Dict[str, Any]):
|
|
35
|
+
"""Initialize Redis lock interceptor.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
activities: Dictionary mapping activity names to activity functions
|
|
39
|
+
"""
|
|
40
|
+
self.activities = activities
|
|
41
|
+
|
|
42
|
+
def workflow_interceptor_class(
|
|
43
|
+
self, input: WorkflowInterceptorClassInput
|
|
44
|
+
) -> Optional[Type[WorkflowInboundInterceptor]]:
|
|
45
|
+
activities = self.activities
|
|
46
|
+
|
|
47
|
+
class RedisLockWorkflowInboundInterceptor(WorkflowInboundInterceptor):
|
|
48
|
+
"""Inbound interceptor that manages Redis locks for activities."""
|
|
49
|
+
|
|
50
|
+
def init(self, outbound: WorkflowOutboundInterceptor) -> None:
|
|
51
|
+
"""Initialize with Redis lock outbound interceptor."""
|
|
52
|
+
lock_outbound = RedisLockOutboundInterceptor(outbound, activities)
|
|
53
|
+
super().init(lock_outbound)
|
|
54
|
+
|
|
55
|
+
return RedisLockWorkflowInboundInterceptor
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class RedisLockOutboundInterceptor(WorkflowOutboundInterceptor):
|
|
59
|
+
"""Outbound interceptor that acquires Redis locks before activity execution."""
|
|
60
|
+
|
|
61
|
+
def __init__(self, next: WorkflowOutboundInterceptor, activities: Dict[str, Any]):
|
|
62
|
+
super().__init__(next)
|
|
63
|
+
self.activities = activities
|
|
64
|
+
|
|
65
|
+
async def start_activity( # type: ignore[override]
|
|
66
|
+
self, input: StartActivityInput
|
|
67
|
+
) -> workflow.ActivityHandle[Any]:
|
|
68
|
+
"""Start activity with distributed lock if required."""
|
|
69
|
+
|
|
70
|
+
# Check if activity needs locking
|
|
71
|
+
activity_fn = self.activities.get(input.activity)
|
|
72
|
+
if (
|
|
73
|
+
not activity_fn
|
|
74
|
+
or not hasattr(activity_fn, LOCK_METADATA_KEY)
|
|
75
|
+
or IS_LOCKING_DISABLED
|
|
76
|
+
):
|
|
77
|
+
return await self.next.start_activity(input)
|
|
78
|
+
|
|
79
|
+
lock_config = getattr(activity_fn, LOCK_METADATA_KEY)
|
|
80
|
+
lock_name = lock_config.get("lock_name", input.activity)
|
|
81
|
+
max_locks = lock_config.get("max_locks", 5)
|
|
82
|
+
if not input.schedule_to_close_timeout:
|
|
83
|
+
logger.error(
|
|
84
|
+
f"Activity '{input.activity}' with @needs_lock decorator requires schedule_to_close_timeout"
|
|
85
|
+
)
|
|
86
|
+
raise WorkflowError(
|
|
87
|
+
f"{WorkflowError.WORKFLOW_CONFIG_ERROR}: Activity '{input.activity}' with @needs_lock decorator must be called with schedule_to_close_timeout parameter. "
|
|
88
|
+
f"Example: workflow.execute_activity('{input.activity}', schedule_to_close_timeout=timedelta(minutes=10))"
|
|
89
|
+
)
|
|
90
|
+
ttl_seconds = int(input.schedule_to_close_timeout.total_seconds())
|
|
91
|
+
|
|
92
|
+
# Orchestrate lock acquisition -> business activity -> lock release
|
|
93
|
+
return await self._execute_with_lock_orchestration(
|
|
94
|
+
input, lock_name, max_locks, ttl_seconds
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
async def _execute_with_lock_orchestration(
|
|
98
|
+
self,
|
|
99
|
+
input: StartActivityInput,
|
|
100
|
+
lock_name: str,
|
|
101
|
+
max_locks: int,
|
|
102
|
+
ttl_seconds: int,
|
|
103
|
+
) -> workflow.ActivityHandle[Any]:
|
|
104
|
+
"""Execute activity with distributed lock orchestration."""
|
|
105
|
+
owner_id = f"{APPLICATION_NAME}:{workflow.info().run_id}"
|
|
106
|
+
lock_result = None
|
|
107
|
+
|
|
108
|
+
try:
|
|
109
|
+
# Step 1: Acquire lock via dedicated activity (can take >2s safely)
|
|
110
|
+
start_to_close_timeout = workflow.info().execution_timeout
|
|
111
|
+
lock_result = await workflow.execute_activity(
|
|
112
|
+
"acquire_distributed_lock",
|
|
113
|
+
args=[lock_name, max_locks, ttl_seconds, owner_id],
|
|
114
|
+
start_to_close_timeout=start_to_close_timeout,
|
|
115
|
+
retry_policy=RetryPolicy(maximum_attempts=1),
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
logger.debug(f"Lock acquired: {lock_result}, executing {input.activity}")
|
|
119
|
+
|
|
120
|
+
# Step 2: Execute the business activity and return its handle
|
|
121
|
+
return await self.next.start_activity(input)
|
|
122
|
+
|
|
123
|
+
finally:
|
|
124
|
+
# Step 3: Release lock (fire-and-forget with short timeout)
|
|
125
|
+
if lock_result is not None:
|
|
126
|
+
try:
|
|
127
|
+
await workflow.execute_local_activity(
|
|
128
|
+
"release_distributed_lock",
|
|
129
|
+
args=[lock_result["resource_id"], lock_result["owner_id"]],
|
|
130
|
+
start_to_close_timeout=timedelta(seconds=5),
|
|
131
|
+
retry_policy=RetryPolicy(maximum_attempts=1),
|
|
132
|
+
)
|
|
133
|
+
logger.debug(f"Lock released: {lock_result['resource_id']}")
|
|
134
|
+
except Exception as e:
|
|
135
|
+
# Silent failure - TTL will handle cleanup
|
|
136
|
+
logger.warning(
|
|
137
|
+
f"Lock release failed for {lock_result['resource_id']}: {e}. "
|
|
138
|
+
f"TTL will handle cleanup."
|
|
139
|
+
)
|
|
@@ -22,9 +22,10 @@ import orjson
|
|
|
22
22
|
from temporalio import activity
|
|
23
23
|
|
|
24
24
|
from application_sdk.activities.common.models import ActivityStatistics
|
|
25
|
+
from application_sdk.activities.common.utils import get_object_store_prefix
|
|
25
26
|
from application_sdk.common.dataframe_utils import is_empty_dataframe
|
|
26
27
|
from application_sdk.observability.logger_adaptor import get_logger
|
|
27
|
-
from application_sdk.
|
|
28
|
+
from application_sdk.services.objectstore import ObjectStore
|
|
28
29
|
|
|
29
30
|
logger = get_logger(__name__)
|
|
30
31
|
activity.logger = logger
|
|
@@ -223,9 +224,11 @@ class Output(ABC):
|
|
|
223
224
|
with open(output_file_name, "w") as f:
|
|
224
225
|
f.write(orjson.dumps(statistics).decode("utf-8"))
|
|
225
226
|
|
|
227
|
+
destination_file_path = get_object_store_prefix(output_file_name)
|
|
226
228
|
# Push the file to the object store
|
|
227
|
-
await
|
|
228
|
-
|
|
229
|
+
await ObjectStore.upload_file(
|
|
230
|
+
source=output_file_name,
|
|
231
|
+
destination=destination_file_path,
|
|
229
232
|
)
|
|
230
233
|
return statistics
|
|
231
234
|
except Exception as e:
|
application_sdk/outputs/json.py
CHANGED
|
@@ -5,10 +5,11 @@ from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Union
|
|
|
5
5
|
import orjson
|
|
6
6
|
from temporalio import activity
|
|
7
7
|
|
|
8
|
+
from application_sdk.activities.common.utils import get_object_store_prefix
|
|
8
9
|
from application_sdk.observability.logger_adaptor import get_logger
|
|
9
10
|
from application_sdk.observability.metrics_adaptor import MetricType, get_metrics
|
|
10
11
|
from application_sdk.outputs import Output
|
|
11
|
-
from application_sdk.
|
|
12
|
+
from application_sdk.services.objectstore import ObjectStore
|
|
12
13
|
|
|
13
14
|
logger = get_logger(__name__)
|
|
14
15
|
activity.logger = logger
|
|
@@ -285,9 +286,10 @@ class JsonOutput(Output):
|
|
|
285
286
|
description="Number of records written to JSON files from daft DataFrame",
|
|
286
287
|
)
|
|
287
288
|
|
|
288
|
-
# Push
|
|
289
|
-
await
|
|
290
|
-
self.
|
|
289
|
+
# Push files to the object store
|
|
290
|
+
await ObjectStore.upload_prefix(
|
|
291
|
+
source=self.output_path,
|
|
292
|
+
destination=get_object_store_prefix(self.output_path),
|
|
291
293
|
)
|
|
292
294
|
|
|
293
295
|
except Exception as e:
|
|
@@ -344,8 +346,9 @@ class JsonOutput(Output):
|
|
|
344
346
|
)
|
|
345
347
|
|
|
346
348
|
# Push the file to the object store
|
|
347
|
-
await
|
|
348
|
-
|
|
349
|
+
await ObjectStore.upload_file(
|
|
350
|
+
source=output_file_name,
|
|
351
|
+
destination=get_object_store_prefix(output_file_name),
|
|
349
352
|
)
|
|
350
353
|
|
|
351
354
|
self.buffer.clear()
|
|
@@ -3,10 +3,11 @@ from typing import TYPE_CHECKING, Literal, Optional
|
|
|
3
3
|
|
|
4
4
|
from temporalio import activity
|
|
5
5
|
|
|
6
|
+
from application_sdk.activities.common.utils import get_object_store_prefix
|
|
6
7
|
from application_sdk.observability.logger_adaptor import get_logger
|
|
7
8
|
from application_sdk.observability.metrics_adaptor import MetricType, get_metrics
|
|
8
9
|
from application_sdk.outputs import Output
|
|
9
|
-
from application_sdk.
|
|
10
|
+
from application_sdk.services.objectstore import ObjectStore
|
|
10
11
|
|
|
11
12
|
logger = get_logger(__name__)
|
|
12
13
|
activity.logger = logger
|
|
@@ -159,7 +160,10 @@ class ParquetOutput(Output):
|
|
|
159
160
|
)
|
|
160
161
|
|
|
161
162
|
# Upload the file to object store
|
|
162
|
-
await
|
|
163
|
+
await ObjectStore.upload_file(
|
|
164
|
+
source=file_path,
|
|
165
|
+
destination=get_object_store_prefix(file_path),
|
|
166
|
+
)
|
|
163
167
|
except Exception as e:
|
|
164
168
|
# Record metrics for failed write
|
|
165
169
|
self.metrics.record_metric(
|
|
@@ -218,7 +222,10 @@ class ParquetOutput(Output):
|
|
|
218
222
|
)
|
|
219
223
|
|
|
220
224
|
# Upload the file to object store
|
|
221
|
-
await
|
|
225
|
+
await ObjectStore.upload_file(
|
|
226
|
+
source=file_path,
|
|
227
|
+
destination=get_object_store_prefix(file_path),
|
|
228
|
+
)
|
|
222
229
|
except Exception as e:
|
|
223
230
|
# Record metrics for failed write
|
|
224
231
|
self.metrics.record_metric(
|
|
@@ -231,39 +238,6 @@ class ParquetOutput(Output):
|
|
|
231
238
|
logger.error(f"Error writing daft dataframe to parquet: {str(e)}")
|
|
232
239
|
raise
|
|
233
240
|
|
|
234
|
-
async def upload_file(self, local_file_path: str) -> None:
|
|
235
|
-
"""Upload a file to the object store.
|
|
236
|
-
|
|
237
|
-
Args:
|
|
238
|
-
local_file_path (str): Path to the local file to upload.
|
|
239
|
-
"""
|
|
240
|
-
try:
|
|
241
|
-
if os.path.isdir(local_file_path):
|
|
242
|
-
logger.info(
|
|
243
|
-
f"Uploading files: {local_file_path} to {self.output_prefix}"
|
|
244
|
-
)
|
|
245
|
-
await ObjectStoreOutput.push_files_to_object_store(
|
|
246
|
-
self.output_prefix, local_file_path
|
|
247
|
-
)
|
|
248
|
-
else:
|
|
249
|
-
logger.info(
|
|
250
|
-
f"Uploading file: {local_file_path} to {self.output_prefix}"
|
|
251
|
-
)
|
|
252
|
-
await ObjectStoreOutput.push_file_to_object_store(
|
|
253
|
-
self.output_prefix, local_file_path
|
|
254
|
-
)
|
|
255
|
-
except Exception as e:
|
|
256
|
-
# Record metrics for failed upload
|
|
257
|
-
self.metrics.record_metric(
|
|
258
|
-
name="parquet_upload_errors",
|
|
259
|
-
value=1,
|
|
260
|
-
metric_type=MetricType.COUNTER,
|
|
261
|
-
labels={"error": str(e)},
|
|
262
|
-
description="Number of errors while uploading Parquet files to object store",
|
|
263
|
-
)
|
|
264
|
-
logger.error(f"Error uploading file to object store: {str(e)}")
|
|
265
|
-
raise e
|
|
266
|
-
|
|
267
241
|
def get_full_path(self) -> str:
|
|
268
242
|
"""Get the full path of the output file.
|
|
269
243
|
|
|
@@ -25,11 +25,9 @@ from application_sdk.constants import (
|
|
|
25
25
|
)
|
|
26
26
|
from application_sdk.docgen import AtlanDocsGenerator
|
|
27
27
|
from application_sdk.handlers import HandlerInterface
|
|
28
|
-
from application_sdk.inputs.statestore import StateStoreInput, StateType
|
|
29
28
|
from application_sdk.observability.logger_adaptor import get_logger
|
|
30
29
|
from application_sdk.observability.metrics_adaptor import MetricType, get_metrics
|
|
31
30
|
from application_sdk.observability.observability import DuckDBUI
|
|
32
|
-
from application_sdk.outputs.statestore import StateStoreOutput
|
|
33
31
|
from application_sdk.server import ServerInterface
|
|
34
32
|
from application_sdk.server.fastapi.middleware.logmiddleware import LogMiddleware
|
|
35
33
|
from application_sdk.server.fastapi.middleware.metrics import MetricsMiddleware
|
|
@@ -53,6 +51,7 @@ from application_sdk.server.fastapi.models import (
|
|
|
53
51
|
)
|
|
54
52
|
from application_sdk.server.fastapi.routers.server import get_server_router
|
|
55
53
|
from application_sdk.server.fastapi.utils import internal_server_error_handler
|
|
54
|
+
from application_sdk.services.statestore import StateStore, StateType
|
|
56
55
|
from application_sdk.workflows import WorkflowInterface
|
|
57
56
|
|
|
58
57
|
logger = get_logger(__name__)
|
|
@@ -588,7 +587,7 @@ class APIServer(ServerInterface):
|
|
|
588
587
|
)
|
|
589
588
|
raise e
|
|
590
589
|
|
|
591
|
-
def get_workflow_config(
|
|
590
|
+
async def get_workflow_config(
|
|
592
591
|
self, config_id: str, type: str = "workflows"
|
|
593
592
|
) -> WorkflowConfigResponse:
|
|
594
593
|
"""Retrieve workflow configuration by ID.
|
|
@@ -603,7 +602,7 @@ class APIServer(ServerInterface):
|
|
|
603
602
|
if not StateType.is_member(type):
|
|
604
603
|
raise ValueError(f"Invalid type {type} for state store")
|
|
605
604
|
|
|
606
|
-
config =
|
|
605
|
+
config = await StateStore.get_state(config_id, StateType(type))
|
|
607
606
|
return WorkflowConfigResponse(
|
|
608
607
|
success=True,
|
|
609
608
|
message="Workflow configuration fetched successfully",
|
|
@@ -680,7 +679,7 @@ class APIServer(ServerInterface):
|
|
|
680
679
|
if not StateType.is_member(type):
|
|
681
680
|
raise ValueError(f"Invalid type {type} for state store")
|
|
682
681
|
|
|
683
|
-
config = await
|
|
682
|
+
config = await StateStore.save_state_object(
|
|
684
683
|
id=config_id, value=body.model_dump(), type=StateType(type)
|
|
685
684
|
)
|
|
686
685
|
return WorkflowConfigResponse(
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"""Services module for the application SDK."""
|
|
2
|
+
|
|
3
|
+
from .atlan_storage import AtlanStorage, MigrationSummary
|
|
4
|
+
from .eventstore import EventStore
|
|
5
|
+
from .objectstore import ObjectStore
|
|
6
|
+
from .secretstore import SecretStore
|
|
7
|
+
from .statestore import StateStore, StateType, build_state_store_path
|
|
8
|
+
|
|
9
|
+
__all__ = [
|
|
10
|
+
"AtlanStorage",
|
|
11
|
+
"EventStore",
|
|
12
|
+
"MigrationSummary",
|
|
13
|
+
"ObjectStore",
|
|
14
|
+
"SecretStore",
|
|
15
|
+
"StateStore",
|
|
16
|
+
"StateType",
|
|
17
|
+
"build_state_store_path",
|
|
18
|
+
]
|
|
@@ -1,4 +1,12 @@
|
|
|
1
|
-
"""Atlan storage
|
|
1
|
+
"""Atlan storage service for upload operations and migration from object store.
|
|
2
|
+
|
|
3
|
+
This module provides the AtlanStorage service for handling data migration between
|
|
4
|
+
local object storage and Atlan's upstream storage system. It's specifically designed
|
|
5
|
+
for the bucket cloning strategy used in customer-deployed applications.
|
|
6
|
+
|
|
7
|
+
The service supports parallel file migration with comprehensive error handling and
|
|
8
|
+
detailed reporting through the MigrationSummary model.
|
|
9
|
+
"""
|
|
2
10
|
|
|
3
11
|
import asyncio
|
|
4
12
|
from typing import Dict, List
|
|
@@ -11,8 +19,8 @@ from application_sdk.constants import (
|
|
|
11
19
|
DEPLOYMENT_OBJECT_STORE_NAME,
|
|
12
20
|
UPSTREAM_OBJECT_STORE_NAME,
|
|
13
21
|
)
|
|
14
|
-
from application_sdk.inputs.objectstore import ObjectStoreInput
|
|
15
22
|
from application_sdk.observability.logger_adaptor import get_logger
|
|
23
|
+
from application_sdk.services.objectstore import ObjectStore
|
|
16
24
|
|
|
17
25
|
logger = get_logger(__name__)
|
|
18
26
|
activity.logger = logger
|
|
@@ -43,27 +51,37 @@ class MigrationSummary(BaseModel):
|
|
|
43
51
|
destination: str = UPSTREAM_OBJECT_STORE_NAME
|
|
44
52
|
|
|
45
53
|
|
|
46
|
-
|
|
47
|
-
class AtlanStorageOutput:
|
|
54
|
+
class AtlanStorage:
|
|
48
55
|
"""Handles upload operations to Atlan storage and migration from objectstore."""
|
|
49
56
|
|
|
50
57
|
OBJECT_CREATE_OPERATION = "create"
|
|
51
58
|
|
|
52
59
|
@classmethod
|
|
53
60
|
async def _migrate_single_file(cls, file_path: str) -> tuple[str, bool, str]:
|
|
54
|
-
"""
|
|
55
|
-
|
|
61
|
+
"""Migrate a single file from object store to Atlan storage.
|
|
62
|
+
|
|
63
|
+
This internal method handles the migration of a single file, including
|
|
64
|
+
error handling and logging. It's designed to be called concurrently
|
|
65
|
+
for multiple files.
|
|
56
66
|
|
|
57
67
|
Args:
|
|
58
|
-
file_path (str): The path of the file to migrate
|
|
68
|
+
file_path (str): The path of the file to migrate in the object store.
|
|
59
69
|
|
|
60
70
|
Returns:
|
|
61
|
-
tuple[str, bool, str]:
|
|
71
|
+
tuple[str, bool, str]: A tuple containing:
|
|
72
|
+
- file_path: The path of the file that was processed
|
|
73
|
+
- success: Boolean indicating if migration was successful
|
|
74
|
+
- error_message: Error details if migration failed, empty string if successful
|
|
75
|
+
|
|
76
|
+
Note:
|
|
77
|
+
This method is internal and should not be called directly. Use
|
|
78
|
+
migrate_from_objectstore_to_atlan() instead for proper coordination
|
|
79
|
+
and error handling.
|
|
62
80
|
"""
|
|
63
81
|
try:
|
|
64
82
|
# Get file data from objectstore
|
|
65
|
-
file_data =
|
|
66
|
-
file_path,
|
|
83
|
+
file_data = await ObjectStore.get_content(
|
|
84
|
+
file_path, store_name=DEPLOYMENT_OBJECT_STORE_NAME
|
|
67
85
|
)
|
|
68
86
|
|
|
69
87
|
with DaprClient() as client:
|
|
@@ -91,14 +109,44 @@ class AtlanStorageOutput:
|
|
|
91
109
|
async def migrate_from_objectstore_to_atlan(
|
|
92
110
|
cls, prefix: str = ""
|
|
93
111
|
) -> MigrationSummary:
|
|
94
|
-
"""
|
|
95
|
-
|
|
112
|
+
"""Migrate all files from object store to Atlan storage under a given prefix.
|
|
113
|
+
|
|
114
|
+
This method performs a parallel migration of files from the local object store
|
|
115
|
+
to Atlan's upstream storage system. It provides comprehensive error handling
|
|
116
|
+
and detailed reporting of the migration process.
|
|
96
117
|
|
|
97
118
|
Args:
|
|
98
|
-
prefix (str): The prefix to filter which files to migrate.
|
|
119
|
+
prefix (str, optional): The prefix to filter which files to migrate.
|
|
120
|
+
Empty string migrates all files. Defaults to "".
|
|
99
121
|
|
|
100
122
|
Returns:
|
|
101
|
-
MigrationSummary:
|
|
123
|
+
MigrationSummary: Comprehensive migration summary including:
|
|
124
|
+
- total_files: Number of files found for migration
|
|
125
|
+
- migrated_files: Number successfully migrated
|
|
126
|
+
- failed_migrations: Number that failed to migrate
|
|
127
|
+
- failures: List of failure details with file paths and errors
|
|
128
|
+
- prefix: The prefix used for filtering
|
|
129
|
+
- source/destination: Storage system identifiers
|
|
130
|
+
|
|
131
|
+
Raises:
|
|
132
|
+
Exception: If there's a critical error during the migration process.
|
|
133
|
+
|
|
134
|
+
Examples:
|
|
135
|
+
>>> # Migrate all files
|
|
136
|
+
>>> summary = await AtlanStorage.migrate_from_objectstore_to_atlan()
|
|
137
|
+
>>> print(f"Success rate: {summary.migrated_files/summary.total_files*100:.1f}%")
|
|
138
|
+
|
|
139
|
+
>>> # Migrate specific dataset
|
|
140
|
+
>>> summary = await AtlanStorage.migrate_from_objectstore_to_atlan(
|
|
141
|
+
... prefix="processed_data/2024/"
|
|
142
|
+
... )
|
|
143
|
+
>>> if summary.total_files == 0:
|
|
144
|
+
... print("No files found with the specified prefix")
|
|
145
|
+
>>> elif summary.failed_migrations == 0:
|
|
146
|
+
... print(f"Successfully migrated all {summary.total_files} files")
|
|
147
|
+
>>> else:
|
|
148
|
+
... print(f"Migration completed with {summary.failed_migrations} failures")
|
|
149
|
+
... # Handle failures...
|
|
102
150
|
"""
|
|
103
151
|
try:
|
|
104
152
|
logger.info(
|
|
@@ -106,8 +154,8 @@ class AtlanStorageOutput:
|
|
|
106
154
|
)
|
|
107
155
|
|
|
108
156
|
# Get list of all files to migrate from objectstore
|
|
109
|
-
files_to_migrate =
|
|
110
|
-
prefix,
|
|
157
|
+
files_to_migrate = await ObjectStore.list_files(
|
|
158
|
+
prefix, store_name=DEPLOYMENT_OBJECT_STORE_NAME
|
|
111
159
|
)
|
|
112
160
|
|
|
113
161
|
total_files = len(files_to_migrate)
|