atlan-application-sdk 0.1.1rc34__py3-none-any.whl → 0.1.1rc36__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. application_sdk/activities/__init__.py +3 -2
  2. application_sdk/activities/common/utils.py +21 -1
  3. application_sdk/activities/lock_management.py +110 -0
  4. application_sdk/activities/metadata_extraction/base.py +4 -2
  5. application_sdk/activities/metadata_extraction/sql.py +13 -12
  6. application_sdk/activities/query_extraction/sql.py +24 -20
  7. application_sdk/clients/atlan_auth.py +2 -2
  8. application_sdk/clients/redis.py +443 -0
  9. application_sdk/clients/temporal.py +36 -196
  10. application_sdk/common/error_codes.py +24 -3
  11. application_sdk/constants.py +18 -1
  12. application_sdk/decorators/__init__.py +0 -0
  13. application_sdk/decorators/locks.py +42 -0
  14. application_sdk/handlers/base.py +18 -1
  15. application_sdk/inputs/json.py +6 -4
  16. application_sdk/inputs/parquet.py +16 -13
  17. application_sdk/interceptors/__init__.py +0 -0
  18. application_sdk/interceptors/events.py +193 -0
  19. application_sdk/interceptors/lock.py +139 -0
  20. application_sdk/outputs/__init__.py +6 -3
  21. application_sdk/outputs/json.py +9 -6
  22. application_sdk/outputs/parquet.py +10 -36
  23. application_sdk/server/fastapi/__init__.py +4 -5
  24. application_sdk/services/__init__.py +18 -0
  25. application_sdk/{outputs → services}/atlan_storage.py +64 -16
  26. application_sdk/{outputs → services}/eventstore.py +68 -6
  27. application_sdk/services/objectstore.py +407 -0
  28. application_sdk/services/secretstore.py +344 -0
  29. application_sdk/services/statestore.py +267 -0
  30. application_sdk/version.py +1 -1
  31. application_sdk/worker.py +1 -1
  32. {atlan_application_sdk-0.1.1rc34.dist-info → atlan_application_sdk-0.1.1rc36.dist-info}/METADATA +4 -2
  33. {atlan_application_sdk-0.1.1rc34.dist-info → atlan_application_sdk-0.1.1rc36.dist-info}/RECORD +36 -32
  34. application_sdk/common/credential_utils.py +0 -85
  35. application_sdk/inputs/objectstore.py +0 -238
  36. application_sdk/inputs/secretstore.py +0 -130
  37. application_sdk/inputs/statestore.py +0 -101
  38. application_sdk/outputs/objectstore.py +0 -125
  39. application_sdk/outputs/secretstore.py +0 -38
  40. application_sdk/outputs/statestore.py +0 -113
  41. {atlan_application_sdk-0.1.1rc34.dist-info → atlan_application_sdk-0.1.1rc36.dist-info}/WHEEL +0 -0
  42. {atlan_application_sdk-0.1.1rc34.dist-info → atlan_application_sdk-0.1.1rc36.dist-info}/licenses/LICENSE +0 -0
  43. {atlan_application_sdk-0.1.1rc34.dist-info → atlan_application_sdk-0.1.1rc36.dist-info}/licenses/NOTICE +0 -0
@@ -0,0 +1,193 @@
1
+ from datetime import timedelta
2
+ from typing import Any, Optional, Type
3
+
4
+ from temporalio import activity, workflow
5
+ from temporalio.common import RetryPolicy
6
+ from temporalio.worker import (
7
+ ActivityInboundInterceptor,
8
+ ExecuteActivityInput,
9
+ ExecuteWorkflowInput,
10
+ Interceptor,
11
+ WorkflowInboundInterceptor,
12
+ WorkflowInterceptorClassInput,
13
+ )
14
+
15
+ from application_sdk.events.models import (
16
+ ApplicationEventNames,
17
+ Event,
18
+ EventMetadata,
19
+ EventTypes,
20
+ WorkflowStates,
21
+ )
22
+ from application_sdk.observability.logger_adaptor import get_logger
23
+ from application_sdk.services.eventstore import EventStore
24
+
25
+ logger = get_logger(__name__)
26
+
27
+ TEMPORAL_NOT_FOUND_FAILURE = (
28
+ "type.googleapis.com/temporal.api.errordetails.v1.NotFoundFailure"
29
+ )
30
+
31
+
32
+ # Activity for publishing events (runs outside sandbox)
33
+ @activity.defn
34
+ async def publish_event(event_data: dict) -> None:
35
+ """Activity to publish events outside the workflow sandbox.
36
+
37
+ Args:
38
+ event_data (dict): Event data to publish containing event_type, event_name,
39
+ metadata, and data fields.
40
+ """
41
+ try:
42
+ event = Event(**event_data)
43
+ await EventStore.publish_event(event)
44
+ activity.logger.info(f"Published event: {event_data.get('event_name','')}")
45
+ except Exception as e:
46
+ activity.logger.error(f"Failed to publish event: {e}")
47
+ raise
48
+
49
+
50
+ class EventActivityInboundInterceptor(ActivityInboundInterceptor):
51
+ """Interceptor for tracking activity execution events.
52
+
53
+ This interceptor captures the start and end of activity executions,
54
+ creating events that can be used for monitoring and tracking.
55
+ Activities run outside the sandbox so they can directly call EventStore.
56
+ """
57
+
58
+ async def execute_activity(self, input: ExecuteActivityInput) -> Any:
59
+ """Execute an activity with event tracking.
60
+
61
+ Args:
62
+ input (ExecuteActivityInput): The activity execution input.
63
+
64
+ Returns:
65
+ Any: The result of the activity execution.
66
+ """
67
+ # Extract activity information for tracking
68
+
69
+ start_event = Event(
70
+ event_type=EventTypes.APPLICATION_EVENT.value,
71
+ event_name=ApplicationEventNames.ACTIVITY_START.value,
72
+ data={},
73
+ )
74
+ await EventStore.publish_event(start_event)
75
+
76
+ output = None
77
+ try:
78
+ output = await super().execute_activity(input)
79
+ except Exception:
80
+ raise
81
+ finally:
82
+ end_event = Event(
83
+ event_type=EventTypes.APPLICATION_EVENT.value,
84
+ event_name=ApplicationEventNames.ACTIVITY_END.value,
85
+ data={},
86
+ )
87
+ await EventStore.publish_event(end_event)
88
+
89
+ return output
90
+
91
+
92
+ class EventWorkflowInboundInterceptor(WorkflowInboundInterceptor):
93
+ """Interceptor for tracking workflow execution events.
94
+
95
+ This interceptor captures the start and end of workflow executions,
96
+ creating events that can be used for monitoring and tracking.
97
+ Uses activities to publish events to avoid sandbox restrictions.
98
+ """
99
+
100
+ async def execute_workflow(self, input: ExecuteWorkflowInput) -> Any:
101
+ """Execute a workflow with event tracking.
102
+
103
+ Args:
104
+ input (ExecuteWorkflowInput): The workflow execution input.
105
+
106
+ Returns:
107
+ Any: The result of the workflow execution.
108
+ """
109
+
110
+ # Publish workflow start event via activity
111
+ try:
112
+ await workflow.execute_activity(
113
+ publish_event,
114
+ {
115
+ "metadata": EventMetadata(
116
+ workflow_state=WorkflowStates.RUNNING.value
117
+ ),
118
+ "event_type": EventTypes.APPLICATION_EVENT.value,
119
+ "event_name": ApplicationEventNames.WORKFLOW_START.value,
120
+ "data": {},
121
+ },
122
+ schedule_to_close_timeout=timedelta(seconds=30),
123
+ retry_policy=RetryPolicy(maximum_attempts=3),
124
+ )
125
+ except Exception as e:
126
+ workflow.logger.warning(f"Failed to publish workflow start event: {e}")
127
+ # Don't fail the workflow if event publishing fails
128
+
129
+ output = None
130
+ workflow_state = WorkflowStates.FAILED.value # Default to failed
131
+
132
+ try:
133
+ output = await super().execute_workflow(input)
134
+ workflow_state = (
135
+ WorkflowStates.COMPLETED.value
136
+ ) # Update to completed on success
137
+ except Exception:
138
+ workflow_state = WorkflowStates.FAILED.value # Keep as failed
139
+ raise
140
+ finally:
141
+ # Always publish workflow end event
142
+ try:
143
+ await workflow.execute_activity(
144
+ publish_event,
145
+ {
146
+ "metadata": EventMetadata(workflow_state=workflow_state),
147
+ "event_type": EventTypes.APPLICATION_EVENT.value,
148
+ "event_name": ApplicationEventNames.WORKFLOW_END.value,
149
+ "data": {},
150
+ },
151
+ schedule_to_close_timeout=timedelta(seconds=30),
152
+ retry_policy=RetryPolicy(maximum_attempts=3),
153
+ )
154
+ except Exception as publish_error:
155
+ workflow.logger.warning(
156
+ f"Failed to publish workflow end event: {publish_error}"
157
+ )
158
+
159
+ return output
160
+
161
+
162
+ class EventInterceptor(Interceptor):
163
+ """Temporal interceptor for event tracking.
164
+
165
+ This interceptor provides event tracking capabilities for both
166
+ workflow and activity executions.
167
+ """
168
+
169
+ def intercept_activity(
170
+ self, next: ActivityInboundInterceptor
171
+ ) -> ActivityInboundInterceptor:
172
+ """Intercept activity executions.
173
+
174
+ Args:
175
+ next (ActivityInboundInterceptor): The next interceptor in the chain.
176
+
177
+ Returns:
178
+ ActivityInboundInterceptor: The activity interceptor.
179
+ """
180
+ return EventActivityInboundInterceptor(super().intercept_activity(next))
181
+
182
+ def workflow_interceptor_class(
183
+ self, input: WorkflowInterceptorClassInput
184
+ ) -> Optional[Type[WorkflowInboundInterceptor]]:
185
+ """Get the workflow interceptor class.
186
+
187
+ Args:
188
+ input (WorkflowInterceptorClassInput): The interceptor input.
189
+
190
+ Returns:
191
+ Optional[Type[WorkflowInboundInterceptor]]: The workflow interceptor class.
192
+ """
193
+ return EventWorkflowInboundInterceptor
@@ -0,0 +1,139 @@
1
+ """Redis lock interceptor for Temporal workflows.
2
+
3
+ Manages distributed locks for activities decorated with @needs_lock using
4
+ separate lock acquisition and release activities to avoid workflow deadlocks.
5
+ """
6
+
7
+ from datetime import timedelta
8
+ from typing import Any, Dict, Optional, Type
9
+
10
+ from temporalio import workflow
11
+ from temporalio.common import RetryPolicy
12
+ from temporalio.worker import (
13
+ Interceptor,
14
+ StartActivityInput,
15
+ WorkflowInboundInterceptor,
16
+ WorkflowInterceptorClassInput,
17
+ WorkflowOutboundInterceptor,
18
+ )
19
+
20
+ from application_sdk.common.error_codes import WorkflowError
21
+ from application_sdk.constants import (
22
+ APPLICATION_NAME,
23
+ IS_LOCKING_DISABLED,
24
+ LOCK_METADATA_KEY,
25
+ )
26
+ from application_sdk.observability.logger_adaptor import get_logger
27
+
28
+ logger = get_logger(__name__)
29
+
30
+
31
+ class RedisLockInterceptor(Interceptor):
32
+ """Main interceptor class for Redis distributed locking."""
33
+
34
+ def __init__(self, activities: Dict[str, Any]):
35
+ """Initialize Redis lock interceptor.
36
+
37
+ Args:
38
+ activities: Dictionary mapping activity names to activity functions
39
+ """
40
+ self.activities = activities
41
+
42
+ def workflow_interceptor_class(
43
+ self, input: WorkflowInterceptorClassInput
44
+ ) -> Optional[Type[WorkflowInboundInterceptor]]:
45
+ activities = self.activities
46
+
47
+ class RedisLockWorkflowInboundInterceptor(WorkflowInboundInterceptor):
48
+ """Inbound interceptor that manages Redis locks for activities."""
49
+
50
+ def init(self, outbound: WorkflowOutboundInterceptor) -> None:
51
+ """Initialize with Redis lock outbound interceptor."""
52
+ lock_outbound = RedisLockOutboundInterceptor(outbound, activities)
53
+ super().init(lock_outbound)
54
+
55
+ return RedisLockWorkflowInboundInterceptor
56
+
57
+
58
+ class RedisLockOutboundInterceptor(WorkflowOutboundInterceptor):
59
+ """Outbound interceptor that acquires Redis locks before activity execution."""
60
+
61
+ def __init__(self, next: WorkflowOutboundInterceptor, activities: Dict[str, Any]):
62
+ super().__init__(next)
63
+ self.activities = activities
64
+
65
+ async def start_activity( # type: ignore[override]
66
+ self, input: StartActivityInput
67
+ ) -> workflow.ActivityHandle[Any]:
68
+ """Start activity with distributed lock if required."""
69
+
70
+ # Check if activity needs locking
71
+ activity_fn = self.activities.get(input.activity)
72
+ if (
73
+ not activity_fn
74
+ or not hasattr(activity_fn, LOCK_METADATA_KEY)
75
+ or IS_LOCKING_DISABLED
76
+ ):
77
+ return await self.next.start_activity(input)
78
+
79
+ lock_config = getattr(activity_fn, LOCK_METADATA_KEY)
80
+ lock_name = lock_config.get("lock_name", input.activity)
81
+ max_locks = lock_config.get("max_locks", 5)
82
+ if not input.schedule_to_close_timeout:
83
+ logger.error(
84
+ f"Activity '{input.activity}' with @needs_lock decorator requires schedule_to_close_timeout"
85
+ )
86
+ raise WorkflowError(
87
+ f"{WorkflowError.WORKFLOW_CONFIG_ERROR}: Activity '{input.activity}' with @needs_lock decorator must be called with schedule_to_close_timeout parameter. "
88
+ f"Example: workflow.execute_activity('{input.activity}', schedule_to_close_timeout=timedelta(minutes=10))"
89
+ )
90
+ ttl_seconds = int(input.schedule_to_close_timeout.total_seconds())
91
+
92
+ # Orchestrate lock acquisition -> business activity -> lock release
93
+ return await self._execute_with_lock_orchestration(
94
+ input, lock_name, max_locks, ttl_seconds
95
+ )
96
+
97
+ async def _execute_with_lock_orchestration(
98
+ self,
99
+ input: StartActivityInput,
100
+ lock_name: str,
101
+ max_locks: int,
102
+ ttl_seconds: int,
103
+ ) -> workflow.ActivityHandle[Any]:
104
+ """Execute activity with distributed lock orchestration."""
105
+ owner_id = f"{APPLICATION_NAME}:{workflow.info().run_id}"
106
+ lock_result = None
107
+
108
+ try:
109
+ # Step 1: Acquire lock via dedicated activity (can take >2s safely)
110
+ start_to_close_timeout = workflow.info().execution_timeout
111
+ lock_result = await workflow.execute_activity(
112
+ "acquire_distributed_lock",
113
+ args=[lock_name, max_locks, ttl_seconds, owner_id],
114
+ start_to_close_timeout=start_to_close_timeout,
115
+ retry_policy=RetryPolicy(maximum_attempts=1),
116
+ )
117
+
118
+ logger.debug(f"Lock acquired: {lock_result}, executing {input.activity}")
119
+
120
+ # Step 2: Execute the business activity and return its handle
121
+ return await self.next.start_activity(input)
122
+
123
+ finally:
124
+ # Step 3: Release lock (fire-and-forget with short timeout)
125
+ if lock_result is not None:
126
+ try:
127
+ await workflow.execute_local_activity(
128
+ "release_distributed_lock",
129
+ args=[lock_result["resource_id"], lock_result["owner_id"]],
130
+ start_to_close_timeout=timedelta(seconds=5),
131
+ retry_policy=RetryPolicy(maximum_attempts=1),
132
+ )
133
+ logger.debug(f"Lock released: {lock_result['resource_id']}")
134
+ except Exception as e:
135
+ # Silent failure - TTL will handle cleanup
136
+ logger.warning(
137
+ f"Lock release failed for {lock_result['resource_id']}: {e}. "
138
+ f"TTL will handle cleanup."
139
+ )
@@ -22,9 +22,10 @@ import orjson
22
22
  from temporalio import activity
23
23
 
24
24
  from application_sdk.activities.common.models import ActivityStatistics
25
+ from application_sdk.activities.common.utils import get_object_store_prefix
25
26
  from application_sdk.common.dataframe_utils import is_empty_dataframe
26
27
  from application_sdk.observability.logger_adaptor import get_logger
27
- from application_sdk.outputs.objectstore import ObjectStoreOutput
28
+ from application_sdk.services.objectstore import ObjectStore
28
29
 
29
30
  logger = get_logger(__name__)
30
31
  activity.logger = logger
@@ -223,9 +224,11 @@ class Output(ABC):
223
224
  with open(output_file_name, "w") as f:
224
225
  f.write(orjson.dumps(statistics).decode("utf-8"))
225
226
 
227
+ destination_file_path = get_object_store_prefix(output_file_name)
226
228
  # Push the file to the object store
227
- await ObjectStoreOutput.push_file_to_object_store(
228
- self.output_prefix, output_file_name
229
+ await ObjectStore.upload_file(
230
+ source=output_file_name,
231
+ destination=destination_file_path,
229
232
  )
230
233
  return statistics
231
234
  except Exception as e:
@@ -5,10 +5,11 @@ from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Union
5
5
  import orjson
6
6
  from temporalio import activity
7
7
 
8
+ from application_sdk.activities.common.utils import get_object_store_prefix
8
9
  from application_sdk.observability.logger_adaptor import get_logger
9
10
  from application_sdk.observability.metrics_adaptor import MetricType, get_metrics
10
11
  from application_sdk.outputs import Output
11
- from application_sdk.outputs.objectstore import ObjectStoreOutput
12
+ from application_sdk.services.objectstore import ObjectStore
12
13
 
13
14
  logger = get_logger(__name__)
14
15
  activity.logger = logger
@@ -285,9 +286,10 @@ class JsonOutput(Output):
285
286
  description="Number of records written to JSON files from daft DataFrame",
286
287
  )
287
288
 
288
- # Push the file to the object store
289
- await ObjectStoreOutput.push_files_to_object_store(
290
- self.output_prefix, self.output_path
289
+ # Push files to the object store
290
+ await ObjectStore.upload_prefix(
291
+ source=self.output_path,
292
+ destination=get_object_store_prefix(self.output_path),
291
293
  )
292
294
 
293
295
  except Exception as e:
@@ -344,8 +346,9 @@ class JsonOutput(Output):
344
346
  )
345
347
 
346
348
  # Push the file to the object store
347
- await ObjectStoreOutput.push_file_to_object_store(
348
- self.output_prefix, output_file_name
349
+ await ObjectStore.upload_file(
350
+ source=output_file_name,
351
+ destination=get_object_store_prefix(output_file_name),
349
352
  )
350
353
 
351
354
  self.buffer.clear()
@@ -3,10 +3,11 @@ from typing import TYPE_CHECKING, Literal, Optional
3
3
 
4
4
  from temporalio import activity
5
5
 
6
+ from application_sdk.activities.common.utils import get_object_store_prefix
6
7
  from application_sdk.observability.logger_adaptor import get_logger
7
8
  from application_sdk.observability.metrics_adaptor import MetricType, get_metrics
8
9
  from application_sdk.outputs import Output
9
- from application_sdk.outputs.objectstore import ObjectStoreOutput
10
+ from application_sdk.services.objectstore import ObjectStore
10
11
 
11
12
  logger = get_logger(__name__)
12
13
  activity.logger = logger
@@ -159,7 +160,10 @@ class ParquetOutput(Output):
159
160
  )
160
161
 
161
162
  # Upload the file to object store
162
- await self.upload_file(file_path)
163
+ await ObjectStore.upload_file(
164
+ source=file_path,
165
+ destination=get_object_store_prefix(file_path),
166
+ )
163
167
  except Exception as e:
164
168
  # Record metrics for failed write
165
169
  self.metrics.record_metric(
@@ -218,7 +222,10 @@ class ParquetOutput(Output):
218
222
  )
219
223
 
220
224
  # Upload the file to object store
221
- await self.upload_file(file_path)
225
+ await ObjectStore.upload_file(
226
+ source=file_path,
227
+ destination=get_object_store_prefix(file_path),
228
+ )
222
229
  except Exception as e:
223
230
  # Record metrics for failed write
224
231
  self.metrics.record_metric(
@@ -231,39 +238,6 @@ class ParquetOutput(Output):
231
238
  logger.error(f"Error writing daft dataframe to parquet: {str(e)}")
232
239
  raise
233
240
 
234
- async def upload_file(self, local_file_path: str) -> None:
235
- """Upload a file to the object store.
236
-
237
- Args:
238
- local_file_path (str): Path to the local file to upload.
239
- """
240
- try:
241
- if os.path.isdir(local_file_path):
242
- logger.info(
243
- f"Uploading files: {local_file_path} to {self.output_prefix}"
244
- )
245
- await ObjectStoreOutput.push_files_to_object_store(
246
- self.output_prefix, local_file_path
247
- )
248
- else:
249
- logger.info(
250
- f"Uploading file: {local_file_path} to {self.output_prefix}"
251
- )
252
- await ObjectStoreOutput.push_file_to_object_store(
253
- self.output_prefix, local_file_path
254
- )
255
- except Exception as e:
256
- # Record metrics for failed upload
257
- self.metrics.record_metric(
258
- name="parquet_upload_errors",
259
- value=1,
260
- metric_type=MetricType.COUNTER,
261
- labels={"error": str(e)},
262
- description="Number of errors while uploading Parquet files to object store",
263
- )
264
- logger.error(f"Error uploading file to object store: {str(e)}")
265
- raise e
266
-
267
241
  def get_full_path(self) -> str:
268
242
  """Get the full path of the output file.
269
243
 
@@ -25,11 +25,9 @@ from application_sdk.constants import (
25
25
  )
26
26
  from application_sdk.docgen import AtlanDocsGenerator
27
27
  from application_sdk.handlers import HandlerInterface
28
- from application_sdk.inputs.statestore import StateStoreInput, StateType
29
28
  from application_sdk.observability.logger_adaptor import get_logger
30
29
  from application_sdk.observability.metrics_adaptor import MetricType, get_metrics
31
30
  from application_sdk.observability.observability import DuckDBUI
32
- from application_sdk.outputs.statestore import StateStoreOutput
33
31
  from application_sdk.server import ServerInterface
34
32
  from application_sdk.server.fastapi.middleware.logmiddleware import LogMiddleware
35
33
  from application_sdk.server.fastapi.middleware.metrics import MetricsMiddleware
@@ -53,6 +51,7 @@ from application_sdk.server.fastapi.models import (
53
51
  )
54
52
  from application_sdk.server.fastapi.routers.server import get_server_router
55
53
  from application_sdk.server.fastapi.utils import internal_server_error_handler
54
+ from application_sdk.services.statestore import StateStore, StateType
56
55
  from application_sdk.workflows import WorkflowInterface
57
56
 
58
57
  logger = get_logger(__name__)
@@ -588,7 +587,7 @@ class APIServer(ServerInterface):
588
587
  )
589
588
  raise e
590
589
 
591
- def get_workflow_config(
590
+ async def get_workflow_config(
592
591
  self, config_id: str, type: str = "workflows"
593
592
  ) -> WorkflowConfigResponse:
594
593
  """Retrieve workflow configuration by ID.
@@ -603,7 +602,7 @@ class APIServer(ServerInterface):
603
602
  if not StateType.is_member(type):
604
603
  raise ValueError(f"Invalid type {type} for state store")
605
604
 
606
- config = StateStoreInput.get_state(config_id, StateType(type))
605
+ config = await StateStore.get_state(config_id, StateType(type))
607
606
  return WorkflowConfigResponse(
608
607
  success=True,
609
608
  message="Workflow configuration fetched successfully",
@@ -680,7 +679,7 @@ class APIServer(ServerInterface):
680
679
  if not StateType.is_member(type):
681
680
  raise ValueError(f"Invalid type {type} for state store")
682
681
 
683
- config = await StateStoreOutput.save_state_object(
682
+ config = await StateStore.save_state_object(
684
683
  id=config_id, value=body.model_dump(), type=StateType(type)
685
684
  )
686
685
  return WorkflowConfigResponse(
@@ -0,0 +1,18 @@
1
+ """Services module for the application SDK."""
2
+
3
+ from .atlan_storage import AtlanStorage, MigrationSummary
4
+ from .eventstore import EventStore
5
+ from .objectstore import ObjectStore
6
+ from .secretstore import SecretStore
7
+ from .statestore import StateStore, StateType, build_state_store_path
8
+
9
+ __all__ = [
10
+ "AtlanStorage",
11
+ "EventStore",
12
+ "MigrationSummary",
13
+ "ObjectStore",
14
+ "SecretStore",
15
+ "StateStore",
16
+ "StateType",
17
+ "build_state_store_path",
18
+ ]
@@ -1,4 +1,12 @@
1
- """Atlan storage interface for upload operations and migration from objectstore."""
1
+ """Atlan storage service for upload operations and migration from object store.
2
+
3
+ This module provides the AtlanStorage service for handling data migration between
4
+ local object storage and Atlan's upstream storage system. It's specifically designed
5
+ for the bucket cloning strategy used in customer-deployed applications.
6
+
7
+ The service supports parallel file migration with comprehensive error handling and
8
+ detailed reporting through the MigrationSummary model.
9
+ """
2
10
 
3
11
  import asyncio
4
12
  from typing import Dict, List
@@ -11,8 +19,8 @@ from application_sdk.constants import (
11
19
  DEPLOYMENT_OBJECT_STORE_NAME,
12
20
  UPSTREAM_OBJECT_STORE_NAME,
13
21
  )
14
- from application_sdk.inputs.objectstore import ObjectStoreInput
15
22
  from application_sdk.observability.logger_adaptor import get_logger
23
+ from application_sdk.services.objectstore import ObjectStore
16
24
 
17
25
  logger = get_logger(__name__)
18
26
  activity.logger = logger
@@ -43,27 +51,37 @@ class MigrationSummary(BaseModel):
43
51
  destination: str = UPSTREAM_OBJECT_STORE_NAME
44
52
 
45
53
 
46
- # keeping any logic related to operations on atlan storage within this file.
47
- class AtlanStorageOutput:
54
+ class AtlanStorage:
48
55
  """Handles upload operations to Atlan storage and migration from objectstore."""
49
56
 
50
57
  OBJECT_CREATE_OPERATION = "create"
51
58
 
52
59
  @classmethod
53
60
  async def _migrate_single_file(cls, file_path: str) -> tuple[str, bool, str]:
54
- """
55
- Migrate a single file from objectstore to Atlan storage.
61
+ """Migrate a single file from object store to Atlan storage.
62
+
63
+ This internal method handles the migration of a single file, including
64
+ error handling and logging. It's designed to be called concurrently
65
+ for multiple files.
56
66
 
57
67
  Args:
58
- file_path (str): The path of the file to migrate
68
+ file_path (str): The path of the file to migrate in the object store.
59
69
 
60
70
  Returns:
61
- tuple[str, bool, str]: (file_path, success, error_message)
71
+ tuple[str, bool, str]: A tuple containing:
72
+ - file_path: The path of the file that was processed
73
+ - success: Boolean indicating if migration was successful
74
+ - error_message: Error details if migration failed, empty string if successful
75
+
76
+ Note:
77
+ This method is internal and should not be called directly. Use
78
+ migrate_from_objectstore_to_atlan() instead for proper coordination
79
+ and error handling.
62
80
  """
63
81
  try:
64
82
  # Get file data from objectstore
65
- file_data = ObjectStoreInput.get_file_data(
66
- file_path, object_store_name=DEPLOYMENT_OBJECT_STORE_NAME
83
+ file_data = await ObjectStore.get_content(
84
+ file_path, store_name=DEPLOYMENT_OBJECT_STORE_NAME
67
85
  )
68
86
 
69
87
  with DaprClient() as client:
@@ -91,14 +109,44 @@ class AtlanStorageOutput:
91
109
  async def migrate_from_objectstore_to_atlan(
92
110
  cls, prefix: str = ""
93
111
  ) -> MigrationSummary:
94
- """
95
- Migrate all files from objectstore to Atlan storage under a given prefix.
112
+ """Migrate all files from object store to Atlan storage under a given prefix.
113
+
114
+ This method performs a parallel migration of files from the local object store
115
+ to Atlan's upstream storage system. It provides comprehensive error handling
116
+ and detailed reporting of the migration process.
96
117
 
97
118
  Args:
98
- prefix (str): The prefix to filter which files to migrate. Empty string migrates all files.
119
+ prefix (str, optional): The prefix to filter which files to migrate.
120
+ Empty string migrates all files. Defaults to "".
99
121
 
100
122
  Returns:
101
- MigrationSummary: Migration summary with counts and any failures
123
+ MigrationSummary: Comprehensive migration summary including:
124
+ - total_files: Number of files found for migration
125
+ - migrated_files: Number successfully migrated
126
+ - failed_migrations: Number that failed to migrate
127
+ - failures: List of failure details with file paths and errors
128
+ - prefix: The prefix used for filtering
129
+ - source/destination: Storage system identifiers
130
+
131
+ Raises:
132
+ Exception: If there's a critical error during the migration process.
133
+
134
+ Examples:
135
+ >>> # Migrate all files
136
+ >>> summary = await AtlanStorage.migrate_from_objectstore_to_atlan()
137
+ >>> print(f"Success rate: {summary.migrated_files/summary.total_files*100:.1f}%")
138
+
139
+ >>> # Migrate specific dataset
140
+ >>> summary = await AtlanStorage.migrate_from_objectstore_to_atlan(
141
+ ... prefix="processed_data/2024/"
142
+ ... )
143
+ >>> if summary.total_files == 0:
144
+ ... print("No files found with the specified prefix")
145
+ >>> elif summary.failed_migrations == 0:
146
+ ... print(f"Successfully migrated all {summary.total_files} files")
147
+ >>> else:
148
+ ... print(f"Migration completed with {summary.failed_migrations} failures")
149
+ ... # Handle failures...
102
150
  """
103
151
  try:
104
152
  logger.info(
@@ -106,8 +154,8 @@ class AtlanStorageOutput:
106
154
  )
107
155
 
108
156
  # Get list of all files to migrate from objectstore
109
- files_to_migrate = ObjectStoreInput.list_all_files(
110
- prefix, object_store_name=DEPLOYMENT_OBJECT_STORE_NAME
157
+ files_to_migrate = await ObjectStore.list_files(
158
+ prefix, store_name=DEPLOYMENT_OBJECT_STORE_NAME
111
159
  )
112
160
 
113
161
  total_files = len(files_to_migrate)