atlan-application-sdk 0.1.1rc34__py3-none-any.whl → 0.1.1rc35__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- application_sdk/activities/__init__.py +3 -2
- application_sdk/activities/common/utils.py +21 -1
- application_sdk/activities/metadata_extraction/base.py +4 -2
- application_sdk/activities/metadata_extraction/sql.py +13 -12
- application_sdk/activities/query_extraction/sql.py +24 -20
- application_sdk/clients/atlan_auth.py +2 -2
- application_sdk/clients/temporal.py +6 -10
- application_sdk/inputs/json.py +6 -4
- application_sdk/inputs/parquet.py +16 -13
- application_sdk/outputs/__init__.py +6 -3
- application_sdk/outputs/json.py +9 -6
- application_sdk/outputs/parquet.py +10 -36
- application_sdk/server/fastapi/__init__.py +4 -5
- application_sdk/services/__init__.py +18 -0
- application_sdk/{outputs → services}/atlan_storage.py +64 -16
- application_sdk/{outputs → services}/eventstore.py +68 -6
- application_sdk/services/objectstore.py +407 -0
- application_sdk/services/secretstore.py +344 -0
- application_sdk/services/statestore.py +267 -0
- application_sdk/version.py +1 -1
- application_sdk/worker.py +1 -1
- {atlan_application_sdk-0.1.1rc34.dist-info → atlan_application_sdk-0.1.1rc35.dist-info}/METADATA +1 -1
- {atlan_application_sdk-0.1.1rc34.dist-info → atlan_application_sdk-0.1.1rc35.dist-info}/RECORD +26 -29
- application_sdk/common/credential_utils.py +0 -85
- application_sdk/inputs/objectstore.py +0 -238
- application_sdk/inputs/secretstore.py +0 -130
- application_sdk/inputs/statestore.py +0 -101
- application_sdk/outputs/objectstore.py +0 -125
- application_sdk/outputs/secretstore.py +0 -38
- application_sdk/outputs/statestore.py +0 -113
- {atlan_application_sdk-0.1.1rc34.dist-info → atlan_application_sdk-0.1.1rc35.dist-info}/WHEEL +0 -0
- {atlan_application_sdk-0.1.1rc34.dist-info → atlan_application_sdk-0.1.1rc35.dist-info}/licenses/LICENSE +0 -0
- {atlan_application_sdk-0.1.1rc34.dist-info → atlan_application_sdk-0.1.1rc35.dist-info}/licenses/NOTICE +0 -0
|
@@ -1,4 +1,12 @@
|
|
|
1
|
-
"""Atlan storage
|
|
1
|
+
"""Atlan storage service for upload operations and migration from object store.
|
|
2
|
+
|
|
3
|
+
This module provides the AtlanStorage service for handling data migration between
|
|
4
|
+
local object storage and Atlan's upstream storage system. It's specifically designed
|
|
5
|
+
for the bucket cloning strategy used in customer-deployed applications.
|
|
6
|
+
|
|
7
|
+
The service supports parallel file migration with comprehensive error handling and
|
|
8
|
+
detailed reporting through the MigrationSummary model.
|
|
9
|
+
"""
|
|
2
10
|
|
|
3
11
|
import asyncio
|
|
4
12
|
from typing import Dict, List
|
|
@@ -11,8 +19,8 @@ from application_sdk.constants import (
|
|
|
11
19
|
DEPLOYMENT_OBJECT_STORE_NAME,
|
|
12
20
|
UPSTREAM_OBJECT_STORE_NAME,
|
|
13
21
|
)
|
|
14
|
-
from application_sdk.inputs.objectstore import ObjectStoreInput
|
|
15
22
|
from application_sdk.observability.logger_adaptor import get_logger
|
|
23
|
+
from application_sdk.services.objectstore import ObjectStore
|
|
16
24
|
|
|
17
25
|
logger = get_logger(__name__)
|
|
18
26
|
activity.logger = logger
|
|
@@ -43,27 +51,37 @@ class MigrationSummary(BaseModel):
|
|
|
43
51
|
destination: str = UPSTREAM_OBJECT_STORE_NAME
|
|
44
52
|
|
|
45
53
|
|
|
46
|
-
|
|
47
|
-
class AtlanStorageOutput:
|
|
54
|
+
class AtlanStorage:
|
|
48
55
|
"""Handles upload operations to Atlan storage and migration from objectstore."""
|
|
49
56
|
|
|
50
57
|
OBJECT_CREATE_OPERATION = "create"
|
|
51
58
|
|
|
52
59
|
@classmethod
|
|
53
60
|
async def _migrate_single_file(cls, file_path: str) -> tuple[str, bool, str]:
|
|
54
|
-
"""
|
|
55
|
-
|
|
61
|
+
"""Migrate a single file from object store to Atlan storage.
|
|
62
|
+
|
|
63
|
+
This internal method handles the migration of a single file, including
|
|
64
|
+
error handling and logging. It's designed to be called concurrently
|
|
65
|
+
for multiple files.
|
|
56
66
|
|
|
57
67
|
Args:
|
|
58
|
-
file_path (str): The path of the file to migrate
|
|
68
|
+
file_path (str): The path of the file to migrate in the object store.
|
|
59
69
|
|
|
60
70
|
Returns:
|
|
61
|
-
tuple[str, bool, str]:
|
|
71
|
+
tuple[str, bool, str]: A tuple containing:
|
|
72
|
+
- file_path: The path of the file that was processed
|
|
73
|
+
- success: Boolean indicating if migration was successful
|
|
74
|
+
- error_message: Error details if migration failed, empty string if successful
|
|
75
|
+
|
|
76
|
+
Note:
|
|
77
|
+
This method is internal and should not be called directly. Use
|
|
78
|
+
migrate_from_objectstore_to_atlan() instead for proper coordination
|
|
79
|
+
and error handling.
|
|
62
80
|
"""
|
|
63
81
|
try:
|
|
64
82
|
# Get file data from objectstore
|
|
65
|
-
file_data =
|
|
66
|
-
file_path,
|
|
83
|
+
file_data = await ObjectStore.get_content(
|
|
84
|
+
file_path, store_name=DEPLOYMENT_OBJECT_STORE_NAME
|
|
67
85
|
)
|
|
68
86
|
|
|
69
87
|
with DaprClient() as client:
|
|
@@ -91,14 +109,44 @@ class AtlanStorageOutput:
|
|
|
91
109
|
async def migrate_from_objectstore_to_atlan(
|
|
92
110
|
cls, prefix: str = ""
|
|
93
111
|
) -> MigrationSummary:
|
|
94
|
-
"""
|
|
95
|
-
|
|
112
|
+
"""Migrate all files from object store to Atlan storage under a given prefix.
|
|
113
|
+
|
|
114
|
+
This method performs a parallel migration of files from the local object store
|
|
115
|
+
to Atlan's upstream storage system. It provides comprehensive error handling
|
|
116
|
+
and detailed reporting of the migration process.
|
|
96
117
|
|
|
97
118
|
Args:
|
|
98
|
-
prefix (str): The prefix to filter which files to migrate.
|
|
119
|
+
prefix (str, optional): The prefix to filter which files to migrate.
|
|
120
|
+
Empty string migrates all files. Defaults to "".
|
|
99
121
|
|
|
100
122
|
Returns:
|
|
101
|
-
MigrationSummary:
|
|
123
|
+
MigrationSummary: Comprehensive migration summary including:
|
|
124
|
+
- total_files: Number of files found for migration
|
|
125
|
+
- migrated_files: Number successfully migrated
|
|
126
|
+
- failed_migrations: Number that failed to migrate
|
|
127
|
+
- failures: List of failure details with file paths and errors
|
|
128
|
+
- prefix: The prefix used for filtering
|
|
129
|
+
- source/destination: Storage system identifiers
|
|
130
|
+
|
|
131
|
+
Raises:
|
|
132
|
+
Exception: If there's a critical error during the migration process.
|
|
133
|
+
|
|
134
|
+
Examples:
|
|
135
|
+
>>> # Migrate all files
|
|
136
|
+
>>> summary = await AtlanStorage.migrate_from_objectstore_to_atlan()
|
|
137
|
+
>>> print(f"Success rate: {summary.migrated_files/summary.total_files*100:.1f}%")
|
|
138
|
+
|
|
139
|
+
>>> # Migrate specific dataset
|
|
140
|
+
>>> summary = await AtlanStorage.migrate_from_objectstore_to_atlan(
|
|
141
|
+
... prefix="processed_data/2024/"
|
|
142
|
+
... )
|
|
143
|
+
>>> if summary.total_files == 0:
|
|
144
|
+
... print("No files found with the specified prefix")
|
|
145
|
+
>>> elif summary.failed_migrations == 0:
|
|
146
|
+
... print(f"Successfully migrated all {summary.total_files} files")
|
|
147
|
+
>>> else:
|
|
148
|
+
... print(f"Migration completed with {summary.failed_migrations} failures")
|
|
149
|
+
... # Handle failures...
|
|
102
150
|
"""
|
|
103
151
|
try:
|
|
104
152
|
logger.info(
|
|
@@ -106,8 +154,8 @@ class AtlanStorageOutput:
|
|
|
106
154
|
)
|
|
107
155
|
|
|
108
156
|
# Get list of all files to migrate from objectstore
|
|
109
|
-
files_to_migrate =
|
|
110
|
-
prefix,
|
|
157
|
+
files_to_migrate = await ObjectStore.list_files(
|
|
158
|
+
prefix, store_name=DEPLOYMENT_OBJECT_STORE_NAME
|
|
111
159
|
)
|
|
112
160
|
|
|
113
161
|
total_files = len(files_to_migrate)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"""
|
|
1
|
+
"""Unified event store service for handling application events.
|
|
2
2
|
|
|
3
3
|
This module provides the EventStore class for publishing application events
|
|
4
4
|
to a pub/sub system with automatic fallback to HTTP binding.
|
|
@@ -10,7 +10,6 @@ from datetime import datetime
|
|
|
10
10
|
from dapr import clients
|
|
11
11
|
from temporalio import activity, workflow
|
|
12
12
|
|
|
13
|
-
from application_sdk.clients.atlan_auth import AtlanAuthClient
|
|
14
13
|
from application_sdk.common.dapr_utils import is_component_registered
|
|
15
14
|
from application_sdk.constants import (
|
|
16
15
|
APPLICATION_NAME,
|
|
@@ -25,17 +24,39 @@ activity.logger = logger
|
|
|
25
24
|
|
|
26
25
|
|
|
27
26
|
class EventStore:
|
|
28
|
-
"""
|
|
27
|
+
"""Unified event store service for publishing application events.
|
|
29
28
|
|
|
30
29
|
This class provides functionality to publish events to a pub/sub system.
|
|
31
30
|
"""
|
|
32
31
|
|
|
33
32
|
@classmethod
|
|
34
33
|
def enrich_event_metadata(cls, event: Event):
|
|
35
|
-
"""Enrich the event metadata with
|
|
34
|
+
"""Enrich the event metadata with workflow and activity context information.
|
|
35
|
+
|
|
36
|
+
This method automatically populates event metadata with context from the current
|
|
37
|
+
Temporal workflow and activity execution, including IDs, types, and execution state.
|
|
36
38
|
|
|
37
39
|
Args:
|
|
38
|
-
event (Event): Event data.
|
|
40
|
+
event (Event): Event data to enrich with metadata.
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
Event: The same event instance with enriched metadata.
|
|
44
|
+
|
|
45
|
+
Note:
|
|
46
|
+
This method safely handles cases where the code is not running within
|
|
47
|
+
a Temporal workflow or activity context.
|
|
48
|
+
|
|
49
|
+
Examples:
|
|
50
|
+
>>> from application_sdk.events.models import Event
|
|
51
|
+
|
|
52
|
+
>>> # Create basic event
|
|
53
|
+
>>> event = Event(event_type="data.processed", data={"count": 100})
|
|
54
|
+
|
|
55
|
+
>>> # Enrich with current context (if available)
|
|
56
|
+
>>> enriched = EventStore.enrich_event_metadata(event)
|
|
57
|
+
>>> print(f"Workflow ID: {enriched.metadata.workflow_id}")
|
|
58
|
+
>>> print(f"Activity: {enriched.metadata.activity_type}")
|
|
59
|
+
>>> print(f"Timestamp: {enriched.metadata.created_timestamp}")
|
|
39
60
|
"""
|
|
40
61
|
if not event.metadata:
|
|
41
62
|
event.metadata = EventMetadata()
|
|
@@ -70,10 +91,49 @@ class EventStore:
|
|
|
70
91
|
|
|
71
92
|
@classmethod
|
|
72
93
|
async def publish_event(cls, event: Event):
|
|
73
|
-
"""Publish event with automatic
|
|
94
|
+
"""Publish event with automatic metadata enrichment and authentication.
|
|
95
|
+
|
|
96
|
+
This method handles the complete event publishing flow including metadata
|
|
97
|
+
enrichment, authentication header injection, and component availability validation.
|
|
98
|
+
It automatically falls back gracefully if the event store component is not available.
|
|
74
99
|
|
|
75
100
|
Args:
|
|
76
101
|
event (Event): Event data to publish.
|
|
102
|
+
|
|
103
|
+
Note:
|
|
104
|
+
The method will silently skip publishing if the event store component
|
|
105
|
+
is not registered, allowing applications to run without event publishing
|
|
106
|
+
capability.
|
|
107
|
+
|
|
108
|
+
Raises:
|
|
109
|
+
Exception: If there's an error during event publishing (logged but not re-raised).
|
|
110
|
+
|
|
111
|
+
Examples:
|
|
112
|
+
>>> from application_sdk.events.models import Event
|
|
113
|
+
|
|
114
|
+
>>> # Publish workflow status event
|
|
115
|
+
>>> status_event = Event(
|
|
116
|
+
... event_type="workflow.status_changed",
|
|
117
|
+
... data={
|
|
118
|
+
... "workflow_id": "wf-123",
|
|
119
|
+
... "old_status": "running",
|
|
120
|
+
... "new_status": "completed",
|
|
121
|
+
... "duration_seconds": 1800
|
|
122
|
+
... }
|
|
123
|
+
... )
|
|
124
|
+
>>> await EventStore.publish_event(status_event)
|
|
125
|
+
|
|
126
|
+
>>> # Publish data processing event
|
|
127
|
+
>>> processing_event = Event(
|
|
128
|
+
... event_type="data.batch_processed",
|
|
129
|
+
... data={
|
|
130
|
+
... "batch_id": "batch-456",
|
|
131
|
+
... "records_processed": 10000,
|
|
132
|
+
... "success_count": 9995,
|
|
133
|
+
... "error_count": 5
|
|
134
|
+
... }
|
|
135
|
+
... )
|
|
136
|
+
>>> await EventStore.publish_event(processing_event)
|
|
77
137
|
"""
|
|
78
138
|
if not is_component_registered(EVENT_STORE_NAME):
|
|
79
139
|
logger.warning(
|
|
@@ -89,6 +149,8 @@ class EventStore:
|
|
|
89
149
|
binding_metadata = {"content-type": "application/json"}
|
|
90
150
|
|
|
91
151
|
# Add auth token - HTTP bindings will use it, others will ignore it
|
|
152
|
+
from application_sdk.clients.atlan_auth import AtlanAuthClient
|
|
153
|
+
|
|
92
154
|
auth_client = AtlanAuthClient()
|
|
93
155
|
binding_metadata.update(await auth_client.get_authenticated_headers())
|
|
94
156
|
|
|
@@ -0,0 +1,407 @@
|
|
|
1
|
+
"""Unified object store interface for the application."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
import shutil
|
|
6
|
+
from typing import List, Union
|
|
7
|
+
|
|
8
|
+
import orjson
|
|
9
|
+
from dapr.clients import DaprClient
|
|
10
|
+
from temporalio import activity
|
|
11
|
+
|
|
12
|
+
from application_sdk.constants import (
|
|
13
|
+
DAPR_MAX_GRPC_MESSAGE_LENGTH,
|
|
14
|
+
DEPLOYMENT_OBJECT_STORE_NAME,
|
|
15
|
+
TEMPORARY_PATH,
|
|
16
|
+
)
|
|
17
|
+
from application_sdk.observability.logger_adaptor import get_logger
|
|
18
|
+
|
|
19
|
+
logger = get_logger(__name__)
|
|
20
|
+
activity.logger = logger
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class ObjectStore:
|
|
24
|
+
"""Unified object store interface supporting both file and directory operations."""
|
|
25
|
+
|
|
26
|
+
OBJECT_CREATE_OPERATION = "create"
|
|
27
|
+
OBJECT_GET_OPERATION = "get"
|
|
28
|
+
OBJECT_LIST_OPERATION = "list"
|
|
29
|
+
|
|
30
|
+
@classmethod
|
|
31
|
+
async def list_files(
|
|
32
|
+
cls, prefix: str = "", store_name: str = DEPLOYMENT_OBJECT_STORE_NAME
|
|
33
|
+
) -> List[str]:
|
|
34
|
+
"""List all files in the object store under a given prefix.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
prefix: The prefix to filter files. Empty string returns all files.
|
|
38
|
+
store_name: Name of the Dapr object store binding to use.
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
List of file paths in the object store.
|
|
42
|
+
|
|
43
|
+
Raises:
|
|
44
|
+
Exception: If there's an error listing files from the object store.
|
|
45
|
+
"""
|
|
46
|
+
try:
|
|
47
|
+
metadata = {"prefix": prefix, "fileName": prefix} if prefix else {}
|
|
48
|
+
data = json.dumps({"prefix": prefix}).encode("utf-8") if prefix else ""
|
|
49
|
+
|
|
50
|
+
response_data = await cls._invoke_dapr_binding(
|
|
51
|
+
operation=cls.OBJECT_LIST_OPERATION,
|
|
52
|
+
metadata=metadata,
|
|
53
|
+
data=data,
|
|
54
|
+
store_name=store_name,
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
if not response_data:
|
|
58
|
+
return []
|
|
59
|
+
|
|
60
|
+
file_list = orjson.loads(response_data.decode("utf-8"))
|
|
61
|
+
|
|
62
|
+
# Extract paths based on response type
|
|
63
|
+
if isinstance(file_list, list):
|
|
64
|
+
paths = file_list
|
|
65
|
+
elif isinstance(file_list, dict) and "Contents" in file_list:
|
|
66
|
+
paths = [item["Key"] for item in file_list["Contents"] if "Key" in item]
|
|
67
|
+
elif isinstance(file_list, dict):
|
|
68
|
+
paths = file_list.get("files") or file_list.get("keys") or []
|
|
69
|
+
else:
|
|
70
|
+
return []
|
|
71
|
+
|
|
72
|
+
valid_list = []
|
|
73
|
+
for path in paths:
|
|
74
|
+
if not isinstance(path, str):
|
|
75
|
+
logger.warning(f"Skipping non-string path: {path}")
|
|
76
|
+
continue
|
|
77
|
+
valid_list.append(
|
|
78
|
+
path[path.find(prefix) :]
|
|
79
|
+
if prefix and prefix in path
|
|
80
|
+
else os.path.basename(path)
|
|
81
|
+
if prefix
|
|
82
|
+
else path
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
return valid_list
|
|
86
|
+
|
|
87
|
+
except Exception as e:
|
|
88
|
+
logger.error(f"Error listing files with prefix {prefix}: {str(e)}")
|
|
89
|
+
raise e
|
|
90
|
+
|
|
91
|
+
@classmethod
|
|
92
|
+
async def get_content(
|
|
93
|
+
cls, key: str, store_name: str = DEPLOYMENT_OBJECT_STORE_NAME
|
|
94
|
+
) -> bytes:
|
|
95
|
+
"""Get raw file content from the object store.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
key: The path of the file in the object store.
|
|
99
|
+
store_name: Name of the Dapr object store binding to use.
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
The raw file content as bytes.
|
|
103
|
+
|
|
104
|
+
Raises:
|
|
105
|
+
Exception: If there's an error getting the file from the object store.
|
|
106
|
+
"""
|
|
107
|
+
try:
|
|
108
|
+
metadata = {"key": key, "fileName": key, "blobName": key}
|
|
109
|
+
data = json.dumps({"key": key}).encode("utf-8") if key else ""
|
|
110
|
+
|
|
111
|
+
response_data = await cls._invoke_dapr_binding(
|
|
112
|
+
operation=cls.OBJECT_GET_OPERATION,
|
|
113
|
+
metadata=metadata,
|
|
114
|
+
data=data,
|
|
115
|
+
store_name=store_name,
|
|
116
|
+
)
|
|
117
|
+
if not response_data:
|
|
118
|
+
raise Exception(f"No data received for file: {key}")
|
|
119
|
+
|
|
120
|
+
logger.debug(f"Successfully retrieved file content: {key}")
|
|
121
|
+
return response_data
|
|
122
|
+
|
|
123
|
+
except Exception as e:
|
|
124
|
+
logger.error(f"Error getting file content for {key}: {str(e)}")
|
|
125
|
+
raise e
|
|
126
|
+
|
|
127
|
+
@classmethod
|
|
128
|
+
async def exists(
|
|
129
|
+
cls, key: str, store_name: str = DEPLOYMENT_OBJECT_STORE_NAME
|
|
130
|
+
) -> bool:
|
|
131
|
+
"""Check if a file exists in the object store.
|
|
132
|
+
|
|
133
|
+
Args:
|
|
134
|
+
key: The path of the file in the object store.
|
|
135
|
+
store_name: Name of the Dapr object store binding to use.
|
|
136
|
+
|
|
137
|
+
Returns:
|
|
138
|
+
True if the file exists, False otherwise.
|
|
139
|
+
"""
|
|
140
|
+
try:
|
|
141
|
+
await cls.get_content(key, store_name)
|
|
142
|
+
return True
|
|
143
|
+
except Exception:
|
|
144
|
+
return False
|
|
145
|
+
|
|
146
|
+
@classmethod
|
|
147
|
+
async def delete(
|
|
148
|
+
cls, key: str, store_name: str = DEPLOYMENT_OBJECT_STORE_NAME
|
|
149
|
+
) -> None:
|
|
150
|
+
"""Delete a file or all files under a prefix from the object store.
|
|
151
|
+
|
|
152
|
+
Args:
|
|
153
|
+
key: The file path or prefix to delete.
|
|
154
|
+
store_name: Name of the Dapr object store binding to use.
|
|
155
|
+
|
|
156
|
+
Note:
|
|
157
|
+
This method is not implemented as it's not commonly used in the current codebase.
|
|
158
|
+
Can be implemented when needed based on the underlying object store capabilities.
|
|
159
|
+
"""
|
|
160
|
+
raise NotImplementedError("Delete operation not yet implemented")
|
|
161
|
+
|
|
162
|
+
@classmethod
|
|
163
|
+
async def upload_file(
|
|
164
|
+
cls,
|
|
165
|
+
source: str,
|
|
166
|
+
destination: str,
|
|
167
|
+
store_name: str = DEPLOYMENT_OBJECT_STORE_NAME,
|
|
168
|
+
) -> None:
|
|
169
|
+
"""Upload a single file to the object store.
|
|
170
|
+
|
|
171
|
+
Args:
|
|
172
|
+
source (str): Local path to the file to upload.
|
|
173
|
+
destination (str): Object store key where the file will be stored.
|
|
174
|
+
store_name (str, optional): Name of the Dapr object store binding to use.
|
|
175
|
+
Defaults to DEPLOYMENT_OBJECT_STORE_NAME.
|
|
176
|
+
|
|
177
|
+
Raises:
|
|
178
|
+
IOError: If the source file cannot be read.
|
|
179
|
+
Exception: If there's an error uploading to the object store.
|
|
180
|
+
|
|
181
|
+
Example:
|
|
182
|
+
>>> await ObjectStore.upload_file(
|
|
183
|
+
... source="/tmp/report.pdf",
|
|
184
|
+
... destination="reports/2024/january/report.pdf"
|
|
185
|
+
... )
|
|
186
|
+
"""
|
|
187
|
+
try:
|
|
188
|
+
with open(source, "rb") as f:
|
|
189
|
+
file_content = f.read()
|
|
190
|
+
except IOError as e:
|
|
191
|
+
logger.error(f"Error reading file {source}: {str(e)}")
|
|
192
|
+
raise e
|
|
193
|
+
|
|
194
|
+
metadata = {
|
|
195
|
+
"key": destination,
|
|
196
|
+
"blobName": destination,
|
|
197
|
+
"fileName": destination,
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
try:
|
|
201
|
+
await cls._invoke_dapr_binding(
|
|
202
|
+
operation=cls.OBJECT_CREATE_OPERATION,
|
|
203
|
+
data=file_content,
|
|
204
|
+
metadata=metadata,
|
|
205
|
+
store_name=store_name,
|
|
206
|
+
)
|
|
207
|
+
logger.debug(f"Successfully uploaded file: {destination}")
|
|
208
|
+
except Exception as e:
|
|
209
|
+
logger.error(
|
|
210
|
+
f"Error uploading file {destination} to object store: {str(e)}"
|
|
211
|
+
)
|
|
212
|
+
raise e
|
|
213
|
+
|
|
214
|
+
# Clean up local file after successful upload
|
|
215
|
+
cls._cleanup_local_path(source)
|
|
216
|
+
|
|
217
|
+
@classmethod
|
|
218
|
+
async def upload_prefix(
|
|
219
|
+
cls,
|
|
220
|
+
source: str,
|
|
221
|
+
destination: str,
|
|
222
|
+
store_name: str = DEPLOYMENT_OBJECT_STORE_NAME,
|
|
223
|
+
recursive: bool = True,
|
|
224
|
+
) -> None:
|
|
225
|
+
"""Upload all files from a directory to the object store.
|
|
226
|
+
|
|
227
|
+
Args:
|
|
228
|
+
source (str): Local directory path containing files to upload.
|
|
229
|
+
destination (str): Object store prefix where files will be stored.
|
|
230
|
+
store_name (str, optional): Name of the Dapr object store binding to use.
|
|
231
|
+
Defaults to DEPLOYMENT_OBJECT_STORE_NAME.
|
|
232
|
+
recursive (bool, optional): Whether to include subdirectories.
|
|
233
|
+
Defaults to True.
|
|
234
|
+
|
|
235
|
+
Raises:
|
|
236
|
+
ValueError: If the source path is not a valid directory.
|
|
237
|
+
Exception: If there's an error during the upload process.
|
|
238
|
+
|
|
239
|
+
Example:
|
|
240
|
+
>>> # Upload all files recursively
|
|
241
|
+
>>> await ObjectStore.upload_prefix(
|
|
242
|
+
... source="local/project/",
|
|
243
|
+
... destination="backups/project-v1/",
|
|
244
|
+
... recursive=True
|
|
245
|
+
... )
|
|
246
|
+
|
|
247
|
+
>>> # Upload only root level files
|
|
248
|
+
>>> await ObjectStore.upload_prefix(
|
|
249
|
+
... source="local/logs/",
|
|
250
|
+
... destination="daily-logs/",
|
|
251
|
+
... recursive=False
|
|
252
|
+
... )
|
|
253
|
+
"""
|
|
254
|
+
if not os.path.isdir(source):
|
|
255
|
+
raise ValueError(f"The provided path '{source}' is not a valid directory.")
|
|
256
|
+
|
|
257
|
+
try:
|
|
258
|
+
for root, _, files in os.walk(source):
|
|
259
|
+
# Skip subdirectories if not recursive
|
|
260
|
+
if not recursive and root != source:
|
|
261
|
+
continue
|
|
262
|
+
|
|
263
|
+
for file in files:
|
|
264
|
+
file_path = os.path.join(root, file)
|
|
265
|
+
# Calculate relative path from the base directory
|
|
266
|
+
relative_path = os.path.relpath(file_path, source)
|
|
267
|
+
# Create store key by combining prefix with relative path
|
|
268
|
+
store_key = os.path.join(destination, relative_path).replace(
|
|
269
|
+
os.sep, "/"
|
|
270
|
+
)
|
|
271
|
+
await cls.upload_file(file_path, store_key, store_name)
|
|
272
|
+
|
|
273
|
+
logger.info(f"Completed uploading directory {source} to object store")
|
|
274
|
+
except Exception as e:
|
|
275
|
+
logger.error(
|
|
276
|
+
f"An unexpected error occurred while uploading directory: {str(e)}"
|
|
277
|
+
)
|
|
278
|
+
raise e
|
|
279
|
+
|
|
280
|
+
@classmethod
|
|
281
|
+
async def download_file(
|
|
282
|
+
cls,
|
|
283
|
+
source: str,
|
|
284
|
+
destination: str,
|
|
285
|
+
store_name: str = DEPLOYMENT_OBJECT_STORE_NAME,
|
|
286
|
+
) -> None:
|
|
287
|
+
"""Download a single file from the object store.
|
|
288
|
+
|
|
289
|
+
Args:
|
|
290
|
+
source (str): Object store key of the file to download.
|
|
291
|
+
destination (str): Local path where the file will be saved.
|
|
292
|
+
store_name (str, optional): Name of the Dapr object store binding to use.
|
|
293
|
+
Defaults to DEPLOYMENT_OBJECT_STORE_NAME.
|
|
294
|
+
|
|
295
|
+
Raises:
|
|
296
|
+
Exception: If there's an error downloading from the object store.
|
|
297
|
+
|
|
298
|
+
Note:
|
|
299
|
+
The destination directory will be created automatically if it doesn't exist.
|
|
300
|
+
|
|
301
|
+
Example:
|
|
302
|
+
>>> await ObjectStore.download_file(
|
|
303
|
+
... source="reports/2024/january/report.pdf",
|
|
304
|
+
... destination="/tmp/downloaded_report.pdf"
|
|
305
|
+
... )
|
|
306
|
+
"""
|
|
307
|
+
# Ensure directory exists
|
|
308
|
+
|
|
309
|
+
if not os.path.exists(os.path.dirname(destination)):
|
|
310
|
+
os.makedirs(os.path.dirname(destination), exist_ok=True)
|
|
311
|
+
|
|
312
|
+
try:
|
|
313
|
+
response_data = await cls.get_content(source, store_name)
|
|
314
|
+
|
|
315
|
+
with open(destination, "wb") as f:
|
|
316
|
+
f.write(response_data)
|
|
317
|
+
|
|
318
|
+
logger.info(f"Successfully downloaded file: {source}")
|
|
319
|
+
except Exception as e:
|
|
320
|
+
logger.warning(
|
|
321
|
+
f"Failed to download file {source} from object store: {str(e)}"
|
|
322
|
+
)
|
|
323
|
+
raise e
|
|
324
|
+
|
|
325
|
+
@classmethod
|
|
326
|
+
async def download_prefix(
|
|
327
|
+
cls,
|
|
328
|
+
source: str,
|
|
329
|
+
destination: str = TEMPORARY_PATH,
|
|
330
|
+
store_name: str = DEPLOYMENT_OBJECT_STORE_NAME,
|
|
331
|
+
) -> None:
|
|
332
|
+
"""Download all files from a store prefix to a local directory.
|
|
333
|
+
|
|
334
|
+
Args:
|
|
335
|
+
source: Object store prefix to download files from.
|
|
336
|
+
destination: Local directory where files will be saved.
|
|
337
|
+
store_name: Name of the Dapr object store binding to use.
|
|
338
|
+
"""
|
|
339
|
+
try:
|
|
340
|
+
# List all files under the prefix
|
|
341
|
+
file_list = await cls.list_files(source, store_name)
|
|
342
|
+
|
|
343
|
+
logger.info(f"Found {len(file_list)} files to download from: {source}")
|
|
344
|
+
|
|
345
|
+
# Download each file
|
|
346
|
+
for file_path in file_list:
|
|
347
|
+
local_file_path = os.path.join(destination, file_path)
|
|
348
|
+
await cls.download_file(file_path, local_file_path, store_name)
|
|
349
|
+
|
|
350
|
+
logger.info(f"Successfully downloaded all files from: {source}")
|
|
351
|
+
except Exception as e:
|
|
352
|
+
logger.warning(f"Failed to download files from object store: {str(e)}")
|
|
353
|
+
raise
|
|
354
|
+
|
|
355
|
+
@classmethod
|
|
356
|
+
async def _invoke_dapr_binding(
|
|
357
|
+
cls,
|
|
358
|
+
operation: str,
|
|
359
|
+
metadata: dict,
|
|
360
|
+
data: Union[bytes, str] = "",
|
|
361
|
+
store_name: str = DEPLOYMENT_OBJECT_STORE_NAME,
|
|
362
|
+
) -> bytes:
|
|
363
|
+
"""Common method to invoke Dapr binding operations.
|
|
364
|
+
|
|
365
|
+
Args:
|
|
366
|
+
operation: The Dapr binding operation to perform.
|
|
367
|
+
metadata: Metadata for the binding operation.
|
|
368
|
+
data: Optional data to send with the request.
|
|
369
|
+
store_name: Name of the Dapr object store binding to use.
|
|
370
|
+
|
|
371
|
+
Returns:
|
|
372
|
+
Response data from the Dapr binding.
|
|
373
|
+
|
|
374
|
+
Raises:
|
|
375
|
+
Exception: If there's an error with the Dapr binding operation.
|
|
376
|
+
"""
|
|
377
|
+
try:
|
|
378
|
+
with DaprClient(
|
|
379
|
+
max_grpc_message_length=DAPR_MAX_GRPC_MESSAGE_LENGTH
|
|
380
|
+
) as client:
|
|
381
|
+
response = client.invoke_binding(
|
|
382
|
+
binding_name=store_name,
|
|
383
|
+
operation=operation,
|
|
384
|
+
data=data,
|
|
385
|
+
binding_metadata=metadata,
|
|
386
|
+
)
|
|
387
|
+
return response.data
|
|
388
|
+
except Exception as e:
|
|
389
|
+
logger.error(f"Error in Dapr binding operation '{operation}': {str(e)}")
|
|
390
|
+
raise
|
|
391
|
+
|
|
392
|
+
@classmethod
|
|
393
|
+
def _cleanup_local_path(cls, path: str) -> None:
|
|
394
|
+
"""Remove a file or directory (recursively). Ignores if doesn't exist.
|
|
395
|
+
|
|
396
|
+
Args:
|
|
397
|
+
path: The path to the file or directory to remove.
|
|
398
|
+
"""
|
|
399
|
+
try:
|
|
400
|
+
if os.path.isfile(path) or os.path.islink(path):
|
|
401
|
+
os.remove(path)
|
|
402
|
+
elif os.path.isdir(path):
|
|
403
|
+
shutil.rmtree(path)
|
|
404
|
+
except FileNotFoundError:
|
|
405
|
+
pass # ignore if the file or directory doesn't exist
|
|
406
|
+
except Exception as e:
|
|
407
|
+
logger.warning(f"Error cleaning up {path}: {str(e)}")
|