konduktor-nightly 0.1.0.dev20250408104740__py3-none-any.whl → 0.1.0.dev20250410104738__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- konduktor/__init__.py +2 -2
- konduktor/adaptors/aws.py +221 -0
- konduktor/cli.py +3 -4
- konduktor/data/aws/__init__.py +15 -0
- konduktor/data/aws/s3.py +1114 -0
- konduktor/data/data_utils.py +46 -1
- konduktor/data/registry.py +2 -1
- konduktor/data/storage.py +22 -8
- konduktor/task.py +13 -0
- konduktor/templates/pod.yaml.j2 +5 -1
- konduktor/utils/common_utils.py +29 -0
- {konduktor_nightly-0.1.0.dev20250408104740.dist-info → konduktor_nightly-0.1.0.dev20250410104738.dist-info}/METADATA +5 -1
- {konduktor_nightly-0.1.0.dev20250408104740.dist-info → konduktor_nightly-0.1.0.dev20250410104738.dist-info}/RECORD +16 -13
- {konduktor_nightly-0.1.0.dev20250408104740.dist-info → konduktor_nightly-0.1.0.dev20250410104738.dist-info}/LICENSE +0 -0
- {konduktor_nightly-0.1.0.dev20250408104740.dist-info → konduktor_nightly-0.1.0.dev20250410104738.dist-info}/WHEEL +0 -0
- {konduktor_nightly-0.1.0.dev20250408104740.dist-info → konduktor_nightly-0.1.0.dev20250410104738.dist-info}/entry_points.txt +0 -0
konduktor/__init__.py
CHANGED
@@ -14,7 +14,7 @@ __all__ = [
|
|
14
14
|
]
|
15
15
|
|
16
16
|
# Replaced with the current commit when building the wheels.
|
17
|
-
_KONDUKTOR_COMMIT_SHA = '
|
17
|
+
_KONDUKTOR_COMMIT_SHA = 'c7becbf4b64d992f56aad47b69770f3345a37297'
|
18
18
|
os.makedirs(os.path.expanduser('~/.konduktor'), exist_ok=True)
|
19
19
|
|
20
20
|
|
@@ -48,5 +48,5 @@ def _get_git_commit():
|
|
48
48
|
|
49
49
|
|
50
50
|
__commit__ = _get_git_commit()
|
51
|
-
__version__ = '1.0.0.dev0.1.0.
|
51
|
+
__version__ = '1.0.0.dev0.1.0.dev20250410104738'
|
52
52
|
__root_dir__ = os.path.dirname(os.path.abspath(__file__))
|
@@ -0,0 +1,221 @@
|
|
1
|
+
# Proprietary Changes made for Trainy under the Trainy Software License
|
2
|
+
# Original source: skypilot: https://github.com/skypilot-org/skypilot
|
3
|
+
# which is Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
7
|
+
# Unless required by applicable law or agreed to in writing, software
|
8
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
9
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
10
|
+
# See the License for the specific language governing permissions and
|
11
|
+
# limitations under the License.
|
12
|
+
|
13
|
+
"""AWS cloud adaptors
|
14
|
+
|
15
|
+
Thread safety notes:
|
16
|
+
|
17
|
+
The results of session() is cached by each thread in a thread.local() storage.
|
18
|
+
This means using their results is completely thread-safe.
|
19
|
+
|
20
|
+
We do not cache the resource/client objects, because some credentials may be
|
21
|
+
automatically rotated, but the cached resource/client object may not refresh the
|
22
|
+
credential quick enough, which can cause unexpected NoCredentialsError. By
|
23
|
+
creating the resource/client object from the thread-local session() object every
|
24
|
+
time, the credentials will be explicitly refreshed.
|
25
|
+
|
26
|
+
Calling session(), resource(), and client() is thread-safe, since they use a
|
27
|
+
lock to protect each object's creation.
|
28
|
+
|
29
|
+
|
30
|
+
This is informed by the following boto3 docs:
|
31
|
+
- Unlike Resources and Sessions, clients are generally thread-safe.
|
32
|
+
https://boto3.amazonaws.com/v1/documentation/api/latest/guide/clients.html
|
33
|
+
- Resource instances are not thread safe and should not be shared across
|
34
|
+
threads or processes
|
35
|
+
https://boto3.amazonaws.com/v1/documentation/api/latest/guide/resources.html
|
36
|
+
- Similar to Resource objects, Session objects are not thread safe and
|
37
|
+
should not be shared across threads and processes.
|
38
|
+
https://boto3.amazonaws.com/v1/documentation/api/latest/guide/session.html
|
39
|
+
"""
|
40
|
+
|
41
|
+
# pylint: disable=import-outside-toplevel
|
42
|
+
|
43
|
+
import functools
|
44
|
+
import logging
|
45
|
+
import threading
|
46
|
+
import time
|
47
|
+
from typing import Any, Callable
|
48
|
+
|
49
|
+
from konduktor.adaptors import common
|
50
|
+
from konduktor.utils import annotations, common_utils
|
51
|
+
|
52
|
+
_IMPORT_ERROR_MESSAGE = (
|
53
|
+
'Failed to import dependencies for AWS. ' 'Try pip install konduktor-nightly[s3]'
|
54
|
+
)
|
55
|
+
boto3 = common.LazyImport('boto3', import_error_message=_IMPORT_ERROR_MESSAGE)
|
56
|
+
botocore = common.LazyImport('botocore', import_error_message=_IMPORT_ERROR_MESSAGE)
|
57
|
+
_LAZY_MODULES = (boto3, botocore)
|
58
|
+
|
59
|
+
logger = logging.getLogger(__name__)
|
60
|
+
_session_creation_lock = threading.RLock()
|
61
|
+
|
62
|
+
version = 1
|
63
|
+
|
64
|
+
# Retry 5 times by default for potential credential errors,
|
65
|
+
_MAX_ATTEMPT_FOR_CREATION = 5
|
66
|
+
|
67
|
+
|
68
|
+
class _ThreadLocalLRUCache(threading.local):
|
69
|
+
def __init__(self, maxsize=32):
|
70
|
+
super().__init__()
|
71
|
+
self.cache = annotations.lru_cache(scope='global', maxsize=maxsize)
|
72
|
+
|
73
|
+
|
74
|
+
def _thread_local_lru_cache(maxsize=32):
|
75
|
+
# Create thread-local storage for the LRU cache
|
76
|
+
local_cache = _ThreadLocalLRUCache(maxsize)
|
77
|
+
|
78
|
+
def decorator(func):
|
79
|
+
@functools.wraps(func)
|
80
|
+
def wrapper(*args, **kwargs):
|
81
|
+
# Use the thread-local LRU cache
|
82
|
+
return local_cache.cache(func)(*args, **kwargs)
|
83
|
+
|
84
|
+
return wrapper
|
85
|
+
|
86
|
+
return decorator
|
87
|
+
|
88
|
+
|
89
|
+
def _assert_kwargs_builtin_type(kwargs):
|
90
|
+
assert all(
|
91
|
+
isinstance(v, (int, float, str)) for v in kwargs.values()
|
92
|
+
), f'kwargs should not contain none built-in types: {kwargs}'
|
93
|
+
|
94
|
+
|
95
|
+
def _create_aws_object(creation_fn_or_cls: Callable[[], Any], object_name: str) -> Any:
|
96
|
+
"""Create an AWS object.
|
97
|
+
|
98
|
+
Args:
|
99
|
+
creation_fn: The function to create the AWS object.
|
100
|
+
|
101
|
+
Returns:
|
102
|
+
The created AWS object.
|
103
|
+
"""
|
104
|
+
attempt = 0
|
105
|
+
backoff = common_utils.Backoff()
|
106
|
+
while True:
|
107
|
+
try:
|
108
|
+
# Creating the boto3 objects are not thread-safe,
|
109
|
+
# so we add a reentrant lock to synchronize the session creation.
|
110
|
+
# Reference: https://github.com/boto/boto3/issues/1592
|
111
|
+
|
112
|
+
# NOTE: we need the lock here to avoid thread-safety issues when
|
113
|
+
# creating the resource, because Python module is a shared object,
|
114
|
+
# and we are not sure if the code inside 'session()' or
|
115
|
+
# 'session().xx()' is thread-safe.
|
116
|
+
with _session_creation_lock:
|
117
|
+
return creation_fn_or_cls()
|
118
|
+
except (
|
119
|
+
botocore_exceptions().CredentialRetrievalError,
|
120
|
+
botocore_exceptions().NoCredentialsError,
|
121
|
+
) as e:
|
122
|
+
attempt += 1
|
123
|
+
if attempt >= _MAX_ATTEMPT_FOR_CREATION:
|
124
|
+
raise
|
125
|
+
time.sleep(backoff.current_backoff())
|
126
|
+
logger.info(
|
127
|
+
f'Retry creating AWS {object_name} due to '
|
128
|
+
f'{common_utils.format_exception(e)}.'
|
129
|
+
)
|
130
|
+
|
131
|
+
|
132
|
+
# The LRU cache needs to be thread-local to avoid multiple threads sharing the
|
133
|
+
# same session object, which is not guaranteed to be thread-safe.
|
134
|
+
@_thread_local_lru_cache()
|
135
|
+
def session(check_credentials: bool = True):
|
136
|
+
"""Create an AWS session."""
|
137
|
+
s = _create_aws_object(boto3.session.Session, 'session')
|
138
|
+
if check_credentials and s.get_credentials() is None:
|
139
|
+
# s.get_credentials() can be None if there are actually no credentials,
|
140
|
+
# or if we fail to get credentials from IMDS (e.g. due to throttling).
|
141
|
+
# Technically, it could be okay to have no credentials, as certain AWS
|
142
|
+
# APIs don't actually need them. But afaik everything we use AWS for
|
143
|
+
# needs credentials.
|
144
|
+
raise botocore_exceptions().NoCredentialsError()
|
145
|
+
return s
|
146
|
+
|
147
|
+
|
148
|
+
# Avoid caching the resource/client objects. If we are using the assumed role,
|
149
|
+
# the credentials will be automatically rotated, but the cached resource/client
|
150
|
+
# object will only refresh the credentials with a fixed 15 minutes interval,
|
151
|
+
# which can cause unexpected NoCredentialsError. By creating the resource/client
|
152
|
+
# object every time, the credentials will be explicitly refreshed.
|
153
|
+
# The creation of the resource/client is relatively fast (around 0.3s), so the
|
154
|
+
# performance impact is negligible.
|
155
|
+
# Reference: https://github.com/skypilot-org/skypilot/issues/2697
|
156
|
+
def resource(service_name: str, **kwargs):
|
157
|
+
"""Create an AWS resource of a certain service.
|
158
|
+
|
159
|
+
Args:
|
160
|
+
service_name: AWS resource name (e.g., 's3').
|
161
|
+
kwargs: Other options. We add max_attempts to the kwargs instead of
|
162
|
+
using botocore.config.Config() because the latter will generate
|
163
|
+
different keys even if the config is the same
|
164
|
+
"""
|
165
|
+
_assert_kwargs_builtin_type(kwargs)
|
166
|
+
|
167
|
+
max_attempts = kwargs.pop('max_attempts', None)
|
168
|
+
if max_attempts is not None:
|
169
|
+
config = botocore_config().Config(retries={'max_attempts': max_attempts})
|
170
|
+
kwargs['config'] = config
|
171
|
+
|
172
|
+
check_credentials = kwargs.pop('check_credentials', True)
|
173
|
+
|
174
|
+
# Need to use the client retrieved from the per-thread session to avoid
|
175
|
+
# thread-safety issues (Directly creating the client with boto3.resource()
|
176
|
+
# is not thread-safe). Reference: https://stackoverflow.com/a/59635814
|
177
|
+
return _create_aws_object(
|
178
|
+
lambda: session(check_credentials=check_credentials).resource(
|
179
|
+
service_name, **kwargs
|
180
|
+
),
|
181
|
+
'resource',
|
182
|
+
)
|
183
|
+
|
184
|
+
|
185
|
+
def client(service_name: str, **kwargs):
|
186
|
+
"""Create an AWS client of a certain service.
|
187
|
+
|
188
|
+
Args:
|
189
|
+
service_name: AWS service name (e.g., 's3', 'ec2').
|
190
|
+
kwargs: Other options.
|
191
|
+
"""
|
192
|
+
_assert_kwargs_builtin_type(kwargs)
|
193
|
+
|
194
|
+
check_credentials = kwargs.pop('check_credentials', True)
|
195
|
+
|
196
|
+
# Need to use the client retrieved from the per-thread session to avoid
|
197
|
+
# thread-safety issues (Directly creating the client with boto3.client() is
|
198
|
+
# not thread-safe). Reference: https://stackoverflow.com/a/59635814
|
199
|
+
|
200
|
+
return _create_aws_object(
|
201
|
+
lambda: session(check_credentials=check_credentials).client(
|
202
|
+
service_name, **kwargs
|
203
|
+
),
|
204
|
+
'client',
|
205
|
+
)
|
206
|
+
|
207
|
+
|
208
|
+
@common.load_lazy_modules(modules=_LAZY_MODULES)
|
209
|
+
def botocore_exceptions():
|
210
|
+
"""AWS botocore exception."""
|
211
|
+
from botocore import exceptions
|
212
|
+
|
213
|
+
return exceptions
|
214
|
+
|
215
|
+
|
216
|
+
@common.load_lazy_modules(modules=_LAZY_MODULES)
|
217
|
+
def botocore_config():
|
218
|
+
"""AWS botocore exception."""
|
219
|
+
from botocore import config
|
220
|
+
|
221
|
+
return config
|
konduktor/cli.py
CHANGED
@@ -773,10 +773,9 @@ def check(clouds: Tuple[str]):
|
|
773
773
|
|
774
774
|
.. code-block:: bash
|
775
775
|
|
776
|
-
# Check
|
777
|
-
konduktor check
|
778
|
-
|
779
|
-
konduktor check gcp
|
776
|
+
# Check only specific clouds - gs, s3.
|
777
|
+
konduktor check gs
|
778
|
+
konduktor check s3
|
780
779
|
"""
|
781
780
|
clouds_arg = clouds if len(clouds) > 0 else None
|
782
781
|
konduktor_check.check(clouds=clouds_arg)
|
@@ -0,0 +1,15 @@
|
|
1
|
+
"""Data sync between workstation <--> blob (s3, gcs, etc.) <--> worker pods"""
|
2
|
+
|
3
|
+
from konduktor.data.aws.s3 import (
|
4
|
+
DEFAULT_AWS_CONFIG_PATH,
|
5
|
+
DEFAULT_AWS_CREDENTIAL_PATH,
|
6
|
+
S3CloudStorage,
|
7
|
+
S3Store,
|
8
|
+
)
|
9
|
+
|
10
|
+
__all__ = [
|
11
|
+
'S3Store',
|
12
|
+
'S3CloudStorage',
|
13
|
+
'DEFAULT_AWS_CREDENTIAL_PATH',
|
14
|
+
'DEFAULT_AWS_CONFIG_PATH',
|
15
|
+
]
|