konduktor-nightly 0.1.0.dev20250422104744__py3-none-any.whl → 0.1.0.dev20250424104814__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- konduktor/__init__.py +2 -2
- konduktor/adaptors/common.py +2 -2
- konduktor/backends/jobset_utils.py +17 -9
- konduktor/data/aws/s3.py +8 -1
- konduktor/utils/kubernetes_utils.py +33 -27
- {konduktor_nightly-0.1.0.dev20250422104744.dist-info → konduktor_nightly-0.1.0.dev20250424104814.dist-info}/METADATA +4 -4
- {konduktor_nightly-0.1.0.dev20250422104744.dist-info → konduktor_nightly-0.1.0.dev20250424104814.dist-info}/RECORD +10 -10
- {konduktor_nightly-0.1.0.dev20250422104744.dist-info → konduktor_nightly-0.1.0.dev20250424104814.dist-info}/LICENSE +0 -0
- {konduktor_nightly-0.1.0.dev20250422104744.dist-info → konduktor_nightly-0.1.0.dev20250424104814.dist-info}/WHEEL +0 -0
- {konduktor_nightly-0.1.0.dev20250422104744.dist-info → konduktor_nightly-0.1.0.dev20250424104814.dist-info}/entry_points.txt +0 -0
konduktor/__init__.py
CHANGED
@@ -14,7 +14,7 @@ __all__ = [
|
|
14
14
|
]
|
15
15
|
|
16
16
|
# Replaced with the current commit when building the wheels.
|
17
|
-
_KONDUKTOR_COMMIT_SHA = '
|
17
|
+
_KONDUKTOR_COMMIT_SHA = 'd1d19dd0b1d1e1440aad10115f235e2b6ea95dd7'
|
18
18
|
os.makedirs(os.path.expanduser('~/.konduktor'), exist_ok=True)
|
19
19
|
|
20
20
|
|
@@ -48,5 +48,5 @@ def _get_git_commit():
|
|
48
48
|
|
49
49
|
|
50
50
|
__commit__ = _get_git_commit()
|
51
|
-
__version__ = '1.0.0.dev0.1.0.
|
51
|
+
__version__ = '1.0.0.dev0.1.0.dev20250424104814'
|
52
52
|
__root_dir__ = os.path.dirname(os.path.abspath(__file__))
|
konduktor/adaptors/common.py
CHANGED
@@ -18,7 +18,7 @@ import os
|
|
18
18
|
import threading
|
19
19
|
from typing import Any, Callable, Optional, Tuple
|
20
20
|
|
21
|
-
|
21
|
+
import filelock
|
22
22
|
|
23
23
|
|
24
24
|
class LazyImport:
|
@@ -101,7 +101,7 @@ class LockedClientProxy:
|
|
101
101
|
timeout=10,
|
102
102
|
):
|
103
103
|
self._client = client
|
104
|
-
self._lock = FileLock(lock_path, timeout=timeout)
|
104
|
+
self._lock = filelock.FileLock(lock_path, timeout=timeout)
|
105
105
|
|
106
106
|
def __getattr__(self, attr):
|
107
107
|
target = getattr(self._client, attr)
|
@@ -6,11 +6,14 @@ import os
|
|
6
6
|
import tempfile
|
7
7
|
import typing
|
8
8
|
from datetime import datetime, timezone
|
9
|
-
from typing import Any, Dict, Optional
|
9
|
+
from typing import Any, Dict, Optional, Tuple
|
10
10
|
from urllib.parse import urlparse
|
11
11
|
|
12
12
|
import colorama
|
13
13
|
|
14
|
+
if typing.TYPE_CHECKING:
|
15
|
+
from datetime import timedelta
|
16
|
+
|
14
17
|
import konduktor
|
15
18
|
from konduktor import constants, kube_client, logging
|
16
19
|
from konduktor.data import registry
|
@@ -387,16 +390,16 @@ def show_status_table(namespace: str, all_users: bool):
|
|
387
390
|
)
|
388
391
|
elif status['replicatedJobsStatus'][0]['suspended']:
|
389
392
|
return (
|
390
|
-
f'{colorama.Fore.
|
393
|
+
f'{colorama.Fore.BLUE}'
|
391
394
|
f'{JobStatus.SUSPENDED.name}{colorama.Style.RESET_ALL}'
|
392
395
|
)
|
393
396
|
else:
|
394
397
|
return (
|
395
|
-
f'{colorama.Fore.
|
398
|
+
f'{colorama.Fore.YELLOW}'
|
396
399
|
f'{JobStatus.PENDING.name}{colorama.Style.RESET_ALL}'
|
397
400
|
)
|
398
401
|
|
399
|
-
def _get_time_delta(timestamp: str):
|
402
|
+
def _get_time_delta(timestamp: str) -> Tuple[str, 'timedelta']:
|
400
403
|
delta = datetime.now(timezone.utc) - datetime.strptime(
|
401
404
|
timestamp, '%Y-%m-%dT%H:%M:%SZ'
|
402
405
|
).replace(tzinfo=timezone.utc)
|
@@ -410,7 +413,7 @@ def show_status_table(namespace: str, all_users: bool):
|
|
410
413
|
hours_str = f'{hours} hours, ' if hours > 0 else ''
|
411
414
|
minutes_str = f'{minutes} minutes' if minutes > 0 else ''
|
412
415
|
|
413
|
-
return f'{days_str}{hours_str}{minutes_str}'
|
416
|
+
return f'{days_str}{hours_str}{minutes_str}', delta
|
414
417
|
|
415
418
|
def _get_resources(job: Dict[str, Any]) -> str:
|
416
419
|
num_pods = int(
|
@@ -433,15 +436,16 @@ def show_status_table(namespace: str, all_users: bool):
|
|
433
436
|
job_table = log_utils.create_table(columns)
|
434
437
|
job_specs = list_jobset(namespace)
|
435
438
|
assert job_specs is not None, 'Retrieving jobs failed'
|
439
|
+
rows = []
|
436
440
|
for job in job_specs['items']:
|
437
441
|
if all_users:
|
438
|
-
|
442
|
+
rows.append(
|
439
443
|
[
|
440
444
|
job['metadata']['name'],
|
441
445
|
job['metadata']['labels'][JOBSET_USERID_LABEL],
|
442
446
|
_get_status_string_colorized(job['status']),
|
443
447
|
_get_resources(job),
|
444
|
-
_get_time_delta(job['metadata']['creationTimestamp']),
|
448
|
+
*_get_time_delta(job['metadata']['creationTimestamp']),
|
445
449
|
]
|
446
450
|
)
|
447
451
|
elif (
|
@@ -449,12 +453,16 @@ def show_status_table(namespace: str, all_users: bool):
|
|
449
453
|
and job['metadata']['labels'][JOBSET_USER_LABEL]
|
450
454
|
== common_utils.get_cleaned_username()
|
451
455
|
):
|
452
|
-
|
456
|
+
rows.append(
|
453
457
|
[
|
454
458
|
job['metadata']['name'],
|
455
459
|
_get_status_string_colorized(job['status']),
|
456
460
|
_get_resources(job),
|
457
|
-
_get_time_delta(job['metadata']['creationTimestamp']),
|
461
|
+
*_get_time_delta(job['metadata']['creationTimestamp']),
|
458
462
|
]
|
459
463
|
)
|
464
|
+
rows = [row[:-1] for row in sorted(rows, key=lambda x: x[-1])]
|
465
|
+
# have the most recently submitted jobs at the top
|
466
|
+
for row in rows:
|
467
|
+
job_table.add_row(row)
|
460
468
|
print(job_table)
|
konduktor/data/aws/s3.py
CHANGED
@@ -472,6 +472,13 @@ class S3Store(storage_utils.AbstractStore):
|
|
472
472
|
f'Bucket {self.name} does not exist.'
|
473
473
|
+ f' To debug, consider running `{command}`.'
|
474
474
|
) from e
|
475
|
+
# Bucket already exists but we tried to create it. Continue
|
476
|
+
elif error_code == '409':
|
477
|
+
command = f'aws s3 ls {self.name}'
|
478
|
+
logger.info(
|
479
|
+
f'Bucket {self.name} already exists. Skipping '
|
480
|
+
f'creation. To check, consider running `{command}`'
|
481
|
+
)
|
475
482
|
|
476
483
|
if isinstance(self.source, str) and self.source.startswith('s3://'):
|
477
484
|
with ux_utils.print_exception_no_traceback():
|
@@ -865,7 +872,7 @@ class S3Store(storage_utils.AbstractStore):
|
|
865
872
|
hints = 'AWS SSO is set.'
|
866
873
|
if static_credential_exists:
|
867
874
|
hints += (
|
868
|
-
' To ensure multiple clouds work correctly, please use
|
875
|
+
' To ensure multiple clouds work correctly, please use Konduktor '
|
869
876
|
'with static credentials (e.g., ~/.aws/credentials) by unsetting '
|
870
877
|
'the AWS_PROFILE environment variable.'
|
871
878
|
)
|
@@ -19,6 +19,7 @@ import re
|
|
19
19
|
import typing
|
20
20
|
from typing import Any, Dict, List, Optional, Tuple, Union
|
21
21
|
|
22
|
+
import filelock
|
22
23
|
import kubernetes
|
23
24
|
import yaml
|
24
25
|
|
@@ -53,6 +54,8 @@ NO_ACCELERATOR_HELP_MESSAGE = (
|
|
53
54
|
'(e.g. `nvidia.com/gpu` are setup correctly. '
|
54
55
|
)
|
55
56
|
|
57
|
+
_K8S_CLIENT_LOCK_PATH = '~/.konduktor/k8s_client.lock'
|
58
|
+
_K8s_CLIENT_LOCK = filelock.FileLock(_K8S_CLIENT_LOCK_PATH)
|
56
59
|
|
57
60
|
logger = logging.get_logger(__name__)
|
58
61
|
|
@@ -581,37 +584,40 @@ def set_secret(
|
|
581
584
|
"""
|
582
585
|
Create/update a secret in a namespace. Values are encoded to base64.
|
583
586
|
"""
|
584
|
-
|
585
|
-
|
586
|
-
|
587
|
-
|
588
|
-
|
587
|
+
with _K8s_CLIENT_LOCK:
|
588
|
+
secret_exists, response = check_secret_exists(
|
589
|
+
secret_name=secret_name,
|
590
|
+
namespace=namespace,
|
591
|
+
context=context,
|
592
|
+
)
|
589
593
|
|
590
|
-
|
591
|
-
|
592
|
-
|
594
|
+
secret_metadata = {'name': secret_name, 'labels': {'parent': 'konduktor'}}
|
595
|
+
custom_metadata = config.get_nested(('kubernetes', 'custom_metadata'), {})
|
596
|
+
config.merge_k8s_configs(secret_metadata, custom_metadata)
|
593
597
|
|
594
|
-
|
595
|
-
|
596
|
-
|
597
|
-
|
598
|
-
|
598
|
+
secret = kubernetes.client.V1Secret(
|
599
|
+
metadata=kubernetes.client.V1ObjectMeta(**secret_metadata),
|
600
|
+
type='Opaque',
|
601
|
+
data={secret_key: secret_value},
|
602
|
+
)
|
599
603
|
|
600
|
-
|
601
|
-
|
602
|
-
|
603
|
-
|
604
|
-
|
604
|
+
try:
|
605
|
+
if secret_exists:
|
606
|
+
kube_client.core_api(context).patch_namespaced_secret(
|
607
|
+
secret_name, namespace, secret
|
608
|
+
)
|
609
|
+
else:
|
610
|
+
kube_client.core_api(context).create_namespaced_secret(
|
611
|
+
namespace, secret
|
612
|
+
)
|
613
|
+
except kube_client.api_exception() as e:
|
614
|
+
return False, str(e)
|
605
615
|
else:
|
606
|
-
|
607
|
-
|
608
|
-
|
609
|
-
|
610
|
-
|
611
|
-
f'Secret {secret_name} in namespace {namespace} '
|
612
|
-
f'in context {context} created/updated'
|
613
|
-
)
|
614
|
-
return True, None
|
616
|
+
logger.debug(
|
617
|
+
f'Secret {secret_name} in namespace {namespace} '
|
618
|
+
f'in context {context} created/updated'
|
619
|
+
)
|
620
|
+
return True, None
|
615
621
|
|
616
622
|
|
617
623
|
def get_autoscaler_type() -> Optional[kubernetes_enums.KubernetesAutoscalerType]:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: konduktor-nightly
|
3
|
-
Version: 0.1.0.
|
3
|
+
Version: 0.1.0.dev20250424104814
|
4
4
|
Summary: GPU Cluster Health Management
|
5
5
|
Author: Andrew Aikawa
|
6
6
|
Author-email: asai@berkeley.edu
|
@@ -12,9 +12,9 @@ Classifier: Programming Language :: Python :: 3.11
|
|
12
12
|
Classifier: Programming Language :: Python :: 3.12
|
13
13
|
Classifier: Programming Language :: Python :: 3.13
|
14
14
|
Provides-Extra: s3
|
15
|
-
Requires-Dist: awscli (>=1.32.84,<2.0.0) ; extra == "s3"
|
16
|
-
Requires-Dist: boto3 (>=1.34.84,<2.0.0) ; extra == "s3"
|
17
|
-
Requires-Dist: botocore (>=1.34.84,<2.0.0) ; extra == "s3"
|
15
|
+
Requires-Dist: awscli[s3] (>=1.32.84,<2.0.0) ; extra == "s3"
|
16
|
+
Requires-Dist: boto3[s3] (>=1.34.84,<2.0.0) ; extra == "s3"
|
17
|
+
Requires-Dist: botocore[s3] (>=1.34.84,<2.0.0) ; extra == "s3"
|
18
18
|
Requires-Dist: click (>=8.1.7,<9.0.0)
|
19
19
|
Requires-Dist: colorama (>=0.4.6,<0.5.0)
|
20
20
|
Requires-Dist: filelock (>=3.18.0,<4.0.0)
|
@@ -1,12 +1,12 @@
|
|
1
|
-
konduktor/__init__.py,sha256=
|
1
|
+
konduktor/__init__.py,sha256=05LRGeAUVsE5sdak6LyrRcGrzNeoSIyp1i9QlkEopRQ,1540
|
2
2
|
konduktor/adaptors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
3
3
|
konduktor/adaptors/aws.py,sha256=s47Ra-GaqCQibzVfmD0pmwEWHif1EGO5opMbwkLxTCU,8244
|
4
|
-
konduktor/adaptors/common.py,sha256=
|
4
|
+
konduktor/adaptors/common.py,sha256=uTdpKvgBSwYMmynx9wR5kiZQyTrdaw9ZI4KH6Z2E5Hw,4296
|
5
5
|
konduktor/adaptors/gcp.py,sha256=ierTF4z7vwpJ9BsC7LSiwv4uLcjGXscwZOwQrddr2vM,4102
|
6
6
|
konduktor/backends/__init__.py,sha256=1Q6sqqdeMYarpTX_U-QVywJYf7idiUTRsyP-E4BQSOw,129
|
7
7
|
konduktor/backends/backend.py,sha256=qh0bp94lzoTYZkzyQv2-CVrB5l91FkG2vclXg24UFC0,2910
|
8
8
|
konduktor/backends/jobset.py,sha256=veptYGXtk-ugWxBsBV5SnqI4rGKOlGfm_N3wApvNhSQ,8326
|
9
|
-
konduktor/backends/jobset_utils.py,sha256=
|
9
|
+
konduktor/backends/jobset_utils.py,sha256=UJkDu6Y8u4N2AaNSJTOSgbGLyY25bzaP-I6esJ11jms,17578
|
10
10
|
konduktor/check.py,sha256=JennyWoaqSKhdyfUldd266KwVXTPJpcYQa4EED4a_BA,7569
|
11
11
|
konduktor/cli.py,sha256=Ii9-2mrc-1f2ksLasA-xRb-JnEi_9ZeCXZ3lJ1GG8H8,23515
|
12
12
|
konduktor/config.py,sha256=J50JxC6MsXMnlrJPXdDUMr38C89xvOO7mR8KJ6fyils,15520
|
@@ -50,7 +50,7 @@ konduktor/dashboard/frontend/server.js,sha256=jcp6_Ww9YJD3uKY07jR3KMlAM6n1QZdxZn
|
|
50
50
|
konduktor/dashboard/frontend/tailwind.config.js,sha256=fCnc48wvioIDOe5ldQ_6RE7F76cP7aU7pDrxBPJx-Fk,366
|
51
51
|
konduktor/data/__init__.py,sha256=KMR2i3E9YcIpiIuCxtRdS7BQ1w2vUAbbve7agziJrLo,213
|
52
52
|
konduktor/data/aws/__init__.py,sha256=_6zWfNNAK1QGgyKqg_yPYWcXlnffchyvIMErYa6tw_U,331
|
53
|
-
konduktor/data/aws/s3.py,sha256=
|
53
|
+
konduktor/data/aws/s3.py,sha256=2hvbgZ9NuwXY88blxfdjSbONSXcyWF0CtheDZkMYorQ,48296
|
54
54
|
konduktor/data/constants.py,sha256=yXVEoTI2we1xOjVSU-bjRCQCLpVvpEvJ0GedXvSwEfw,127
|
55
55
|
konduktor/data/data_utils.py,sha256=yrnu8_cY63TXqfWfFG3yqY2w_tE9UQK9jIQAFQCDVg0,9668
|
56
56
|
konduktor/data/gcp/__init__.py,sha256=rlQxACBC_Vu36mdgPyJgUy4mGc_6Nt_a96JAuaPz2pQ,489
|
@@ -82,7 +82,7 @@ konduktor/utils/constants.py,sha256=1DneiTR21lvKUcWdBGwC4I4fD4uPjbjLUilEnJS7rzA,
|
|
82
82
|
konduktor/utils/env_options.py,sha256=T41Slzf4Mzl-n45CGXXqdy2fCrYhPNZQ7RP5vmnN4xc,2258
|
83
83
|
konduktor/utils/exceptions.py,sha256=GBOFIkk9nikqWGR0FXGXOWVVImoH7nWnMl_L3Oux3fo,6581
|
84
84
|
konduktor/utils/kubernetes_enums.py,sha256=SabUueF6Bpzbpa57gyH5VB65xla2N9l8CZmAeYTfGmM,176
|
85
|
-
konduktor/utils/kubernetes_utils.py,sha256=
|
85
|
+
konduktor/utils/kubernetes_utils.py,sha256=ivFVh90Gez19_JD5U4bgCO5zNtQUflF0hJsM5nZLj8A,23864
|
86
86
|
konduktor/utils/log_utils.py,sha256=lgHCq4OdtJNfbpso-uYGONUCVNsUrUkUWjROarsHt6s,9897
|
87
87
|
konduktor/utils/loki_utils.py,sha256=ND1pbbbFhLhLKw3870j44LpR_9MB0EkDJSs5K7nWdY4,3473
|
88
88
|
konduktor/utils/rich_utils.py,sha256=kdjNe6S2LlpOxyzhFHqMzCz7g4ROC4e7TPWgcbRsrQE,3577
|
@@ -90,8 +90,8 @@ konduktor/utils/schemas.py,sha256=Gv7SEhFpv-eO5izqRz8d-eQ9z-lVmY05akm6HEXIIdc,17
|
|
90
90
|
konduktor/utils/subprocess_utils.py,sha256=WoFkoFhGecPR8-rF8WJxbIe-YtV94LXz9UG64SDhCY4,9448
|
91
91
|
konduktor/utils/ux_utils.py,sha256=NPNu3Igu2Z9Oq77ghJhy_fIxQZTXWr9BtKyxN3Wslzo,7164
|
92
92
|
konduktor/utils/validator.py,sha256=tgBghVyedyzGx84-U2Qfoh_cJBE3oUk9gclMW90ORks,691
|
93
|
-
konduktor_nightly-0.1.0.
|
94
|
-
konduktor_nightly-0.1.0.
|
95
|
-
konduktor_nightly-0.1.0.
|
96
|
-
konduktor_nightly-0.1.0.
|
97
|
-
konduktor_nightly-0.1.0.
|
93
|
+
konduktor_nightly-0.1.0.dev20250424104814.dist-info/LICENSE,sha256=MuuqTZbHvmqXR_aNKAXzggdV45ANd3wQ5YI7tnpZhm0,6586
|
94
|
+
konduktor_nightly-0.1.0.dev20250424104814.dist-info/METADATA,sha256=FyMj5AqQzOiHDSZqEi2BKlJO-IyuWDfXbIlYkBYRqG0,4366
|
95
|
+
konduktor_nightly-0.1.0.dev20250424104814.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
|
96
|
+
konduktor_nightly-0.1.0.dev20250424104814.dist-info/entry_points.txt,sha256=k3nG5wDFIJhNqsZWrHk4d0irIB2Ns9s47cjRWYsTCT8,48
|
97
|
+
konduktor_nightly-0.1.0.dev20250424104814.dist-info/RECORD,,
|
File without changes
|
File without changes
|