konduktor-nightly 0.1.0.dev20250422104744__py3-none-any.whl → 0.1.0.dev20250424104814__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
konduktor/__init__.py CHANGED
@@ -14,7 +14,7 @@ __all__ = [
14
14
  ]
15
15
 
16
16
  # Replaced with the current commit when building the wheels.
17
- _KONDUKTOR_COMMIT_SHA = 'aa8a6323371c604e3172983b46dc90bf1dc781b7'
17
+ _KONDUKTOR_COMMIT_SHA = 'd1d19dd0b1d1e1440aad10115f235e2b6ea95dd7'
18
18
  os.makedirs(os.path.expanduser('~/.konduktor'), exist_ok=True)
19
19
 
20
20
 
@@ -48,5 +48,5 @@ def _get_git_commit():
48
48
 
49
49
 
50
50
  __commit__ = _get_git_commit()
51
- __version__ = '1.0.0.dev0.1.0.dev20250422104744'
51
+ __version__ = '1.0.0.dev0.1.0.dev20250424104814'
52
52
  __root_dir__ = os.path.dirname(os.path.abspath(__file__))
@@ -18,7 +18,7 @@ import os
18
18
  import threading
19
19
  from typing import Any, Callable, Optional, Tuple
20
20
 
21
- from filelock import FileLock
21
+ import filelock
22
22
 
23
23
 
24
24
  class LazyImport:
@@ -101,7 +101,7 @@ class LockedClientProxy:
101
101
  timeout=10,
102
102
  ):
103
103
  self._client = client
104
- self._lock = FileLock(lock_path, timeout=timeout)
104
+ self._lock = filelock.FileLock(lock_path, timeout=timeout)
105
105
 
106
106
  def __getattr__(self, attr):
107
107
  target = getattr(self._client, attr)
@@ -6,11 +6,14 @@ import os
6
6
  import tempfile
7
7
  import typing
8
8
  from datetime import datetime, timezone
9
- from typing import Any, Dict, Optional
9
+ from typing import Any, Dict, Optional, Tuple
10
10
  from urllib.parse import urlparse
11
11
 
12
12
  import colorama
13
13
 
14
+ if typing.TYPE_CHECKING:
15
+ from datetime import timedelta
16
+
14
17
  import konduktor
15
18
  from konduktor import constants, kube_client, logging
16
19
  from konduktor.data import registry
@@ -387,16 +390,16 @@ def show_status_table(namespace: str, all_users: bool):
387
390
  )
388
391
  elif status['replicatedJobsStatus'][0]['suspended']:
389
392
  return (
390
- f'{colorama.Fore.GREEN}'
393
+ f'{colorama.Fore.BLUE}'
391
394
  f'{JobStatus.SUSPENDED.name}{colorama.Style.RESET_ALL}'
392
395
  )
393
396
  else:
394
397
  return (
395
- f'{colorama.Fore.BLUE}'
398
+ f'{colorama.Fore.YELLOW}'
396
399
  f'{JobStatus.PENDING.name}{colorama.Style.RESET_ALL}'
397
400
  )
398
401
 
399
- def _get_time_delta(timestamp: str):
402
+ def _get_time_delta(timestamp: str) -> Tuple[str, 'timedelta']:
400
403
  delta = datetime.now(timezone.utc) - datetime.strptime(
401
404
  timestamp, '%Y-%m-%dT%H:%M:%SZ'
402
405
  ).replace(tzinfo=timezone.utc)
@@ -410,7 +413,7 @@ def show_status_table(namespace: str, all_users: bool):
410
413
  hours_str = f'{hours} hours, ' if hours > 0 else ''
411
414
  minutes_str = f'{minutes} minutes' if minutes > 0 else ''
412
415
 
413
- return f'{days_str}{hours_str}{minutes_str}'
416
+ return f'{days_str}{hours_str}{minutes_str}', delta
414
417
 
415
418
  def _get_resources(job: Dict[str, Any]) -> str:
416
419
  num_pods = int(
@@ -433,15 +436,16 @@ def show_status_table(namespace: str, all_users: bool):
433
436
  job_table = log_utils.create_table(columns)
434
437
  job_specs = list_jobset(namespace)
435
438
  assert job_specs is not None, 'Retrieving jobs failed'
439
+ rows = []
436
440
  for job in job_specs['items']:
437
441
  if all_users:
438
- job_table.add_row(
442
+ rows.append(
439
443
  [
440
444
  job['metadata']['name'],
441
445
  job['metadata']['labels'][JOBSET_USERID_LABEL],
442
446
  _get_status_string_colorized(job['status']),
443
447
  _get_resources(job),
444
- _get_time_delta(job['metadata']['creationTimestamp']),
448
+ *_get_time_delta(job['metadata']['creationTimestamp']),
445
449
  ]
446
450
  )
447
451
  elif (
@@ -449,12 +453,16 @@ def show_status_table(namespace: str, all_users: bool):
449
453
  and job['metadata']['labels'][JOBSET_USER_LABEL]
450
454
  == common_utils.get_cleaned_username()
451
455
  ):
452
- job_table.add_row(
456
+ rows.append(
453
457
  [
454
458
  job['metadata']['name'],
455
459
  _get_status_string_colorized(job['status']),
456
460
  _get_resources(job),
457
- _get_time_delta(job['metadata']['creationTimestamp']),
461
+ *_get_time_delta(job['metadata']['creationTimestamp']),
458
462
  ]
459
463
  )
464
+ rows = [row[:-1] for row in sorted(rows, key=lambda x: x[-1])]
465
+ # have the most recently submitted jobs at the top
466
+ for row in rows:
467
+ job_table.add_row(row)
460
468
  print(job_table)
konduktor/data/aws/s3.py CHANGED
@@ -472,6 +472,13 @@ class S3Store(storage_utils.AbstractStore):
472
472
  f'Bucket {self.name} does not exist.'
473
473
  + f' To debug, consider running `{command}`.'
474
474
  ) from e
475
+ # Bucket already exists but we tried to create it. Continue
476
+ elif error_code == '409':
477
+ command = f'aws s3 ls {self.name}'
478
+ logger.info(
479
+ f'Bucket {self.name} already exists. Skipping '
480
+ f'creation. To check, consider running `{command}`'
481
+ )
475
482
 
476
483
  if isinstance(self.source, str) and self.source.startswith('s3://'):
477
484
  with ux_utils.print_exception_no_traceback():
@@ -865,7 +872,7 @@ class S3Store(storage_utils.AbstractStore):
865
872
  hints = 'AWS SSO is set.'
866
873
  if static_credential_exists:
867
874
  hints += (
868
- ' To ensure multiple clouds work correctly, please use SkyPilot '
875
+ ' To ensure multiple clouds work correctly, please use Konduktor '
869
876
  'with static credentials (e.g., ~/.aws/credentials) by unsetting '
870
877
  'the AWS_PROFILE environment variable.'
871
878
  )
@@ -19,6 +19,7 @@ import re
19
19
  import typing
20
20
  from typing import Any, Dict, List, Optional, Tuple, Union
21
21
 
22
+ import filelock
22
23
  import kubernetes
23
24
  import yaml
24
25
 
@@ -53,6 +54,8 @@ NO_ACCELERATOR_HELP_MESSAGE = (
53
54
  '(e.g. `nvidia.com/gpu` are setup correctly. '
54
55
  )
55
56
 
57
+ _K8S_CLIENT_LOCK_PATH = '~/.konduktor/k8s_client.lock'
58
+ _K8s_CLIENT_LOCK = filelock.FileLock(_K8S_CLIENT_LOCK_PATH)
56
59
 
57
60
  logger = logging.get_logger(__name__)
58
61
 
@@ -581,37 +584,40 @@ def set_secret(
581
584
  """
582
585
  Create/update a secret in a namespace. Values are encoded to base64.
583
586
  """
584
- secret_exists, response = check_secret_exists(
585
- secret_name=secret_name,
586
- namespace=namespace,
587
- context=context,
588
- )
587
+ with _K8s_CLIENT_LOCK:
588
+ secret_exists, response = check_secret_exists(
589
+ secret_name=secret_name,
590
+ namespace=namespace,
591
+ context=context,
592
+ )
589
593
 
590
- secret_metadata = {'name': secret_name, 'labels': {'parent': 'konduktor'}}
591
- custom_metadata = config.get_nested(('kubernetes', 'custom_metadata'), {})
592
- config.merge_k8s_configs(secret_metadata, custom_metadata)
594
+ secret_metadata = {'name': secret_name, 'labels': {'parent': 'konduktor'}}
595
+ custom_metadata = config.get_nested(('kubernetes', 'custom_metadata'), {})
596
+ config.merge_k8s_configs(secret_metadata, custom_metadata)
593
597
 
594
- secret = kubernetes.client.V1Secret(
595
- metadata=kubernetes.client.V1ObjectMeta(**secret_metadata),
596
- type='Opaque',
597
- data={secret_key: secret_value},
598
- )
598
+ secret = kubernetes.client.V1Secret(
599
+ metadata=kubernetes.client.V1ObjectMeta(**secret_metadata),
600
+ type='Opaque',
601
+ data={secret_key: secret_value},
602
+ )
599
603
 
600
- try:
601
- if secret_exists:
602
- kube_client.core_api(context).patch_namespaced_secret(
603
- secret_name, namespace, secret
604
- )
604
+ try:
605
+ if secret_exists:
606
+ kube_client.core_api(context).patch_namespaced_secret(
607
+ secret_name, namespace, secret
608
+ )
609
+ else:
610
+ kube_client.core_api(context).create_namespaced_secret(
611
+ namespace, secret
612
+ )
613
+ except kube_client.api_exception() as e:
614
+ return False, str(e)
605
615
  else:
606
- kube_client.core_api(context).create_namespaced_secret(namespace, secret)
607
- except kube_client.api_exception() as e:
608
- return False, str(e)
609
- else:
610
- logger.debug(
611
- f'Secret {secret_name} in namespace {namespace} '
612
- f'in context {context} created/updated'
613
- )
614
- return True, None
616
+ logger.debug(
617
+ f'Secret {secret_name} in namespace {namespace} '
618
+ f'in context {context} created/updated'
619
+ )
620
+ return True, None
615
621
 
616
622
 
617
623
  def get_autoscaler_type() -> Optional[kubernetes_enums.KubernetesAutoscalerType]:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: konduktor-nightly
3
- Version: 0.1.0.dev20250422104744
3
+ Version: 0.1.0.dev20250424104814
4
4
  Summary: GPU Cluster Health Management
5
5
  Author: Andrew Aikawa
6
6
  Author-email: asai@berkeley.edu
@@ -12,9 +12,9 @@ Classifier: Programming Language :: Python :: 3.11
12
12
  Classifier: Programming Language :: Python :: 3.12
13
13
  Classifier: Programming Language :: Python :: 3.13
14
14
  Provides-Extra: s3
15
- Requires-Dist: awscli (>=1.32.84,<2.0.0) ; extra == "s3"
16
- Requires-Dist: boto3 (>=1.34.84,<2.0.0) ; extra == "s3"
17
- Requires-Dist: botocore (>=1.34.84,<2.0.0) ; extra == "s3"
15
+ Requires-Dist: awscli[s3] (>=1.32.84,<2.0.0) ; extra == "s3"
16
+ Requires-Dist: boto3[s3] (>=1.34.84,<2.0.0) ; extra == "s3"
17
+ Requires-Dist: botocore[s3] (>=1.34.84,<2.0.0) ; extra == "s3"
18
18
  Requires-Dist: click (>=8.1.7,<9.0.0)
19
19
  Requires-Dist: colorama (>=0.4.6,<0.5.0)
20
20
  Requires-Dist: filelock (>=3.18.0,<4.0.0)
@@ -1,12 +1,12 @@
1
- konduktor/__init__.py,sha256=fEbyFdqXRcQAk3deY_jEzyYiFv2UV-rfpUEZNviQ7k4,1540
1
+ konduktor/__init__.py,sha256=05LRGeAUVsE5sdak6LyrRcGrzNeoSIyp1i9QlkEopRQ,1540
2
2
  konduktor/adaptors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  konduktor/adaptors/aws.py,sha256=s47Ra-GaqCQibzVfmD0pmwEWHif1EGO5opMbwkLxTCU,8244
4
- konduktor/adaptors/common.py,sha256=lbsLs5ldUN2nWAWmiUwvy_RFNL_s5KHl4lDYavYpkmY,4301
4
+ konduktor/adaptors/common.py,sha256=uTdpKvgBSwYMmynx9wR5kiZQyTrdaw9ZI4KH6Z2E5Hw,4296
5
5
  konduktor/adaptors/gcp.py,sha256=ierTF4z7vwpJ9BsC7LSiwv4uLcjGXscwZOwQrddr2vM,4102
6
6
  konduktor/backends/__init__.py,sha256=1Q6sqqdeMYarpTX_U-QVywJYf7idiUTRsyP-E4BQSOw,129
7
7
  konduktor/backends/backend.py,sha256=qh0bp94lzoTYZkzyQv2-CVrB5l91FkG2vclXg24UFC0,2910
8
8
  konduktor/backends/jobset.py,sha256=veptYGXtk-ugWxBsBV5SnqI4rGKOlGfm_N3wApvNhSQ,8326
9
- konduktor/backends/jobset_utils.py,sha256=NIwTvJdGhbDnXEceabiuUm9aHZ29LK3jVHfQzutB_ec,17297
9
+ konduktor/backends/jobset_utils.py,sha256=UJkDu6Y8u4N2AaNSJTOSgbGLyY25bzaP-I6esJ11jms,17578
10
10
  konduktor/check.py,sha256=JennyWoaqSKhdyfUldd266KwVXTPJpcYQa4EED4a_BA,7569
11
11
  konduktor/cli.py,sha256=Ii9-2mrc-1f2ksLasA-xRb-JnEi_9ZeCXZ3lJ1GG8H8,23515
12
12
  konduktor/config.py,sha256=J50JxC6MsXMnlrJPXdDUMr38C89xvOO7mR8KJ6fyils,15520
@@ -50,7 +50,7 @@ konduktor/dashboard/frontend/server.js,sha256=jcp6_Ww9YJD3uKY07jR3KMlAM6n1QZdxZn
50
50
  konduktor/dashboard/frontend/tailwind.config.js,sha256=fCnc48wvioIDOe5ldQ_6RE7F76cP7aU7pDrxBPJx-Fk,366
51
51
  konduktor/data/__init__.py,sha256=KMR2i3E9YcIpiIuCxtRdS7BQ1w2vUAbbve7agziJrLo,213
52
52
  konduktor/data/aws/__init__.py,sha256=_6zWfNNAK1QGgyKqg_yPYWcXlnffchyvIMErYa6tw_U,331
53
- konduktor/data/aws/s3.py,sha256=O2RDWlO3rwVZVIKXHiT_tqt3_ll-fa8KbAdocKKGj_8,47947
53
+ konduktor/data/aws/s3.py,sha256=2hvbgZ9NuwXY88blxfdjSbONSXcyWF0CtheDZkMYorQ,48296
54
54
  konduktor/data/constants.py,sha256=yXVEoTI2we1xOjVSU-bjRCQCLpVvpEvJ0GedXvSwEfw,127
55
55
  konduktor/data/data_utils.py,sha256=yrnu8_cY63TXqfWfFG3yqY2w_tE9UQK9jIQAFQCDVg0,9668
56
56
  konduktor/data/gcp/__init__.py,sha256=rlQxACBC_Vu36mdgPyJgUy4mGc_6Nt_a96JAuaPz2pQ,489
@@ -82,7 +82,7 @@ konduktor/utils/constants.py,sha256=1DneiTR21lvKUcWdBGwC4I4fD4uPjbjLUilEnJS7rzA,
82
82
  konduktor/utils/env_options.py,sha256=T41Slzf4Mzl-n45CGXXqdy2fCrYhPNZQ7RP5vmnN4xc,2258
83
83
  konduktor/utils/exceptions.py,sha256=GBOFIkk9nikqWGR0FXGXOWVVImoH7nWnMl_L3Oux3fo,6581
84
84
  konduktor/utils/kubernetes_enums.py,sha256=SabUueF6Bpzbpa57gyH5VB65xla2N9l8CZmAeYTfGmM,176
85
- konduktor/utils/kubernetes_utils.py,sha256=NGBredKPWpZC8VNlwTfWLhHnc-p68d5xlxT-0e92738,23556
85
+ konduktor/utils/kubernetes_utils.py,sha256=ivFVh90Gez19_JD5U4bgCO5zNtQUflF0hJsM5nZLj8A,23864
86
86
  konduktor/utils/log_utils.py,sha256=lgHCq4OdtJNfbpso-uYGONUCVNsUrUkUWjROarsHt6s,9897
87
87
  konduktor/utils/loki_utils.py,sha256=ND1pbbbFhLhLKw3870j44LpR_9MB0EkDJSs5K7nWdY4,3473
88
88
  konduktor/utils/rich_utils.py,sha256=kdjNe6S2LlpOxyzhFHqMzCz7g4ROC4e7TPWgcbRsrQE,3577
@@ -90,8 +90,8 @@ konduktor/utils/schemas.py,sha256=Gv7SEhFpv-eO5izqRz8d-eQ9z-lVmY05akm6HEXIIdc,17
90
90
  konduktor/utils/subprocess_utils.py,sha256=WoFkoFhGecPR8-rF8WJxbIe-YtV94LXz9UG64SDhCY4,9448
91
91
  konduktor/utils/ux_utils.py,sha256=NPNu3Igu2Z9Oq77ghJhy_fIxQZTXWr9BtKyxN3Wslzo,7164
92
92
  konduktor/utils/validator.py,sha256=tgBghVyedyzGx84-U2Qfoh_cJBE3oUk9gclMW90ORks,691
93
- konduktor_nightly-0.1.0.dev20250422104744.dist-info/LICENSE,sha256=MuuqTZbHvmqXR_aNKAXzggdV45ANd3wQ5YI7tnpZhm0,6586
94
- konduktor_nightly-0.1.0.dev20250422104744.dist-info/METADATA,sha256=0v9QX7mbQv7JhNJCsIYqHHa9nw4DGF3uNKPmwnIon2k,4354
95
- konduktor_nightly-0.1.0.dev20250422104744.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
96
- konduktor_nightly-0.1.0.dev20250422104744.dist-info/entry_points.txt,sha256=k3nG5wDFIJhNqsZWrHk4d0irIB2Ns9s47cjRWYsTCT8,48
97
- konduktor_nightly-0.1.0.dev20250422104744.dist-info/RECORD,,
93
+ konduktor_nightly-0.1.0.dev20250424104814.dist-info/LICENSE,sha256=MuuqTZbHvmqXR_aNKAXzggdV45ANd3wQ5YI7tnpZhm0,6586
94
+ konduktor_nightly-0.1.0.dev20250424104814.dist-info/METADATA,sha256=FyMj5AqQzOiHDSZqEi2BKlJO-IyuWDfXbIlYkBYRqG0,4366
95
+ konduktor_nightly-0.1.0.dev20250424104814.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
96
+ konduktor_nightly-0.1.0.dev20250424104814.dist-info/entry_points.txt,sha256=k3nG5wDFIJhNqsZWrHk4d0irIB2Ns9s47cjRWYsTCT8,48
97
+ konduktor_nightly-0.1.0.dev20250424104814.dist-info/RECORD,,