konduktor-nightly 0.1.0.dev20250327104656__py3-none-any.whl → 0.1.0.dev20250328104606__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
konduktor/__init__.py CHANGED
@@ -14,7 +14,8 @@ __all__ = [
14
14
  ]
15
15
 
16
16
  # Replaced with the current commit when building the wheels.
17
- _KONDUKTOR_COMMIT_SHA = 'ac5a9936d3c1b6a76bffd3f660d68ac80634bda5'
17
+ _KONDUKTOR_COMMIT_SHA = '139a1d21db826f4beb908a929f2a02c514e1adef'
18
+ os.makedirs(os.path.expanduser('~/.konduktor'), exist_ok=True)
18
19
 
19
20
 
20
21
  def _get_git_commit():
@@ -47,5 +48,5 @@ def _get_git_commit():
47
48
 
48
49
 
49
50
  __commit__ = _get_git_commit()
50
- __version__ = '1.0.0.dev0.1.0.dev20250327104656'
51
+ __version__ = '1.0.0.dev0.1.0.dev20250328104606'
51
52
  __root_dir__ = os.path.dirname(os.path.abspath(__file__))
konduktor/adaptors/gcp.py CHANGED
@@ -12,13 +12,16 @@
12
12
 
13
13
  """GCP cloud adaptors"""
14
14
 
15
- # pylint: disable=import-outside-toplevel
16
15
  import json
16
+ import os
17
+ from functools import wraps
18
+
19
+ from filelock import FileLock
17
20
 
18
21
  from konduktor.adaptors import common
19
22
 
20
23
  _IMPORT_ERROR_MESSAGE = (
21
- 'Failed to import dependencies for GCP. ' 'Try pip install "skypilot[gcp]"'
24
+ 'Failed to import dependencies for GCP. ' 'Try pip install "konduktor[gcp]"'
22
25
  )
23
26
  googleapiclient = common.LazyImport(
24
27
  'googleapiclient', import_error_message=_IMPORT_ERROR_MESSAGE
@@ -27,6 +30,34 @@ google = common.LazyImport('google', import_error_message=_IMPORT_ERROR_MESSAGE)
27
30
  _LAZY_MODULES = (google, googleapiclient)
28
31
 
29
32
 
33
+ class LockedClientProxy:
34
+ """Proxy for GCP client that locks access to the client."""
35
+
36
+ def __init__(
37
+ self,
38
+ client,
39
+ lock_path=os.path.expanduser('~/.konduktor/gcs_storage.lock'),
40
+ timeout=10,
41
+ ):
42
+ self._client = client
43
+ self._lock = FileLock(lock_path, timeout=timeout)
44
+
45
+ def __getattr__(self, attr):
46
+ target = getattr(self._client, attr)
47
+
48
+ if callable(target):
49
+
50
+ @wraps(target)
51
+ def locked_method(*args, **kwargs):
52
+ with self._lock:
53
+ return target(*args, **kwargs)
54
+
55
+ return locked_method
56
+ else:
57
+ # Attribute (not method) access just passes through
58
+ return target
59
+
60
+
30
61
  @common.load_lazy_modules(_LAZY_MODULES)
31
62
  def build(service_name: str, version: str, *args, **kwargs):
32
63
  """Build a GCP service.
@@ -44,7 +75,7 @@ def storage_client():
44
75
  """Helper that connects to GCS Storage Client for GCS Bucket"""
45
76
  from google.cloud import storage
46
77
 
47
- return storage.Client()
78
+ return LockedClientProxy(storage.Client())
48
79
 
49
80
 
50
81
  @common.load_lazy_modules(_LAZY_MODULES)
@@ -52,7 +83,7 @@ def anonymous_storage_client():
52
83
  """Helper that connects to GCS Storage Client for Public GCS Buckets"""
53
84
  from google.cloud import storage
54
85
 
55
- return storage.Client.create_anonymous_client()
86
+ return LockedClientProxy(storage.Client.create_anonymous_client())
56
87
 
57
88
 
58
89
  @common.load_lazy_modules(_LAZY_MODULES)
@@ -71,6 +102,14 @@ def forbidden_exception():
71
102
  return gcs_exceptions.Forbidden
72
103
 
73
104
 
105
+ @common.load_lazy_modules(_LAZY_MODULES)
106
+ def conflict_exception():
107
+ """Conflict exception."""
108
+ from google.api_core import exceptions as gcs_exceptions
109
+
110
+ return gcs_exceptions.Conflict
111
+
112
+
74
113
  @common.load_lazy_modules(_LAZY_MODULES)
75
114
  def http_error_exception():
76
115
  """HttpError exception."""
@@ -12,7 +12,8 @@ from urllib.parse import urlparse
12
12
  import colorama
13
13
 
14
14
  import konduktor
15
- from konduktor import cloud_stores, constants, kube_client, logging
15
+ from konduktor import constants, kube_client, logging
16
+ from konduktor.data import registry
16
17
  from konduktor.utils import common_utils, kubernetes_utils, log_utils
17
18
 
18
19
  if typing.TYPE_CHECKING:
@@ -102,7 +103,7 @@ def create_pod_spec(task: 'konduktor.Task') -> Dict[str, Any]:
102
103
  if '/tmp/konduktor-job-filemounts-files' in dst:
103
104
  continue
104
105
  # should impelement a method here instead of raw dog dict access
105
- cloud_store = cloud_stores._REGISTRY[store_scheme]
106
+ cloud_store = registry._REGISTRY[store_scheme]
106
107
  storage_secrets[store_scheme] = cloud_store._STORE.get_k8s_credential_name()
107
108
  mkdir_commands.append(
108
109
  f'cd {constants.KONDUKTOR_REMOTE_WORKDIR};' f'mkdir -p {dst}'
@@ -118,7 +119,7 @@ def create_pod_spec(task: 'konduktor.Task') -> Dict[str, Any]:
118
119
  assert task.file_mounts is not None
119
120
  for dst, src in task.file_mounts.items():
120
121
  store_scheme = str(urlparse(store.source).scheme)
121
- cloud_store = cloud_stores._REGISTRY[store_scheme]
122
+ cloud_store = registry._REGISTRY[store_scheme]
122
123
  mkdir_commands.append(
123
124
  f'cd {constants.KONDUKTOR_REMOTE_WORKDIR};'
124
125
  f'mkdir -p {os.path.dirname(dst)}'
konduktor/check.py CHANGED
@@ -34,12 +34,13 @@ from typing import Iterable, List, Optional, Tuple
34
34
  import click
35
35
  import colorama
36
36
 
37
- from konduktor import cloud_stores, logging
38
37
  from konduktor import config as konduktor_config
38
+ from konduktor import logging
39
+ from konduktor.data import registry
39
40
  from konduktor.utils import rich_utils
40
41
 
41
42
  if typing.TYPE_CHECKING:
42
- pass
43
+ from konduktor.data import storage_utils
43
44
 
44
45
  logger = logging.get_logger(__name__)
45
46
 
@@ -58,7 +59,7 @@ def check(
58
59
  disabled_clouds = []
59
60
 
60
61
  def check_one_cloud(
61
- cloud_tuple: Tuple[str, 'cloud_stores.CloudStorage'],
62
+ cloud_tuple: Tuple[str, 'storage_utils.CloudStorage'],
62
63
  ) -> None:
63
64
  cloud_repr, cloud = cloud_tuple
64
65
  with rich_utils.safe_status(f'Checking {cloud_repr}...'):
@@ -80,15 +81,15 @@ def check(
80
81
  disabled_clouds.append(cloud_repr)
81
82
  echo(f' Reason: {reason}')
82
83
 
83
- def get_cloud_tuple(cloud_name: str) -> Tuple[str, 'cloud_stores.CloudStorage']:
84
+ def get_cloud_tuple(cloud_name: str) -> Tuple[str, 'storage_utils.CloudStorage']:
84
85
  # Validates cloud_name and returns a tuple of the cloud's name and
85
86
  # the cloud object. Includes special handling for Cloudflare.
86
- cloud_obj = cloud_stores._REGISTRY.get(cloud_name, None)
87
+ cloud_obj = registry._REGISTRY.get(cloud_name, None)
87
88
  assert cloud_obj is not None, f'Cloud {cloud_name!r} not found'
88
89
  return cloud_name, cloud_obj
89
90
 
90
91
  def get_all_clouds():
91
- return tuple([c for c in cloud_stores._REGISTRY.keys()])
92
+ return tuple([c for c in registry._REGISTRY.keys()])
92
93
 
93
94
  if clouds is not None:
94
95
  cloud_list = clouds
@@ -158,7 +159,7 @@ def check(
158
159
  + '\n'
159
160
  + click.style(
160
161
  'If any problems remain, refer to detailed docs at: '
161
- 'https://konduktor.readthedocs.io/en/latest/admin/installation.html', # pylint: disable=line-too-long
162
+ 'https://trainy.mintlify.app', # pylint: disable=line-too-long
162
163
  dim=True,
163
164
  )
164
165
  )
@@ -179,14 +180,5 @@ def check(
179
180
  return enabled_clouds
180
181
 
181
182
 
182
- # === Helper functions ===
183
- def storage_in_iterable(
184
- cloud: 'cloud_stores.GcsCloudStorage',
185
- cloud_list: Iterable['cloud_stores.GcsCloudStorage'],
186
- ) -> bool:
187
- """Returns whether the cloud is in the given cloud list."""
188
- return any(cloud == c for c in cloud_list)
189
-
190
-
191
183
  def _format_enabled_storage(cloud_name: str) -> str:
192
184
  return f'{colorama.Fore.GREEN}{cloud_name}{colorama.Style.RESET_ALL}'
@@ -4,9 +4,4 @@ Path = str
4
4
  SourceType = Union[Path]
5
5
  StorageHandle = Any
6
6
 
7
- # TODO(asaiacai) This should match the cloud store
8
- # classes in cloud_stores.py,
9
- # should honestly just use one or the other instead of both
10
- STORE_ENABLED_CLOUDS = ['gs']
11
-
12
7
  _STORAGE_LOG_FILE_NAME = 'storage.log'
@@ -1,18 +1,18 @@
1
1
  """Data sync between workstation <--> blob (s3, gcs, etc.) <--> worker pods"""
2
2
 
3
3
  from konduktor.data.gcp.constants import (
4
- DEFAULT_SERVICE_ACCOUNT_ROLES,
5
4
  STORAGE_MINIMAL_PERMISSIONS,
6
5
  )
7
6
  from konduktor.data.gcp.gcs import (
8
7
  DEFAULT_GCP_APPLICATION_CREDENTIAL_PATH,
9
8
  GOOGLE_SDK_INSTALLATION_COMMAND,
9
+ GcsCloudStorage,
10
10
  GcsStore,
11
11
  )
12
12
 
13
13
  __all__ = [
14
14
  'GcsStore',
15
- 'DEFAULT_SERVICE_ACCOUNT_ROLES',
15
+ 'GcsCloudStorage',
16
16
  'STORAGE_MINIMAL_PERMISSIONS',
17
17
  'GOOGLE_SDK_INSTALLATION_COMMAND',
18
18
  'DEFAULT_GCP_APPLICATION_CREDENTIAL_PATH',
konduktor/data/gcp/gcs.py CHANGED
@@ -578,6 +578,10 @@ class GcsStore(storage_utils.AbstractStore):
578
578
  bucket = self.client.bucket(bucket_name)
579
579
  bucket.storage_class = 'STANDARD'
580
580
  new_bucket = self.client.create_bucket(bucket, location=region)
581
+ except gcp.conflict_exception():
582
+ # it's fine to pass this exception since
583
+ # this means the bucket already exists
584
+ pass
581
585
  except Exception as e: # pylint: disable=broad-except
582
586
  with ux_utils.print_exception_no_traceback():
583
587
  raise exceptions.StorageBucketCreateError(
@@ -904,3 +908,79 @@ class GcsStore(storage_utils.AbstractStore):
904
908
  @classmethod
905
909
  def get_k8s_credential_name(cls) -> str:
906
910
  return cls._GCP_SECRET_NAME
911
+
912
+
913
+ class GcsCloudStorage(storage_utils.CloudStorage):
914
+ """Google Cloud Storage."""
915
+
916
+ # We use gsutil as a basic implementation. One pro is that its -m
917
+ # multi-threaded download is nice, which frees us from implementing
918
+ # parellel workers on our end.
919
+ # The gsutil command is part of the Google Cloud SDK, and we reuse
920
+ # the installation logic here.
921
+ _INSTALL_GSUTIL = GOOGLE_SDK_INSTALLATION_COMMAND
922
+ _STORE: typing.Type[storage_utils.AbstractStore] = GcsStore
923
+
924
+ @property
925
+ def _gsutil_command(self):
926
+ gsutil_alias, alias_gen = data_utils.get_gsutil_command()
927
+ return (
928
+ f'{alias_gen}; GOOGLE_APPLICATION_CREDENTIALS='
929
+ f'{DEFAULT_GCP_APPLICATION_CREDENTIAL_PATH}; '
930
+ # Explicitly activate service account. Unlike the gcp packages
931
+ # and other GCP commands, gsutil does not automatically pick up
932
+ # the default credential keys when it is a service account.
933
+ 'gcloud auth activate-service-account '
934
+ '--key-file=$GOOGLE_APPLICATION_CREDENTIALS '
935
+ '2> /dev/null || true; '
936
+ f'{gsutil_alias}'
937
+ )
938
+
939
+ def is_directory(self, url: str) -> bool:
940
+ """Returns whether 'url' is a directory.
941
+ In cloud object stores, a "directory" refers to a regular object whose
942
+ name is a prefix of other objects.
943
+ """
944
+ commands = [self._INSTALL_GSUTIL]
945
+ commands.append(f'{self._gsutil_command} ls -d {url}')
946
+ command = ' && '.join(commands)
947
+ p = subprocess.run(
948
+ command,
949
+ stdout=subprocess.PIPE,
950
+ shell=True,
951
+ check=True,
952
+ executable='/bin/bash',
953
+ )
954
+ out = p.stdout.decode().strip()
955
+ # Edge Case: Gcloud command is run for first time #437
956
+ out = out.split('\n')[-1]
957
+ # If <url> is a bucket root, then we only need `gsutil` to succeed
958
+ # to make sure the bucket exists. It is already a directory.
959
+ _, key = data_utils.split_gcs_path(url)
960
+ if not key:
961
+ return True
962
+ # Otherwise, gsutil ls -d url will return:
963
+ # --> url.rstrip('/') if url is not a directory
964
+ # --> url with an ending '/' if url is a directory
965
+ if not out.endswith('/'):
966
+ assert out == url.rstrip('/'), (out, url)
967
+ return False
968
+ url = url if url.endswith('/') else (url + '/')
969
+ assert out == url, (out, url)
970
+ return True
971
+
972
+ def make_sync_dir_command(self, source: str, destination: str) -> str:
973
+ """Downloads a directory using gsutil."""
974
+ download_via_gsutil = (
975
+ f'{self._gsutil_command} ' f'rsync -e -r {source} {destination}'
976
+ )
977
+ all_commands = [self._INSTALL_GSUTIL]
978
+ all_commands.append(download_via_gsutil)
979
+ return ' && '.join(all_commands)
980
+
981
+ def make_sync_file_command(self, source: str, destination: str) -> str:
982
+ """Downloads a file using gsutil."""
983
+ download_via_gsutil = f'{self._gsutil_command} ' f'cp {source} {destination}'
984
+ all_commands = [self._INSTALL_GSUTIL]
985
+ all_commands.append(download_via_gsutil)
986
+ return ' && '.join(all_commands)
@@ -0,0 +1,18 @@
1
+ from konduktor.data.gcp import GcsCloudStorage
2
+
3
+ # Maps bucket's URIs prefix(scheme) to its corresponding storage class
4
+
5
+ _REGISTRY = {
6
+ 'gs': GcsCloudStorage(),
7
+ # TODO(asaiacai): Add other cloud stores here
8
+ # 's3': S3CloudStorage(),
9
+ # 'r2': R2CloudStorage(),
10
+ # 'cos': IBMCosCloudStorage(),
11
+ # 'oci': OciCloudStorage(),
12
+ # # TODO: This is a hack, as Azure URL starts with https://, we should
13
+ # # refactor the registry to be able to take regex, so that Azure blob can
14
+ # # be identified with `https://(.*?)\.blob\.core\.windows\.net`
15
+ # 'https': AzureBlobCloudStorage()
16
+ }
17
+
18
+ _STORE_ENABLED_CLOUDS = list(_REGISTRY.keys())
konduktor/data/storage.py CHANGED
@@ -28,7 +28,7 @@ import urllib.parse
28
28
  from typing import Any, Dict, List, Literal, Optional, Tuple, Type, Union
29
29
 
30
30
  from konduktor import check, config, logging
31
- from konduktor.data import constants, data_utils, gcp, storage_utils
31
+ from konduktor.data import constants, data_utils, gcp, registry, storage_utils
32
32
  from konduktor.utils import annotations, common_utils, exceptions, schemas, ux_utils
33
33
 
34
34
  logger = logging.get_logger(__file__)
@@ -40,13 +40,13 @@ def get_cached_enabled_storage_clouds_or_refresh(
40
40
  ) -> List[str]:
41
41
  # This is a temporary solution until https://github.com/skypilot-org/skypilot/issues/1943 # noqa: E501
42
42
  # (asaiacai): This function does not do any actual checking right now.
43
- # this is temporary.In the future, we can cache to disk.
43
+ # this is temporary. In the future, we can cache to disk.
44
44
  # For now, we just print a warning to the user saying what
45
45
  # clouds are enabled and if the task fails to run `konduktor check`
46
46
  # to update the credentials.
47
47
  enabled_clouds = config.get_nested(('allowed_clouds',), [])
48
48
  if len(enabled_clouds) == 0:
49
- enabled_clouds = constants.STORE_ENABLED_CLOUDS
49
+ enabled_clouds = registry._STORE_ENABLED_CLOUDS
50
50
  else:
51
51
  enabled_clouds = [str(cloud) for cloud in enabled_clouds]
52
52
  logger.warning(
@@ -498,3 +498,38 @@ class AbstractStore:
498
498
  def get_k8s_credential_name(cls) -> str:
499
499
  """Returns the name of the k8s secret storing the credentials for the store."""
500
500
  raise NotImplementedError
501
+
502
+
503
+ class CloudStorage:
504
+ """Interface for a cloud object store."""
505
+
506
+ # this needs to be overridden by the subclass
507
+ _STORE: typing.Type[AbstractStore]
508
+
509
+ def is_directory(self, url: str) -> bool:
510
+ """Returns whether 'url' is a directory.
511
+
512
+ In cloud object stores, a "directory" refers to a regular object whose
513
+ name is a prefix of other objects.
514
+ """
515
+ raise NotImplementedError
516
+
517
+ def make_sync_dir_command(self, source: str, destination: str) -> str:
518
+ """Makes a runnable bash command to sync a 'directory'."""
519
+ raise NotImplementedError
520
+
521
+ def make_sync_file_command(self, source: str, destination: str) -> str:
522
+ """Makes a runnable bash command to sync a file."""
523
+ raise NotImplementedError
524
+
525
+ def check_credentials(self):
526
+ """Checks if the user has access credentials to this cloud."""
527
+ return self._STORE.check_credentials()
528
+
529
+ def check_credentials_from_secret(self):
530
+ """Checks if the user has access credentials to this cloud."""
531
+ return self._STORE.check_credentials_from_secret()
532
+
533
+ def set_secret_credentials(self):
534
+ """Set the credentials from the secret"""
535
+ return self._STORE.set_secret_credentials()
konduktor/execution.py CHANGED
@@ -13,8 +13,8 @@ if typing.TYPE_CHECKING:
13
13
  from konduktor import config, constants
14
14
  from konduktor import logging as konduktor_logging
15
15
  from konduktor.backends import JobsetBackend
16
- from konduktor.data import constants as storage_constants
17
16
  from konduktor.data import data_utils
17
+ from konduktor.data import registry as storage_registry
18
18
  from konduktor.data import storage as storage_lib
19
19
  from konduktor.utils import common_utils, exceptions, rich_utils, ux_utils
20
20
 
@@ -345,7 +345,7 @@ def maybe_translate_local_file_mounts_and_sync_up(
345
345
  data_src += ' and workdir'
346
346
  else:
347
347
  data_src = 'workdir'
348
- store_enabled_clouds = ', '.join(storage_constants.STORE_ENABLED_CLOUDS)
348
+ store_enabled_clouds = ', '.join(storage_registry._STORE_ENABLED_CLOUDS)
349
349
  with ux_utils.print_exception_no_traceback():
350
350
  raise exceptions.NotSupportedError(
351
351
  f'Unable to use {data_src} - no cloud with object '
konduktor/kube_client.py CHANGED
@@ -18,7 +18,7 @@ DEFAULT_IN_CLUSTER_REGION = 'in-cluster'
18
18
  # for Kubernetes clusters. This is used to associate a name with the current
19
19
  # context when running with in-cluster auth. If not set, the context name is
20
20
  # set to DEFAULT_IN_CLUSTER_REGION.
21
- IN_CLUSTER_CONTEXT_NAME_ENV_VAR = 'SKYPILOT_IN_CLUSTER_CONTEXT_NAME'
21
+ IN_CLUSTER_CONTEXT_NAME_ENV_VAR = 'KONDUKTOR_IN_CLUSTER_CONTEXT_NAME'
22
22
 
23
23
 
24
24
  def _decorate_methods(obj: Any, decorator: Callable, decoration_type: str):
@@ -85,7 +85,7 @@ kubernetes:
85
85
  PACKAGES="$PACKAGES git";
86
86
  {% endif %}
87
87
 
88
- if [ -z "${PACKAGES}" ]; then
88
+ if [ ! -z "${PACKAGES}" ]; then
89
89
  # Run apt update, install missing packages
90
90
  DEBIAN_FRONTEND=noninteractive $(prefix_cmd) apt-get update > ~/.konduktor/tmp/apt-update.log 2>&1 || \
91
91
  $(prefix_cmd) echo "Warning: apt-get update failed. Continuing anyway..." >> ~/.konduktor/tmp/apt-update.log
@@ -454,7 +454,8 @@ _REMOTE_IDENTITY_SCHEMA_KUBERNETES = {
454
454
 
455
455
  def get_storage_schema():
456
456
  # pylint: disable=import-outside-toplevel
457
- from konduktor import cloud_stores
457
+ from knoduktor.registry import registry
458
+
458
459
  from konduktor.data import storage
459
460
 
460
461
  return {
@@ -476,7 +477,7 @@ def get_storage_schema():
476
477
  },
477
478
  'store': {
478
479
  'type': 'string',
479
- 'case_insensitive_enum': [type for type in cloud_stores._REGISTRY],
480
+ 'case_insensitive_enum': [type for type in registry._REGISTRY],
480
481
  },
481
482
  'persistent': {
482
483
  'type': 'boolean',
@@ -497,7 +498,7 @@ def get_storage_schema():
497
498
 
498
499
  def get_config_schema():
499
500
  # pylint: disable=import-outside-toplevel
500
- from konduktor import cloud_stores
501
+ from konduktor.data import registry
501
502
  from konduktor.utils import kubernetes_enums
502
503
 
503
504
  cloud_configs = {
@@ -547,7 +548,7 @@ def get_config_schema():
547
548
  'required': ['items'],
548
549
  'items': {
549
550
  'type': 'string',
550
- 'case_insensitive_enum': (list(cloud_stores._REGISTRY.keys())),
551
+ 'case_insensitive_enum': (list(registry._REGISTRY.keys())),
551
552
  },
552
553
  }
553
554
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: konduktor-nightly
3
- Version: 0.1.0.dev20250327104656
3
+ Version: 0.1.0.dev20250328104606
4
4
  Summary: GPU Cluster Health Management
5
5
  Author: Andrew Aikawa
6
6
  Author-email: asai@berkeley.edu
@@ -12,6 +12,7 @@ Classifier: Programming Language :: Python :: 3.11
12
12
  Classifier: Programming Language :: Python :: 3.12
13
13
  Requires-Dist: click (>=8.1.7,<9.0.0)
14
14
  Requires-Dist: colorama (>=0.4.6,<0.5.0)
15
+ Requires-Dist: filelock (>=3.18.0,<4.0.0)
15
16
  Requires-Dist: google-api-python-client[gcp] (>=2.161.0,<3.0.0)
16
17
  Requires-Dist: google-cloud-storage[gcp] (>=3.0.0,<4.0.0)
17
18
  Requires-Dist: jinja2 (>=3.1.5,<4.0.0)
@@ -1,14 +1,13 @@
1
- konduktor/__init__.py,sha256=CWG3yvw6TSNGuQHtxiuStEVcTZ-0X7So3cCPi0XyJks,1477
1
+ konduktor/__init__.py,sha256=2U2pj_Uu1x76859aRUOI6UapnthXIO_rYCUUhV4K8oQ,1540
2
2
  konduktor/adaptors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  konduktor/adaptors/common.py,sha256=mYb_6c3u5MghtiFfiW5OO-EH6t7cIR5npbkgUmz6FYE,3517
4
- konduktor/adaptors/gcp.py,sha256=liCm4_D_qSci0DZA2t5bckLIoGDkJ8qx31EO_hSBzo0,3751
4
+ konduktor/adaptors/gcp.py,sha256=pOQA2q8fFyr97Htn8EqvNM0XT-Ao8UwvExviiLaDats,4746
5
5
  konduktor/backends/__init__.py,sha256=1Q6sqqdeMYarpTX_U-QVywJYf7idiUTRsyP-E4BQSOw,129
6
6
  konduktor/backends/backend.py,sha256=qh0bp94lzoTYZkzyQv2-CVrB5l91FkG2vclXg24UFC0,2910
7
7
  konduktor/backends/jobset.py,sha256=lh_PihQgM0tmVryCpjSsZjWug8hBnJr7ua9lqk0qEAM,8251
8
- konduktor/backends/jobset_utils.py,sha256=1LOiP-UVtezbLP0jbtQR7pwBQH3B5b5sqptVVzri-48,17222
9
- konduktor/check.py,sha256=hIrxDMKaGX2eZP-Pj9TCymGUHQAp93m48Gj3XMiqadA,7833
8
+ konduktor/backends/jobset_utils.py,sha256=5xeFPNN724SX6M4p4Koump5yCcdRJdiSUs03TexSwMQ,17236
9
+ konduktor/check.py,sha256=JennyWoaqSKhdyfUldd266KwVXTPJpcYQa4EED4a_BA,7569
10
10
  konduktor/cli.py,sha256=90bnh3nIobfBkzqS_SXgw9Z8Zqh4ouwpLDj0kx_6kL8,23562
11
- konduktor/cloud_stores.py,sha256=KX3u5YlXGslMCe_q8zYtFy62_KGCmmLTrYuK7Y9jFIM,6277
12
11
  konduktor/config.py,sha256=J50JxC6MsXMnlrJPXdDUMr38C89xvOO7mR8KJ6fyils,15520
13
12
  konduktor/constants.py,sha256=T3AeXXxuQHINW_bAWyztvDeS8r4g8kXBGIwIq13cys0,1814
14
13
  konduktor/controller/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -49,16 +48,17 @@ konduktor/dashboard/frontend/postcss.config.mjs,sha256=rDHiqV72T-J860Ek4QFnUnMQe
49
48
  konduktor/dashboard/frontend/server.js,sha256=jcp6_Ww9YJD3uKY07jR3KMlAM6n1QZdxZnVY6Kh-J6A,1789
50
49
  konduktor/dashboard/frontend/tailwind.config.js,sha256=fCnc48wvioIDOe5ldQ_6RE7F76cP7aU7pDrxBPJx-Fk,366
51
50
  konduktor/data/__init__.py,sha256=KMR2i3E9YcIpiIuCxtRdS7BQ1w2vUAbbve7agziJrLo,213
52
- konduktor/data/constants.py,sha256=tAYHrmzPCI2lwK_Fy7AfwE1RTcb38AYW99Zkw8fEtDM,299
51
+ konduktor/data/constants.py,sha256=yXVEoTI2we1xOjVSU-bjRCQCLpVvpEvJ0GedXvSwEfw,127
53
52
  konduktor/data/data_utils.py,sha256=aIv3q2H1GSiN2w8WNjZgVaglm-hoiHSb4KR-MAiKKXs,8383
54
- konduktor/data/gcp/__init__.py,sha256=m40OpIx2UDGnREokqfQ52OsAfo9WXC748hF98YWyG-A,517
53
+ konduktor/data/gcp/__init__.py,sha256=rlQxACBC_Vu36mdgPyJgUy4mGc_6Nt_a96JAuaPz2pQ,489
55
54
  konduktor/data/gcp/constants.py,sha256=dMfOiFccM8O6rUi9kClJcbvw1K1VnS1JzzQk3apq8ho,1483
56
- konduktor/data/gcp/gcs.py,sha256=wJri7wG7FJBQvEkAqWQsNHJMLWqMH2n10d8vyy0M_5o,38650
55
+ konduktor/data/gcp/gcs.py,sha256=kDbUzf8ALYzsw_G3sBRn_enQ8fjI-UKV0jeWuFZiULA,42018
57
56
  konduktor/data/gcp/utils.py,sha256=FJQcMXZqtMIzjZ98b3lTTc0UbdPUKTDLsOsfJaaH5-s,214
58
- konduktor/data/storage.py,sha256=KppuO1bYSyMA7RSkmBbJnw8xwBPjKYQ8IbtxYv3abz4,34731
59
- konduktor/data/storage_utils.py,sha256=v_OZyWEb9DmNeLxn_OEJLCzKa0Y3MxcKI5CmPL3jQ6I,19566
60
- konduktor/execution.py,sha256=wwJUQJO3Rc0u0TiF78ilwsEw3a7gGa6uEwBULfeIzAQ,18403
61
- konduktor/kube_client.py,sha256=Dhza1605wmS4TaFCrw63Y7nh3oSc2P3ekUE2-RI-Qlw,6155
57
+ konduktor/data/registry.py,sha256=eLs8Wr5ugwOfXGPtg1utTvGIqdbVLsCf-a3PFS1NELc,652
58
+ konduktor/data/storage.py,sha256=sm0ZfGZUZRiChza_jMRQY1xDIWtZpFQqwPuVOF8PM_Y,34742
59
+ konduktor/data/storage_utils.py,sha256=n4GivkN0KMqmyOTDznF0Z-hzsJvm7KCEh5i5HgFAT-4,20806
60
+ konduktor/execution.py,sha256=UaHUdBmDaIYgiAXkRKJQOHniYPVIR4sr4yUbIqpgMrQ,18401
61
+ konduktor/kube_client.py,sha256=aqwjDfNSneB5NOxV6CtqhkBeNl0UQNUt730R3ujG9Ow,6156
62
62
  konduktor/logging.py,sha256=mBCoCTNhDEkUxd4tsse4mw-aVzSGohhXYf16ViR0ch4,2722
63
63
  konduktor/manifests/controller_deployment.yaml,sha256=6p3oSLkEVONZsvKZGqVop0Dhn4bo3lrigRmhf8NXBHE,1730
64
64
  konduktor/manifests/dashboard_deployment.yaml,sha256=xJLd4FbPMAosI0fIv5_8y7dV9bw0Vsf81l-w4MB_aU8,2837
@@ -67,7 +67,7 @@ konduktor/manifests/pod_cleanup_controller.yaml,sha256=hziL1Ka1kCAEL9R7Tjvpb80iw
67
67
  konduktor/resource.py,sha256=68z8gC8Ivqktwv0R6ylMn9ZNocgkcRT0yIRGGKOdwcM,18491
68
68
  konduktor/task.py,sha256=edHgMLYECGux6WLCilqsNZNYr3dEcw_miWvu4FYpu5U,34713
69
69
  konduktor/templates/jobset.yaml.j2,sha256=onYiHtXAgk-XBtji994hPu_g0hxnLzvmfxwjbdKdeZc,960
70
- konduktor/templates/pod.yaml.j2,sha256=XLQ2dD7jq9yeF-eKtweXaMrHWj5cFI-DIZwcJ3qkANQ,8433
70
+ konduktor/templates/pod.yaml.j2,sha256=rO5rDfM2XYCRbc5tD0bXzIK_ulZGlVISZdQdJzr-Gfk,8435
71
71
  konduktor/usage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
72
72
  konduktor/usage/constants.py,sha256=gCL8afIHZhO0dcxbJGpESE9sCC1cBSbeRnQ8GwNOY4M,612
73
73
  konduktor/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -83,12 +83,12 @@ konduktor/utils/kubernetes_utils.py,sha256=NGBredKPWpZC8VNlwTfWLhHnc-p68d5xlxT-0
83
83
  konduktor/utils/log_utils.py,sha256=lgHCq4OdtJNfbpso-uYGONUCVNsUrUkUWjROarsHt6s,9897
84
84
  konduktor/utils/loki_utils.py,sha256=ND1pbbbFhLhLKw3870j44LpR_9MB0EkDJSs5K7nWdY4,3473
85
85
  konduktor/utils/rich_utils.py,sha256=kdjNe6S2LlpOxyzhFHqMzCz7g4ROC4e7TPWgcbRsrQE,3577
86
- konduktor/utils/schemas.py,sha256=4Goihc-NpFQpiJ7RSiKirAIPNWqw_DV_TRqVwejqTDY,17479
86
+ konduktor/utils/schemas.py,sha256=Gv7SEhFpv-eO5izqRz8d-eQ9z-lVmY05akm6HEXIIdc,17478
87
87
  konduktor/utils/subprocess_utils.py,sha256=WoFkoFhGecPR8-rF8WJxbIe-YtV94LXz9UG64SDhCY4,9448
88
88
  konduktor/utils/ux_utils.py,sha256=NPNu3Igu2Z9Oq77ghJhy_fIxQZTXWr9BtKyxN3Wslzo,7164
89
89
  konduktor/utils/validator.py,sha256=tgBghVyedyzGx84-U2Qfoh_cJBE3oUk9gclMW90ORks,691
90
- konduktor_nightly-0.1.0.dev20250327104656.dist-info/LICENSE,sha256=MuuqTZbHvmqXR_aNKAXzggdV45ANd3wQ5YI7tnpZhm0,6586
91
- konduktor_nightly-0.1.0.dev20250327104656.dist-info/METADATA,sha256=EX_G_loycMXJbLC5OQc05ikBf0RbbGbA2zMGia3xNyc,4070
92
- konduktor_nightly-0.1.0.dev20250327104656.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
93
- konduktor_nightly-0.1.0.dev20250327104656.dist-info/entry_points.txt,sha256=k3nG5wDFIJhNqsZWrHk4d0irIB2Ns9s47cjRWYsTCT8,48
94
- konduktor_nightly-0.1.0.dev20250327104656.dist-info/RECORD,,
90
+ konduktor_nightly-0.1.0.dev20250328104606.dist-info/LICENSE,sha256=MuuqTZbHvmqXR_aNKAXzggdV45ANd3wQ5YI7tnpZhm0,6586
91
+ konduktor_nightly-0.1.0.dev20250328104606.dist-info/METADATA,sha256=AEritN7-smRecfPlFGJTxTtmqRImheYS4fW-KsGCBBo,4112
92
+ konduktor_nightly-0.1.0.dev20250328104606.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
93
+ konduktor_nightly-0.1.0.dev20250328104606.dist-info/entry_points.txt,sha256=k3nG5wDFIJhNqsZWrHk4d0irIB2Ns9s47cjRWYsTCT8,48
94
+ konduktor_nightly-0.1.0.dev20250328104606.dist-info/RECORD,,
konduktor/cloud_stores.py DELETED
@@ -1,158 +0,0 @@
1
- # Proprietary Changes made for Trainy under the Trainy Software License
2
- # Original source: skypilot: https://github.com/skypilot-org/skypilot
3
- # which is Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- # http://www.apache.org/licenses/LICENSE-2.0
7
- # Unless required by applicable law or agreed to in writing, software
8
- # distributed under the License is distributed on an "AS IS" BASIS,
9
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
- # See the License for the specific language governing permissions and
11
- # limitations under the License.
12
-
13
- """Cloud object stores.
14
-
15
- Currently, used for transferring data in bulk. Thus, this module does not
16
- offer file-level calls (e.g., open, reading, writing).
17
-
18
- TODO:
19
- * Better interface.
20
- * Better implementation (e.g., fsspec, smart_open, using each cloud's SDK).
21
- """
22
-
23
- import subprocess
24
- import typing
25
-
26
- from konduktor import logging
27
- from konduktor.data import data_utils, gcp, storage_utils
28
-
29
- logger = logging.get_logger(__name__)
30
-
31
- # TODO(asaiacai): this internal API is shit and should just be unified with
32
- # the storage_utils.AbstractStore class. Shit Berkeley EECS as usual.
33
-
34
-
35
- class CloudStorage:
36
- """Interface for a cloud object store."""
37
-
38
- # this needs to be overridden by the subclass
39
- _STORE: typing.Type[storage_utils.AbstractStore]
40
-
41
- def is_directory(self, url: str) -> bool:
42
- """Returns whether 'url' is a directory.
43
-
44
- In cloud object stores, a "directory" refers to a regular object whose
45
- name is a prefix of other objects.
46
- """
47
- raise NotImplementedError
48
-
49
- def make_sync_dir_command(self, source: str, destination: str) -> str:
50
- """Makes a runnable bash command to sync a 'directory'."""
51
- raise NotImplementedError
52
-
53
- def make_sync_file_command(self, source: str, destination: str) -> str:
54
- """Makes a runnable bash command to sync a file."""
55
- raise NotImplementedError
56
-
57
- def check_credentials(self):
58
- """Checks if the user has access credentials to this cloud."""
59
- return self._STORE.check_credentials()
60
-
61
- def check_credentials_from_secret(self):
62
- """Checks if the user has access credentials to this cloud."""
63
- return self._STORE.check_credentials_from_secret()
64
-
65
- def set_secret_credentials(self):
66
- """Set the credentials from the secret"""
67
- return self._STORE.set_secret_credentials()
68
-
69
-
70
- class GcsCloudStorage(CloudStorage):
71
- """Google Cloud Storage."""
72
-
73
- # We use gsutil as a basic implementation. One pro is that its -m
74
- # multi-threaded download is nice, which frees us from implementing
75
- # parellel workers on our end.
76
- # The gsutil command is part of the Google Cloud SDK, and we reuse
77
- # the installation logic here.
78
- _INSTALL_GSUTIL = gcp.GOOGLE_SDK_INSTALLATION_COMMAND
79
- _STORE: typing.Type[storage_utils.AbstractStore] = gcp.GcsStore
80
-
81
- @property
82
- def _gsutil_command(self):
83
- gsutil_alias, alias_gen = data_utils.get_gsutil_command()
84
- return (
85
- f'{alias_gen}; GOOGLE_APPLICATION_CREDENTIALS='
86
- f'{gcp.DEFAULT_GCP_APPLICATION_CREDENTIAL_PATH}; '
87
- # Explicitly activate service account. Unlike the gcp packages
88
- # and other GCP commands, gsutil does not automatically pick up
89
- # the default credential keys when it is a service account.
90
- 'gcloud auth activate-service-account '
91
- '--key-file=$GOOGLE_APPLICATION_CREDENTIALS '
92
- '2> /dev/null || true; '
93
- f'{gsutil_alias}'
94
- )
95
-
96
- def is_directory(self, url: str) -> bool:
97
- """Returns whether 'url' is a directory.
98
- In cloud object stores, a "directory" refers to a regular object whose
99
- name is a prefix of other objects.
100
- """
101
- commands = [self._INSTALL_GSUTIL]
102
- commands.append(f'{self._gsutil_command} ls -d {url}')
103
- command = ' && '.join(commands)
104
- p = subprocess.run(
105
- command,
106
- stdout=subprocess.PIPE,
107
- shell=True,
108
- check=True,
109
- executable='/bin/bash',
110
- )
111
- out = p.stdout.decode().strip()
112
- # Edge Case: Gcloud command is run for first time #437
113
- out = out.split('\n')[-1]
114
- # If <url> is a bucket root, then we only need `gsutil` to succeed
115
- # to make sure the bucket exists. It is already a directory.
116
- _, key = data_utils.split_gcs_path(url)
117
- if not key:
118
- return True
119
- # Otherwise, gsutil ls -d url will return:
120
- # --> url.rstrip('/') if url is not a directory
121
- # --> url with an ending '/' if url is a directory
122
- if not out.endswith('/'):
123
- assert out == url.rstrip('/'), (out, url)
124
- return False
125
- url = url if url.endswith('/') else (url + '/')
126
- assert out == url, (out, url)
127
- return True
128
-
129
- def make_sync_dir_command(self, source: str, destination: str) -> str:
130
- """Downloads a directory using gsutil."""
131
- download_via_gsutil = (
132
- f'{self._gsutil_command} ' f'rsync -e -r {source} {destination}'
133
- )
134
- all_commands = [self._INSTALL_GSUTIL]
135
- all_commands.append(download_via_gsutil)
136
- return ' && '.join(all_commands)
137
-
138
- def make_sync_file_command(self, source: str, destination: str) -> str:
139
- """Downloads a file using gsutil."""
140
- download_via_gsutil = f'{self._gsutil_command} ' f'cp {source} {destination}'
141
- all_commands = [self._INSTALL_GSUTIL]
142
- all_commands.append(download_via_gsutil)
143
- return ' && '.join(all_commands)
144
-
145
-
146
- # Maps bucket's URIs prefix(scheme) to its corresponding storage class
147
- _REGISTRY = {
148
- 'gs': GcsCloudStorage(),
149
- # TODO(asaiacai): Add other cloud stores here
150
- # 's3': S3CloudStorage(),
151
- # 'r2': R2CloudStorage(),
152
- # 'cos': IBMCosCloudStorage(),
153
- # 'oci': OciCloudStorage(),
154
- # # TODO: This is a hack, as Azure URL starts with https://, we should
155
- # # refactor the registry to be able to take regex, so that Azure blob can
156
- # # be identified with `https://(.*?)\.blob\.core\.windows\.net`
157
- # 'https': AzureBlobCloudStorage()
158
- }