konduktor-nightly 0.1.0.dev20250514104854__py3-none-any.whl → 0.1.0.dev20250515104942__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
konduktor/__init__.py CHANGED
@@ -14,7 +14,7 @@ __all__ = [
14
14
  ]
15
15
 
16
16
  # Replaced with the current commit when building the wheels.
17
- _KONDUKTOR_COMMIT_SHA = '05c7d9e243ae23c6e9abb0a4a034bfc0815fd587'
17
+ _KONDUKTOR_COMMIT_SHA = 'c0bd8e8774fab8042721b43a8cb8c35a624f8299'
18
18
  os.makedirs(os.path.expanduser('~/.konduktor'), exist_ok=True)
19
19
 
20
20
 
@@ -48,5 +48,5 @@ def _get_git_commit():
48
48
 
49
49
 
50
50
  __commit__ = _get_git_commit()
51
- __version__ = '1.0.0.dev0.1.0.dev20250514104854'
51
+ __version__ = '1.0.0.dev0.1.0.dev20250515104942'
52
52
  __root_dir__ = os.path.dirname(os.path.abspath(__file__))
@@ -26,8 +26,7 @@ class LazyImport:
26
26
 
27
27
  We use this for pandas and networkx, as they can be time-consuming to import
28
28
  (0.1-0.2 seconds). With this class, we can avoid the unnecessary import time
29
- when the module is not used (e.g., `networkx` should not be imported for
30
- `sky status and `pandas` should not be imported for `sky exec`).
29
+ when the module is not used.
31
30
 
32
31
  We also use this for cloud adaptors, because we do not want to import the
33
32
  cloud dependencies when it is not enabled.
@@ -109,7 +109,7 @@ def get_or_generate_keys() -> Tuple[str, str]:
109
109
  lock_path = os.path.expanduser(lock_path)
110
110
 
111
111
  lock_dir = os.path.dirname(lock_path)
112
- # We should have the folder ~/.sky/generated/ssh to have 0o700 permission,
112
+ # We should have the folder ~/.konduktor/generated/ssh to have 0o700 permission,
113
113
  # as the ssh configs will be written to this folder as well in
114
114
  # backend_utils.SSHConfigHelper
115
115
  os.makedirs(lock_dir, exist_ok=True, mode=0o700)
konduktor/cli.py CHANGED
@@ -105,7 +105,7 @@ def _make_task_with_overrides(
105
105
  env: Optional[List[Tuple[str, str]]] = None,
106
106
  field_to_ignore: Optional[List[str]] = None,
107
107
  ) -> konduktor.Task:
108
- """Creates a task or a dag from an entrypoint with overrides.
108
+ """Creates a task from an entrypoint with overrides.
109
109
 
110
110
  Returns:
111
111
  konduktor.Task
@@ -271,8 +271,7 @@ _EXTRA_RESOURCES_OPTIONS = [
271
271
  type=str,
272
272
  help=(
273
273
  'Type and number of GPUs to use. Example values: '
274
- '"V100:8", "V100" (short for a count of 1), or "V100:0.5" '
275
- '(fractional counts are supported by the scheduling framework). '
274
+ '"V100:8", "V100" (short for a count of 1)'
276
275
  'If a new cluster is being launched by this command, this is the '
277
276
  'resources to provision. If an existing cluster is being reused, this'
278
277
  " is seen as the task demand, which must fit the cluster's total "
@@ -219,10 +219,10 @@ def get_gsutil_command() -> Tuple[str, str]:
219
219
  cmd_to_run = f'{alias_gen}; {gsutil_alias} cp ...'
220
220
  ```
221
221
  """
222
- gsutil_alias = 'skypilot_gsutil'
222
+ gsutil_alias = 'konduktor_gsutil'
223
223
  disable_multiprocessing_flag = '-o "GSUtil:parallel_process_count=1"'
224
224
 
225
- # Define skypilot_gsutil as a shell function instead of an alias.
225
+ # Define konduktor_gsutil as a shell function instead of an alias.
226
226
  # This function will behave just like alias, but can be called immediately
227
227
  # after its definition on the same line
228
228
  alias_gen = (
konduktor/data/storage.py CHANGED
@@ -271,15 +271,14 @@ class Storage(object):
271
271
  Can be a single local path, a list of local paths, or a cloud URI
272
272
  (s3://, gs://, etc.). Local paths do not need to be absolute.
273
273
  stores: Optional; Specify pre-initialized stores (S3Store, GcsStore).
274
- persistent: bool; Whether to persist across sky launches.
274
+ persistent: bool; Whether to persist across konduktor launches.
275
275
  mode: StorageMode; Specify how the storage object is manifested on
276
276
  the remote VM. Can be either MOUNT or COPY. Defaults to MOUNT.
277
- sync_on_reconstruction: bool; Whether to sync the data if the storage
278
- object is found in the global_user_state and reconstructed from
279
- there. This is set to false when the Storage object is created not
280
- for direct use, e.g. for 'sky storage delete', or the storage is
281
- being re-used, e.g., for `sky start` on a stopped cluster.
282
- _is_sky_managed: Optional[bool]; Indicates if the storage is managed
277
+ sync_on_reconstruction: bool; [defunct] Whether to sync the
278
+ data if the storage object is found in the global_user_state
279
+ and reconstructed from there. This is set to
280
+ false when the Storage object is created not for direct use
281
+ _is_sky_managed: Optional[bool]; [defunct] Indicates if the storage is managed
283
282
  by Sky. Without this argument, the controller's behavior differs
284
283
  from the local machine. For example, if a bucket does not exist:
285
284
  Local Machine (is_sky_managed=True) →
konduktor/execution.py CHANGED
@@ -149,10 +149,10 @@ def maybe_translate_local_file_mounts_and_sync_up(
149
149
  msg = 'workdir'
150
150
  if msg:
151
151
  logger.info(
152
- ux_utils.starting_message(f'Translating {msg} to ' 'SkyPilot Storage...')
152
+ ux_utils.starting_message(f'Translating {msg} to ' 'cloud Storage...')
153
153
  )
154
154
  rich_utils.force_update_status(
155
- ux_utils.spinner_message(f'Translating {msg} to SkyPilot Storage...')
155
+ ux_utils.spinner_message(f'Translating {msg} to cloud Storage...')
156
156
  )
157
157
 
158
158
  # Get the bucket name for the workdir and file mounts,
konduktor/kube_client.py CHANGED
@@ -63,8 +63,6 @@ def _load_config(context: Optional[str] = None):
63
63
  err_str = (
64
64
  f'Failed to load Kubernetes configuration for {context!r}. '
65
65
  'Kubeconfig does not contain any valid context(s).\n'
66
- ' If you were running a local Kubernetes '
67
- 'cluster, run `sky local up` to start the cluster.'
68
66
  )
69
67
  else:
70
68
  err_str = (
@@ -72,7 +70,6 @@ def _load_config(context: Optional[str] = None):
72
70
  'Please check if your kubeconfig file exists at '
73
71
  f'~/.kube/config and is valid.'
74
72
  )
75
- err_str += '\nTo disable Kubernetes for SkyPilot: run `sky check`.'
76
73
  with ux_utils.print_exception_no_traceback():
77
74
  raise ValueError(err_str) from None
78
75
 
konduktor/task.py CHANGED
@@ -695,7 +695,7 @@ class Task:
695
695
  This should be called before provisioning in order to take effect.
696
696
 
697
697
  Args:
698
- storage_mounts: an optional dict of ``{mount_path: sky.Storage
698
+ storage_mounts: an optional dict of ``{mount_path: konduktor.data.Storage
699
699
  object}``, where mount_path is the path inside the remote VM(s)
700
700
  where the Storage object will be mounted on.
701
701
 
@@ -191,8 +191,8 @@ kubernetes:
191
191
  if ! command -v sshd &> /dev/null; then
192
192
  $(prefix_cmd) echo "OpenSSH server is not installed. Installing..."
193
193
 
194
- DEBIAN_FRONTEND=noninteractive $(prefix_cmd) apt update
195
- DEBIAN_FRONTEND=noninteractive $(prefix_cmd) apt install -y openssh-server
194
+ DEBIAN_FRONTEND=noninteractive $(prefix_cmd) apt update 2>&1 >> ~/.konduktor/tmp/apt-install.log;
195
+ DEBIAN_FRONTEND=noninteractive $(prefix_cmd) apt install -y openssh-server >> ~/.konduktor/tmp/apt-install.log;
196
196
 
197
197
  $(prefix_cmd) echo "OpenSSH server installation complete."
198
198
  else
@@ -264,14 +264,16 @@ kubernetes:
264
264
 
265
265
  InstallSSH
266
266
  {% endif %}
267
-
268
267
  {% if tailscale_secret %}
269
268
  function InstallTailscale {
270
269
  if ! command -v tailscale >/dev/null 2>&1; then
271
270
  export TS_HOSTNAME=$(echo "$POD_NAME" | sed 's/-[^-]*$//')
272
- $(prefix_cmd) curl -fsSL https://tailscale.com/install.sh | DEBIAN_FRONTEND=noninteractive $(prefix_cmd) sh 2>&1 | tee -a ~/.konduktor/tmp/tailscale-install.log
271
+ $(prefix_cmd) curl -fsSL https://tailscale.com/install.sh | DEBIAN_FRONTEND=noninteractive $(prefix_cmd) sh > ~/.konduktor/tmp/tailscale-install.log 2>&1
273
272
  $(prefix_cmd) tailscaled --tun=userspace-networking >/dev/null 2>&1 &
274
- $(prefix_cmd) tailscale up --auth-key=${TS_AUTHKEY} --ssh --hostname=${TS_HOSTNAME} >/dev/null 2>&1
273
+ while ! tailscale status >/dev/null 2>&1; do
274
+ $(prefix_cmd) timeout 5 tailscale up --auth-key=${TS_AUTHKEY} --ssh --hostname=${TS_HOSTNAME}
275
+ sleep 1
276
+ done
275
277
  fi
276
278
  }
277
279
  InstallTailscale &
@@ -43,7 +43,7 @@ def is_safe_exception(exc: Exception) -> bool:
43
43
  return True
44
44
 
45
45
  # Konduktor's own exceptions
46
- if module.startswith('sky.'):
46
+ if module.startswith('konduktor.'):
47
47
  return True
48
48
 
49
49
  return False
@@ -100,8 +100,6 @@ def _handle_io_stream(io_stream, out_stream, args: _ProcessingArgs):
100
100
  start_streaming_flag = True
101
101
  if args.end_streaming_at is not None and args.end_streaming_at in line:
102
102
  # Keep executing the loop, only stop streaming.
103
- # E.g., this is used for `sky bench` to hide the
104
- # redundant messages of `sky launch` while
105
103
  # saving them in log files.
106
104
  end_streaming_flag = True
107
105
  if args.stream_logs and start_streaming_flag and not end_streaming_flag:
@@ -103,7 +103,7 @@ class RichSafeStreamHandler(logging.StreamHandler):
103
103
 
104
104
 
105
105
  def force_update_status(msg: str):
106
- """Update the status message even if sky_logging.is_silent() is true."""
106
+ """Update the status message even if konduktor_logging.is_silent() is true."""
107
107
  if threading.current_thread() is threading.main_thread() and _status is not None:
108
108
  _status.update(msg)
109
109
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: konduktor-nightly
3
- Version: 0.1.0.dev20250514104854
3
+ Version: 0.1.0.dev20250515104942
4
4
  Summary: GPU Cluster Health Management
5
5
  Author: Andrew Aikawa
6
6
  Author-email: asai@berkeley.edu
@@ -1,15 +1,15 @@
1
- konduktor/__init__.py,sha256=ODIjRocI7dlxyMFYh5S2VYJai-4MWm98MSyiSzaGDbA,1540
1
+ konduktor/__init__.py,sha256=Dz34neDq8Q0MFGTtlBPBW9OnytuxHIBA6KHubrKBi2g,1540
2
2
  konduktor/adaptors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  konduktor/adaptors/aws.py,sha256=s47Ra-GaqCQibzVfmD0pmwEWHif1EGO5opMbwkLxTCU,8244
4
- konduktor/adaptors/common.py,sha256=uTdpKvgBSwYMmynx9wR5kiZQyTrdaw9ZI4KH6Z2E5Hw,4296
4
+ konduktor/adaptors/common.py,sha256=ZIqzjx77PIHUwpjfAQ1uX8B2aX78YMuGj4Bppd-MdyM,4183
5
5
  konduktor/adaptors/gcp.py,sha256=ierTF4z7vwpJ9BsC7LSiwv4uLcjGXscwZOwQrddr2vM,4102
6
- konduktor/authentication.py,sha256=jhw_virbyvrY_9WZqOXX3LyOP_HkpfmTssWUMxJVyBg,4564
6
+ konduktor/authentication.py,sha256=_mVy3eqoKohicHostFiGwG1-2ybxP-l7ouofQ0LRlCY,4570
7
7
  konduktor/backends/__init__.py,sha256=1Q6sqqdeMYarpTX_U-QVywJYf7idiUTRsyP-E4BQSOw,129
8
8
  konduktor/backends/backend.py,sha256=qh0bp94lzoTYZkzyQv2-CVrB5l91FkG2vclXg24UFC0,2910
9
9
  konduktor/backends/jobset.py,sha256=UdhwAuZODLMbLY51Y2zOBsh6wg4Pb84oHVvUKzx3Z2w,8434
10
10
  konduktor/backends/jobset_utils.py,sha256=4vMYOhTENfBL9khzFuj69-Vy4g0sBkUpXX-1bfPnVys,20054
11
11
  konduktor/check.py,sha256=JennyWoaqSKhdyfUldd266KwVXTPJpcYQa4EED4a_BA,7569
12
- konduktor/cli.py,sha256=Ii9-2mrc-1f2ksLasA-xRb-JnEi_9ZeCXZ3lJ1GG8H8,23515
12
+ konduktor/cli.py,sha256=Fl1dwNB5T-kDQAlAoOJetzl6RYt9FYUlowKjbNhVjkQ,23412
13
13
  konduktor/config.py,sha256=J50JxC6MsXMnlrJPXdDUMr38C89xvOO7mR8KJ6fyils,15520
14
14
  konduktor/constants.py,sha256=T3AeXXxuQHINW_bAWyztvDeS8r4g8kXBGIwIq13cys0,1814
15
15
  konduktor/controller/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -53,25 +53,25 @@ konduktor/data/__init__.py,sha256=KMR2i3E9YcIpiIuCxtRdS7BQ1w2vUAbbve7agziJrLo,21
53
53
  konduktor/data/aws/__init__.py,sha256=_6zWfNNAK1QGgyKqg_yPYWcXlnffchyvIMErYa6tw_U,331
54
54
  konduktor/data/aws/s3.py,sha256=T4FnCxilNp35bsgmE7j5O3j15FVbgWRdUH8YFXCiwSw,48335
55
55
  konduktor/data/constants.py,sha256=yXVEoTI2we1xOjVSU-bjRCQCLpVvpEvJ0GedXvSwEfw,127
56
- konduktor/data/data_utils.py,sha256=yrnu8_cY63TXqfWfFG3yqY2w_tE9UQK9jIQAFQCDVg0,9668
56
+ konduktor/data/data_utils.py,sha256=IG1jgb_La997wi90xCvxYYsHQRlmm8Aooq04ZSf8EDI,9670
57
57
  konduktor/data/gcp/__init__.py,sha256=rlQxACBC_Vu36mdgPyJgUy4mGc_6Nt_a96JAuaPz2pQ,489
58
58
  konduktor/data/gcp/constants.py,sha256=dMfOiFccM8O6rUi9kClJcbvw1K1VnS1JzzQk3apq8ho,1483
59
59
  konduktor/data/gcp/gcs.py,sha256=nqhCvQuGpHFPoxT5SKgxL25KtZuSg377Nh1bICiQwlc,42057
60
60
  konduktor/data/gcp/utils.py,sha256=FJQcMXZqtMIzjZ98b3lTTc0UbdPUKTDLsOsfJaaH5-s,214
61
61
  konduktor/data/registry.py,sha256=CUbMsN_Q17Pf4wRHkqZrycErEjTP7cLEdgcfwVGcEpc,696
62
- konduktor/data/storage.py,sha256=SDKRWDd7PCT9ytuz4cH0CejZj5QmWG_EZhUMVoTzWsc,35308
62
+ konduktor/data/storage.py,sha256=o2So-bY9glvgbGdoN7AQNYmNnvGf1AUDPpImtadRL90,35213
63
63
  konduktor/data/storage_utils.py,sha256=n4GivkN0KMqmyOTDznF0Z-hzsJvm7KCEh5i5HgFAT-4,20806
64
- konduktor/execution.py,sha256=UaHUdBmDaIYgiAXkRKJQOHniYPVIR4sr4yUbIqpgMrQ,18401
65
- konduktor/kube_client.py,sha256=aqwjDfNSneB5NOxV6CtqhkBeNl0UQNUt730R3ujG9Ow,6156
64
+ konduktor/execution.py,sha256=NCl2bgo5p1ZZl8HLaXT-juAe9PXr-iCJv0md2sT7A20,18395
65
+ konduktor/kube_client.py,sha256=lC-U_1hLRG3mDN8tBxYc4VZ3BS5BzKm8hlt-lE3505A,5938
66
66
  konduktor/logging.py,sha256=mBCoCTNhDEkUxd4tsse4mw-aVzSGohhXYf16ViR0ch4,2722
67
67
  konduktor/manifests/controller_deployment.yaml,sha256=6p3oSLkEVONZsvKZGqVop0Dhn4bo3lrigRmhf8NXBHE,1730
68
68
  konduktor/manifests/dashboard_deployment.yaml,sha256=xJLd4FbPMAosI0fIv5_8y7dV9bw0Vsf81l-w4MB_aU8,2837
69
69
  konduktor/manifests/dmesg_daemonset.yaml,sha256=pSWt7YOeTYjS0l0iki1fvHOs7MhY-sH-RQfVW6JJyno,1391
70
70
  konduktor/manifests/pod_cleanup_controller.yaml,sha256=hziL1Ka1kCAEL9R7Tjvpb80iw1vcq9_3gwHCu75Bi0A,3939
71
71
  konduktor/resource.py,sha256=w2PdIrmQaJWA-GLSmVBcg4lxwuxvPulz35_YSKa5o24,19254
72
- konduktor/task.py,sha256=2JOHRS4JE2FdN-M3qZKhII1hkUvWHbreNtkf30Mo2lo,35196
72
+ konduktor/task.py,sha256=ofwd8WIhfD6C3ThLcv6X3GUzQHyZ6ddjUagE-umF4K0,35207
73
73
  konduktor/templates/jobset.yaml.j2,sha256=onYiHtXAgk-XBtji994hPu_g0hxnLzvmfxwjbdKdeZc,960
74
- konduktor/templates/pod.yaml.j2,sha256=AobmCpvXRnZuQjfT000vN72Nuk380CCmWPHC_BVrUhM,17161
74
+ konduktor/templates/pod.yaml.j2,sha256=xg0BiATrBtL7A5TkU_ndh26WZqnnkf-jtXadJ1BILHc,17343
75
75
  konduktor/usage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
76
76
  konduktor/usage/constants.py,sha256=gCL8afIHZhO0dcxbJGpESE9sCC1cBSbeRnQ8GwNOY4M,612
77
77
  konduktor/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -81,18 +81,18 @@ konduktor/utils/base64_utils.py,sha256=mF-Tw98mFRG70YE4w6s9feuQSCYZHOb8YatBZwMug
81
81
  konduktor/utils/common_utils.py,sha256=F5x7k4AdBB44u8PYRkaugORnZKnK3JLqGn1jHOKgUYo,14960
82
82
  konduktor/utils/constants.py,sha256=1DneiTR21lvKUcWdBGwC4I4fD4uPjbjLUilEnJS7rzA,216
83
83
  konduktor/utils/env_options.py,sha256=T41Slzf4Mzl-n45CGXXqdy2fCrYhPNZQ7RP5vmnN4xc,2258
84
- konduktor/utils/exceptions.py,sha256=bOYHk3SHR3XO__p9bPwVPz8g9k6weIRxGRFNkyzgZOA,6676
84
+ konduktor/utils/exceptions.py,sha256=5IFnN5bIUSBJv4KRRrCepk5jyY9EG5vWWQqbjCmP3NU,6682
85
85
  konduktor/utils/kubernetes_enums.py,sha256=SabUueF6Bpzbpa57gyH5VB65xla2N9l8CZmAeYTfGmM,176
86
86
  konduktor/utils/kubernetes_utils.py,sha256=1MZHwU4vy-exA4TA5_oTiV-zm1A2ayfeA0T_75DMFM8,23937
87
- konduktor/utils/log_utils.py,sha256=lgHCq4OdtJNfbpso-uYGONUCVNsUrUkUWjROarsHt6s,9897
87
+ konduktor/utils/log_utils.py,sha256=oFCKkYKCS_e_GRw_-0F7WsiIZNqJL1RZ4cD5-zh59Q4,9765
88
88
  konduktor/utils/loki_utils.py,sha256=h2ZvZQr1nE_wXXsKsGMjhG2s2MXknNd4icydTR_ruKU,3539
89
- konduktor/utils/rich_utils.py,sha256=kdjNe6S2LlpOxyzhFHqMzCz7g4ROC4e7TPWgcbRsrQE,3577
89
+ konduktor/utils/rich_utils.py,sha256=ycADW6Ij3wX3uT8ou7T8qxX519RxlkJivsLvUahQaJo,3583
90
90
  konduktor/utils/schemas.py,sha256=2fHsTi3t9q3LXqOPrcpkmPsMbaoJBnuJstd6ULmDiUo,16455
91
91
  konduktor/utils/subprocess_utils.py,sha256=WoFkoFhGecPR8-rF8WJxbIe-YtV94LXz9UG64SDhCY4,9448
92
92
  konduktor/utils/ux_utils.py,sha256=czCwiS1bDqgeKtzAJctczpLwFZzAse7WuozdvzEFYJ4,7437
93
93
  konduktor/utils/validator.py,sha256=tgBghVyedyzGx84-U2Qfoh_cJBE3oUk9gclMW90ORks,691
94
- konduktor_nightly-0.1.0.dev20250514104854.dist-info/LICENSE,sha256=MuuqTZbHvmqXR_aNKAXzggdV45ANd3wQ5YI7tnpZhm0,6586
95
- konduktor_nightly-0.1.0.dev20250514104854.dist-info/METADATA,sha256=ErMUfOWxJPkbM0by718uNtBgUv-2w7m5sqFzJ_cHc64,4366
96
- konduktor_nightly-0.1.0.dev20250514104854.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
97
- konduktor_nightly-0.1.0.dev20250514104854.dist-info/entry_points.txt,sha256=k3nG5wDFIJhNqsZWrHk4d0irIB2Ns9s47cjRWYsTCT8,48
98
- konduktor_nightly-0.1.0.dev20250514104854.dist-info/RECORD,,
94
+ konduktor_nightly-0.1.0.dev20250515104942.dist-info/LICENSE,sha256=MuuqTZbHvmqXR_aNKAXzggdV45ANd3wQ5YI7tnpZhm0,6586
95
+ konduktor_nightly-0.1.0.dev20250515104942.dist-info/METADATA,sha256=ikZ6zhXDw6OHLbu8sswo0tQh_J7S1kRDMJV0cwc4aI4,4366
96
+ konduktor_nightly-0.1.0.dev20250515104942.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
97
+ konduktor_nightly-0.1.0.dev20250515104942.dist-info/entry_points.txt,sha256=k3nG5wDFIJhNqsZWrHk4d0irIB2Ns9s47cjRWYsTCT8,48
98
+ konduktor_nightly-0.1.0.dev20250515104942.dist-info/RECORD,,