konduktor-nightly 0.1.0.dev20250809104842__py3-none-any.whl → 0.1.0.dev20250811105223__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of konduktor-nightly might be problematic. Click here for more details.

konduktor/__init__.py CHANGED
@@ -11,7 +11,7 @@ from konduktor.task import Task
11
11
  __all__ = ['launch', 'Resources', 'Task', 'Serving']
12
12
 
13
13
  # Replaced with the current commit when building the wheels.
14
- _KONDUKTOR_COMMIT_SHA = '0f0b36c3a67aa7c60d6cb33240631b7c8ccaed03'
14
+ _KONDUKTOR_COMMIT_SHA = '92fe69bd3f29e7b191de663c598dfcf10738f87a'
15
15
  os.makedirs(os.path.expanduser('~/.konduktor'), exist_ok=True)
16
16
 
17
17
 
@@ -45,5 +45,5 @@ def _get_git_commit():
45
45
 
46
46
 
47
47
  __commit__ = _get_git_commit()
48
- __version__ = '1.0.0.dev0.1.0.dev20250809104842'
48
+ __version__ = '1.0.0.dev0.1.0.dev20250811105223'
49
49
  __root_dir__ = os.path.dirname(os.path.abspath(__file__))
@@ -232,8 +232,12 @@ def stop_jobset(namespace: str, job_name: str) -> Optional[Dict[str, Any]]:
232
232
  'spec': {'suspend': True},
233
233
  'metadata': {
234
234
  'annotations': {
235
- backend_constants.STOP_USERID_LABEL: common_utils.user_and_hostname_hash(),
236
- backend_constants.STOP_USERNAME_LABEL: common_utils.get_cleaned_username(),
235
+ backend_constants.STOP_USERID_LABEL: (
236
+ common_utils.user_and_hostname_hash()
237
+ ),
238
+ backend_constants.STOP_USERNAME_LABEL: (
239
+ common_utils.get_cleaned_username()
240
+ ),
237
241
  }
238
242
  },
239
243
  }
konduktor/cli.py CHANGED
@@ -855,16 +855,23 @@ def launch(
855
855
  'Serving information detected. Use '
856
856
  '`konduktor serve launch` instead for serving.'
857
857
  )
858
+ try:
859
+ _launch_with_confirm(
860
+ task,
861
+ dryrun=dryrun,
862
+ detach_run=detach_run,
863
+ no_confirm=yes,
864
+ serving=bool(task.serving),
865
+ )
866
+ except KeyboardInterrupt:
867
+ click.secho(
868
+ f'Detaching... manage your job {task.name} with the following commands:',
869
+ fg='yellow',
870
+ bold=True,
871
+ )
858
872
 
859
- job_name = _launch_with_confirm(
860
- task,
861
- dryrun=dryrun,
862
- detach_run=detach_run,
863
- no_confirm=yes,
864
- serving=bool(task.serving),
865
- )
866
873
  click.secho(
867
- ux_utils.command_hint_messages(ux_utils.CommandHintType.JOB, job_name),
874
+ ux_utils.command_hint_messages(ux_utils.CommandHintType.JOB, task.name),
868
875
  fg='green',
869
876
  bold=True,
870
877
  )
@@ -1139,6 +1146,12 @@ def stop(
1139
1146
  for job in track(jobs, description='Suspending job(s)...'):
1140
1147
  jobset_utils.stop_jobset(namespace, job)
1141
1148
 
1149
+ click.secho(
1150
+ ux_utils.command_hint_messages(ux_utils.CommandHintType.JOB_STOP, jobs),
1151
+ fg='green',
1152
+ bold=True,
1153
+ )
1154
+
1142
1155
 
1143
1156
  @cli.command(cls=_DocumentedCodeCommand)
1144
1157
  @click.argument(
@@ -1836,7 +1849,7 @@ def main():
1836
1849
  return cli(standalone_mode=False)
1837
1850
  except click.exceptions.Abort:
1838
1851
  click.secho('Detaching...', fg='yellow', bold=True)
1839
- return
1852
+ return None
1840
1853
 
1841
1854
 
1842
1855
  if __name__ == '__main__':
konduktor/logging.py CHANGED
@@ -75,12 +75,14 @@ def get_logger(name: str):
75
75
  fh.setFormatter(FORMATTER)
76
76
  logger.addHandler(fh)
77
77
 
78
- # --- Console logging: DEBUG level only if KONDUKTOR_DEBUG=1 ---
78
+ # --- Console logging: INFO level by default, DEBUG if KONDUKTOR_DEBUG=1 ---
79
+ ch = logging.StreamHandler()
79
80
  if os.environ.get('KONDUKTOR_DEBUG') == '1':
80
- ch = logging.StreamHandler()
81
81
  ch.setLevel(logging.DEBUG)
82
- ch.setFormatter(FORMATTER)
83
- logger.addHandler(ch)
82
+ else:
83
+ ch.setLevel(logging.INFO)
84
+ ch.setFormatter(FORMATTER)
85
+ logger.addHandler(ch)
84
86
 
85
87
  logger.propagate = False
86
88
  return logger
@@ -6,7 +6,7 @@ import os
6
6
  import sys
7
7
  import traceback
8
8
  import typing
9
- from typing import Callable, Optional, Union
9
+ from typing import Callable, List, Optional, Union
10
10
 
11
11
  import colorama
12
12
  import rich.console as rich_console
@@ -196,27 +196,41 @@ def spinner_message(
196
196
 
197
197
  class CommandHintType(enum.Enum):
198
198
  JOB = 'JOB'
199
+ JOB_STOP = 'JOB_STOP'
199
200
 
200
201
 
201
- def command_hint_messages(hint_type: CommandHintType, job_id: str) -> str:
202
+ def command_hint_messages(
203
+ hint_type: CommandHintType,
204
+ job_id: Union[str, List[str]],
205
+ ) -> str:
202
206
  """Gets the command hint messages for the given job id."""
207
+ hint_str = '\n📋 Useful Commands'
203
208
  if hint_type == CommandHintType.JOB:
204
209
  job_hint_str = (
205
210
  f'\nJob ID: {job_id}'
206
- f'\n{INDENT_SYMBOL}To cancel the job:\t\t'
207
- f'{BOLD}konduktor down {job_id} {RESET_BOLD}'
208
211
  f'\n{INDENT_SYMBOL}To stream job logs:\t\t'
209
212
  f'{BOLD}konduktor logs {job_id} {RESET_BOLD}'
210
213
  f'\n{INDENT_SYMBOL}To list all jobs:\t\t'
211
214
  f'{BOLD}konduktor status{RESET_BOLD}'
215
+ f'\n{INDENT_SYMBOL}To suspend the job:\t\t'
216
+ f'{BOLD}konduktor stop {job_id} {RESET_BOLD}'
217
+ f'\n{INDENT_SYMBOL}{colorama.Fore.RED}To delete the job:\t\t'
218
+ f'{BOLD}konduktor down {job_id} {RESET_BOLD}{colorama.Style.RESET_ALL}'
212
219
  )
213
- hint_str = '\n📋 Useful Commands'
214
220
  hint_str += f'{job_hint_str}'
215
- if config.get_nested(('tailscale', 'secret_name'), None) is not None:
216
- hint_str += (
217
- f'\n{INDENT_SYMBOL}To tailscale ssh:\t\t'
218
- f'{BOLD}ssh root@{job_id}-workers-0-0 {RESET_BOLD}'
219
- )
220
- return hint_str
221
+ elif hint_type == CommandHintType.JOB_STOP:
222
+ assert isinstance(job_id, list), 'job_id must be a list of strings'
223
+ job_ids_str = ' '.join(job_id)
224
+ hint_str += (
225
+ f'\n{INDENT_SYMBOL}To resume the following jobs:\t\t'
226
+ f'{BOLD}konduktor start {job_ids_str} {RESET_BOLD}'
227
+ )
221
228
  else:
222
229
  raise ValueError(f'Invalid hint type: {hint_type}')
230
+
231
+ if config.get_nested(('tailscale', 'secret_name'), None) is not None:
232
+ hint_str += (
233
+ f'\n{INDENT_SYMBOL}To tailscale ssh:\t\t'
234
+ f'{BOLD}ssh root@{job_id}-workers-0-0 {RESET_BOLD}'
235
+ )
236
+ return hint_str
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: konduktor-nightly
3
- Version: 0.1.0.dev20250809104842
3
+ Version: 0.1.0.dev20250811105223
4
4
  Summary: GPU Cluster Health Management
5
5
  Author: Andrew Aikawa
6
6
  Author-email: asai@berkeley.edu
@@ -1,4 +1,4 @@
1
- konduktor/__init__.py,sha256=bTZfonclg2KPH9idiGwsQys1xaoPFVgSruIPRdy8GSQ,1574
1
+ konduktor/__init__.py,sha256=DYiQ-TfOdYUqIzT8psdjnWcjtjMI4sbldAghArAY5e0,1574
2
2
  konduktor/adaptors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  konduktor/adaptors/aws.py,sha256=s47Ra-GaqCQibzVfmD0pmwEWHif1EGO5opMbwkLxTCU,8244
4
4
  konduktor/adaptors/common.py,sha256=ZIqzjx77PIHUwpjfAQ1uX8B2aX78YMuGj4Bppd-MdyM,4183
@@ -10,10 +10,10 @@ konduktor/backends/constants.py,sha256=NfdhY1PQnewvDCjgRKXj6EZDcVH8k_0GGxnMo7w6H
10
10
  konduktor/backends/deployment.py,sha256=EHfB2uLeKFQ3maek9tx6XL4_sjQ-ax59DZA79Q3EkVs,5519
11
11
  konduktor/backends/deployment_utils.py,sha256=VGuL01rKe7p7PoVRI_cP4tiZRxHZ13nnTMG-bmDf7P0,28975
12
12
  konduktor/backends/jobset.py,sha256=OwgDog9nH-FoUmNU_H--C3U5jx70reTKL1l849M1k5A,8430
13
- konduktor/backends/jobset_utils.py,sha256=YPbGxbM9FIPNLlvu3_189iGDopWrGLqL_kJO17McRUU,24567
13
+ konduktor/backends/jobset_utils.py,sha256=O983a78D411go_F0K2mijZAE1dXAFF7i6aQ7rOrfH7A,24663
14
14
  konduktor/backends/pod_utils.py,sha256=Jfv_CY8suF0e7QEaeQiNRRxRnOueLgPR8SfLEO7lnwc,15260
15
15
  konduktor/check.py,sha256=JennyWoaqSKhdyfUldd266KwVXTPJpcYQa4EED4a_BA,7569
16
- konduktor/cli.py,sha256=lEZmfrswuxMAyU5hmndMHqk4GkJyohk_TOHBx-0h90M,56316
16
+ konduktor/cli.py,sha256=9S6DEsK_qlD34UM6CwFah0FmJgQ4lVaV-LViKp9fJ6o,56687
17
17
  konduktor/config.py,sha256=9upqgCCYvcu6fKw7tovEYC1MWTkAAir0_WHPdayylbI,15536
18
18
  konduktor/constants.py,sha256=T3AeXXxuQHINW_bAWyztvDeS8r4g8kXBGIwIq13cys0,1814
19
19
  konduktor/controller/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -67,7 +67,7 @@ konduktor/data/storage.py,sha256=o2So-bY9glvgbGdoN7AQNYmNnvGf1AUDPpImtadRL90,352
67
67
  konduktor/data/storage_utils.py,sha256=n4GivkN0KMqmyOTDznF0Z-hzsJvm7KCEh5i5HgFAT-4,20806
68
68
  konduktor/execution.py,sha256=d0EP79iSrW2uFsoqn0YV_4kgIupPIqpMOParXx0y3kg,18519
69
69
  konduktor/kube_client.py,sha256=WELs9jClRW9r-imNJF3gJi3Z7ygkFDnYDmMXu5nJhEM,6213
70
- konduktor/logging.py,sha256=mA1JCCWPCqQMRqEpE4l6D6vOYdbtbQXr0BuEk9RR790,3177
70
+ konduktor/logging.py,sha256=xtcCdnecmC3rqMTyunK-klQRINojI7NI4Apag78i9jM,3221
71
71
  konduktor/manifests/controller_deployment.yaml,sha256=6p3oSLkEVONZsvKZGqVop0Dhn4bo3lrigRmhf8NXBHE,1730
72
72
  konduktor/manifests/dashboard_deployment.yaml,sha256=xJLd4FbPMAosI0fIv5_8y7dV9bw0Vsf81l-w4MB_aU8,2837
73
73
  konduktor/manifests/dmesg_daemonset.yaml,sha256=pSWt7YOeTYjS0l0iki1fvHOs7MhY-sH-RQfVW6JJyno,1391
@@ -95,10 +95,10 @@ konduktor/utils/loki_utils.py,sha256=h2ZvZQr1nE_wXXsKsGMjhG2s2MXknNd4icydTR_ruKU
95
95
  konduktor/utils/rich_utils.py,sha256=ycADW6Ij3wX3uT8ou7T8qxX519RxlkJivsLvUahQaJo,3583
96
96
  konduktor/utils/schemas.py,sha256=tBrKhnkfn9uKDYdlb4L2KgooW-muuhww7U8fu9zX-ms,18336
97
97
  konduktor/utils/subprocess_utils.py,sha256=WoFkoFhGecPR8-rF8WJxbIe-YtV94LXz9UG64SDhCY4,9448
98
- konduktor/utils/ux_utils.py,sha256=czCwiS1bDqgeKtzAJctczpLwFZzAse7WuozdvzEFYJ4,7437
98
+ konduktor/utils/ux_utils.py,sha256=7-Lt3QbDVvBQUli5_U9lOdXKeC-ip8rZBpO9gQ6vPJw,7955
99
99
  konduktor/utils/validator.py,sha256=5C1kE57Eyj1OPnAbvojqMNHHtf5fnl47FK_vEttd8aw,4331
100
- konduktor_nightly-0.1.0.dev20250809104842.dist-info/LICENSE,sha256=MuuqTZbHvmqXR_aNKAXzggdV45ANd3wQ5YI7tnpZhm0,6586
101
- konduktor_nightly-0.1.0.dev20250809104842.dist-info/METADATA,sha256=cOmfOuxpidR03qJMivceD6nkUWRwmNzm_D3OVKGF2q8,4247
102
- konduktor_nightly-0.1.0.dev20250809104842.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
103
- konduktor_nightly-0.1.0.dev20250809104842.dist-info/entry_points.txt,sha256=k3nG5wDFIJhNqsZWrHk4d0irIB2Ns9s47cjRWYsTCT8,48
104
- konduktor_nightly-0.1.0.dev20250809104842.dist-info/RECORD,,
100
+ konduktor_nightly-0.1.0.dev20250811105223.dist-info/LICENSE,sha256=MuuqTZbHvmqXR_aNKAXzggdV45ANd3wQ5YI7tnpZhm0,6586
101
+ konduktor_nightly-0.1.0.dev20250811105223.dist-info/METADATA,sha256=Gp9W_UVyGtpg-hU24Hm1DU-RKr-Hkzx10JQmBsrJMdQ,4247
102
+ konduktor_nightly-0.1.0.dev20250811105223.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
103
+ konduktor_nightly-0.1.0.dev20250811105223.dist-info/entry_points.txt,sha256=k3nG5wDFIJhNqsZWrHk4d0irIB2Ns9s47cjRWYsTCT8,48
104
+ konduktor_nightly-0.1.0.dev20250811105223.dist-info/RECORD,,