konduktor-nightly 0.1.0.dev20250807105334__py3-none-any.whl → 0.1.0.dev20250807224131__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of konduktor-nightly might be problematic. Click here for more details.

konduktor/__init__.py CHANGED
@@ -11,7 +11,7 @@ from konduktor.task import Task
11
11
  __all__ = ['launch', 'Resources', 'Task', 'Serving']
12
12
 
13
13
  # Replaced with the current commit when building the wheels.
14
- _KONDUKTOR_COMMIT_SHA = '4f9623300ec72edc8ddf6680cd6149cbfb038f47'
14
+ _KONDUKTOR_COMMIT_SHA = '45add2e516f1b7bb1f16ed063e5af87b8e5609cf'
15
15
  os.makedirs(os.path.expanduser('~/.konduktor'), exist_ok=True)
16
16
 
17
17
 
@@ -45,5 +45,5 @@ def _get_git_commit():
45
45
 
46
46
 
47
47
  __commit__ = _get_git_commit()
48
- __version__ = '1.0.0.dev0.1.0.dev20250807105334'
48
+ __version__ = '1.0.0.dev0.1.0.dev20250807224131'
49
49
  __root_dir__ = os.path.dirname(os.path.abspath(__file__))
konduktor/cli.py CHANGED
@@ -653,6 +653,14 @@ def status(all_users: bool):
653
653
  'Default is 1000.'
654
654
  ),
655
655
  )
656
+ @click.option(
657
+ '--node-rank',
658
+ '--node_rank',
659
+ '-N',
660
+ default=0,
661
+ type=int,
662
+ help='The node rank to tail logs from.',
663
+ )
656
664
  @click.argument('job_id', type=str, nargs=1)
657
665
  # TODO(zhwu): support logs by job name
658
666
  def logs(
@@ -660,6 +668,7 @@ def logs(
660
668
  job_id: str,
661
669
  follow: bool,
662
670
  num_lines: int,
671
+ node_rank: int,
663
672
  ):
664
673
  # NOTE(dev): Keep the docstring consistent between the Python API and CLI.
665
674
  """Retrieve/tail the log of a job."""
@@ -689,7 +698,12 @@ def logs(
689
698
  'Logs are tailed from 1 hour ago, ' 'to see more logs, check Grafana.',
690
699
  fg='yellow',
691
700
  )
692
- log_utils.tail_logs(job_id, follow=follow, num_logs=num_lines)
701
+ log_utils.tail_logs(
702
+ job_id,
703
+ worker_id=node_rank,
704
+ follow=follow,
705
+ num_logs=num_lines,
706
+ )
693
707
 
694
708
 
695
709
  @cli.command(cls=_DocumentedCodeCommand)
@@ -1483,7 +1497,11 @@ def serve_status(all_users: bool):
1483
1497
 
1484
1498
 
1485
1499
  def main():
1486
- return cli()
1500
+ try:
1501
+ return cli(standalone_mode=False)
1502
+ except click.exceptions.Abort:
1503
+ click.secho('Detaching...', fg='yellow', bold=True)
1504
+ return
1487
1505
 
1488
1506
 
1489
1507
  if __name__ == '__main__':
@@ -30,7 +30,7 @@ import requests
30
30
  import websockets
31
31
 
32
32
  from konduktor import config, logging
33
- from konduktor.utils import subprocess_utils
33
+ from konduktor.utils import kubernetes_utils, subprocess_utils
34
34
 
35
35
  logger = logging.get_logger(__name__)
36
36
 
@@ -337,8 +337,13 @@ def tail_loki_logs_ws(
337
337
 
338
338
 
339
339
  def tail_vicky_logs(
340
- job_name: str, worker_id: int = 0, num_logs: int = 1000, follow: bool = True
340
+ job_name: str,
341
+ worker_id: int = 0,
342
+ num_logs: int = 1000,
343
+ follow: bool = True,
341
344
  ):
345
+ context = kubernetes_utils.get_current_kube_config_context_name()
346
+ namespace = kubernetes_utils.get_kube_config_context_namespace(context)
342
347
  query: Dict[str, Any] = {}
343
348
  if num_logs > 5000:
344
349
  # TODO(asaiacai): we should not have a limit on the number of logs, but rather
@@ -365,8 +370,9 @@ def tail_vicky_logs(
365
370
  logger.debug(f'Vicky URL: {vicky_url}')
366
371
 
367
372
  query['query'] = (
368
- 'k8s.namespace.name: "default" AND '
369
- f'batch.kubernetes.io/job-name: "{job_name}-workers-{worker_id}"'
373
+ f'k8s.namespace.name: "{namespace}" AND '
374
+ f'batch.kubernetes.io/job-name: "{job_name}-workers-0" AND '
375
+ f'batch.kubernetes.io/job-completion-index: "{worker_id}"'
370
376
  )
371
377
  query['start_offset'] = '1h'
372
378
 
@@ -406,7 +412,10 @@ def tail_vicky_logs(
406
412
 
407
413
 
408
414
  def tail_logs(
409
- job_name: str, worker_id: int = 0, num_logs: int = 1000, follow: bool = True
415
+ job_name: str,
416
+ worker_id: int = 0,
417
+ num_logs: int = 1000,
418
+ follow: bool = True,
410
419
  ):
411
420
  logs_backend = config.get_nested(('logs', 'backend'), None)
412
421
  if logs_backend == LogBackend.VICTORIA:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: konduktor-nightly
3
- Version: 0.1.0.dev20250807105334
3
+ Version: 0.1.0.dev20250807224131
4
4
  Summary: GPU Cluster Health Management
5
5
  Author: Andrew Aikawa
6
6
  Author-email: asai@berkeley.edu
@@ -1,4 +1,4 @@
1
- konduktor/__init__.py,sha256=q3tcNX556xQdRe2M42K9PxHx1wCiqql_aGIr3DTKpAk,1574
1
+ konduktor/__init__.py,sha256=I3R6OdNwpyQj5WQSleQEfTTQL932GIcrZmm7F26vZcs,1574
2
2
  konduktor/adaptors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  konduktor/adaptors/aws.py,sha256=s47Ra-GaqCQibzVfmD0pmwEWHif1EGO5opMbwkLxTCU,8244
4
4
  konduktor/adaptors/common.py,sha256=ZIqzjx77PIHUwpjfAQ1uX8B2aX78YMuGj4Bppd-MdyM,4183
@@ -13,7 +13,7 @@ konduktor/backends/jobset.py,sha256=OwgDog9nH-FoUmNU_H--C3U5jx70reTKL1l849M1k5A,
13
13
  konduktor/backends/jobset_utils.py,sha256=MRZf-Wcn084lnig0SfgXlF9Q3RyC7m7THeJQQlpdSw8,12931
14
14
  konduktor/backends/pod_utils.py,sha256=K0y2kRTzrmIWFbmyJSEMhw7gueiIFF7VAlXtaBMNHkM,15237
15
15
  konduktor/check.py,sha256=JennyWoaqSKhdyfUldd266KwVXTPJpcYQa4EED4a_BA,7569
16
- konduktor/cli.py,sha256=PRMOseG1v1qXj0-KbW7Hzsg5rVRfNZw1oykSJWHO_qA,45481
16
+ konduktor/cli.py,sha256=NOJGSa7iakFpIcC6ny2uhGCugGNyku-U02rlvjgri5Q,45844
17
17
  konduktor/config.py,sha256=J50JxC6MsXMnlrJPXdDUMr38C89xvOO7mR8KJ6fyils,15520
18
18
  konduktor/constants.py,sha256=T3AeXXxuQHINW_bAWyztvDeS8r4g8kXBGIwIq13cys0,1814
19
19
  konduktor/controller/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -90,15 +90,15 @@ konduktor/utils/env_options.py,sha256=T41Slzf4Mzl-n45CGXXqdy2fCrYhPNZQ7RP5vmnN4x
90
90
  konduktor/utils/exceptions.py,sha256=5IFnN5bIUSBJv4KRRrCepk5jyY9EG5vWWQqbjCmP3NU,6682
91
91
  konduktor/utils/kubernetes_enums.py,sha256=SabUueF6Bpzbpa57gyH5VB65xla2N9l8CZmAeYTfGmM,176
92
92
  konduktor/utils/kubernetes_utils.py,sha256=VG7qatUFyWHY-PCQ8fYWh2kn2TMwfg84cn-VkXdCwI8,26077
93
- konduktor/utils/log_utils.py,sha256=CC6McrURGIcM8R4ICBCmn_Y7oXbWh1fwl4__la6Zotw,16677
93
+ konduktor/utils/log_utils.py,sha256=k4Qo0OlUZYQmLcbSD9tDWe6_Q5XcsLO_K8uVWjlTEU0,16938
94
94
  konduktor/utils/loki_utils.py,sha256=h2ZvZQr1nE_wXXsKsGMjhG2s2MXknNd4icydTR_ruKU,3539
95
95
  konduktor/utils/rich_utils.py,sha256=ycADW6Ij3wX3uT8ou7T8qxX519RxlkJivsLvUahQaJo,3583
96
96
  konduktor/utils/schemas.py,sha256=tBrKhnkfn9uKDYdlb4L2KgooW-muuhww7U8fu9zX-ms,18336
97
97
  konduktor/utils/subprocess_utils.py,sha256=WoFkoFhGecPR8-rF8WJxbIe-YtV94LXz9UG64SDhCY4,9448
98
98
  konduktor/utils/ux_utils.py,sha256=czCwiS1bDqgeKtzAJctczpLwFZzAse7WuozdvzEFYJ4,7437
99
99
  konduktor/utils/validator.py,sha256=5C1kE57Eyj1OPnAbvojqMNHHtf5fnl47FK_vEttd8aw,4331
100
- konduktor_nightly-0.1.0.dev20250807105334.dist-info/LICENSE,sha256=MuuqTZbHvmqXR_aNKAXzggdV45ANd3wQ5YI7tnpZhm0,6586
101
- konduktor_nightly-0.1.0.dev20250807105334.dist-info/METADATA,sha256=agt8lbfGconzOtVJBStu3_Q3AI1Pf5dANidlxA4TICQ,4247
102
- konduktor_nightly-0.1.0.dev20250807105334.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
103
- konduktor_nightly-0.1.0.dev20250807105334.dist-info/entry_points.txt,sha256=k3nG5wDFIJhNqsZWrHk4d0irIB2Ns9s47cjRWYsTCT8,48
104
- konduktor_nightly-0.1.0.dev20250807105334.dist-info/RECORD,,
100
+ konduktor_nightly-0.1.0.dev20250807224131.dist-info/LICENSE,sha256=MuuqTZbHvmqXR_aNKAXzggdV45ANd3wQ5YI7tnpZhm0,6586
101
+ konduktor_nightly-0.1.0.dev20250807224131.dist-info/METADATA,sha256=yICy9FoogRekN2wyrPif9xoTxRTC_XrkPQEylY8kn5c,4247
102
+ konduktor_nightly-0.1.0.dev20250807224131.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
103
+ konduktor_nightly-0.1.0.dev20250807224131.dist-info/entry_points.txt,sha256=k3nG5wDFIJhNqsZWrHk4d0irIB2Ns9s47cjRWYsTCT8,48
104
+ konduktor_nightly-0.1.0.dev20250807224131.dist-info/RECORD,,