konduktor-nightly 0.1.0.dev20250820104812__tar.gz → 0.1.0.dev20250821104804__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of konduktor-nightly might be problematic. Click here for more details.

Files changed (103) hide show
  1. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/PKG-INFO +1 -1
  2. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/__init__.py +2 -2
  3. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/backends/jobset_utils.py +53 -28
  4. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/backends/pod_utils.py +2 -1
  5. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/templates/pod.yaml.j2 +17 -8
  6. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/utils/log_utils.py +2 -0
  7. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/pyproject.toml +1 -1
  8. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/LICENSE +0 -0
  9. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/README.md +0 -0
  10. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/adaptors/__init__.py +0 -0
  11. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/adaptors/aws.py +0 -0
  12. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/adaptors/common.py +0 -0
  13. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/adaptors/gcp.py +0 -0
  14. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/authentication.py +0 -0
  15. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/backends/__init__.py +0 -0
  16. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/backends/backend.py +0 -0
  17. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/backends/constants.py +0 -0
  18. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/backends/deployment.py +0 -0
  19. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/backends/deployment_utils.py +0 -0
  20. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/backends/jobset.py +0 -0
  21. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/check.py +0 -0
  22. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/cli.py +0 -0
  23. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/config.py +0 -0
  24. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/constants.py +0 -0
  25. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/controller/__init__.py +0 -0
  26. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/controller/constants.py +0 -0
  27. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/controller/launch.py +0 -0
  28. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/controller/node.py +0 -0
  29. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/controller/parse.py +0 -0
  30. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/dashboard/README.md +0 -0
  31. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/dashboard/backend/main.py +0 -0
  32. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/dashboard/backend/sockets.py +0 -0
  33. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/dashboard/frontend/.eslintrc.json +0 -0
  34. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/dashboard/frontend/.gitignore +0 -0
  35. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/dashboard/frontend/app/api/jobs/route.js +0 -0
  36. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/dashboard/frontend/app/api/namespaces/route.js +0 -0
  37. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/dashboard/frontend/app/components/Grafana.jsx +0 -0
  38. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/dashboard/frontend/app/components/JobsData.jsx +0 -0
  39. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/dashboard/frontend/app/components/LogsData.jsx +0 -0
  40. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/dashboard/frontend/app/components/NavMenu.jsx +0 -0
  41. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/dashboard/frontend/app/components/NavTabs.jsx +0 -0
  42. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/dashboard/frontend/app/components/NavTabs2.jsx +0 -0
  43. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/dashboard/frontend/app/components/SelectBtn.jsx +0 -0
  44. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/dashboard/frontend/app/components/lib/utils.js +0 -0
  45. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/dashboard/frontend/app/components/ui/chip-select.jsx +0 -0
  46. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/dashboard/frontend/app/components/ui/input.jsx +0 -0
  47. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/dashboard/frontend/app/components/ui/navigation-menu.jsx +0 -0
  48. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/dashboard/frontend/app/components/ui/select.jsx +0 -0
  49. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/dashboard/frontend/app/favicon.ico +0 -0
  50. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/dashboard/frontend/app/globals.css +0 -0
  51. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/dashboard/frontend/app/jobs/page.js +0 -0
  52. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/dashboard/frontend/app/layout.js +0 -0
  53. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/dashboard/frontend/app/logs/page.js +0 -0
  54. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/dashboard/frontend/app/page.js +0 -0
  55. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/dashboard/frontend/jsconfig.json +0 -0
  56. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/dashboard/frontend/next.config.mjs +0 -0
  57. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/dashboard/frontend/package-lock.json +0 -0
  58. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/dashboard/frontend/package.json +0 -0
  59. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/dashboard/frontend/postcss.config.mjs +0 -0
  60. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/dashboard/frontend/server.js +0 -0
  61. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/dashboard/frontend/tailwind.config.js +0 -0
  62. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/data/__init__.py +0 -0
  63. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/data/aws/__init__.py +0 -0
  64. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/data/aws/s3.py +0 -0
  65. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/data/constants.py +0 -0
  66. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/data/data_utils.py +0 -0
  67. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/data/gcp/__init__.py +0 -0
  68. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/data/gcp/constants.py +0 -0
  69. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/data/gcp/gcs.py +0 -0
  70. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/data/gcp/utils.py +0 -0
  71. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/data/registry.py +0 -0
  72. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/data/storage.py +0 -0
  73. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/data/storage_utils.py +0 -0
  74. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/execution.py +0 -0
  75. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/kube_client.py +0 -0
  76. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/logging.py +0 -0
  77. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/manifests/controller_deployment.yaml +0 -0
  78. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/manifests/dashboard_deployment.yaml +0 -0
  79. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/manifests/dmesg_daemonset.yaml +0 -0
  80. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/manifests/pod_cleanup_controller.yaml +0 -0
  81. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/resource.py +0 -0
  82. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/serving.py +0 -0
  83. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/task.py +0 -0
  84. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/templates/deployment.yaml.j2 +0 -0
  85. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/templates/jobset.yaml.j2 +0 -0
  86. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/usage/__init__.py +0 -0
  87. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/usage/constants.py +0 -0
  88. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/utils/__init__.py +0 -0
  89. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/utils/accelerator_registry.py +0 -0
  90. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/utils/annotations.py +0 -0
  91. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/utils/base64_utils.py +0 -0
  92. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/utils/common_utils.py +0 -0
  93. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/utils/constants.py +0 -0
  94. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/utils/env_options.py +0 -0
  95. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/utils/exceptions.py +0 -0
  96. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/utils/kubernetes_enums.py +0 -0
  97. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/utils/kubernetes_utils.py +0 -0
  98. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/utils/loki_utils.py +0 -0
  99. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/utils/rich_utils.py +0 -0
  100. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/utils/schemas.py +0 -0
  101. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/utils/subprocess_utils.py +0 -0
  102. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/utils/ux_utils.py +0 -0
  103. {konduktor_nightly-0.1.0.dev20250820104812 → konduktor_nightly-0.1.0.dev20250821104804}/konduktor/utils/validator.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: konduktor-nightly
3
- Version: 0.1.0.dev20250820104812
3
+ Version: 0.1.0.dev20250821104804
4
4
  Summary: GPU Cluster Health Management
5
5
  Author: Andrew Aikawa
6
6
  Author-email: asai@berkeley.edu
@@ -11,7 +11,7 @@ from konduktor.task import Task
11
11
  __all__ = ['launch', 'Resources', 'Task', 'Serving']
12
12
 
13
13
  # Replaced with the current commit when building the wheels.
14
- _KONDUKTOR_COMMIT_SHA = '108d7fe47b1bd5db50d555510714d2e204fb7b6f'
14
+ _KONDUKTOR_COMMIT_SHA = 'eee38d922bf4c7cb8a2e6e730092dde0ae372500'
15
15
  os.makedirs(os.path.expanduser('~/.konduktor'), exist_ok=True)
16
16
 
17
17
 
@@ -45,5 +45,5 @@ def _get_git_commit():
45
45
 
46
46
 
47
47
  __commit__ = _get_git_commit()
48
- __version__ = '1.0.0.dev0.1.0.dev20250820104812'
48
+ __version__ = '1.0.0.dev0.1.0.dev20250821104804'
49
49
  __root_dir__ = os.path.dirname(os.path.abspath(__file__))
@@ -464,6 +464,41 @@ def _get_end_time_from_conditions(job: Dict[str, Any]) -> str:
464
464
  return '-'
465
465
 
466
466
 
467
+ def _get_time_delta(delta: 'timedelta') -> Tuple[str, 'timedelta']:
468
+ total_seconds = int(delta.total_seconds())
469
+
470
+ days, remainder = divmod(total_seconds, 86400) # 86400 seconds in a day
471
+ hours, remainder = divmod(remainder, 3600) # 3600 seconds in an hour
472
+ minutes, seconds = divmod(remainder, 60) # 60 seconds in a minute
473
+
474
+ days_str = f'{days} day{"s" if days != 1 else ""}, ' if days > 0 else ''
475
+ hours_str = f'{hours} hr{"s" if hours != 1 else ""}, ' if hours > 0 else ''
476
+ minutes_str = (
477
+ f'{minutes} min{"s" if minutes != 1 else ""}'
478
+ if minutes > 0 and days == 0
479
+ else ''
480
+ )
481
+
482
+ seconds_str = (
483
+ f'{seconds} sec{"s" if seconds != 1 else ""}'
484
+ if seconds > 0 and days == 0 and hours == 0 and minutes == 0
485
+ else ''
486
+ )
487
+
488
+ result = f'{days_str}{hours_str}{minutes_str}{seconds_str}'
489
+ return result if result else '<1 minute', delta
490
+
491
+
492
+ def _get_job_length(start_time: str, end_time: str) -> str:
493
+ if start_time == '-' or end_time == '-':
494
+ return '-'
495
+ else:
496
+ start = datetime.strptime(start_time, '%m/%d/%y %I:%M%p')
497
+ end = datetime.strptime(end_time, '%m/%d/%y %I:%M%p')
498
+ delta, _ = _get_time_delta(end - start)
499
+ return delta
500
+
501
+
467
502
  def show_status_table(
468
503
  namespace: str,
469
504
  all_users: bool,
@@ -531,32 +566,6 @@ def show_status_table(
531
566
  f'{JobStatus.PENDING.name}{colorama.Style.RESET_ALL}'
532
567
  )
533
568
 
534
- def _get_time_delta(timestamp: str) -> Tuple[str, 'timedelta']:
535
- delta = datetime.now(timezone.utc) - datetime.strptime(
536
- timestamp, '%Y-%m-%dT%H:%M:%SZ'
537
- ).replace(tzinfo=timezone.utc)
538
- total_seconds = int(delta.total_seconds())
539
-
540
- days, remainder = divmod(total_seconds, 86400) # 86400 seconds in a day
541
- hours, remainder = divmod(remainder, 3600) # 3600 seconds in an hour
542
- minutes, seconds = divmod(remainder, 60) # 60 seconds in a minute
543
-
544
- days_str = f'{days} day{"s" if days != 1 else ""}, ' if days > 0 else ''
545
- hours_str = f'{hours} hr{"s" if hours != 1 else ""}, ' if hours > 0 else ''
546
- minutes_str = (
547
- f'{minutes} min{"s" if minutes != 1 else ""}'
548
- if minutes > 0 and days == 0
549
- else ''
550
- )
551
- seconds_str = (
552
- f'{seconds} sec{"s" if seconds != 1 else ""}'
553
- if seconds > 0 and days == 0 and hours == 0 and minutes == 0
554
- else ''
555
- )
556
-
557
- result = f'{days_str}{hours_str}{minutes_str}{seconds_str}'
558
- return result if result else '<1 minute', delta
559
-
560
569
  def _get_resources(job: Dict[str, Any]) -> str:
561
570
  num_pods = int(
562
571
  job['spec']['replicatedJobs'][0]['template']['spec']['parallelism']
@@ -580,9 +589,18 @@ def show_status_table(
580
589
  'SUBMITTED',
581
590
  'START TIME',
582
591
  'END TIME',
592
+ 'DURATION',
583
593
  ]
584
594
  else:
585
- columns = ['NAME', 'STATUS', 'RESOURCES', 'SUBMITTED', 'START TIME', 'END TIME']
595
+ columns = [
596
+ 'NAME',
597
+ 'STATUS',
598
+ 'RESOURCES',
599
+ 'SUBMITTED',
600
+ 'START TIME',
601
+ 'END TIME',
602
+ 'DURATION',
603
+ ]
586
604
  job_table = log_utils.create_table(columns)
587
605
  job_specs = list_jobset(namespace)
588
606
  assert job_specs is not None, 'Retrieving jobs failed'
@@ -621,13 +639,18 @@ def show_status_table(
621
639
  start_time = _format_timestamp(start_time)
622
640
 
623
641
  # Get submitted time (how long ago)
624
- submitted_time, _ = _get_time_delta(job['metadata']['creationTimestamp'])
642
+ time_delta = datetime.now(timezone.utc) - datetime.strptime(
643
+ job['metadata']['creationTimestamp'], '%Y-%m-%dT%H:%M:%SZ'
644
+ ).replace(tzinfo=timezone.utc)
645
+ submitted_time, _ = _get_time_delta(time_delta)
625
646
 
626
647
  # Get end time (from JobSet conditions)
627
648
  end_time = _get_end_time_from_conditions(job)
628
649
  if end_time != '-':
629
650
  end_time = _format_timestamp(end_time)
630
651
 
652
+ job_length = _get_job_length(start_time, end_time)
653
+
631
654
  if all_users:
632
655
  rows.append(
633
656
  [
@@ -638,6 +661,7 @@ def show_status_table(
638
661
  submitted_time,
639
662
  start_time,
640
663
  end_time,
664
+ job_length,
641
665
  job['metadata']['creationTimestamp'],
642
666
  ]
643
667
  )
@@ -654,6 +678,7 @@ def show_status_table(
654
678
  submitted_time,
655
679
  start_time,
656
680
  end_time,
681
+ job_length,
657
682
  job['metadata']['creationTimestamp'],
658
683
  ]
659
684
  )
@@ -1,6 +1,7 @@
1
1
  """Pod utils: handles pod spec creation and manipulation"""
2
2
 
3
3
  import base64
4
+ import json
4
5
  import os
5
6
  import tempfile
6
7
  import typing
@@ -284,7 +285,7 @@ def create_pod_spec(task: 'konduktor.Task') -> Dict[str, Any]:
284
285
  pod_config['kubernetes']['pod_config']['spec']['containers'][0]['env'] = list(
285
286
  env_map.values()
286
287
  )
287
- logger.debug(f'rendered pod spec: \n\t{pod_config}')
288
+ logger.debug(f'rendered pod spec: \n\t{json.dumps(pod_config, indent=2)}')
288
289
 
289
290
  # validate pod spec using json schema
290
291
  try:
@@ -161,6 +161,10 @@ kubernetes:
161
161
  - name: git-ssh-secret
162
162
  mountPath: /run/konduktor/git-ssh-secret
163
163
  {% endif %}
164
+ {% if tailscale_secret %}
165
+ - name: tailscale-state
166
+ mountPath: /var/lib/tailscale
167
+ {% endif %}
164
168
  command: ["bash", "-c"]
165
169
  args:
166
170
  - |
@@ -317,18 +321,19 @@ kubernetes:
317
321
  export TS_HOSTNAME=$(echo "$POD_NAME" | sed 's/-[^-]*$//')
318
322
  $(prefix_cmd) echo "TS_HOSTNAME=${TS_HOSTNAME}" >> /etc/environment
319
323
  function InstallTailscale {
320
- while ! tailscale status >/dev/null 2>&1; do
324
+ if ! command -v tailscale >/dev/null 2>&1; then
325
+ $(prefix_cmd) curl -fsSL https://tailscale.com/install.sh | DEBIAN_FRONTEND=noninteractive $(prefix_cmd) sh > ~/.konduktor/tmp/tailscale-install.log 2>&1
326
+ fi
327
+ if ! tailscale status >/dev/null 2>&1; then
321
328
  $(prefix_cmd) mkdir -p /var/run/tailscale /var/cache/tailscale /var/lib/tailscale
322
- if ! command -v tailscale >/dev/null 2>&1; then
323
- $(prefix_cmd) curl -fsSL https://tailscale.com/install.sh | DEBIAN_FRONTEND=noninteractive $(prefix_cmd) sh > ~/.konduktor/tmp/tailscale-install.log 2>&1
324
- fi
325
- $(prefix_cmd) tailscaled --tun=userspace-networking --state=mem: >~/.konduktor/tmp/tailscaled.log 2>&1 &
326
- $(prefix_cmd) sleep 2
327
- $(prefix_cmd) timeout 5 tailscale up --auth-key=${TS_AUTHKEY} --ssh --hostname=${TS_HOSTNAME} || echo "tailscale up failed retrying"
328
- $(prefix_cmd) sleep 2
329
+ $(prefix_cmd) nohup tailscaled --tun=userspace-networking >~/.konduktor/tmp/tailscaled.log 2>&1 &
330
+ fi
331
+ until tailscale status >/dev/null 2>&1; do
332
+ $(prefix_cmd) tailscale up --auth-key=${TS_AUTHKEY} --ssh --hostname=${TS_HOSTNAME} --accept-dns=false || echo "tailscale up failed retrying"
329
333
  done
330
334
  $(prefix_cmd) echo "Tailscale is up"
331
335
  $(prefix_cmd) tailscale status
336
+ $(prefix_cmd) tailscale netcheck
332
337
  }
333
338
  InstallTailscale | tee ~/.konduktor/tmp/tailscale-out.log
334
339
  {% if konduktor_debug %}
@@ -413,6 +418,10 @@ kubernetes:
413
418
  emptyDir:
414
419
  medium: "Memory"
415
420
  sizeLimit: 4Gi
421
+ {% if tailscale_secret %}
422
+ - name: tailscale-state
423
+ emptyDir: {}
424
+ {% endif %}
416
425
  - name: sync
417
426
  emptyDir: {}
418
427
  {% for secret_type, secret_name in mount_secrets.items() %}
@@ -397,6 +397,8 @@ def tail_vicky_logs(
397
397
  for line in response.iter_lines(decode_unicode=True):
398
398
  if line:
399
399
  payload = json.loads(line)
400
+ if 'missing _msg field' in payload['_msg']:
401
+ payload['_msg'] = ''
400
402
  print(
401
403
  f"{colorama.Fore.CYAN}{colorama.Style.BRIGHT} "
402
404
  f"(job_name={job_name} worker_id={worker_id})"
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "konduktor-nightly"
3
- version = "0.1.0.dev20250820104812"
3
+ version = "0.1.0.dev20250821104804"
4
4
  description = "GPU Cluster Health Management"
5
5
  packages = [
6
6
  {include = "konduktor"}