konduktor-nightly 0.1.0.dev20250624105122__py3-none-any.whl → 0.1.0.dev20250626105144__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
konduktor/__init__.py CHANGED
@@ -14,7 +14,7 @@ __all__ = [
14
14
  ]
15
15
 
16
16
  # Replaced with the current commit when building the wheels.
17
- _KONDUKTOR_COMMIT_SHA = 'd7cf0201d5498c86113628b414d2112670a356d6'
17
+ _KONDUKTOR_COMMIT_SHA = '249f48b15859d1a210ca9b28e6e9cd85ac19ac68'
18
18
  os.makedirs(os.path.expanduser('~/.konduktor'), exist_ok=True)
19
19
 
20
20
 
@@ -48,5 +48,5 @@ def _get_git_commit():
48
48
 
49
49
 
50
50
  __commit__ = _get_git_commit()
51
- __version__ = '1.0.0.dev0.1.0.dev20250624105122'
51
+ __version__ = '1.0.0.dev0.1.0.dev20250626105144'
52
52
  __root_dir__ = os.path.dirname(os.path.abspath(__file__))
@@ -10,6 +10,7 @@ from datetime import datetime, timezone
10
10
  from typing import Any, Dict, Optional, Tuple
11
11
  from urllib.parse import urlparse
12
12
 
13
+ import click
13
14
  import colorama
14
15
 
15
16
  if typing.TYPE_CHECKING:
@@ -272,8 +273,10 @@ def create_pod_spec(task: 'konduktor.Task') -> Dict[str, Any]:
272
273
  )
273
274
 
274
275
  # validate pod spec using json schema
275
- # schema: https://github.com/instrumenta/kubernetes-json-schema/blob/master/v1.9.8-standalone-strict/podspec.json
276
- validator.validate_pod_spec(pod_config['kubernetes']['pod_config']['spec'])
276
+ try:
277
+ validator.validate_pod_spec(pod_config['kubernetes']['pod_config']['spec'])
278
+ except ValueError as e:
279
+ raise click.UsageError(str(e))
277
280
 
278
281
  return pod_config
279
282
 
konduktor/logging.py CHANGED
@@ -17,6 +17,7 @@ _FORMAT = '[%(levelname).1s %(asctime)s %(filename)s:%(lineno)d] %(message)s'
17
17
  _DATE_FORMAT = '%m-%d %H:%M:%S'
18
18
 
19
19
  _logging_config = threading.local()
20
+ _log_path = None
20
21
 
21
22
 
22
23
  class NewLineFormatter(logging.Formatter):
@@ -51,19 +52,36 @@ def set_logging_level(logger: str, level: int):
51
52
 
52
53
 
53
54
  def get_logger(name: str):
54
- # Determine the logging level based on the KONDUKTOR_DEBUG environment variable
55
- log_level = logging.INFO
56
- if os.environ.get('KONDUKTOR_DEBUG', None) == '1':
57
- log_level = logging.DEBUG
55
+ global _log_path
58
56
 
59
- # Configure the logger
60
57
  logger = logging.getLogger(name)
61
- if not logger.hasHandlers(): # Check if the logger already has handlers
62
- logger.setLevel(log_level)
58
+
59
+ # Avoid duplicate handlers
60
+ if logger.hasHandlers():
61
+ return logger
62
+
63
+ logger.setLevel(logging.DEBUG) # Always capture all levels internally
64
+
65
+ # --- File logging: Always enabled ---
66
+ if not _log_path:
67
+ log_dir = os.path.expanduser('~/.konduktor/logs')
68
+ os.makedirs(log_dir, exist_ok=True)
69
+ timestamp = datetime.now().strftime('%Y%m%d-%H%M%S')
70
+ _log_path = os.path.join(log_dir, f'konduktor-logs-{timestamp}.log')
71
+ print(f'Log file: {_log_path}')
72
+
73
+ fh = logging.FileHandler(_log_path)
74
+ fh.setLevel(logging.DEBUG)
75
+ fh.setFormatter(FORMATTER)
76
+ logger.addHandler(fh)
77
+
78
+ # --- Console logging: DEBUG level only if KONDUKTOR_DEBUG=1 ---
79
+ if os.environ.get('KONDUKTOR_DEBUG') == '1':
63
80
  ch = logging.StreamHandler()
64
- ch.setLevel(log_level)
81
+ ch.setLevel(logging.DEBUG)
65
82
  ch.setFormatter(FORMATTER)
66
83
  logger.addHandler(ch)
84
+
67
85
  logger.propagate = False
68
86
  return logger
69
87
 
konduktor/resource.py CHANGED
@@ -426,7 +426,7 @@ class Resources:
426
426
  common_utils.validate_schema(
427
427
  config['job_config'],
428
428
  schemas.get_job_schema(),
429
- 'Invalid job config YAML',
429
+ 'Invalid job config YAML: ',
430
430
  )
431
431
 
432
432
  def _override_resources(
@@ -14,12 +14,16 @@ import requests
14
14
  from colorama import Fore, Style
15
15
  from filelock import FileLock
16
16
 
17
+ from konduktor import logging
18
+
17
19
  SCHEMA_VERSION = 'v1.32.0-standalone-strict'
18
20
  SCHEMA_URL = f'https://raw.githubusercontent.com/yannh/kubernetes-json-schema/master/{SCHEMA_VERSION}/podspec.json'
19
21
  SCHEMA_CACHE_PATH = Path.home() / '.konduktor/schemas/podspec.json'
20
22
  SCHEMA_LOCK_PATH = SCHEMA_CACHE_PATH.with_suffix('.lock')
21
23
  CACHE_MAX_AGE_SECONDS = 86400 # 24 hours
22
24
 
25
+ logger = logging.get_logger(__name__)
26
+
23
27
 
24
28
  def case_insensitive_enum(validator, enums, instance, schema):
25
29
  del validator, schema # Unused.
@@ -65,6 +69,16 @@ def validate_pod_spec(pod_spec: dict) -> None:
65
69
  return
66
70
 
67
71
  formatted = [
72
+ f'- {error.message}'
73
+ + (f" at path: {' → '.join(str(p) for p in error.path)}" if error.path else '')
74
+ for error in errors
75
+ ]
76
+
77
+ # Clean log
78
+ logger.debug('Invalid k8s pod spec/config:\n%s', '\n'.join(formatted))
79
+
80
+ # Color only in CLI
81
+ formatted_colored = [
68
82
  f'{Fore.RED}- {error.message}'
69
83
  + (f" at path: {' → '.join(str(p) for p in error.path)}" if error.path else '')
70
84
  + Style.RESET_ALL
@@ -72,7 +86,6 @@ def validate_pod_spec(pod_spec: dict) -> None:
72
86
  ]
73
87
 
74
88
  raise ValueError(
75
- f'\n{Fore.RED}Invalid k8s pod spec/config: \
76
- {Style.RESET_ALL}\n'
77
- + '\n'.join(formatted)
89
+ f'\n{Fore.RED}Invalid k8s pod spec/config: {Style.RESET_ALL}\n'
90
+ + '\n'.join(formatted_colored)
78
91
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: konduktor-nightly
3
- Version: 0.1.0.dev20250624105122
3
+ Version: 0.1.0.dev20250626105144
4
4
  Summary: GPU Cluster Health Management
5
5
  Author: Andrew Aikawa
6
6
  Author-email: asai@berkeley.edu
@@ -68,9 +68,6 @@ resources:
68
68
  kueue.x-k8s.io/queue-name: user-queue
69
69
  kueue.x-k8s.io/priority-class: low-priority
70
70
 
71
- setup: |
72
- pip install torch torchvision
73
-
74
71
  run: |
75
72
  torchrun \
76
73
  --nproc_per_node 8 \
@@ -1,4 +1,4 @@
1
- konduktor/__init__.py,sha256=OxmrDGwoENVNb3qVf4cymtyq9QO64bZW6N3SXYi9Ygs,1540
1
+ konduktor/__init__.py,sha256=byGYtA7z96XhkUiAZZrm_ZqesFcaVhRpP7t3rOkRlnI,1540
2
2
  konduktor/adaptors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  konduktor/adaptors/aws.py,sha256=s47Ra-GaqCQibzVfmD0pmwEWHif1EGO5opMbwkLxTCU,8244
4
4
  konduktor/adaptors/common.py,sha256=ZIqzjx77PIHUwpjfAQ1uX8B2aX78YMuGj4Bppd-MdyM,4183
@@ -7,7 +7,7 @@ konduktor/authentication.py,sha256=_mVy3eqoKohicHostFiGwG1-2ybxP-l7ouofQ0LRlCY,4
7
7
  konduktor/backends/__init__.py,sha256=1Q6sqqdeMYarpTX_U-QVywJYf7idiUTRsyP-E4BQSOw,129
8
8
  konduktor/backends/backend.py,sha256=qh0bp94lzoTYZkzyQv2-CVrB5l91FkG2vclXg24UFC0,2910
9
9
  konduktor/backends/jobset.py,sha256=UdhwAuZODLMbLY51Y2zOBsh6wg4Pb84oHVvUKzx3Z2w,8434
10
- konduktor/backends/jobset_utils.py,sha256=mOjK3oFgmNacpg956r0qtR7cdZ0PPzXYVVuIr3QqKuI,22170
10
+ konduktor/backends/jobset_utils.py,sha256=esudKceD7iFjzYokRGEkAQd21GxsHvgQHTEBMU0rsdM,22145
11
11
  konduktor/check.py,sha256=JennyWoaqSKhdyfUldd266KwVXTPJpcYQa4EED4a_BA,7569
12
12
  konduktor/cli.py,sha256=qtktD8N17IRC5MYEdaE0o3pv8EI36cvyyQkYUFi5_nQ,35590
13
13
  konduktor/config.py,sha256=J50JxC6MsXMnlrJPXdDUMr38C89xvOO7mR8KJ6fyils,15520
@@ -63,12 +63,12 @@ konduktor/data/storage.py,sha256=o2So-bY9glvgbGdoN7AQNYmNnvGf1AUDPpImtadRL90,352
63
63
  konduktor/data/storage_utils.py,sha256=n4GivkN0KMqmyOTDznF0Z-hzsJvm7KCEh5i5HgFAT-4,20806
64
64
  konduktor/execution.py,sha256=NCl2bgo5p1ZZl8HLaXT-juAe9PXr-iCJv0md2sT7A20,18395
65
65
  konduktor/kube_client.py,sha256=lC-U_1hLRG3mDN8tBxYc4VZ3BS5BzKm8hlt-lE3505A,5938
66
- konduktor/logging.py,sha256=mBCoCTNhDEkUxd4tsse4mw-aVzSGohhXYf16ViR0ch4,2722
66
+ konduktor/logging.py,sha256=mA1JCCWPCqQMRqEpE4l6D6vOYdbtbQXr0BuEk9RR790,3177
67
67
  konduktor/manifests/controller_deployment.yaml,sha256=6p3oSLkEVONZsvKZGqVop0Dhn4bo3lrigRmhf8NXBHE,1730
68
68
  konduktor/manifests/dashboard_deployment.yaml,sha256=xJLd4FbPMAosI0fIv5_8y7dV9bw0Vsf81l-w4MB_aU8,2837
69
69
  konduktor/manifests/dmesg_daemonset.yaml,sha256=pSWt7YOeTYjS0l0iki1fvHOs7MhY-sH-RQfVW6JJyno,1391
70
70
  konduktor/manifests/pod_cleanup_controller.yaml,sha256=hziL1Ka1kCAEL9R7Tjvpb80iw1vcq9_3gwHCu75Bi0A,3939
71
- konduktor/resource.py,sha256=Fg4kon7jQ9xDo9Iz8Q0J8doIRmTkSwIhYXLH6jbtRO8,19610
71
+ konduktor/resource.py,sha256=nHgPWXCbWj5sWyslNngrFypMN1K0Dksb0yHbJqWaei8,19612
72
72
  konduktor/task.py,sha256=ofwd8WIhfD6C3ThLcv6X3GUzQHyZ6ddjUagE-umF4K0,35207
73
73
  konduktor/templates/jobset.yaml.j2,sha256=rdURknodtgLp4zoA2PX86Nn4wPpi3tr5l4IG55aWBRg,1059
74
74
  konduktor/templates/pod.yaml.j2,sha256=SlK6XKSwjuFJtBimlrUiFTcx7G_00XDtEopIKXBg5SI,16635
@@ -90,9 +90,9 @@ konduktor/utils/rich_utils.py,sha256=ycADW6Ij3wX3uT8ou7T8qxX519RxlkJivsLvUahQaJo
90
90
  konduktor/utils/schemas.py,sha256=VGPERAso2G4sVAznsJ80qT2Q-I_EFxXw6Rfcw-vkYgQ,16535
91
91
  konduktor/utils/subprocess_utils.py,sha256=WoFkoFhGecPR8-rF8WJxbIe-YtV94LXz9UG64SDhCY4,9448
92
92
  konduktor/utils/ux_utils.py,sha256=czCwiS1bDqgeKtzAJctczpLwFZzAse7WuozdvzEFYJ4,7437
93
- konduktor/utils/validator.py,sha256=K-eEmwq4qgYcOhMv6SqgIPcrpWqDusH0f8EBkzv827Q,2429
94
- konduktor_nightly-0.1.0.dev20250624105122.dist-info/LICENSE,sha256=MuuqTZbHvmqXR_aNKAXzggdV45ANd3wQ5YI7tnpZhm0,6586
95
- konduktor_nightly-0.1.0.dev20250624105122.dist-info/METADATA,sha256=DuJtRE293dmNcL7tu-Evzfx08EczgoxOyXAJkz3CalI,4289
96
- konduktor_nightly-0.1.0.dev20250624105122.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
97
- konduktor_nightly-0.1.0.dev20250624105122.dist-info/entry_points.txt,sha256=k3nG5wDFIJhNqsZWrHk4d0irIB2Ns9s47cjRWYsTCT8,48
98
- konduktor_nightly-0.1.0.dev20250624105122.dist-info/RECORD,,
93
+ konduktor/utils/validator.py,sha256=uCRlScO1NYxsbTNKY9dkoqvlO8S0ISIIB8XmX2ItcO8,2793
94
+ konduktor_nightly-0.1.0.dev20250626105144.dist-info/LICENSE,sha256=MuuqTZbHvmqXR_aNKAXzggdV45ANd3wQ5YI7tnpZhm0,6586
95
+ konduktor_nightly-0.1.0.dev20250626105144.dist-info/METADATA,sha256=xv1mwXgJ8q9jP3vLjxJp9M6x7QwdHJ5dQpEthUW8cnQ,4247
96
+ konduktor_nightly-0.1.0.dev20250626105144.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
97
+ konduktor_nightly-0.1.0.dev20250626105144.dist-info/entry_points.txt,sha256=k3nG5wDFIJhNqsZWrHk4d0irIB2Ns9s47cjRWYsTCT8,48
98
+ konduktor_nightly-0.1.0.dev20250626105144.dist-info/RECORD,,