konduktor-nightly 0.1.0.dev20250623105113__py3-none-any.whl → 0.1.0.dev20250625105100__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- konduktor/__init__.py +2 -2
- konduktor/backends/jobset_utils.py +5 -2
- konduktor/logging.py +26 -8
- konduktor/resource.py +1 -1
- konduktor/utils/validator.py +16 -3
- {konduktor_nightly-0.1.0.dev20250623105113.dist-info → konduktor_nightly-0.1.0.dev20250625105100.dist-info}/METADATA +1 -4
- {konduktor_nightly-0.1.0.dev20250623105113.dist-info → konduktor_nightly-0.1.0.dev20250625105100.dist-info}/RECORD +10 -10
- {konduktor_nightly-0.1.0.dev20250623105113.dist-info → konduktor_nightly-0.1.0.dev20250625105100.dist-info}/LICENSE +0 -0
- {konduktor_nightly-0.1.0.dev20250623105113.dist-info → konduktor_nightly-0.1.0.dev20250625105100.dist-info}/WHEEL +0 -0
- {konduktor_nightly-0.1.0.dev20250623105113.dist-info → konduktor_nightly-0.1.0.dev20250625105100.dist-info}/entry_points.txt +0 -0
konduktor/__init__.py
CHANGED
@@ -14,7 +14,7 @@ __all__ = [
|
|
14
14
|
]
|
15
15
|
|
16
16
|
# Replaced with the current commit when building the wheels.
|
17
|
-
_KONDUKTOR_COMMIT_SHA = '
|
17
|
+
_KONDUKTOR_COMMIT_SHA = '249f48b15859d1a210ca9b28e6e9cd85ac19ac68'
|
18
18
|
os.makedirs(os.path.expanduser('~/.konduktor'), exist_ok=True)
|
19
19
|
|
20
20
|
|
@@ -48,5 +48,5 @@ def _get_git_commit():
|
|
48
48
|
|
49
49
|
|
50
50
|
__commit__ = _get_git_commit()
|
51
|
-
__version__ = '1.0.0.dev0.1.0.
|
51
|
+
__version__ = '1.0.0.dev0.1.0.dev20250625105100'
|
52
52
|
__root_dir__ = os.path.dirname(os.path.abspath(__file__))
|
@@ -10,6 +10,7 @@ from datetime import datetime, timezone
|
|
10
10
|
from typing import Any, Dict, Optional, Tuple
|
11
11
|
from urllib.parse import urlparse
|
12
12
|
|
13
|
+
import click
|
13
14
|
import colorama
|
14
15
|
|
15
16
|
if typing.TYPE_CHECKING:
|
@@ -272,8 +273,10 @@ def create_pod_spec(task: 'konduktor.Task') -> Dict[str, Any]:
|
|
272
273
|
)
|
273
274
|
|
274
275
|
# validate pod spec using json schema
|
275
|
-
|
276
|
-
|
276
|
+
try:
|
277
|
+
validator.validate_pod_spec(pod_config['kubernetes']['pod_config']['spec'])
|
278
|
+
except ValueError as e:
|
279
|
+
raise click.UsageError(str(e))
|
277
280
|
|
278
281
|
return pod_config
|
279
282
|
|
konduktor/logging.py
CHANGED
@@ -17,6 +17,7 @@ _FORMAT = '[%(levelname).1s %(asctime)s %(filename)s:%(lineno)d] %(message)s'
|
|
17
17
|
_DATE_FORMAT = '%m-%d %H:%M:%S'
|
18
18
|
|
19
19
|
_logging_config = threading.local()
|
20
|
+
_log_path = None
|
20
21
|
|
21
22
|
|
22
23
|
class NewLineFormatter(logging.Formatter):
|
@@ -51,19 +52,36 @@ def set_logging_level(logger: str, level: int):
|
|
51
52
|
|
52
53
|
|
53
54
|
def get_logger(name: str):
|
54
|
-
|
55
|
-
log_level = logging.INFO
|
56
|
-
if os.environ.get('KONDUKTOR_DEBUG', None) == '1':
|
57
|
-
log_level = logging.DEBUG
|
55
|
+
global _log_path
|
58
56
|
|
59
|
-
# Configure the logger
|
60
57
|
logger = logging.getLogger(name)
|
61
|
-
|
62
|
-
|
58
|
+
|
59
|
+
# Avoid duplicate handlers
|
60
|
+
if logger.hasHandlers():
|
61
|
+
return logger
|
62
|
+
|
63
|
+
logger.setLevel(logging.DEBUG) # Always capture all levels internally
|
64
|
+
|
65
|
+
# --- File logging: Always enabled ---
|
66
|
+
if not _log_path:
|
67
|
+
log_dir = os.path.expanduser('~/.konduktor/logs')
|
68
|
+
os.makedirs(log_dir, exist_ok=True)
|
69
|
+
timestamp = datetime.now().strftime('%Y%m%d-%H%M%S')
|
70
|
+
_log_path = os.path.join(log_dir, f'konduktor-logs-{timestamp}.log')
|
71
|
+
print(f'Log file: {_log_path}')
|
72
|
+
|
73
|
+
fh = logging.FileHandler(_log_path)
|
74
|
+
fh.setLevel(logging.DEBUG)
|
75
|
+
fh.setFormatter(FORMATTER)
|
76
|
+
logger.addHandler(fh)
|
77
|
+
|
78
|
+
# --- Console logging: DEBUG level only if KONDUKTOR_DEBUG=1 ---
|
79
|
+
if os.environ.get('KONDUKTOR_DEBUG') == '1':
|
63
80
|
ch = logging.StreamHandler()
|
64
|
-
ch.setLevel(
|
81
|
+
ch.setLevel(logging.DEBUG)
|
65
82
|
ch.setFormatter(FORMATTER)
|
66
83
|
logger.addHandler(ch)
|
84
|
+
|
67
85
|
logger.propagate = False
|
68
86
|
return logger
|
69
87
|
|
konduktor/resource.py
CHANGED
konduktor/utils/validator.py
CHANGED
@@ -14,12 +14,16 @@ import requests
|
|
14
14
|
from colorama import Fore, Style
|
15
15
|
from filelock import FileLock
|
16
16
|
|
17
|
+
from konduktor import logging
|
18
|
+
|
17
19
|
SCHEMA_VERSION = 'v1.32.0-standalone-strict'
|
18
20
|
SCHEMA_URL = f'https://raw.githubusercontent.com/yannh/kubernetes-json-schema/master/{SCHEMA_VERSION}/podspec.json'
|
19
21
|
SCHEMA_CACHE_PATH = Path.home() / '.konduktor/schemas/podspec.json'
|
20
22
|
SCHEMA_LOCK_PATH = SCHEMA_CACHE_PATH.with_suffix('.lock')
|
21
23
|
CACHE_MAX_AGE_SECONDS = 86400 # 24 hours
|
22
24
|
|
25
|
+
logger = logging.get_logger(__name__)
|
26
|
+
|
23
27
|
|
24
28
|
def case_insensitive_enum(validator, enums, instance, schema):
|
25
29
|
del validator, schema # Unused.
|
@@ -65,6 +69,16 @@ def validate_pod_spec(pod_spec: dict) -> None:
|
|
65
69
|
return
|
66
70
|
|
67
71
|
formatted = [
|
72
|
+
f'- {error.message}'
|
73
|
+
+ (f" at path: {' → '.join(str(p) for p in error.path)}" if error.path else '')
|
74
|
+
for error in errors
|
75
|
+
]
|
76
|
+
|
77
|
+
# Clean log
|
78
|
+
logger.debug('Invalid k8s pod spec/config:\n%s', '\n'.join(formatted))
|
79
|
+
|
80
|
+
# Color only in CLI
|
81
|
+
formatted_colored = [
|
68
82
|
f'{Fore.RED}- {error.message}'
|
69
83
|
+ (f" at path: {' → '.join(str(p) for p in error.path)}" if error.path else '')
|
70
84
|
+ Style.RESET_ALL
|
@@ -72,7 +86,6 @@ def validate_pod_spec(pod_spec: dict) -> None:
|
|
72
86
|
]
|
73
87
|
|
74
88
|
raise ValueError(
|
75
|
-
f'\n{Fore.RED}Invalid k8s pod spec/config: \
|
76
|
-
|
77
|
-
+ '\n'.join(formatted)
|
89
|
+
f'\n{Fore.RED}Invalid k8s pod spec/config: {Style.RESET_ALL}\n'
|
90
|
+
+ '\n'.join(formatted_colored)
|
78
91
|
)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: konduktor-nightly
|
3
|
-
Version: 0.1.0.
|
3
|
+
Version: 0.1.0.dev20250625105100
|
4
4
|
Summary: GPU Cluster Health Management
|
5
5
|
Author: Andrew Aikawa
|
6
6
|
Author-email: asai@berkeley.edu
|
@@ -68,9 +68,6 @@ resources:
|
|
68
68
|
kueue.x-k8s.io/queue-name: user-queue
|
69
69
|
kueue.x-k8s.io/priority-class: low-priority
|
70
70
|
|
71
|
-
setup: |
|
72
|
-
pip install torch torchvision
|
73
|
-
|
74
71
|
run: |
|
75
72
|
torchrun \
|
76
73
|
--nproc_per_node 8 \
|
@@ -1,4 +1,4 @@
|
|
1
|
-
konduktor/__init__.py,sha256=
|
1
|
+
konduktor/__init__.py,sha256=uc7vr7keZUYeHbZs1BjiGOtnD8DknRUw5AdryM8F9lg,1540
|
2
2
|
konduktor/adaptors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
3
3
|
konduktor/adaptors/aws.py,sha256=s47Ra-GaqCQibzVfmD0pmwEWHif1EGO5opMbwkLxTCU,8244
|
4
4
|
konduktor/adaptors/common.py,sha256=ZIqzjx77PIHUwpjfAQ1uX8B2aX78YMuGj4Bppd-MdyM,4183
|
@@ -7,7 +7,7 @@ konduktor/authentication.py,sha256=_mVy3eqoKohicHostFiGwG1-2ybxP-l7ouofQ0LRlCY,4
|
|
7
7
|
konduktor/backends/__init__.py,sha256=1Q6sqqdeMYarpTX_U-QVywJYf7idiUTRsyP-E4BQSOw,129
|
8
8
|
konduktor/backends/backend.py,sha256=qh0bp94lzoTYZkzyQv2-CVrB5l91FkG2vclXg24UFC0,2910
|
9
9
|
konduktor/backends/jobset.py,sha256=UdhwAuZODLMbLY51Y2zOBsh6wg4Pb84oHVvUKzx3Z2w,8434
|
10
|
-
konduktor/backends/jobset_utils.py,sha256=
|
10
|
+
konduktor/backends/jobset_utils.py,sha256=esudKceD7iFjzYokRGEkAQd21GxsHvgQHTEBMU0rsdM,22145
|
11
11
|
konduktor/check.py,sha256=JennyWoaqSKhdyfUldd266KwVXTPJpcYQa4EED4a_BA,7569
|
12
12
|
konduktor/cli.py,sha256=qtktD8N17IRC5MYEdaE0o3pv8EI36cvyyQkYUFi5_nQ,35590
|
13
13
|
konduktor/config.py,sha256=J50JxC6MsXMnlrJPXdDUMr38C89xvOO7mR8KJ6fyils,15520
|
@@ -63,12 +63,12 @@ konduktor/data/storage.py,sha256=o2So-bY9glvgbGdoN7AQNYmNnvGf1AUDPpImtadRL90,352
|
|
63
63
|
konduktor/data/storage_utils.py,sha256=n4GivkN0KMqmyOTDznF0Z-hzsJvm7KCEh5i5HgFAT-4,20806
|
64
64
|
konduktor/execution.py,sha256=NCl2bgo5p1ZZl8HLaXT-juAe9PXr-iCJv0md2sT7A20,18395
|
65
65
|
konduktor/kube_client.py,sha256=lC-U_1hLRG3mDN8tBxYc4VZ3BS5BzKm8hlt-lE3505A,5938
|
66
|
-
konduktor/logging.py,sha256=
|
66
|
+
konduktor/logging.py,sha256=mA1JCCWPCqQMRqEpE4l6D6vOYdbtbQXr0BuEk9RR790,3177
|
67
67
|
konduktor/manifests/controller_deployment.yaml,sha256=6p3oSLkEVONZsvKZGqVop0Dhn4bo3lrigRmhf8NXBHE,1730
|
68
68
|
konduktor/manifests/dashboard_deployment.yaml,sha256=xJLd4FbPMAosI0fIv5_8y7dV9bw0Vsf81l-w4MB_aU8,2837
|
69
69
|
konduktor/manifests/dmesg_daemonset.yaml,sha256=pSWt7YOeTYjS0l0iki1fvHOs7MhY-sH-RQfVW6JJyno,1391
|
70
70
|
konduktor/manifests/pod_cleanup_controller.yaml,sha256=hziL1Ka1kCAEL9R7Tjvpb80iw1vcq9_3gwHCu75Bi0A,3939
|
71
|
-
konduktor/resource.py,sha256=
|
71
|
+
konduktor/resource.py,sha256=nHgPWXCbWj5sWyslNngrFypMN1K0Dksb0yHbJqWaei8,19612
|
72
72
|
konduktor/task.py,sha256=ofwd8WIhfD6C3ThLcv6X3GUzQHyZ6ddjUagE-umF4K0,35207
|
73
73
|
konduktor/templates/jobset.yaml.j2,sha256=rdURknodtgLp4zoA2PX86Nn4wPpi3tr5l4IG55aWBRg,1059
|
74
74
|
konduktor/templates/pod.yaml.j2,sha256=SlK6XKSwjuFJtBimlrUiFTcx7G_00XDtEopIKXBg5SI,16635
|
@@ -90,9 +90,9 @@ konduktor/utils/rich_utils.py,sha256=ycADW6Ij3wX3uT8ou7T8qxX519RxlkJivsLvUahQaJo
|
|
90
90
|
konduktor/utils/schemas.py,sha256=VGPERAso2G4sVAznsJ80qT2Q-I_EFxXw6Rfcw-vkYgQ,16535
|
91
91
|
konduktor/utils/subprocess_utils.py,sha256=WoFkoFhGecPR8-rF8WJxbIe-YtV94LXz9UG64SDhCY4,9448
|
92
92
|
konduktor/utils/ux_utils.py,sha256=czCwiS1bDqgeKtzAJctczpLwFZzAse7WuozdvzEFYJ4,7437
|
93
|
-
konduktor/utils/validator.py,sha256=
|
94
|
-
konduktor_nightly-0.1.0.
|
95
|
-
konduktor_nightly-0.1.0.
|
96
|
-
konduktor_nightly-0.1.0.
|
97
|
-
konduktor_nightly-0.1.0.
|
98
|
-
konduktor_nightly-0.1.0.
|
93
|
+
konduktor/utils/validator.py,sha256=uCRlScO1NYxsbTNKY9dkoqvlO8S0ISIIB8XmX2ItcO8,2793
|
94
|
+
konduktor_nightly-0.1.0.dev20250625105100.dist-info/LICENSE,sha256=MuuqTZbHvmqXR_aNKAXzggdV45ANd3wQ5YI7tnpZhm0,6586
|
95
|
+
konduktor_nightly-0.1.0.dev20250625105100.dist-info/METADATA,sha256=9rBtjMCrltjV0GGyTPOpp_bwVQdC1uO3RvgScWUNlLo,4247
|
96
|
+
konduktor_nightly-0.1.0.dev20250625105100.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
|
97
|
+
konduktor_nightly-0.1.0.dev20250625105100.dist-info/entry_points.txt,sha256=k3nG5wDFIJhNqsZWrHk4d0irIB2Ns9s47cjRWYsTCT8,48
|
98
|
+
konduktor_nightly-0.1.0.dev20250625105100.dist-info/RECORD,,
|
File without changes
|
File without changes
|