konduktor-nightly 0.1.0.dev20250603105033__py3-none-any.whl → 0.1.0.dev20250605105049__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- konduktor/__init__.py +2 -2
- konduktor/backends/jobset_utils.py +1 -0
- konduktor/cli.py +18 -1
- konduktor/resource.py +12 -3
- konduktor/templates/jobset.yaml.j2 +4 -0
- konduktor/templates/pod.yaml.j2 +2 -2
- konduktor/utils/accelerator_registry.py +1 -8
- konduktor/utils/schemas.py +3 -0
- {konduktor_nightly-0.1.0.dev20250603105033.dist-info → konduktor_nightly-0.1.0.dev20250605105049.dist-info}/METADATA +1 -1
- {konduktor_nightly-0.1.0.dev20250603105033.dist-info → konduktor_nightly-0.1.0.dev20250605105049.dist-info}/RECORD +13 -13
- {konduktor_nightly-0.1.0.dev20250603105033.dist-info → konduktor_nightly-0.1.0.dev20250605105049.dist-info}/LICENSE +0 -0
- {konduktor_nightly-0.1.0.dev20250603105033.dist-info → konduktor_nightly-0.1.0.dev20250605105049.dist-info}/WHEEL +0 -0
- {konduktor_nightly-0.1.0.dev20250603105033.dist-info → konduktor_nightly-0.1.0.dev20250605105049.dist-info}/entry_points.txt +0 -0
konduktor/__init__.py
CHANGED
@@ -14,7 +14,7 @@ __all__ = [
|
|
14
14
|
]
|
15
15
|
|
16
16
|
# Replaced with the current commit when building the wheels.
|
17
|
-
_KONDUKTOR_COMMIT_SHA = '
|
17
|
+
_KONDUKTOR_COMMIT_SHA = 'b5ac51935edc296ff721f11d112e64438b366608'
|
18
18
|
os.makedirs(os.path.expanduser('~/.konduktor'), exist_ok=True)
|
19
19
|
|
20
20
|
|
@@ -48,5 +48,5 @@ def _get_git_commit():
|
|
48
48
|
|
49
49
|
|
50
50
|
__commit__ = _get_git_commit()
|
51
|
-
__version__ = '1.0.0.dev0.1.0.
|
51
|
+
__version__ = '1.0.0.dev0.1.0.dev20250605105049'
|
52
52
|
__root_dir__ = os.path.dirname(os.path.abspath(__file__))
|
@@ -300,6 +300,7 @@ def create_jobset(
|
|
300
300
|
'accelerator_type': accelerator_type,
|
301
301
|
'num_accelerators': num_accelerators,
|
302
302
|
'completions': task.resources.get_completions(),
|
303
|
+
'max_restarts': task.resources.get_max_restarts(),
|
303
304
|
**_JOBSET_METADATA_LABELS,
|
304
305
|
},
|
305
306
|
temp.name,
|
konduktor/cli.py
CHANGED
@@ -102,6 +102,7 @@ def _make_task_with_overrides(
|
|
102
102
|
memory: Optional[str] = None,
|
103
103
|
instance_type: Optional[str] = None,
|
104
104
|
num_nodes: Optional[int] = None,
|
105
|
+
max_restarts: Optional[int] = None,
|
105
106
|
image_id: Optional[str] = None,
|
106
107
|
disk_size: Optional[int] = None,
|
107
108
|
env: Optional[List[Tuple[str, str]]] = None,
|
@@ -147,6 +148,10 @@ def _make_task_with_overrides(
|
|
147
148
|
|
148
149
|
task.set_resources_override(override_params)
|
149
150
|
|
151
|
+
if max_restarts is not None:
|
152
|
+
assert task.resources is not None
|
153
|
+
task.resources.job_config['max_restarts'] = max_restarts
|
154
|
+
|
150
155
|
if num_nodes is not None:
|
151
156
|
task.num_nodes = num_nodes
|
152
157
|
if name is not None:
|
@@ -185,6 +190,16 @@ _TASK_OPTIONS = [
|
|
185
190
|
'supplied.'
|
186
191
|
),
|
187
192
|
),
|
193
|
+
click.option(
|
194
|
+
'--max-restarts',
|
195
|
+
required=False,
|
196
|
+
type=int,
|
197
|
+
help=(
|
198
|
+
'Maximum number of jobset restarts allowed. Overrides YAML.'
|
199
|
+
'Overrides the "max_restarts" config in the YAML if both are '
|
200
|
+
'supplied.'
|
201
|
+
),
|
202
|
+
),
|
188
203
|
click.option(
|
189
204
|
'--cpus',
|
190
205
|
default=None,
|
@@ -631,6 +646,7 @@ def launch(
|
|
631
646
|
cpus: Optional[str],
|
632
647
|
memory: Optional[str],
|
633
648
|
num_nodes: Optional[int],
|
649
|
+
max_restarts: Optional[int],
|
634
650
|
image_id: Optional[str],
|
635
651
|
env_file: Optional[Dict[str, str]],
|
636
652
|
env: List[Tuple[str, str]],
|
@@ -654,6 +670,7 @@ def launch(
|
|
654
670
|
cpus=cpus,
|
655
671
|
memory=memory,
|
656
672
|
num_nodes=num_nodes,
|
673
|
+
max_restarts=max_restarts,
|
657
674
|
image_id=image_id,
|
658
675
|
env=env,
|
659
676
|
disk_size=disk_size,
|
@@ -962,7 +979,7 @@ def create(kind, from_file, from_directory, inline, name):
|
|
962
979
|
old_name = s.metadata.name
|
963
980
|
click.echo(f'Found existing git-ssh secret: {old_name}, deleting it.')
|
964
981
|
kubernetes_utils.delete_secret(
|
965
|
-
|
982
|
+
name=old_name, namespace=namespace, context=context
|
966
983
|
)
|
967
984
|
break
|
968
985
|
|
konduktor/resource.py
CHANGED
@@ -124,7 +124,7 @@ class Resources:
|
|
124
124
|
self._set_cpus(cpus)
|
125
125
|
self._set_memory(memory)
|
126
126
|
self._set_accelerators(accelerators)
|
127
|
-
self.job_config = job_config
|
127
|
+
self.job_config = job_config or {}
|
128
128
|
|
129
129
|
# TODO: move these out of init to prevent repeated calls.
|
130
130
|
self._try_validate_cpus_mem()
|
@@ -386,8 +386,17 @@ class Resources:
|
|
386
386
|
return accel_str
|
387
387
|
|
388
388
|
def get_completions(self) -> Optional[int]:
|
389
|
-
|
390
|
-
|
389
|
+
value = self.job_config.get('completions')
|
390
|
+
return int(value) if value is not None else None
|
391
|
+
|
392
|
+
def get_max_restarts(self) -> Optional[int]:
|
393
|
+
value = self.job_config.get('max_restarts')
|
394
|
+
if value is not None:
|
395
|
+
value = int(value)
|
396
|
+
if value < 0:
|
397
|
+
with ux_utils.print_exception_no_traceback():
|
398
|
+
raise ValueError('max_restarts must be a non-negative integer')
|
399
|
+
return value
|
391
400
|
return None
|
392
401
|
|
393
402
|
def copy(self, **override) -> 'Resources':
|
konduktor/templates/pod.yaml.j2
CHANGED
@@ -183,8 +183,8 @@ kubernetes:
|
|
183
183
|
|
184
184
|
# Set root password if SSHKEY is provided
|
185
185
|
# Enable root login in SSH configuration
|
186
|
-
$(prefix_cmd) sed -i '/^#PermitRootLogin/c\PermitRootLogin
|
187
|
-
$(prefix_cmd) sed -i '/^PermitRootLogin/c\PermitRootLogin
|
186
|
+
$(prefix_cmd) sed -i '/^#PermitRootLogin/c\PermitRootLogin without-password' /etc/ssh/sshd_config
|
187
|
+
$(prefix_cmd) sed -i '/^PermitRootLogin/c\PermitRootLogin without-password' /etc/ssh/sshd_config
|
188
188
|
$(prefix_cmd) echo "Root login is enabled."
|
189
189
|
|
190
190
|
# Create the .ssh directory and authorized_keys file if they don't exist
|
@@ -1,13 +1,6 @@
|
|
1
1
|
"""Accelerator registry."""
|
2
2
|
|
3
|
-
_ACCELERATORS = [
|
4
|
-
'A100',
|
5
|
-
'A100-80GB',
|
6
|
-
'B200',
|
7
|
-
'H100',
|
8
|
-
'H200',
|
9
|
-
'L40S',
|
10
|
-
]
|
3
|
+
_ACCELERATORS = ['A100', 'A100-80GB', 'B200', 'H100', 'H200', 'L40S', 'T4']
|
11
4
|
|
12
5
|
|
13
6
|
def canonicalize_accelerator_name(accelerator: str) -> str:
|
konduktor/utils/schemas.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
konduktor/__init__.py,sha256=
|
1
|
+
konduktor/__init__.py,sha256=6W1DQ2KPTCksSVesiBXKLOt4MwKcbZL8C1ot4Ov3dYg,1540
|
2
2
|
konduktor/adaptors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
3
3
|
konduktor/adaptors/aws.py,sha256=s47Ra-GaqCQibzVfmD0pmwEWHif1EGO5opMbwkLxTCU,8244
|
4
4
|
konduktor/adaptors/common.py,sha256=ZIqzjx77PIHUwpjfAQ1uX8B2aX78YMuGj4Bppd-MdyM,4183
|
@@ -7,9 +7,9 @@ konduktor/authentication.py,sha256=_mVy3eqoKohicHostFiGwG1-2ybxP-l7ouofQ0LRlCY,4
|
|
7
7
|
konduktor/backends/__init__.py,sha256=1Q6sqqdeMYarpTX_U-QVywJYf7idiUTRsyP-E4BQSOw,129
|
8
8
|
konduktor/backends/backend.py,sha256=qh0bp94lzoTYZkzyQv2-CVrB5l91FkG2vclXg24UFC0,2910
|
9
9
|
konduktor/backends/jobset.py,sha256=UdhwAuZODLMbLY51Y2zOBsh6wg4Pb84oHVvUKzx3Z2w,8434
|
10
|
-
konduktor/backends/jobset_utils.py,sha256=
|
10
|
+
konduktor/backends/jobset_utils.py,sha256=DSdWdCUPdTh9EzFG0z_GoHVeCW49kLwTujaKjC1ko3I,21430
|
11
11
|
konduktor/check.py,sha256=JennyWoaqSKhdyfUldd266KwVXTPJpcYQa4EED4a_BA,7569
|
12
|
-
konduktor/cli.py,sha256=
|
12
|
+
konduktor/cli.py,sha256=4eYDqSvwEn38rDNk6fKiSk4BwdmSna2XonUrI9-o7w0,33903
|
13
13
|
konduktor/config.py,sha256=J50JxC6MsXMnlrJPXdDUMr38C89xvOO7mR8KJ6fyils,15520
|
14
14
|
konduktor/constants.py,sha256=T3AeXXxuQHINW_bAWyztvDeS8r4g8kXBGIwIq13cys0,1814
|
15
15
|
konduktor/controller/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -68,14 +68,14 @@ konduktor/manifests/controller_deployment.yaml,sha256=6p3oSLkEVONZsvKZGqVop0Dhn4
|
|
68
68
|
konduktor/manifests/dashboard_deployment.yaml,sha256=xJLd4FbPMAosI0fIv5_8y7dV9bw0Vsf81l-w4MB_aU8,2837
|
69
69
|
konduktor/manifests/dmesg_daemonset.yaml,sha256=pSWt7YOeTYjS0l0iki1fvHOs7MhY-sH-RQfVW6JJyno,1391
|
70
70
|
konduktor/manifests/pod_cleanup_controller.yaml,sha256=hziL1Ka1kCAEL9R7Tjvpb80iw1vcq9_3gwHCu75Bi0A,3939
|
71
|
-
konduktor/resource.py,sha256=
|
71
|
+
konduktor/resource.py,sha256=Fg4kon7jQ9xDo9Iz8Q0J8doIRmTkSwIhYXLH6jbtRO8,19610
|
72
72
|
konduktor/task.py,sha256=ofwd8WIhfD6C3ThLcv6X3GUzQHyZ6ddjUagE-umF4K0,35207
|
73
|
-
konduktor/templates/jobset.yaml.j2,sha256=
|
74
|
-
konduktor/templates/pod.yaml.j2,sha256=
|
73
|
+
konduktor/templates/jobset.yaml.j2,sha256=rdURknodtgLp4zoA2PX86Nn4wPpi3tr5l4IG55aWBRg,1059
|
74
|
+
konduktor/templates/pod.yaml.j2,sha256=7KyUy4orakJ8nI7ee8AIGGY4HHfhs-6zwUlAzhekZHw,16112
|
75
75
|
konduktor/usage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
76
76
|
konduktor/usage/constants.py,sha256=gCL8afIHZhO0dcxbJGpESE9sCC1cBSbeRnQ8GwNOY4M,612
|
77
77
|
konduktor/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
78
|
-
konduktor/utils/accelerator_registry.py,sha256=
|
78
|
+
konduktor/utils/accelerator_registry.py,sha256=LmhLPR-N9kxrk0UBYYcF1O6ADv9HHwsn_Pc3SOYFNzs,553
|
79
79
|
konduktor/utils/annotations.py,sha256=oy2-BLydkFt3KWkXDuaGY84d6b7iISuy4eAT9uXk0Fc,2225
|
80
80
|
konduktor/utils/base64_utils.py,sha256=mF-Tw98mFRG70YE4w6s9feuQSCYZHOb8YatBZwMugyI,3130
|
81
81
|
konduktor/utils/common_utils.py,sha256=4yG5Kjvu1hu6x2nKNaaCUKQNrheUaG61Qe913MFPry8,15060
|
@@ -87,12 +87,12 @@ konduktor/utils/kubernetes_utils.py,sha256=VG7qatUFyWHY-PCQ8fYWh2kn2TMwfg84cn-Vk
|
|
87
87
|
konduktor/utils/log_utils.py,sha256=oFCKkYKCS_e_GRw_-0F7WsiIZNqJL1RZ4cD5-zh59Q4,9765
|
88
88
|
konduktor/utils/loki_utils.py,sha256=h2ZvZQr1nE_wXXsKsGMjhG2s2MXknNd4icydTR_ruKU,3539
|
89
89
|
konduktor/utils/rich_utils.py,sha256=ycADW6Ij3wX3uT8ou7T8qxX519RxlkJivsLvUahQaJo,3583
|
90
|
-
konduktor/utils/schemas.py,sha256=
|
90
|
+
konduktor/utils/schemas.py,sha256=VGPERAso2G4sVAznsJ80qT2Q-I_EFxXw6Rfcw-vkYgQ,16535
|
91
91
|
konduktor/utils/subprocess_utils.py,sha256=WoFkoFhGecPR8-rF8WJxbIe-YtV94LXz9UG64SDhCY4,9448
|
92
92
|
konduktor/utils/ux_utils.py,sha256=czCwiS1bDqgeKtzAJctczpLwFZzAse7WuozdvzEFYJ4,7437
|
93
93
|
konduktor/utils/validator.py,sha256=tgBghVyedyzGx84-U2Qfoh_cJBE3oUk9gclMW90ORks,691
|
94
|
-
konduktor_nightly-0.1.0.
|
95
|
-
konduktor_nightly-0.1.0.
|
96
|
-
konduktor_nightly-0.1.0.
|
97
|
-
konduktor_nightly-0.1.0.
|
98
|
-
konduktor_nightly-0.1.0.
|
94
|
+
konduktor_nightly-0.1.0.dev20250605105049.dist-info/LICENSE,sha256=MuuqTZbHvmqXR_aNKAXzggdV45ANd3wQ5YI7tnpZhm0,6586
|
95
|
+
konduktor_nightly-0.1.0.dev20250605105049.dist-info/METADATA,sha256=uZGkLvGqATI96wS6l8jrBSQo7B9x2VPBwaZw-PhDWa0,4289
|
96
|
+
konduktor_nightly-0.1.0.dev20250605105049.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
|
97
|
+
konduktor_nightly-0.1.0.dev20250605105049.dist-info/entry_points.txt,sha256=k3nG5wDFIJhNqsZWrHk4d0irIB2Ns9s47cjRWYsTCT8,48
|
98
|
+
konduktor_nightly-0.1.0.dev20250605105049.dist-info/RECORD,,
|
File without changes
|
File without changes
|