konduktor-nightly 0.1.0.dev20251128104812__py3-none-any.whl → 0.1.0.dev20251215105431__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- konduktor/__init__.py +2 -2
- konduktor/backends/pod_utils.py +14 -11
- konduktor/cli.py +148 -185
- {konduktor_nightly-0.1.0.dev20251128104812.dist-info → konduktor_nightly-0.1.0.dev20251215105431.dist-info}/METADATA +2 -1
- {konduktor_nightly-0.1.0.dev20251128104812.dist-info → konduktor_nightly-0.1.0.dev20251215105431.dist-info}/RECORD +8 -8
- {konduktor_nightly-0.1.0.dev20251128104812.dist-info → konduktor_nightly-0.1.0.dev20251215105431.dist-info}/LICENSE +0 -0
- {konduktor_nightly-0.1.0.dev20251128104812.dist-info → konduktor_nightly-0.1.0.dev20251215105431.dist-info}/WHEEL +0 -0
- {konduktor_nightly-0.1.0.dev20251128104812.dist-info → konduktor_nightly-0.1.0.dev20251215105431.dist-info}/entry_points.txt +0 -0
konduktor/__init__.py
CHANGED
|
@@ -11,7 +11,7 @@ from konduktor.task import Task
|
|
|
11
11
|
__all__ = ['launch', 'Resources', 'Task', 'Serving']
|
|
12
12
|
|
|
13
13
|
# Replaced with the current commit when building the wheels.
|
|
14
|
-
_KONDUKTOR_COMMIT_SHA = '
|
|
14
|
+
_KONDUKTOR_COMMIT_SHA = '421390595e3a1b9f263e790323deae61d94da231'
|
|
15
15
|
os.makedirs(os.path.expanduser('~/.konduktor'), exist_ok=True)
|
|
16
16
|
|
|
17
17
|
|
|
@@ -45,5 +45,5 @@ def _get_git_commit():
|
|
|
45
45
|
|
|
46
46
|
|
|
47
47
|
__commit__ = _get_git_commit()
|
|
48
|
-
__version__ = '1.0.0.dev0.1.0.
|
|
48
|
+
__version__ = '1.0.0.dev0.1.0.dev20251215105431'
|
|
49
49
|
__root_dir__ = os.path.dirname(os.path.abspath(__file__))
|
konduktor/backends/pod_utils.py
CHANGED
|
@@ -28,6 +28,8 @@ if typing.TYPE_CHECKING:
|
|
|
28
28
|
logger = logging.get_logger(__name__)
|
|
29
29
|
|
|
30
30
|
_RUN_DURATION_ANNOTATION_KEY = 'kueue.x-k8s.io/maxRunDurationSeconds'
|
|
31
|
+
# Use a large default (7 days) to mimic "infinite" runtime.
|
|
32
|
+
_DEFAULT_MAX_RUN_DURATION_SECONDS = 604800
|
|
31
33
|
|
|
32
34
|
|
|
33
35
|
def create_pod_spec(task: 'konduktor.Task') -> Dict[str, Any]:
|
|
@@ -471,20 +473,21 @@ def inject_jobset_metadata(jobset_spec: Dict[str, Any], task: 'konduktor.Task')
|
|
|
471
473
|
jobset_spec: The JobSet spec dictionary to modify
|
|
472
474
|
task: The task object containing resource information
|
|
473
475
|
"""
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
maxRunDurationSeconds =
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
476
|
+
assert task.resources is not None, 'Task resources are required'
|
|
477
|
+
labels = task.resources.labels or {}
|
|
478
|
+
|
|
479
|
+
# Add max run duration annotation, defaulting to a practically infinite value.
|
|
480
|
+
maxRunDurationSeconds = labels.get('maxRunDurationSeconds')
|
|
481
|
+
metadata = jobset_spec['jobset']['metadata']
|
|
482
|
+
metadata.setdefault('annotations', {})[_RUN_DURATION_ANNOTATION_KEY] = str(
|
|
483
|
+
maxRunDurationSeconds
|
|
484
|
+
if maxRunDurationSeconds is not None
|
|
485
|
+
else _DEFAULT_MAX_RUN_DURATION_SECONDS
|
|
483
486
|
)
|
|
484
487
|
|
|
485
488
|
# Inject resource labels into JobSet metadata.
|
|
486
|
-
if
|
|
487
|
-
jobset_spec['jobset']['metadata']['labels'].update(
|
|
489
|
+
if labels:
|
|
490
|
+
jobset_spec['jobset']['metadata']['labels'].update(labels)
|
|
488
491
|
|
|
489
492
|
|
|
490
493
|
def merge_pod_into_jobset_template(
|
konduktor/cli.py
CHANGED
|
@@ -274,22 +274,20 @@ _TASK_OPTIONS = [
|
|
|
274
274
|
'--env-file',
|
|
275
275
|
required=False,
|
|
276
276
|
type=dotenv.dotenv_values,
|
|
277
|
-
help=
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
277
|
+
help=(
|
|
278
|
+
'Path to a dotenv file with environment variables to set on the '
|
|
279
|
+
'remote node. If any values from ``--env-file`` conflict '
|
|
280
|
+
'with values set by ``--env``, the ``--env`` value will '
|
|
281
|
+
'be preferred.'
|
|
282
|
+
),
|
|
283
283
|
),
|
|
284
284
|
click.option(
|
|
285
285
|
'--env',
|
|
286
286
|
required=False,
|
|
287
287
|
type=_parse_env_var,
|
|
288
288
|
multiple=True,
|
|
289
|
-
help="""
|
|
290
|
-
Environment variable to set on the remote node.
|
|
291
|
-
It can be specified multiple times.
|
|
292
|
-
Examples:
|
|
289
|
+
help="""\\
|
|
290
|
+
Environment variable to set on the remote node. It can be specified multiple times:
|
|
293
291
|
|
|
294
292
|
\b
|
|
295
293
|
1. ``--env MY_ENV=1``: set ``$MY_ENV`` on the cluster to be 1.
|
|
@@ -299,7 +297,7 @@ _TASK_OPTIONS = [
|
|
|
299
297
|
is run.
|
|
300
298
|
|
|
301
299
|
3. ``--env MY_ENV3``: set ``$MY_ENV3`` on the cluster to be the
|
|
302
|
-
same value of ``$MY_ENV3`` in the local environment.""",
|
|
300
|
+
same value of ``$MY_ENV3`` in the local environment.""", # noqa: E501,
|
|
303
301
|
),
|
|
304
302
|
]
|
|
305
303
|
_TASK_OPTIONS_WITH_NAME = [
|
|
@@ -321,10 +319,10 @@ _EXTRA_RESOURCES_OPTIONS = [
|
|
|
321
319
|
type=str,
|
|
322
320
|
help=(
|
|
323
321
|
'Type and number of GPUs to use. Example values: '
|
|
324
|
-
'"V100:8", "V100" (short for a count of 1)'
|
|
322
|
+
'"V100:8", "V100" (short for a count of 1) '
|
|
325
323
|
'If a new cluster is being launched by this command, this is the '
|
|
326
|
-
'resources to provision. If an existing cluster is being reused, this'
|
|
327
|
-
"
|
|
324
|
+
'resources to provision. If an existing cluster is being reused, this '
|
|
325
|
+
"is seen as the task demand, which must fit the cluster's total "
|
|
328
326
|
'resources and is used for scheduling the task. '
|
|
329
327
|
'Overrides the "accelerators" '
|
|
330
328
|
'config in the YAML if both are supplied. '
|
|
@@ -661,19 +659,19 @@ def status(
|
|
|
661
659
|
|
|
662
660
|
\b
|
|
663
661
|
Examples:
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
662
|
+
konduktor status --limit 10
|
|
663
|
+
konduktor status --before "08/06/25 03:53PM"
|
|
664
|
+
konduktor status --all-users --limit 10 --after "08/06/25 03:53PM"
|
|
667
665
|
|
|
668
666
|
\b
|
|
669
667
|
Notes:
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
Example: "03:53:55PM" → "03:53PM"
|
|
676
|
-
|
|
668
|
+
• When using --before or --after timestamps, "08/06/25" is
|
|
669
|
+
equivalent to "08/06/25 00:00".
|
|
670
|
+
• "03:53PM" is equivalent to "03:53:00PM".
|
|
671
|
+
• Timestamps shown in "konduktor status" are truncated and are in
|
|
672
|
+
the local timezone.
|
|
673
|
+
Example: "03:53:55PM" → "03:53PM" would show up in --after "03:53PM"
|
|
674
|
+
but not in --before "03:53PM".
|
|
677
675
|
"""
|
|
678
676
|
context = kubernetes_utils.get_current_kube_config_context_name()
|
|
679
677
|
namespace = kubernetes_utils.get_kube_config_context_namespace(context)
|
|
@@ -691,8 +689,8 @@ def status(
|
|
|
691
689
|
is_flag=True,
|
|
692
690
|
default=False,
|
|
693
691
|
help=(
|
|
694
|
-
'If specified, do not show logs but exit with a status code
|
|
695
|
-
"job's status: 0 for succeeded, or 1 for all other statuses."
|
|
692
|
+
'[DEPRECATED] If specified, do not show logs but exit with a status code '
|
|
693
|
+
"for the job's status: 0 for succeeded, or 1 for all other statuses."
|
|
696
694
|
),
|
|
697
695
|
)
|
|
698
696
|
@click.option(
|
|
@@ -702,7 +700,7 @@ def status(
|
|
|
702
700
|
help=(
|
|
703
701
|
'Follow the logs of a job. '
|
|
704
702
|
'If --no-follow is specified, print the log so far and exit. '
|
|
705
|
-
'
|
|
703
|
+
'(default: --follow)'
|
|
706
704
|
),
|
|
707
705
|
)
|
|
708
706
|
@click.option(
|
|
@@ -732,7 +730,7 @@ def status(
|
|
|
732
730
|
default='1h',
|
|
733
731
|
help=(
|
|
734
732
|
'Choose how much time from now to look back in logs. '
|
|
735
|
-
'Examples: 30s, 5m, 2h, 1d. Default is 1h.'
|
|
733
|
+
'Examples: 30s, 5m, 2h, 1d. Default is 1h. '
|
|
736
734
|
'Note: currently only applies when streaming (default --follow). '
|
|
737
735
|
'With --no-follow, all available logs are returned.'
|
|
738
736
|
),
|
|
@@ -866,8 +864,10 @@ def launch(
|
|
|
866
864
|
):
|
|
867
865
|
"""Launch a task.
|
|
868
866
|
|
|
869
|
-
|
|
870
|
-
|
|
867
|
+
\b
|
|
868
|
+
Notes:
|
|
869
|
+
• If ENTRYPOINT points to a valid YAML file, it is read in as the task
|
|
870
|
+
specification. Otherwise, it is interpreted as a bash command.
|
|
871
871
|
"""
|
|
872
872
|
# NOTE(dev): Keep the docstring consistent between the Python API and CLI.
|
|
873
873
|
env = _merge_env_vars(env_file, env)
|
|
@@ -944,6 +944,7 @@ def _find_matching_jobs(
|
|
|
944
944
|
jobs_response: Dict[str, Any],
|
|
945
945
|
namespace: str,
|
|
946
946
|
all_users: Optional[bool],
|
|
947
|
+
all_flag: Optional[bool] = None,
|
|
947
948
|
):
|
|
948
949
|
"""
|
|
949
950
|
Find all jobs matching against the user specified pattern.
|
|
@@ -956,19 +957,21 @@ def _find_matching_jobs(
|
|
|
956
957
|
|
|
957
958
|
jobs_specs = [job for job in jobs_response['items']]
|
|
958
959
|
|
|
959
|
-
if
|
|
960
|
+
if all_flag:
|
|
960
961
|
assert jobs_specs is not None, f'No jobs found in namespace {namespace}'
|
|
961
962
|
assert len(jobs_specs) > 0, f'No jobs found in namespace {namespace}'
|
|
962
|
-
|
|
963
|
-
|
|
964
|
-
for job in jobs_specs
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
|
|
963
|
+
if all_users:
|
|
964
|
+
# --all with --all-users = all jobs of all users
|
|
965
|
+
jobs = [job['metadata']['name'] for job in jobs_specs]
|
|
966
|
+
else:
|
|
967
|
+
# --all without --all-users = all jobs of current user
|
|
968
|
+
jobs = [
|
|
969
|
+
job['metadata']['name']
|
|
970
|
+
for job in jobs_specs
|
|
971
|
+
if job['metadata']['labels'][backend_constants.USER_LABEL]
|
|
972
|
+
== common_utils.get_cleaned_username()
|
|
973
|
+
]
|
|
974
|
+
return jobs
|
|
972
975
|
elif jobs:
|
|
973
976
|
# Get all available jobs to match against patterns
|
|
974
977
|
if len(jobs_specs) == 0:
|
|
@@ -1070,36 +1073,33 @@ def down(
|
|
|
1070
1073
|
# NOTE(dev): Keep the docstring consistent between the Python API and CLI.
|
|
1071
1074
|
"""Tear down job(s).
|
|
1072
1075
|
|
|
1073
|
-
|
|
1074
|
-
JOB and ``--all`` are supplied, the latter takes precedence.
|
|
1075
|
-
|
|
1076
|
-
Tearing down a job will delete all associated containers (all billing
|
|
1077
|
-
stops), and any data on the containers disks will be lost. Accelerators
|
|
1078
|
-
(e.g., GPUs) that are part of the job will be deleted too.
|
|
1079
|
-
|
|
1080
|
-
Wildcard patterns are supported using * characters.
|
|
1081
|
-
Examples: "test-*" matches all jobs starting with "test-",
|
|
1082
|
-
"*-gpu" matches all jobs ending with "-gpu".
|
|
1083
|
-
|
|
1076
|
+
\b
|
|
1084
1077
|
Examples:
|
|
1078
|
+
# Tear down a specific job.
|
|
1079
|
+
konduktor down my_job
|
|
1080
|
+
\b
|
|
1081
|
+
# Tear down multiple jobs.
|
|
1082
|
+
konduktor down my_job1 my_job2
|
|
1083
|
+
\b
|
|
1084
|
+
# Tear down all jobs matching a pattern.
|
|
1085
|
+
konduktor down "my_job-*"
|
|
1086
|
+
\b
|
|
1087
|
+
# Tear down all of this users jobs.
|
|
1088
|
+
konduktor down -a
|
|
1089
|
+
konduktor down --all
|
|
1090
|
+
\b
|
|
1091
|
+
# Tear down all jobs across all users
|
|
1092
|
+
konduktor down --all --all-users
|
|
1085
1093
|
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
|
|
1089
|
-
|
|
1090
|
-
|
|
1091
|
-
|
|
1092
|
-
|
|
1093
|
-
|
|
1094
|
-
|
|
1095
|
-
konduktor down "test-*"
|
|
1096
|
-
\b
|
|
1097
|
-
# Tear down all of this users jobs.
|
|
1098
|
-
konduktor down -a
|
|
1099
|
-
konduktor down --all
|
|
1100
|
-
|
|
1101
|
-
# Tear down all jobs across all users
|
|
1102
|
-
konduktor down --all --all-users
|
|
1094
|
+
\b
|
|
1095
|
+
Notes:
|
|
1096
|
+
• If both JOB and ``--all`` are supplied, the latter takes precedence.
|
|
1097
|
+
• Tearing down a job will delete all associated containers (all billing
|
|
1098
|
+
stops), and any data on the containers disks will be lost. Accelerators
|
|
1099
|
+
(e.g., GPUs) that are part of the job will be deleted too.
|
|
1100
|
+
• Wildcard patterns are supported using * characters.
|
|
1101
|
+
Ex: "test-*" matches all jobs starting with "test-",
|
|
1102
|
+
"*-gpu" matches all jobs ending with "-gpu".
|
|
1103
1103
|
|
|
1104
1104
|
"""
|
|
1105
1105
|
|
|
@@ -1107,7 +1107,7 @@ def down(
|
|
|
1107
1107
|
namespace = kubernetes_utils.get_kube_config_context_namespace(context)
|
|
1108
1108
|
jobs_response = jobset_utils.list_jobset(namespace)
|
|
1109
1109
|
assert jobs_response
|
|
1110
|
-
filtered_jobs = _find_matching_jobs(jobs, jobs_response, namespace, all_users)
|
|
1110
|
+
filtered_jobs = _find_matching_jobs(jobs, jobs_response, namespace, all_users, all)
|
|
1111
1111
|
|
|
1112
1112
|
if not yes:
|
|
1113
1113
|
# Prompt if (1) --cluster is None, or (2) cluster doesn't exist, or (3)
|
|
@@ -1154,46 +1154,41 @@ def stop(
|
|
|
1154
1154
|
):
|
|
1155
1155
|
"""Suspend job(s) (manual/user-initiated).
|
|
1156
1156
|
|
|
1157
|
-
|
|
1158
|
-
JOB and ``--all`` are supplied, the latter takes precedence.
|
|
1159
|
-
|
|
1160
|
-
Suspending a job will pause execution and mark the job as SUSPENDED (by user).
|
|
1161
|
-
The job can be resumed later with `konduktor start`.
|
|
1162
|
-
|
|
1163
|
-
If a job is suspended by the system (e.g., due to queueing),
|
|
1164
|
-
it will show as SUSPENDED (by system).
|
|
1165
|
-
|
|
1166
|
-
Wildcard patterns are supported using * characters.
|
|
1167
|
-
Examples: "my_job-*" matches all jobs starting with "my_job-",
|
|
1168
|
-
"*-gpu" matches all jobs ending with "-gpu".
|
|
1169
|
-
|
|
1157
|
+
\b
|
|
1170
1158
|
Examples:
|
|
1159
|
+
# Suspend a specific job.
|
|
1160
|
+
konduktor stop my_job
|
|
1161
|
+
\b
|
|
1162
|
+
# Suspend multiple jobs.
|
|
1163
|
+
konduktor stop my_job1 my_job2
|
|
1164
|
+
\b
|
|
1165
|
+
# Suspend all jobs matching a pattern.
|
|
1166
|
+
konduktor stop "my_job-*"
|
|
1167
|
+
\b
|
|
1168
|
+
# Suspend all of this users jobs.
|
|
1169
|
+
konduktor stop -a
|
|
1170
|
+
konduktor stop --all
|
|
1171
|
+
\b
|
|
1172
|
+
# Suspend all jobs across all users
|
|
1173
|
+
konduktor stop --all --all-users
|
|
1171
1174
|
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
|
-
|
|
1175
|
-
|
|
1176
|
-
|
|
1177
|
-
|
|
1178
|
-
|
|
1179
|
-
|
|
1180
|
-
|
|
1181
|
-
|
|
1182
|
-
\b
|
|
1183
|
-
# Suspend all of this users jobs.
|
|
1184
|
-
konduktor stop -a
|
|
1185
|
-
konduktor stop --all
|
|
1186
|
-
|
|
1187
|
-
# Suspend all jobs across all users
|
|
1188
|
-
konduktor stop --all --all-users
|
|
1189
|
-
|
|
1175
|
+
\b
|
|
1176
|
+
Notes:
|
|
1177
|
+
• If both JOB and ``--all`` are supplied, the latter takes precedence.
|
|
1178
|
+
• Suspending a job will pause execution and mark the job as SUSPENDED (by user).
|
|
1179
|
+
The job can be resumed later with `konduktor start`.
|
|
1180
|
+
• If a job is suspended by the system (e.g., due to queueing), it
|
|
1181
|
+
will show as SUSPENDED (by system).
|
|
1182
|
+
• Wildcard patterns are supported using * characters.
|
|
1183
|
+
Ex: "test-*" matches all jobs starting with "test-",
|
|
1184
|
+
"*-gpu" matches all jobs ending with "-gpu".
|
|
1190
1185
|
"""
|
|
1191
1186
|
|
|
1192
1187
|
context = kubernetes_utils.get_current_kube_config_context_name()
|
|
1193
1188
|
namespace = kubernetes_utils.get_kube_config_context_namespace(context)
|
|
1194
1189
|
jobs_response = jobset_utils.list_jobset(namespace)
|
|
1195
1190
|
assert jobs_response
|
|
1196
|
-
filtered_jobs = _find_matching_jobs(jobs, jobs_response, namespace, all_users)
|
|
1191
|
+
filtered_jobs = _find_matching_jobs(jobs, jobs_response, namespace, all_users, all)
|
|
1197
1192
|
|
|
1198
1193
|
if not yes:
|
|
1199
1194
|
# Prompt for confirmation
|
|
@@ -1249,39 +1244,34 @@ def start(
|
|
|
1249
1244
|
):
|
|
1250
1245
|
"""Resume suspended job(s) (manual/user-initiated).
|
|
1251
1246
|
|
|
1252
|
-
|
|
1253
|
-
JOB and ``--all`` are supplied, the latter takes precedence.
|
|
1254
|
-
|
|
1255
|
-
Resuming a job will restart execution from where it was suspended.
|
|
1256
|
-
Only suspended jobs can be resumed.
|
|
1257
|
-
|
|
1258
|
-
This command works for both manually suspended jobs (SUSPENDED by user)
|
|
1259
|
-
and system-suspended jobs (SUSPENDED by system).
|
|
1260
|
-
|
|
1261
|
-
Wildcard patterns are supported using * characters.
|
|
1262
|
-
Examples: "my_job-*" matches all jobs starting with "my_job-",
|
|
1263
|
-
"*-gpu" matches all jobs ending with "-gpu".
|
|
1264
|
-
|
|
1247
|
+
\b
|
|
1265
1248
|
Examples:
|
|
1249
|
+
# Resume a specific job.
|
|
1250
|
+
konduktor start my_job
|
|
1251
|
+
\b
|
|
1252
|
+
# Resume multiple jobs.
|
|
1253
|
+
konduktor start my_job1 my_job2
|
|
1254
|
+
\b
|
|
1255
|
+
# Resume all jobs matching a pattern.
|
|
1256
|
+
konduktor start "my_job-*"
|
|
1257
|
+
\b
|
|
1258
|
+
# Resume all of this users suspended jobs.
|
|
1259
|
+
konduktor start -a
|
|
1260
|
+
konduktor start --all
|
|
1261
|
+
\b
|
|
1262
|
+
# Resume all suspended jobs across all users
|
|
1263
|
+
konduktor start --all --all-users
|
|
1266
1264
|
|
|
1267
|
-
|
|
1268
|
-
|
|
1269
|
-
|
|
1270
|
-
|
|
1271
|
-
|
|
1272
|
-
|
|
1273
|
-
|
|
1274
|
-
|
|
1275
|
-
|
|
1276
|
-
|
|
1277
|
-
\b
|
|
1278
|
-
# Resume all of this users suspended jobs.
|
|
1279
|
-
konduktor start -a
|
|
1280
|
-
konduktor start --all
|
|
1281
|
-
|
|
1282
|
-
# Resume all suspended jobs across all users
|
|
1283
|
-
konduktor start --all --all-users
|
|
1284
|
-
|
|
1265
|
+
\b
|
|
1266
|
+
Notes:
|
|
1267
|
+
• If both JOB and ``--all`` are supplied, the latter takes precedence.
|
|
1268
|
+
• Resuming a job will restart execution from where it was suspended.
|
|
1269
|
+
Only suspended jobs can be resumed.
|
|
1270
|
+
• This command works for both manually suspended jobs (SUSPENDED by user)
|
|
1271
|
+
and system-suspended jobs (SUSPENDED by system).
|
|
1272
|
+
• Wildcard patterns are supported using * characters.
|
|
1273
|
+
Ex: "test-*" matches all jobs starting with "test-",
|
|
1274
|
+
"*-gpu" matches all jobs ending with "-gpu".
|
|
1285
1275
|
"""
|
|
1286
1276
|
|
|
1287
1277
|
context = kubernetes_utils.get_current_kube_config_context_name()
|
|
@@ -1364,24 +1354,22 @@ def start(
|
|
|
1364
1354
|
nargs=-1,
|
|
1365
1355
|
)
|
|
1366
1356
|
def check(clouds: Tuple[str]):
|
|
1367
|
-
"""Check which clouds are available to use for storage
|
|
1368
|
-
|
|
1369
|
-
This checks storage credentials for a cloud supported by konduktor. If a
|
|
1370
|
-
cloud is detected to be inaccessible, the reason and correction steps will
|
|
1371
|
-
be shown.
|
|
1372
|
-
|
|
1373
|
-
If CLOUDS are specified, checks credentials for only those clouds.
|
|
1374
|
-
|
|
1375
|
-
The enabled clouds are cached and form the "search space" to be considered
|
|
1376
|
-
for each task.
|
|
1357
|
+
"""Check which clouds are available to use for storage with Konduktor
|
|
1377
1358
|
|
|
1359
|
+
\b
|
|
1378
1360
|
Examples:
|
|
1361
|
+
# Check only specific clouds - gs, s3.
|
|
1362
|
+
konduktor check gs
|
|
1363
|
+
konduktor check s3
|
|
1379
1364
|
|
|
1380
|
-
|
|
1381
|
-
|
|
1382
|
-
|
|
1383
|
-
|
|
1384
|
-
|
|
1365
|
+
\b
|
|
1366
|
+
Notes:
|
|
1367
|
+
• This checks storage credentials for a cloud supported by konduktor.
|
|
1368
|
+
If a cloud is detected to be inaccessible, the reason and correction
|
|
1369
|
+
steps will be shown.
|
|
1370
|
+
• If CLOUDS are specified, checks credentials for only those clouds.
|
|
1371
|
+
• The enabled clouds are cached and form the "search space" to
|
|
1372
|
+
be considered for each task.
|
|
1385
1373
|
"""
|
|
1386
1374
|
clouds_arg = clouds if len(clouds) > 0 else None
|
|
1387
1375
|
konduktor_check.check(clouds=clouds_arg)
|
|
@@ -1430,23 +1418,12 @@ def secret():
|
|
|
1430
1418
|
|
|
1431
1419
|
USAGE: konduktor secret COMMAND
|
|
1432
1420
|
|
|
1433
|
-
\b
|
|
1434
|
-
Use one of the following COMMANDS:
|
|
1435
|
-
create [FLAGS] [NAME]
|
|
1436
|
-
delete [NAME]
|
|
1437
|
-
list [FLAGS]
|
|
1438
|
-
|
|
1439
1421
|
\b
|
|
1440
1422
|
Examples:
|
|
1441
|
-
|
|
1442
|
-
|
|
1443
|
-
|
|
1444
|
-
|
|
1445
|
-
|
|
1446
|
-
\b
|
|
1447
|
-
For details on COMMAND ARGS:
|
|
1448
|
-
konduktor secret create -h
|
|
1449
|
-
konduktor secret list -h
|
|
1423
|
+
konduktor secret create --kind git-ssh --from-file ~/.ssh/id_rsa my-ssh-name
|
|
1424
|
+
konduktor secret create --kind env --inline FOO=bar my-env-name
|
|
1425
|
+
konduktor secret delete my-ssh-name
|
|
1426
|
+
konduktor secret list
|
|
1450
1427
|
"""
|
|
1451
1428
|
|
|
1452
1429
|
|
|
@@ -1637,8 +1614,7 @@ def delete(name):
|
|
|
1637
1614
|
help='Show all secrets, including those not owned by the current user.',
|
|
1638
1615
|
)
|
|
1639
1616
|
def list_secrets(all_users: bool):
|
|
1640
|
-
"""List secrets in the namespace.
|
|
1641
|
-
Defaults to only your secrets unless --all-users is set."""
|
|
1617
|
+
"""List secrets in the namespace."""
|
|
1642
1618
|
|
|
1643
1619
|
context = kubernetes_utils.get_current_kube_config_context_name()
|
|
1644
1620
|
namespace = kubernetes_utils.get_kube_config_context_namespace(context)
|
|
@@ -1683,23 +1659,11 @@ def serve():
|
|
|
1683
1659
|
|
|
1684
1660
|
USAGE: konduktor serve COMMAND
|
|
1685
1661
|
|
|
1686
|
-
\b
|
|
1687
|
-
Use one of the following COMMANDS:
|
|
1688
|
-
launch
|
|
1689
|
-
down
|
|
1690
|
-
status
|
|
1691
|
-
|
|
1692
1662
|
\b
|
|
1693
1663
|
Examples:
|
|
1694
1664
|
konduktor serve launch my-deployment
|
|
1695
1665
|
konduktor serve down my-deployment
|
|
1696
1666
|
konduktor serve status
|
|
1697
|
-
|
|
1698
|
-
\b
|
|
1699
|
-
For details on COMMAND ARGS:
|
|
1700
|
-
konduktor serve launch -h
|
|
1701
|
-
konduktor serve down -h
|
|
1702
|
-
konduktor serve status -h
|
|
1703
1667
|
"""
|
|
1704
1668
|
pass
|
|
1705
1669
|
|
|
@@ -1774,8 +1738,10 @@ def serve_launch(
|
|
|
1774
1738
|
):
|
|
1775
1739
|
"""Launch a deployment to serve.
|
|
1776
1740
|
|
|
1777
|
-
|
|
1778
|
-
|
|
1741
|
+
\b
|
|
1742
|
+
Notes:
|
|
1743
|
+
• If ENTRYPOINT points to a valid YAML file, it is read in as the task
|
|
1744
|
+
specification. Otherwise, it is interpreted as a bash command.
|
|
1779
1745
|
"""
|
|
1780
1746
|
# NOTE(dev): Keep the docstring consistent between the Python API and CLI.
|
|
1781
1747
|
env = _merge_env_vars(env_file, env)
|
|
@@ -1857,13 +1823,10 @@ def serve_down(
|
|
|
1857
1823
|
):
|
|
1858
1824
|
"""Tear down deployments (Deployment, Service, PodAutoscaler).
|
|
1859
1825
|
|
|
1860
|
-
Use --all or -a to tear down all deployments.
|
|
1861
|
-
|
|
1862
|
-
Examples:
|
|
1863
|
-
|
|
1864
1826
|
\b
|
|
1865
|
-
|
|
1866
|
-
|
|
1827
|
+
Examples:
|
|
1828
|
+
konduktor serve down my-deployment
|
|
1829
|
+
konduktor serve down -a
|
|
1867
1830
|
"""
|
|
1868
1831
|
context = kubernetes_utils.get_current_kube_config_context_name()
|
|
1869
1832
|
namespace = kubernetes_utils.get_kube_config_context_namespace(context)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: konduktor-nightly
|
|
3
|
-
Version: 0.1.0.
|
|
3
|
+
Version: 0.1.0.dev20251215105431
|
|
4
4
|
Summary: GPU Cluster Health Management
|
|
5
5
|
Author: Andrew Aikawa
|
|
6
6
|
Author-email: asai@berkeley.edu
|
|
@@ -29,6 +29,7 @@ Requires-Dist: prettytable (>=3.12.0,<4.0.0)
|
|
|
29
29
|
Requires-Dist: psutil (>=7.0.0,<8.0.0)
|
|
30
30
|
Requires-Dist: python-dotenv (>=1.0.1,<2.0.0)
|
|
31
31
|
Requires-Dist: rich (>=13.9.4,<14.0.0)
|
|
32
|
+
Requires-Dist: sniffio (>=1.3,<2.0)
|
|
32
33
|
Requires-Dist: websockets (>=15.0.1,<16.0.0)
|
|
33
34
|
Description-Content-Type: text/markdown
|
|
34
35
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
konduktor/__init__.py,sha256=
|
|
1
|
+
konduktor/__init__.py,sha256=A8k1HK8UyBfw1hk53UvZzpp6khOyWkEDT4EAypu6Osc,1574
|
|
2
2
|
konduktor/adaptors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
3
|
konduktor/adaptors/aws.py,sha256=s47Ra-GaqCQibzVfmD0pmwEWHif1EGO5opMbwkLxTCU,8244
|
|
4
4
|
konduktor/adaptors/common.py,sha256=ZIqzjx77PIHUwpjfAQ1uX8B2aX78YMuGj4Bppd-MdyM,4183
|
|
@@ -11,9 +11,9 @@ konduktor/backends/deployment.py,sha256=d0a3F7dxDbnRKIt4ZO_kQ0_vet0pZvg4bWYzVZ8D
|
|
|
11
11
|
konduktor/backends/deployment_utils.py,sha256=9CmB9CYC_3wxIfIOmTSCN2hbURZ5MpEMTvPwYMUXBRM,49272
|
|
12
12
|
konduktor/backends/jobset.py,sha256=drt8Gc0iYQx18JWXBU6XfhUvC2xCKd8szSJ2JC4O20Q,8640
|
|
13
13
|
konduktor/backends/jobset_utils.py,sha256=g49NY8RFhL_NNd4c1adRLG_Bq3UTFtRURxcAzxnMEYw,26524
|
|
14
|
-
konduktor/backends/pod_utils.py,sha256=
|
|
14
|
+
konduktor/backends/pod_utils.py,sha256=WL6b9_yBqBHjX84hE57uMTYL-rbGo_Ugf2L7-_8NpDc,19422
|
|
15
15
|
konduktor/check.py,sha256=JennyWoaqSKhdyfUldd266KwVXTPJpcYQa4EED4a_BA,7569
|
|
16
|
-
konduktor/cli.py,sha256=
|
|
16
|
+
konduktor/cli.py,sha256=ORnFQub6aSGeZytETn39Dafl1gjH-yihP2r5FnF3EeQ,59591
|
|
17
17
|
konduktor/config.py,sha256=9upqgCCYvcu6fKw7tovEYC1MWTkAAir0_WHPdayylbI,15536
|
|
18
18
|
konduktor/constants.py,sha256=T3AeXXxuQHINW_bAWyztvDeS8r4g8kXBGIwIq13cys0,1814
|
|
19
19
|
konduktor/controller/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -100,8 +100,8 @@ konduktor/utils/schemas.py,sha256=cr39nEAgjluhXoUYnvIwCwLBH8rLds37MBsF1uQv1rw,19
|
|
|
100
100
|
konduktor/utils/subprocess_utils.py,sha256=WoFkoFhGecPR8-rF8WJxbIe-YtV94LXz9UG64SDhCY4,9448
|
|
101
101
|
konduktor/utils/ux_utils.py,sha256=LSH4b5lckD157qDF4keThxtkGdxNrAfGKmH1ewhZkm4,8646
|
|
102
102
|
konduktor/utils/validator.py,sha256=UcLvZCk9Cpbbhw8r_ZJtTpMSTfY1NKqcyciKsPzRPZM,17222
|
|
103
|
-
konduktor_nightly-0.1.0.
|
|
104
|
-
konduktor_nightly-0.1.0.
|
|
105
|
-
konduktor_nightly-0.1.0.
|
|
106
|
-
konduktor_nightly-0.1.0.
|
|
107
|
-
konduktor_nightly-0.1.0.
|
|
103
|
+
konduktor_nightly-0.1.0.dev20251215105431.dist-info/LICENSE,sha256=MuuqTZbHvmqXR_aNKAXzggdV45ANd3wQ5YI7tnpZhm0,6586
|
|
104
|
+
konduktor_nightly-0.1.0.dev20251215105431.dist-info/METADATA,sha256=WWbQsIiHdKINH1YFtDbNxi8ZW3K_G9P7FyasPy3IWdM,4283
|
|
105
|
+
konduktor_nightly-0.1.0.dev20251215105431.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
|
|
106
|
+
konduktor_nightly-0.1.0.dev20251215105431.dist-info/entry_points.txt,sha256=k3nG5wDFIJhNqsZWrHk4d0irIB2Ns9s47cjRWYsTCT8,48
|
|
107
|
+
konduktor_nightly-0.1.0.dev20251215105431.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|