konduktor-nightly 0.1.0.dev20250805105421__py3-none-any.whl → 0.1.0.dev20250807105334__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of konduktor-nightly might be problematic. Click here for more details.
- konduktor/__init__.py +4 -7
- konduktor/backends/__init__.py +2 -4
- konduktor/backends/constants.py +12 -0
- konduktor/backends/deployment.py +179 -0
- konduktor/backends/deployment_utils.py +835 -0
- konduktor/backends/jobset.py +2 -2
- konduktor/backends/jobset_utils.py +16 -266
- konduktor/backends/pod_utils.py +392 -0
- konduktor/cli.py +343 -8
- konduktor/controller/launch.py +1 -1
- konduktor/execution.py +5 -2
- konduktor/kube_client.py +8 -0
- konduktor/resource.py +20 -0
- konduktor/serving.py +149 -0
- konduktor/task.py +61 -0
- konduktor/templates/deployment.yaml.j2 +142 -0
- konduktor/templates/pod.yaml.j2 +36 -0
- konduktor/utils/accelerator_registry.py +1 -1
- konduktor/utils/log_utils.py +1 -1
- konduktor/utils/schemas.py +42 -0
- konduktor/utils/validator.py +51 -16
- {konduktor_nightly-0.1.0.dev20250805105421.dist-info → konduktor_nightly-0.1.0.dev20250807105334.dist-info}/METADATA +1 -1
- {konduktor_nightly-0.1.0.dev20250805105421.dist-info → konduktor_nightly-0.1.0.dev20250807105334.dist-info}/RECORD +26 -21
- {konduktor_nightly-0.1.0.dev20250805105421.dist-info → konduktor_nightly-0.1.0.dev20250807105334.dist-info}/LICENSE +0 -0
- {konduktor_nightly-0.1.0.dev20250805105421.dist-info → konduktor_nightly-0.1.0.dev20250807105334.dist-info}/WHEEL +0 -0
- {konduktor_nightly-0.1.0.dev20250805105421.dist-info → konduktor_nightly-0.1.0.dev20250807105334.dist-info}/entry_points.txt +0 -0
konduktor/cli.py
CHANGED
|
@@ -51,7 +51,7 @@ from rich.progress import track
|
|
|
51
51
|
import konduktor
|
|
52
52
|
from konduktor import check as konduktor_check
|
|
53
53
|
from konduktor import logging
|
|
54
|
-
from konduktor.backends import jobset_utils
|
|
54
|
+
from konduktor.backends import deployment_utils, jobset_utils
|
|
55
55
|
from konduktor.utils import (
|
|
56
56
|
common_utils,
|
|
57
57
|
kubernetes_utils,
|
|
@@ -107,6 +107,10 @@ def _make_task_with_overrides(
|
|
|
107
107
|
disk_size: Optional[int] = None,
|
|
108
108
|
env: Optional[List[Tuple[str, str]]] = None,
|
|
109
109
|
field_to_ignore: Optional[List[str]] = None,
|
|
110
|
+
min_replicas: Optional[int] = None,
|
|
111
|
+
max_replicas: Optional[int] = None,
|
|
112
|
+
ports: Optional[int] = None,
|
|
113
|
+
probe: Optional[str] = None,
|
|
110
114
|
) -> konduktor.Task:
|
|
111
115
|
"""Creates a task from an entrypoint with overrides.
|
|
112
116
|
|
|
@@ -135,6 +139,14 @@ def _make_task_with_overrides(
|
|
|
135
139
|
disk_size=disk_size,
|
|
136
140
|
)
|
|
137
141
|
|
|
142
|
+
serving_override_params = _parse_serving_override_params(
|
|
143
|
+
num_nodes=num_nodes,
|
|
144
|
+
min_replicas=min_replicas,
|
|
145
|
+
max_replicas=max_replicas,
|
|
146
|
+
ports=ports,
|
|
147
|
+
probe=probe,
|
|
148
|
+
)
|
|
149
|
+
|
|
138
150
|
if field_to_ignore is not None:
|
|
139
151
|
_pop_and_ignore_fields_in_override_params(override_params, field_to_ignore)
|
|
140
152
|
|
|
@@ -147,6 +159,8 @@ def _make_task_with_overrides(
|
|
|
147
159
|
task.workdir = workdir
|
|
148
160
|
|
|
149
161
|
task.set_resources_override(override_params)
|
|
162
|
+
if task.serving:
|
|
163
|
+
task.set_serving_override(serving_override_params)
|
|
150
164
|
|
|
151
165
|
if max_restarts is not None:
|
|
152
166
|
assert task.resources is not None
|
|
@@ -299,6 +313,49 @@ _EXTRA_RESOURCES_OPTIONS = [
|
|
|
299
313
|
),
|
|
300
314
|
),
|
|
301
315
|
]
|
|
316
|
+
_EXTRA_SERVING_OPTIONS = [
|
|
317
|
+
click.option(
|
|
318
|
+
'--min-replicas',
|
|
319
|
+
required=False,
|
|
320
|
+
type=int,
|
|
321
|
+
help=(
|
|
322
|
+
'Minimum number of replicas to run for the service. '
|
|
323
|
+
'Overrides the "min_replicas" field in the YAML if both '
|
|
324
|
+
'are supplied.'
|
|
325
|
+
),
|
|
326
|
+
),
|
|
327
|
+
click.option(
|
|
328
|
+
'--max-replicas',
|
|
329
|
+
required=False,
|
|
330
|
+
type=int,
|
|
331
|
+
help=(
|
|
332
|
+
'Maximum number of replicas to allow for the service. '
|
|
333
|
+
'Overrides the "max_replicas" field in the YAML if both '
|
|
334
|
+
'are supplied.'
|
|
335
|
+
),
|
|
336
|
+
),
|
|
337
|
+
click.option(
|
|
338
|
+
'--ports',
|
|
339
|
+
required=False,
|
|
340
|
+
type=int,
|
|
341
|
+
help=(
|
|
342
|
+
'The container port on which your service will listen for HTTP '
|
|
343
|
+
'traffic. Overrides the "ports" field in the YAML if both '
|
|
344
|
+
'are supplied.'
|
|
345
|
+
),
|
|
346
|
+
),
|
|
347
|
+
click.option(
|
|
348
|
+
'--probe',
|
|
349
|
+
required=False,
|
|
350
|
+
type=str,
|
|
351
|
+
help=(
|
|
352
|
+
'The HTTP path to use for health checks (liveness, readiness, and '
|
|
353
|
+
'startup probes). Overrides the "probe" field in the YAML '
|
|
354
|
+
'if both are supplied. The service should respond with HTTP 200 on '
|
|
355
|
+
'this path when healthy.'
|
|
356
|
+
),
|
|
357
|
+
),
|
|
358
|
+
]
|
|
302
359
|
|
|
303
360
|
|
|
304
361
|
def _get_click_major_version():
|
|
@@ -354,12 +411,36 @@ def _parse_override_params(
|
|
|
354
411
|
return override_params
|
|
355
412
|
|
|
356
413
|
|
|
414
|
+
def _parse_serving_override_params(
|
|
415
|
+
num_nodes: Optional[int] = None,
|
|
416
|
+
min_replicas: Optional[int] = None,
|
|
417
|
+
max_replicas: Optional[int] = None,
|
|
418
|
+
ports: Optional[int] = None,
|
|
419
|
+
probe: Optional[str] = None,
|
|
420
|
+
) -> Dict[str, Any]:
|
|
421
|
+
"""Parses the relevant serving override parameters into a dictionary."""
|
|
422
|
+
override_params: Dict[str, Any] = {}
|
|
423
|
+
if num_nodes is not None:
|
|
424
|
+
override_params['num_nodes'] = num_nodes
|
|
425
|
+
if min_replicas is not None:
|
|
426
|
+
override_params['min_replicas'] = min_replicas
|
|
427
|
+
if max_replicas is not None:
|
|
428
|
+
override_params['max_replicas'] = max_replicas
|
|
429
|
+
if ports is not None:
|
|
430
|
+
override_params['ports'] = ports
|
|
431
|
+
if probe is not None:
|
|
432
|
+
override_params['probe'] = probe
|
|
433
|
+
|
|
434
|
+
return override_params
|
|
435
|
+
|
|
436
|
+
|
|
357
437
|
def _launch_with_confirm(
|
|
358
438
|
task: konduktor.Task,
|
|
359
439
|
*,
|
|
360
440
|
dryrun: bool,
|
|
361
441
|
detach_run: bool,
|
|
362
442
|
no_confirm: bool,
|
|
443
|
+
serving: bool,
|
|
363
444
|
):
|
|
364
445
|
"""Launch a cluster with a Task."""
|
|
365
446
|
|
|
@@ -367,17 +448,27 @@ def _launch_with_confirm(
|
|
|
367
448
|
if not no_confirm:
|
|
368
449
|
# Prompt if (1) --cluster is None, or (2) cluster doesn't exist, or (3)
|
|
369
450
|
# it exists but is STOPPED.
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
451
|
+
if serving:
|
|
452
|
+
prompt = (
|
|
453
|
+
f'Launching a new deployment {colorama.Style.BRIGHT}'
|
|
454
|
+
f'{colorama.Fore.GREEN}{task.name}{colorama.Style.RESET_ALL}. '
|
|
455
|
+
'Proceed?'
|
|
456
|
+
)
|
|
457
|
+
else:
|
|
458
|
+
prompt = (
|
|
459
|
+
f'Launching a new job {colorama.Style.BRIGHT}'
|
|
460
|
+
f'{colorama.Fore.GREEN}{task.name}{colorama.Style.RESET_ALL}. '
|
|
461
|
+
'Proceed?'
|
|
462
|
+
)
|
|
375
463
|
if prompt is not None:
|
|
376
464
|
confirm_shown = True
|
|
377
465
|
click.confirm(prompt, default=True, abort=True, show_default=True)
|
|
378
466
|
|
|
379
467
|
if not confirm_shown:
|
|
380
|
-
|
|
468
|
+
if serving:
|
|
469
|
+
click.secho(f'Creating deployment {task.name}...', fg='yellow')
|
|
470
|
+
else:
|
|
471
|
+
click.secho(f'Running task {task.name}...', fg='yellow')
|
|
381
472
|
return konduktor.launch(
|
|
382
473
|
task,
|
|
383
474
|
dryrun=dryrun,
|
|
@@ -675,6 +766,11 @@ def launch(
|
|
|
675
766
|
image_id=image_id,
|
|
676
767
|
env=env,
|
|
677
768
|
disk_size=disk_size,
|
|
769
|
+
# serving stuff
|
|
770
|
+
min_replicas=None,
|
|
771
|
+
max_replicas=None,
|
|
772
|
+
ports=None,
|
|
773
|
+
probe=None,
|
|
678
774
|
)
|
|
679
775
|
|
|
680
776
|
click.secho(
|
|
@@ -693,11 +789,18 @@ def launch(
|
|
|
693
789
|
)
|
|
694
790
|
print(table)
|
|
695
791
|
|
|
792
|
+
if task.serving:
|
|
793
|
+
raise click.UsageError(
|
|
794
|
+
'Serving information detected. Use '
|
|
795
|
+
'`konduktor serve launch` instead for serving.'
|
|
796
|
+
)
|
|
797
|
+
|
|
696
798
|
job_name = _launch_with_confirm(
|
|
697
799
|
task,
|
|
698
800
|
dryrun=dryrun,
|
|
699
801
|
detach_run=detach_run,
|
|
700
802
|
no_confirm=yes,
|
|
803
|
+
serving=bool(task.serving),
|
|
701
804
|
)
|
|
702
805
|
click.secho(
|
|
703
806
|
ux_utils.command_hint_messages(ux_utils.CommandHintType.JOB, job_name),
|
|
@@ -800,7 +903,7 @@ def down(
|
|
|
800
903
|
# Use fnmatch for both wildcard and exact pattern matching
|
|
801
904
|
pattern_matches = fnmatch.filter(all_job_names, job_pattern)
|
|
802
905
|
if not pattern_matches:
|
|
803
|
-
click.
|
|
906
|
+
click.secho(
|
|
804
907
|
f'Warning: No jobs found matching pattern "{job_pattern}"',
|
|
805
908
|
fg='yellow',
|
|
806
909
|
err=True,
|
|
@@ -1147,6 +1250,238 @@ def list_secrets(all_users: bool):
|
|
|
1147
1250
|
click.echo(f'{basename:30} kind={kind:10}')
|
|
1148
1251
|
|
|
1149
1252
|
|
|
1253
|
+
@cli.group(cls=_NaturalOrderGroup)
|
|
1254
|
+
def serve():
|
|
1255
|
+
"""Manage LLM serving with Konduktor.
|
|
1256
|
+
|
|
1257
|
+
USAGE: konduktor serve COMMAND
|
|
1258
|
+
|
|
1259
|
+
\b
|
|
1260
|
+
Use one of the following COMMANDS:
|
|
1261
|
+
launch
|
|
1262
|
+
down
|
|
1263
|
+
status
|
|
1264
|
+
|
|
1265
|
+
\b
|
|
1266
|
+
Examples:
|
|
1267
|
+
konduktor serve launch my-deployment
|
|
1268
|
+
konduktor serve down my-deployment
|
|
1269
|
+
konduktor serve status
|
|
1270
|
+
|
|
1271
|
+
\b
|
|
1272
|
+
For details on COMMAND ARGS:
|
|
1273
|
+
konduktor serve launch -h
|
|
1274
|
+
konduktor serve down -h
|
|
1275
|
+
konduktor serve status -h
|
|
1276
|
+
"""
|
|
1277
|
+
pass
|
|
1278
|
+
|
|
1279
|
+
|
|
1280
|
+
@serve.command(name='launch')
|
|
1281
|
+
@click.argument(
|
|
1282
|
+
'entrypoint',
|
|
1283
|
+
required=False,
|
|
1284
|
+
type=str,
|
|
1285
|
+
nargs=-1,
|
|
1286
|
+
)
|
|
1287
|
+
@click.option(
|
|
1288
|
+
'--dryrun',
|
|
1289
|
+
default=False,
|
|
1290
|
+
is_flag=True,
|
|
1291
|
+
help='If True, do not actually run the job.',
|
|
1292
|
+
)
|
|
1293
|
+
@click.option(
|
|
1294
|
+
'--detach-run',
|
|
1295
|
+
'-d',
|
|
1296
|
+
default=False,
|
|
1297
|
+
is_flag=True,
|
|
1298
|
+
help=(
|
|
1299
|
+
'If True, as soon as a job is submitted, return from this call '
|
|
1300
|
+
'and do not stream execution logs.'
|
|
1301
|
+
),
|
|
1302
|
+
)
|
|
1303
|
+
@_add_click_options(
|
|
1304
|
+
_TASK_OPTIONS_WITH_NAME + _EXTRA_RESOURCES_OPTIONS + _EXTRA_SERVING_OPTIONS
|
|
1305
|
+
)
|
|
1306
|
+
@click.option(
|
|
1307
|
+
'--yes',
|
|
1308
|
+
'-y',
|
|
1309
|
+
is_flag=True,
|
|
1310
|
+
default=False,
|
|
1311
|
+
required=False,
|
|
1312
|
+
# Disabling quote check here, as there seems to be a bug in pylint,
|
|
1313
|
+
# which incorrectly recognizes the help string as a docstring.
|
|
1314
|
+
# pylint: disable=bad-docstring-quotes
|
|
1315
|
+
help='Skip confirmation prompt.',
|
|
1316
|
+
)
|
|
1317
|
+
def serve_launch(
|
|
1318
|
+
entrypoint: Tuple[str, ...],
|
|
1319
|
+
dryrun: bool,
|
|
1320
|
+
detach_run: bool,
|
|
1321
|
+
name: Optional[str],
|
|
1322
|
+
workdir: Optional[str],
|
|
1323
|
+
cloud: Optional[str],
|
|
1324
|
+
gpus: Optional[str],
|
|
1325
|
+
cpus: Optional[str],
|
|
1326
|
+
memory: Optional[str],
|
|
1327
|
+
num_nodes: Optional[int],
|
|
1328
|
+
max_restarts: Optional[int],
|
|
1329
|
+
image_id: Optional[str],
|
|
1330
|
+
env_file: Optional[Dict[str, str]],
|
|
1331
|
+
env: List[Tuple[str, str]],
|
|
1332
|
+
disk_size: Optional[int],
|
|
1333
|
+
min_replicas: Optional[int],
|
|
1334
|
+
max_replicas: Optional[int],
|
|
1335
|
+
ports: Optional[int],
|
|
1336
|
+
probe: Optional[str],
|
|
1337
|
+
yes: bool,
|
|
1338
|
+
):
|
|
1339
|
+
"""Launch a deployment to serve.
|
|
1340
|
+
|
|
1341
|
+
If ENTRYPOINT points to a valid YAML file, it is read in as the task
|
|
1342
|
+
specification. Otherwise, it is interpreted as a bash command.
|
|
1343
|
+
"""
|
|
1344
|
+
# NOTE(dev): Keep the docstring consistent between the Python API and CLI.
|
|
1345
|
+
env = _merge_env_vars(env_file, env)
|
|
1346
|
+
|
|
1347
|
+
task = _make_task_with_overrides(
|
|
1348
|
+
entrypoint=entrypoint,
|
|
1349
|
+
name=name,
|
|
1350
|
+
workdir=workdir,
|
|
1351
|
+
cloud=cloud,
|
|
1352
|
+
gpus=gpus,
|
|
1353
|
+
cpus=cpus,
|
|
1354
|
+
memory=memory,
|
|
1355
|
+
num_nodes=num_nodes,
|
|
1356
|
+
max_restarts=max_restarts,
|
|
1357
|
+
image_id=image_id,
|
|
1358
|
+
env=env,
|
|
1359
|
+
disk_size=disk_size,
|
|
1360
|
+
min_replicas=min_replicas,
|
|
1361
|
+
max_replicas=max_replicas,
|
|
1362
|
+
ports=ports,
|
|
1363
|
+
probe=probe,
|
|
1364
|
+
)
|
|
1365
|
+
|
|
1366
|
+
click.secho(
|
|
1367
|
+
f'Considered resources ({task.num_nodes} nodes):', fg='green', bold=True
|
|
1368
|
+
)
|
|
1369
|
+
table_kwargs = {
|
|
1370
|
+
'hrules': prettytable.FRAME,
|
|
1371
|
+
'vrules': prettytable.NONE,
|
|
1372
|
+
'border': True,
|
|
1373
|
+
}
|
|
1374
|
+
headers = ['CPUs', 'Mem (GB)', 'GPUs']
|
|
1375
|
+
table = log_utils.create_table(headers, **table_kwargs)
|
|
1376
|
+
assert task.resources is not None
|
|
1377
|
+
table.add_row(
|
|
1378
|
+
[task.resources.cpus, task.resources.memory, task.resources.accelerators]
|
|
1379
|
+
)
|
|
1380
|
+
print(table)
|
|
1381
|
+
|
|
1382
|
+
if not task.serving:
|
|
1383
|
+
raise click.UsageError(
|
|
1384
|
+
'No serving information detected. '
|
|
1385
|
+
'Use `konduktor launch` instead for workloads.'
|
|
1386
|
+
)
|
|
1387
|
+
|
|
1388
|
+
job_name = _launch_with_confirm(
|
|
1389
|
+
task,
|
|
1390
|
+
dryrun=dryrun,
|
|
1391
|
+
detach_run=detach_run,
|
|
1392
|
+
no_confirm=yes,
|
|
1393
|
+
serving=bool(task.serving),
|
|
1394
|
+
)
|
|
1395
|
+
|
|
1396
|
+
click.secho(f'Deployment Name: {job_name}', fg='green', bold=True)
|
|
1397
|
+
|
|
1398
|
+
|
|
1399
|
+
@serve.command(name='down')
|
|
1400
|
+
@click.argument('names', nargs=-1, required=False)
|
|
1401
|
+
@click.option(
|
|
1402
|
+
'--all', '-a', default=False, is_flag=True, help='Tear down all deployments.'
|
|
1403
|
+
)
|
|
1404
|
+
@click.option(
|
|
1405
|
+
'--yes',
|
|
1406
|
+
'-y',
|
|
1407
|
+
is_flag=True,
|
|
1408
|
+
default=False,
|
|
1409
|
+
required=False,
|
|
1410
|
+
help='Skip confirmation prompt.',
|
|
1411
|
+
)
|
|
1412
|
+
def serve_down(
|
|
1413
|
+
names: List[str],
|
|
1414
|
+
all: bool,
|
|
1415
|
+
yes: bool,
|
|
1416
|
+
):
|
|
1417
|
+
"""Tear down deployments (Deployment, Service, PodAutoscaler).
|
|
1418
|
+
|
|
1419
|
+
Use --all or -a to tear down all deployments.
|
|
1420
|
+
|
|
1421
|
+
Examples:
|
|
1422
|
+
|
|
1423
|
+
\b
|
|
1424
|
+
konduktor serve down my-deployment
|
|
1425
|
+
konduktor serve down -a
|
|
1426
|
+
"""
|
|
1427
|
+
context = kubernetes_utils.get_current_kube_config_context_name()
|
|
1428
|
+
namespace = kubernetes_utils.get_kube_config_context_namespace(context)
|
|
1429
|
+
|
|
1430
|
+
all_models = deployment_utils.list_models(namespace)
|
|
1431
|
+
|
|
1432
|
+
if all:
|
|
1433
|
+
names = all_models
|
|
1434
|
+
if not names:
|
|
1435
|
+
logger.warning(
|
|
1436
|
+
f'No deployments found in namespace '
|
|
1437
|
+
f'{namespace}, but continuing teardown.'
|
|
1438
|
+
)
|
|
1439
|
+
elif names:
|
|
1440
|
+
matched = []
|
|
1441
|
+
for pattern in names:
|
|
1442
|
+
matched.extend(fnmatch.filter(all_models, pattern))
|
|
1443
|
+
names = sorted(set(matched))
|
|
1444
|
+
if not names:
|
|
1445
|
+
raise click.ClickException(
|
|
1446
|
+
f'No matching deployments found. Check with: '
|
|
1447
|
+
f'{colorama.Style.BRIGHT}konduktor serve '
|
|
1448
|
+
f'status{colorama.Style.RESET_ALL}'
|
|
1449
|
+
)
|
|
1450
|
+
else:
|
|
1451
|
+
raise click.ClickException(
|
|
1452
|
+
'No deployments specified. Use --all to tear down all deplotments '
|
|
1453
|
+
'or pass names/patterns.'
|
|
1454
|
+
)
|
|
1455
|
+
|
|
1456
|
+
if not yes:
|
|
1457
|
+
prompt = (
|
|
1458
|
+
f'Tearing down deployment(s) '
|
|
1459
|
+
f'{colorama.Style.BRIGHT}{colorama.Fore.GREEN}{names}'
|
|
1460
|
+
f'{colorama.Style.RESET_ALL}. '
|
|
1461
|
+
f'Proceed?'
|
|
1462
|
+
)
|
|
1463
|
+
click.confirm(prompt, default=True, abort=True, show_default=True)
|
|
1464
|
+
|
|
1465
|
+
for name in track(names, description='Tearing down deployment(s)...'):
|
|
1466
|
+
deployment_utils.delete_serving_specs(name, namespace)
|
|
1467
|
+
|
|
1468
|
+
|
|
1469
|
+
@serve.command(name='status')
|
|
1470
|
+
@click.option(
|
|
1471
|
+
'--all-users',
|
|
1472
|
+
'-u',
|
|
1473
|
+
default=False,
|
|
1474
|
+
is_flag=True,
|
|
1475
|
+
required=False,
|
|
1476
|
+
help='Show all deployments, including those not owned by the ' 'current user.',
|
|
1477
|
+
)
|
|
1478
|
+
def serve_status(all_users: bool):
|
|
1479
|
+
"""Show status of deployments launched via `konduktor serve launch`."""
|
|
1480
|
+
context = kubernetes_utils.get_current_kube_config_context_name()
|
|
1481
|
+
namespace = kubernetes_utils.get_kube_config_context_namespace(context)
|
|
1482
|
+
deployment_utils.show_status_table(namespace, all_users=all_users)
|
|
1483
|
+
|
|
1484
|
+
|
|
1150
1485
|
def main():
|
|
1151
1486
|
return cli()
|
|
1152
1487
|
|
konduktor/controller/launch.py
CHANGED
konduktor/execution.py
CHANGED
|
@@ -12,7 +12,7 @@ if typing.TYPE_CHECKING:
|
|
|
12
12
|
|
|
13
13
|
from konduktor import config, constants
|
|
14
14
|
from konduktor import logging as konduktor_logging
|
|
15
|
-
from konduktor.backends import JobsetBackend
|
|
15
|
+
from konduktor.backends import DeploymentBackend, JobsetBackend
|
|
16
16
|
from konduktor.data import data_utils
|
|
17
17
|
from konduktor.data import registry as storage_registry
|
|
18
18
|
from konduktor.data import storage as storage_lib
|
|
@@ -42,7 +42,10 @@ def _execute(
|
|
|
42
42
|
the cluster.
|
|
43
43
|
"""
|
|
44
44
|
# (asaiacai): in the future we may support more backends but not likely
|
|
45
|
-
|
|
45
|
+
if task.serving:
|
|
46
|
+
backend = DeploymentBackend() # type: ignore
|
|
47
|
+
else:
|
|
48
|
+
backend = JobsetBackend() # type: ignore
|
|
46
49
|
# template the commands for syncing the contents within the shell command
|
|
47
50
|
# initialization of the pod
|
|
48
51
|
job_name = backend.execute(task, detach_run, dryrun=dryrun)
|
konduktor/kube_client.py
CHANGED
|
@@ -152,6 +152,14 @@ def crd_client(context: Optional[str] = None):
|
|
|
152
152
|
return kubernetes.client.CustomObjectsApi()
|
|
153
153
|
|
|
154
154
|
|
|
155
|
+
@_api_logging_decorator('urllib3', logging.ERROR)
|
|
156
|
+
@annotations.lru_cache(scope='request')
|
|
157
|
+
def autoscaling_api(context: Optional[str] = None):
|
|
158
|
+
"""Return the Kubernetes AutoscalingV2Api client."""
|
|
159
|
+
_load_config(context)
|
|
160
|
+
return kubernetes.client.AutoscalingV2Api()
|
|
161
|
+
|
|
162
|
+
|
|
155
163
|
def api_exception():
|
|
156
164
|
return kubernetes.client.rest.ApiException
|
|
157
165
|
|
konduktor/resource.py
CHANGED
|
@@ -399,6 +399,26 @@ class Resources:
|
|
|
399
399
|
return value
|
|
400
400
|
return None
|
|
401
401
|
|
|
402
|
+
def get_accelerator_type(self) -> Optional[str]:
|
|
403
|
+
"""Returns the first accelerator type from the accelerators dict.
|
|
404
|
+
|
|
405
|
+
Returns:
|
|
406
|
+
The accelerator type (e.g., 'V100', 'A100') or None if no accelerators
|
|
407
|
+
"""
|
|
408
|
+
if self.accelerators is None or not self.accelerators:
|
|
409
|
+
return None
|
|
410
|
+
return next(iter(self.accelerators.keys())) # type: ignore
|
|
411
|
+
|
|
412
|
+
def get_accelerator_count(self) -> Optional[int]:
|
|
413
|
+
"""Returns the count of the first accelerator type from the accelerators dict.
|
|
414
|
+
|
|
415
|
+
Returns:
|
|
416
|
+
The accelerator count (e.g., 1, 2) or None if no accelerators
|
|
417
|
+
"""
|
|
418
|
+
if self.accelerators is None or not self.accelerators:
|
|
419
|
+
return None
|
|
420
|
+
return next(iter(self.accelerators.values())) # type: ignore
|
|
421
|
+
|
|
402
422
|
def copy(self, **override) -> 'Resources':
|
|
403
423
|
"""Returns a copy of the given Resources."""
|
|
404
424
|
resources = Resources(
|
konduktor/serving.py
ADDED
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
# Proprietary Changes made for Trainy under the Trainy Software License
|
|
2
|
+
# Original source: skypilot: https://github.com/skypilot-org/skypilot
|
|
3
|
+
# which is Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
8
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
9
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
10
|
+
# See the License for the specific language governing permissions and
|
|
11
|
+
# limitations under the License.
|
|
12
|
+
|
|
13
|
+
"""Serving: configuration for long-running serving deployments."""
|
|
14
|
+
|
|
15
|
+
from typing import Any, Dict, Optional, Union
|
|
16
|
+
|
|
17
|
+
from konduktor import logging
|
|
18
|
+
from konduktor.utils import common_utils, schemas, ux_utils
|
|
19
|
+
|
|
20
|
+
logger = logging.get_logger(__name__)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class Serving:
|
|
24
|
+
"""Serving: configuration for deployments.
|
|
25
|
+
|
|
26
|
+
Immutable once created. Use `copy()` to create a modified copy.
|
|
27
|
+
|
|
28
|
+
Used:
|
|
29
|
+
* to represent serving config in tasks
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
_VERSION = 1
|
|
33
|
+
|
|
34
|
+
def __init__(
|
|
35
|
+
self,
|
|
36
|
+
min_replicas: Optional[int] = None,
|
|
37
|
+
max_replicas: Optional[int] = None,
|
|
38
|
+
ports: Optional[int] = 8000,
|
|
39
|
+
probe: Optional[str] = '/health',
|
|
40
|
+
):
|
|
41
|
+
self._version = self._VERSION
|
|
42
|
+
|
|
43
|
+
if min_replicas is None and max_replicas is None:
|
|
44
|
+
with ux_utils.print_exception_no_traceback():
|
|
45
|
+
raise ValueError(
|
|
46
|
+
'At least one of min_replicas or ' 'max_replicas must be specified.'
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
if min_replicas is None:
|
|
50
|
+
min_replicas = max_replicas
|
|
51
|
+
if max_replicas is None:
|
|
52
|
+
max_replicas = min_replicas
|
|
53
|
+
|
|
54
|
+
if min_replicas is not None and min_replicas <= 0:
|
|
55
|
+
with ux_utils.print_exception_no_traceback():
|
|
56
|
+
raise ValueError('min_replicas must be >= 1')
|
|
57
|
+
|
|
58
|
+
if (
|
|
59
|
+
max_replicas is not None
|
|
60
|
+
and min_replicas is not None
|
|
61
|
+
and max_replicas < min_replicas
|
|
62
|
+
):
|
|
63
|
+
with ux_utils.print_exception_no_traceback():
|
|
64
|
+
raise ValueError(
|
|
65
|
+
f'max_replicas ({max_replicas}) must '
|
|
66
|
+
f'be >= min_replicas ({min_replicas})'
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
self._min_replicas = min_replicas
|
|
70
|
+
self._max_replicas = max_replicas
|
|
71
|
+
self._ports = ports
|
|
72
|
+
self._probe = probe
|
|
73
|
+
|
|
74
|
+
@property
|
|
75
|
+
def min_replicas(self) -> int:
|
|
76
|
+
assert self._min_replicas is not None
|
|
77
|
+
return self._min_replicas
|
|
78
|
+
|
|
79
|
+
@property
|
|
80
|
+
def max_replicas(self) -> int:
|
|
81
|
+
assert self._max_replicas is not None
|
|
82
|
+
return self._max_replicas
|
|
83
|
+
|
|
84
|
+
@property
|
|
85
|
+
def ports(self) -> int:
|
|
86
|
+
assert self._ports is not None
|
|
87
|
+
return self._ports
|
|
88
|
+
|
|
89
|
+
@property
|
|
90
|
+
def probe(self) -> Optional[str]:
|
|
91
|
+
return self._probe
|
|
92
|
+
|
|
93
|
+
def get(self, key: str, default=None):
|
|
94
|
+
return {
|
|
95
|
+
'min_replicas': self._min_replicas,
|
|
96
|
+
'max_replicas': self._max_replicas,
|
|
97
|
+
'ports': self._ports,
|
|
98
|
+
'probe': self._probe,
|
|
99
|
+
}.get(key, default)
|
|
100
|
+
|
|
101
|
+
def copy(self, **override) -> 'Serving':
|
|
102
|
+
"""Returns a copy of this Serving with fields overridden."""
|
|
103
|
+
return Serving(
|
|
104
|
+
min_replicas=override.pop('min_replicas', self._min_replicas),
|
|
105
|
+
max_replicas=override.pop('max_replicas', self._max_replicas),
|
|
106
|
+
ports=override.pop('ports', self._ports),
|
|
107
|
+
probe=override.pop('probe', self._probe),
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
@classmethod
|
|
111
|
+
def from_yaml_config(
|
|
112
|
+
cls, config: Optional[Dict[str, Any]], task_run: Optional[str] = None
|
|
113
|
+
) -> Optional['Serving']:
|
|
114
|
+
if config is None:
|
|
115
|
+
return None
|
|
116
|
+
common_utils.validate_schema(
|
|
117
|
+
config,
|
|
118
|
+
schemas.get_serving_schema(),
|
|
119
|
+
'Invalid serving config YAML: ',
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
if 'min_replicas' not in config and 'max_replicas' not in config:
|
|
123
|
+
raise ValueError(
|
|
124
|
+
'At least one of min_replicas or '
|
|
125
|
+
'max_replicas must be specified in serving'
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
# Determine default probe based on deployment type
|
|
129
|
+
default_probe = None # No probing by default for general deployments
|
|
130
|
+
if task_run and 'vllm.entrypoints.openai.api_server' in task_run:
|
|
131
|
+
default_probe = '/health' # Aibrix deployments get /health by default
|
|
132
|
+
|
|
133
|
+
return cls(
|
|
134
|
+
min_replicas=config.get('min_replicas', None),
|
|
135
|
+
max_replicas=config.get('max_replicas', None),
|
|
136
|
+
ports=config.get('ports', 8000),
|
|
137
|
+
probe=config.get('probe', default_probe),
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
def to_yaml_config(self) -> Dict[str, Union[int, str]]:
|
|
141
|
+
config: Dict[str, Union[int, str]] = {
|
|
142
|
+
'min_replicas': self._min_replicas or 1,
|
|
143
|
+
'max_replicas': self._max_replicas or 1,
|
|
144
|
+
'ports': self._ports or 8000,
|
|
145
|
+
}
|
|
146
|
+
# Only include probe if it's not None
|
|
147
|
+
if self._probe is not None:
|
|
148
|
+
config['probe'] = self._probe
|
|
149
|
+
return config
|