konduktor-nightly 0.1.0.dev20250805105421__py3-none-any.whl → 0.1.0.dev20250807105334__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of konduktor-nightly might be problematic. Click here for more details.

konduktor/cli.py CHANGED
@@ -51,7 +51,7 @@ from rich.progress import track
51
51
  import konduktor
52
52
  from konduktor import check as konduktor_check
53
53
  from konduktor import logging
54
- from konduktor.backends import jobset_utils
54
+ from konduktor.backends import deployment_utils, jobset_utils
55
55
  from konduktor.utils import (
56
56
  common_utils,
57
57
  kubernetes_utils,
@@ -107,6 +107,10 @@ def _make_task_with_overrides(
107
107
  disk_size: Optional[int] = None,
108
108
  env: Optional[List[Tuple[str, str]]] = None,
109
109
  field_to_ignore: Optional[List[str]] = None,
110
+ min_replicas: Optional[int] = None,
111
+ max_replicas: Optional[int] = None,
112
+ ports: Optional[int] = None,
113
+ probe: Optional[str] = None,
110
114
  ) -> konduktor.Task:
111
115
  """Creates a task from an entrypoint with overrides.
112
116
 
@@ -135,6 +139,14 @@ def _make_task_with_overrides(
135
139
  disk_size=disk_size,
136
140
  )
137
141
 
142
+ serving_override_params = _parse_serving_override_params(
143
+ num_nodes=num_nodes,
144
+ min_replicas=min_replicas,
145
+ max_replicas=max_replicas,
146
+ ports=ports,
147
+ probe=probe,
148
+ )
149
+
138
150
  if field_to_ignore is not None:
139
151
  _pop_and_ignore_fields_in_override_params(override_params, field_to_ignore)
140
152
 
@@ -147,6 +159,8 @@ def _make_task_with_overrides(
147
159
  task.workdir = workdir
148
160
 
149
161
  task.set_resources_override(override_params)
162
+ if task.serving:
163
+ task.set_serving_override(serving_override_params)
150
164
 
151
165
  if max_restarts is not None:
152
166
  assert task.resources is not None
@@ -299,6 +313,49 @@ _EXTRA_RESOURCES_OPTIONS = [
299
313
  ),
300
314
  ),
301
315
  ]
316
+ _EXTRA_SERVING_OPTIONS = [
317
+ click.option(
318
+ '--min-replicas',
319
+ required=False,
320
+ type=int,
321
+ help=(
322
+ 'Minimum number of replicas to run for the service. '
323
+ 'Overrides the "min_replicas" field in the YAML if both '
324
+ 'are supplied.'
325
+ ),
326
+ ),
327
+ click.option(
328
+ '--max-replicas',
329
+ required=False,
330
+ type=int,
331
+ help=(
332
+ 'Maximum number of replicas to allow for the service. '
333
+ 'Overrides the "max_replicas" field in the YAML if both '
334
+ 'are supplied.'
335
+ ),
336
+ ),
337
+ click.option(
338
+ '--ports',
339
+ required=False,
340
+ type=int,
341
+ help=(
342
+ 'The container port on which your service will listen for HTTP '
343
+ 'traffic. Overrides the "ports" field in the YAML if both '
344
+ 'are supplied.'
345
+ ),
346
+ ),
347
+ click.option(
348
+ '--probe',
349
+ required=False,
350
+ type=str,
351
+ help=(
352
+ 'The HTTP path to use for health checks (liveness, readiness, and '
353
+ 'startup probes). Overrides the "probe" field in the YAML '
354
+ 'if both are supplied. The service should respond with HTTP 200 on '
355
+ 'this path when healthy.'
356
+ ),
357
+ ),
358
+ ]
302
359
 
303
360
 
304
361
  def _get_click_major_version():
@@ -354,12 +411,36 @@ def _parse_override_params(
354
411
  return override_params
355
412
 
356
413
 
414
+ def _parse_serving_override_params(
415
+ num_nodes: Optional[int] = None,
416
+ min_replicas: Optional[int] = None,
417
+ max_replicas: Optional[int] = None,
418
+ ports: Optional[int] = None,
419
+ probe: Optional[str] = None,
420
+ ) -> Dict[str, Any]:
421
+ """Parses the relevant serving override parameters into a dictionary."""
422
+ override_params: Dict[str, Any] = {}
423
+ if num_nodes is not None:
424
+ override_params['num_nodes'] = num_nodes
425
+ if min_replicas is not None:
426
+ override_params['min_replicas'] = min_replicas
427
+ if max_replicas is not None:
428
+ override_params['max_replicas'] = max_replicas
429
+ if ports is not None:
430
+ override_params['ports'] = ports
431
+ if probe is not None:
432
+ override_params['probe'] = probe
433
+
434
+ return override_params
435
+
436
+
357
437
  def _launch_with_confirm(
358
438
  task: konduktor.Task,
359
439
  *,
360
440
  dryrun: bool,
361
441
  detach_run: bool,
362
442
  no_confirm: bool,
443
+ serving: bool,
363
444
  ):
364
445
  """Launch a cluster with a Task."""
365
446
 
@@ -367,17 +448,27 @@ def _launch_with_confirm(
367
448
  if not no_confirm:
368
449
  # Prompt if (1) --cluster is None, or (2) cluster doesn't exist, or (3)
369
450
  # it exists but is STOPPED.
370
- prompt = (
371
- f'Launching a new job {colorama.Style.BRIGHT}'
372
- f'{colorama.Fore.GREEN}{task.name}{colorama.Style.RESET_ALL}. '
373
- 'Proceed?'
374
- )
451
+ if serving:
452
+ prompt = (
453
+ f'Launching a new deployment {colorama.Style.BRIGHT}'
454
+ f'{colorama.Fore.GREEN}{task.name}{colorama.Style.RESET_ALL}. '
455
+ 'Proceed?'
456
+ )
457
+ else:
458
+ prompt = (
459
+ f'Launching a new job {colorama.Style.BRIGHT}'
460
+ f'{colorama.Fore.GREEN}{task.name}{colorama.Style.RESET_ALL}. '
461
+ 'Proceed?'
462
+ )
375
463
  if prompt is not None:
376
464
  confirm_shown = True
377
465
  click.confirm(prompt, default=True, abort=True, show_default=True)
378
466
 
379
467
  if not confirm_shown:
380
- click.secho(f'Running task {task.name}...', fg='yellow')
468
+ if serving:
469
+ click.secho(f'Creating deployment {task.name}...', fg='yellow')
470
+ else:
471
+ click.secho(f'Running task {task.name}...', fg='yellow')
381
472
  return konduktor.launch(
382
473
  task,
383
474
  dryrun=dryrun,
@@ -675,6 +766,11 @@ def launch(
675
766
  image_id=image_id,
676
767
  env=env,
677
768
  disk_size=disk_size,
769
+ # serving stuff
770
+ min_replicas=None,
771
+ max_replicas=None,
772
+ ports=None,
773
+ probe=None,
678
774
  )
679
775
 
680
776
  click.secho(
@@ -693,11 +789,18 @@ def launch(
693
789
  )
694
790
  print(table)
695
791
 
792
+ if task.serving:
793
+ raise click.UsageError(
794
+ 'Serving information detected. Use '
795
+ '`konduktor serve launch` instead for serving.'
796
+ )
797
+
696
798
  job_name = _launch_with_confirm(
697
799
  task,
698
800
  dryrun=dryrun,
699
801
  detach_run=detach_run,
700
802
  no_confirm=yes,
803
+ serving=bool(task.serving),
701
804
  )
702
805
  click.secho(
703
806
  ux_utils.command_hint_messages(ux_utils.CommandHintType.JOB, job_name),
@@ -800,7 +903,7 @@ def down(
800
903
  # Use fnmatch for both wildcard and exact pattern matching
801
904
  pattern_matches = fnmatch.filter(all_job_names, job_pattern)
802
905
  if not pattern_matches:
803
- click.echo(
906
+ click.secho(
804
907
  f'Warning: No jobs found matching pattern "{job_pattern}"',
805
908
  fg='yellow',
806
909
  err=True,
@@ -1147,6 +1250,238 @@ def list_secrets(all_users: bool):
1147
1250
  click.echo(f'{basename:30} kind={kind:10}')
1148
1251
 
1149
1252
 
1253
+ @cli.group(cls=_NaturalOrderGroup)
1254
+ def serve():
1255
+ """Manage LLM serving with Konduktor.
1256
+
1257
+ USAGE: konduktor serve COMMAND
1258
+
1259
+ \b
1260
+ Use one of the following COMMANDS:
1261
+ launch
1262
+ down
1263
+ status
1264
+
1265
+ \b
1266
+ Examples:
1267
+ konduktor serve launch my-deployment
1268
+ konduktor serve down my-deployment
1269
+ konduktor serve status
1270
+
1271
+ \b
1272
+ For details on COMMAND ARGS:
1273
+ konduktor serve launch -h
1274
+ konduktor serve down -h
1275
+ konduktor serve status -h
1276
+ """
1277
+ pass
1278
+
1279
+
1280
+ @serve.command(name='launch')
1281
+ @click.argument(
1282
+ 'entrypoint',
1283
+ required=False,
1284
+ type=str,
1285
+ nargs=-1,
1286
+ )
1287
+ @click.option(
1288
+ '--dryrun',
1289
+ default=False,
1290
+ is_flag=True,
1291
+ help='If True, do not actually run the job.',
1292
+ )
1293
+ @click.option(
1294
+ '--detach-run',
1295
+ '-d',
1296
+ default=False,
1297
+ is_flag=True,
1298
+ help=(
1299
+ 'If True, as soon as a job is submitted, return from this call '
1300
+ 'and do not stream execution logs.'
1301
+ ),
1302
+ )
1303
+ @_add_click_options(
1304
+ _TASK_OPTIONS_WITH_NAME + _EXTRA_RESOURCES_OPTIONS + _EXTRA_SERVING_OPTIONS
1305
+ )
1306
+ @click.option(
1307
+ '--yes',
1308
+ '-y',
1309
+ is_flag=True,
1310
+ default=False,
1311
+ required=False,
1312
+ # Disabling quote check here, as there seems to be a bug in pylint,
1313
+ # which incorrectly recognizes the help string as a docstring.
1314
+ # pylint: disable=bad-docstring-quotes
1315
+ help='Skip confirmation prompt.',
1316
+ )
1317
+ def serve_launch(
1318
+ entrypoint: Tuple[str, ...],
1319
+ dryrun: bool,
1320
+ detach_run: bool,
1321
+ name: Optional[str],
1322
+ workdir: Optional[str],
1323
+ cloud: Optional[str],
1324
+ gpus: Optional[str],
1325
+ cpus: Optional[str],
1326
+ memory: Optional[str],
1327
+ num_nodes: Optional[int],
1328
+ max_restarts: Optional[int],
1329
+ image_id: Optional[str],
1330
+ env_file: Optional[Dict[str, str]],
1331
+ env: List[Tuple[str, str]],
1332
+ disk_size: Optional[int],
1333
+ min_replicas: Optional[int],
1334
+ max_replicas: Optional[int],
1335
+ ports: Optional[int],
1336
+ probe: Optional[str],
1337
+ yes: bool,
1338
+ ):
1339
+ """Launch a deployment to serve.
1340
+
1341
+ If ENTRYPOINT points to a valid YAML file, it is read in as the task
1342
+ specification. Otherwise, it is interpreted as a bash command.
1343
+ """
1344
+ # NOTE(dev): Keep the docstring consistent between the Python API and CLI.
1345
+ env = _merge_env_vars(env_file, env)
1346
+
1347
+ task = _make_task_with_overrides(
1348
+ entrypoint=entrypoint,
1349
+ name=name,
1350
+ workdir=workdir,
1351
+ cloud=cloud,
1352
+ gpus=gpus,
1353
+ cpus=cpus,
1354
+ memory=memory,
1355
+ num_nodes=num_nodes,
1356
+ max_restarts=max_restarts,
1357
+ image_id=image_id,
1358
+ env=env,
1359
+ disk_size=disk_size,
1360
+ min_replicas=min_replicas,
1361
+ max_replicas=max_replicas,
1362
+ ports=ports,
1363
+ probe=probe,
1364
+ )
1365
+
1366
+ click.secho(
1367
+ f'Considered resources ({task.num_nodes} nodes):', fg='green', bold=True
1368
+ )
1369
+ table_kwargs = {
1370
+ 'hrules': prettytable.FRAME,
1371
+ 'vrules': prettytable.NONE,
1372
+ 'border': True,
1373
+ }
1374
+ headers = ['CPUs', 'Mem (GB)', 'GPUs']
1375
+ table = log_utils.create_table(headers, **table_kwargs)
1376
+ assert task.resources is not None
1377
+ table.add_row(
1378
+ [task.resources.cpus, task.resources.memory, task.resources.accelerators]
1379
+ )
1380
+ print(table)
1381
+
1382
+ if not task.serving:
1383
+ raise click.UsageError(
1384
+ 'No serving information detected. '
1385
+ 'Use `konduktor launch` instead for workloads.'
1386
+ )
1387
+
1388
+ job_name = _launch_with_confirm(
1389
+ task,
1390
+ dryrun=dryrun,
1391
+ detach_run=detach_run,
1392
+ no_confirm=yes,
1393
+ serving=bool(task.serving),
1394
+ )
1395
+
1396
+ click.secho(f'Deployment Name: {job_name}', fg='green', bold=True)
1397
+
1398
+
1399
+ @serve.command(name='down')
1400
+ @click.argument('names', nargs=-1, required=False)
1401
+ @click.option(
1402
+ '--all', '-a', default=False, is_flag=True, help='Tear down all deployments.'
1403
+ )
1404
+ @click.option(
1405
+ '--yes',
1406
+ '-y',
1407
+ is_flag=True,
1408
+ default=False,
1409
+ required=False,
1410
+ help='Skip confirmation prompt.',
1411
+ )
1412
+ def serve_down(
1413
+ names: List[str],
1414
+ all: bool,
1415
+ yes: bool,
1416
+ ):
1417
+ """Tear down deployments (Deployment, Service, PodAutoscaler).
1418
+
1419
+ Use --all or -a to tear down all deployments.
1420
+
1421
+ Examples:
1422
+
1423
+ \b
1424
+ konduktor serve down my-deployment
1425
+ konduktor serve down -a
1426
+ """
1427
+ context = kubernetes_utils.get_current_kube_config_context_name()
1428
+ namespace = kubernetes_utils.get_kube_config_context_namespace(context)
1429
+
1430
+ all_models = deployment_utils.list_models(namespace)
1431
+
1432
+ if all:
1433
+ names = all_models
1434
+ if not names:
1435
+ logger.warning(
1436
+ f'No deployments found in namespace '
1437
+ f'{namespace}, but continuing teardown.'
1438
+ )
1439
+ elif names:
1440
+ matched = []
1441
+ for pattern in names:
1442
+ matched.extend(fnmatch.filter(all_models, pattern))
1443
+ names = sorted(set(matched))
1444
+ if not names:
1445
+ raise click.ClickException(
1446
+ f'No matching deployments found. Check with: '
1447
+ f'{colorama.Style.BRIGHT}konduktor serve '
1448
+ f'status{colorama.Style.RESET_ALL}'
1449
+ )
1450
+ else:
1451
+ raise click.ClickException(
1452
+ 'No deployments specified. Use --all to tear down all deplotments '
1453
+ 'or pass names/patterns.'
1454
+ )
1455
+
1456
+ if not yes:
1457
+ prompt = (
1458
+ f'Tearing down deployment(s) '
1459
+ f'{colorama.Style.BRIGHT}{colorama.Fore.GREEN}{names}'
1460
+ f'{colorama.Style.RESET_ALL}. '
1461
+ f'Proceed?'
1462
+ )
1463
+ click.confirm(prompt, default=True, abort=True, show_default=True)
1464
+
1465
+ for name in track(names, description='Tearing down deployment(s)...'):
1466
+ deployment_utils.delete_serving_specs(name, namespace)
1467
+
1468
+
1469
+ @serve.command(name='status')
1470
+ @click.option(
1471
+ '--all-users',
1472
+ '-u',
1473
+ default=False,
1474
+ is_flag=True,
1475
+ required=False,
1476
+ help='Show all deployments, including those not owned by the ' 'current user.',
1477
+ )
1478
+ def serve_status(all_users: bool):
1479
+ """Show status of deployments launched via `konduktor serve launch`."""
1480
+ context = kubernetes_utils.get_current_kube_config_context_name()
1481
+ namespace = kubernetes_utils.get_kube_config_context_namespace(context)
1482
+ deployment_utils.show_status_table(namespace, all_users=all_users)
1483
+
1484
+
1150
1485
  def main():
1151
1486
  return cli()
1152
1487
 
@@ -25,7 +25,7 @@ KONDUKTOR_CONTROLLER_HEALTH_CHECK_FREQ = 5
25
25
  logger = logging.get_logger('konduktor.controller')
26
26
 
27
27
 
28
- def main():
28
+ def main() -> None:
29
29
  logger.info(
30
30
  f'starting konduktor.controller ver. {constants.KONDUKTOR_CONTROLLER_VERSION}'
31
31
  )
konduktor/execution.py CHANGED
@@ -12,7 +12,7 @@ if typing.TYPE_CHECKING:
12
12
 
13
13
  from konduktor import config, constants
14
14
  from konduktor import logging as konduktor_logging
15
- from konduktor.backends import JobsetBackend
15
+ from konduktor.backends import DeploymentBackend, JobsetBackend
16
16
  from konduktor.data import data_utils
17
17
  from konduktor.data import registry as storage_registry
18
18
  from konduktor.data import storage as storage_lib
@@ -42,7 +42,10 @@ def _execute(
42
42
  the cluster.
43
43
  """
44
44
  # (asaiacai): in the future we may support more backends but not likely
45
- backend = JobsetBackend()
45
+ if task.serving:
46
+ backend = DeploymentBackend() # type: ignore
47
+ else:
48
+ backend = JobsetBackend() # type: ignore
46
49
  # template the commands for syncing the contents within the shell command
47
50
  # initialization of the pod
48
51
  job_name = backend.execute(task, detach_run, dryrun=dryrun)
konduktor/kube_client.py CHANGED
@@ -152,6 +152,14 @@ def crd_client(context: Optional[str] = None):
152
152
  return kubernetes.client.CustomObjectsApi()
153
153
 
154
154
 
155
+ @_api_logging_decorator('urllib3', logging.ERROR)
156
+ @annotations.lru_cache(scope='request')
157
+ def autoscaling_api(context: Optional[str] = None):
158
+ """Return the Kubernetes AutoscalingV2Api client."""
159
+ _load_config(context)
160
+ return kubernetes.client.AutoscalingV2Api()
161
+
162
+
155
163
  def api_exception():
156
164
  return kubernetes.client.rest.ApiException
157
165
 
konduktor/resource.py CHANGED
@@ -399,6 +399,26 @@ class Resources:
399
399
  return value
400
400
  return None
401
401
 
402
+ def get_accelerator_type(self) -> Optional[str]:
403
+ """Returns the first accelerator type from the accelerators dict.
404
+
405
+ Returns:
406
+ The accelerator type (e.g., 'V100', 'A100') or None if no accelerators
407
+ """
408
+ if self.accelerators is None or not self.accelerators:
409
+ return None
410
+ return next(iter(self.accelerators.keys())) # type: ignore
411
+
412
+ def get_accelerator_count(self) -> Optional[int]:
413
+ """Returns the count of the first accelerator type from the accelerators dict.
414
+
415
+ Returns:
416
+ The accelerator count (e.g., 1, 2) or None if no accelerators
417
+ """
418
+ if self.accelerators is None or not self.accelerators:
419
+ return None
420
+ return next(iter(self.accelerators.values())) # type: ignore
421
+
402
422
  def copy(self, **override) -> 'Resources':
403
423
  """Returns a copy of the given Resources."""
404
424
  resources = Resources(
konduktor/serving.py ADDED
@@ -0,0 +1,149 @@
1
+ # Proprietary Changes made for Trainy under the Trainy Software License
2
+ # Original source: skypilot: https://github.com/skypilot-org/skypilot
3
+ # which is Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ # Unless required by applicable law or agreed to in writing, software
8
+ # distributed under the License is distributed on an "AS IS" BASIS,
9
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10
+ # See the License for the specific language governing permissions and
11
+ # limitations under the License.
12
+
13
+ """Serving: configuration for long-running serving deployments."""
14
+
15
+ from typing import Any, Dict, Optional, Union
16
+
17
+ from konduktor import logging
18
+ from konduktor.utils import common_utils, schemas, ux_utils
19
+
20
+ logger = logging.get_logger(__name__)
21
+
22
+
23
+ class Serving:
24
+ """Serving: configuration for deployments.
25
+
26
+ Immutable once created. Use `copy()` to create a modified copy.
27
+
28
+ Used:
29
+ * to represent serving config in tasks
30
+ """
31
+
32
+ _VERSION = 1
33
+
34
+ def __init__(
35
+ self,
36
+ min_replicas: Optional[int] = None,
37
+ max_replicas: Optional[int] = None,
38
+ ports: Optional[int] = 8000,
39
+ probe: Optional[str] = '/health',
40
+ ):
41
+ self._version = self._VERSION
42
+
43
+ if min_replicas is None and max_replicas is None:
44
+ with ux_utils.print_exception_no_traceback():
45
+ raise ValueError(
46
+ 'At least one of min_replicas or ' 'max_replicas must be specified.'
47
+ )
48
+
49
+ if min_replicas is None:
50
+ min_replicas = max_replicas
51
+ if max_replicas is None:
52
+ max_replicas = min_replicas
53
+
54
+ if min_replicas is not None and min_replicas <= 0:
55
+ with ux_utils.print_exception_no_traceback():
56
+ raise ValueError('min_replicas must be >= 1')
57
+
58
+ if (
59
+ max_replicas is not None
60
+ and min_replicas is not None
61
+ and max_replicas < min_replicas
62
+ ):
63
+ with ux_utils.print_exception_no_traceback():
64
+ raise ValueError(
65
+ f'max_replicas ({max_replicas}) must '
66
+ f'be >= min_replicas ({min_replicas})'
67
+ )
68
+
69
+ self._min_replicas = min_replicas
70
+ self._max_replicas = max_replicas
71
+ self._ports = ports
72
+ self._probe = probe
73
+
74
+ @property
75
+ def min_replicas(self) -> int:
76
+ assert self._min_replicas is not None
77
+ return self._min_replicas
78
+
79
+ @property
80
+ def max_replicas(self) -> int:
81
+ assert self._max_replicas is not None
82
+ return self._max_replicas
83
+
84
+ @property
85
+ def ports(self) -> int:
86
+ assert self._ports is not None
87
+ return self._ports
88
+
89
+ @property
90
+ def probe(self) -> Optional[str]:
91
+ return self._probe
92
+
93
+ def get(self, key: str, default=None):
94
+ return {
95
+ 'min_replicas': self._min_replicas,
96
+ 'max_replicas': self._max_replicas,
97
+ 'ports': self._ports,
98
+ 'probe': self._probe,
99
+ }.get(key, default)
100
+
101
+ def copy(self, **override) -> 'Serving':
102
+ """Returns a copy of this Serving with fields overridden."""
103
+ return Serving(
104
+ min_replicas=override.pop('min_replicas', self._min_replicas),
105
+ max_replicas=override.pop('max_replicas', self._max_replicas),
106
+ ports=override.pop('ports', self._ports),
107
+ probe=override.pop('probe', self._probe),
108
+ )
109
+
110
+ @classmethod
111
+ def from_yaml_config(
112
+ cls, config: Optional[Dict[str, Any]], task_run: Optional[str] = None
113
+ ) -> Optional['Serving']:
114
+ if config is None:
115
+ return None
116
+ common_utils.validate_schema(
117
+ config,
118
+ schemas.get_serving_schema(),
119
+ 'Invalid serving config YAML: ',
120
+ )
121
+
122
+ if 'min_replicas' not in config and 'max_replicas' not in config:
123
+ raise ValueError(
124
+ 'At least one of min_replicas or '
125
+ 'max_replicas must be specified in serving'
126
+ )
127
+
128
+ # Determine default probe based on deployment type
129
+ default_probe = None # No probing by default for general deployments
130
+ if task_run and 'vllm.entrypoints.openai.api_server' in task_run:
131
+ default_probe = '/health' # Aibrix deployments get /health by default
132
+
133
+ return cls(
134
+ min_replicas=config.get('min_replicas', None),
135
+ max_replicas=config.get('max_replicas', None),
136
+ ports=config.get('ports', 8000),
137
+ probe=config.get('probe', default_probe),
138
+ )
139
+
140
+ def to_yaml_config(self) -> Dict[str, Union[int, str]]:
141
+ config: Dict[str, Union[int, str]] = {
142
+ 'min_replicas': self._min_replicas or 1,
143
+ 'max_replicas': self._max_replicas or 1,
144
+ 'ports': self._ports or 8000,
145
+ }
146
+ # Only include probe if it's not None
147
+ if self._probe is not None:
148
+ config['probe'] = self._probe
149
+ return config