konduktor-nightly 0.1.0.dev20250915104603__py3-none-any.whl → 0.1.0.dev20251107104752__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- konduktor/__init__.py +2 -2
- konduktor/backends/constants.py +1 -0
- konduktor/backends/deployment.py +27 -10
- konduktor/backends/deployment_utils.py +594 -358
- konduktor/backends/jobset_utils.py +6 -6
- konduktor/backends/pod_utils.py +133 -18
- konduktor/cli.py +61 -29
- konduktor/manifests/aibrix-setup.yaml +430 -0
- konduktor/manifests/apoxy-setup.yaml +42 -9
- konduktor/manifests/apoxy-setup2.yaml +69 -5
- konduktor/resource.py +9 -2
- konduktor/serving.py +10 -6
- konduktor/task.py +8 -5
- konduktor/templates/deployment.yaml.j2 +96 -47
- konduktor/templates/pod.yaml.j2 +123 -9
- konduktor/utils/base64_utils.py +2 -0
- konduktor/utils/schemas.py +1 -1
- konduktor/utils/validator.py +12 -0
- {konduktor_nightly-0.1.0.dev20250915104603.dist-info → konduktor_nightly-0.1.0.dev20251107104752.dist-info}/METADATA +1 -1
- {konduktor_nightly-0.1.0.dev20250915104603.dist-info → konduktor_nightly-0.1.0.dev20251107104752.dist-info}/RECORD +23 -23
- konduktor/templates/apoxy-deployment.yaml.j2 +0 -33
- {konduktor_nightly-0.1.0.dev20250915104603.dist-info → konduktor_nightly-0.1.0.dev20251107104752.dist-info}/LICENSE +0 -0
- {konduktor_nightly-0.1.0.dev20250915104603.dist-info → konduktor_nightly-0.1.0.dev20251107104752.dist-info}/WHEEL +0 -0
- {konduktor_nightly-0.1.0.dev20250915104603.dist-info → konduktor_nightly-0.1.0.dev20251107104752.dist-info}/entry_points.txt +0 -0
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
|
|
3
3
|
import json
|
|
4
4
|
import os
|
|
5
|
-
import random
|
|
6
5
|
import tempfile
|
|
7
6
|
import typing
|
|
8
7
|
from typing import Any, Dict, List, Optional, Tuple
|
|
@@ -50,35 +49,35 @@ _DEPLOYMENT_METADATA_LABELS = {
|
|
|
50
49
|
}
|
|
51
50
|
|
|
52
51
|
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
# Count existing backends
|
|
61
|
-
backends = custom_api.list_cluster_custom_object(
|
|
62
|
-
group='core.apoxy.dev', version='v1alpha', plural='backends'
|
|
63
|
-
)
|
|
64
|
-
|
|
65
|
-
# Find the highest number
|
|
66
|
-
max_number = 0
|
|
67
|
-
for backend in backends.get('items', []):
|
|
68
|
-
name = backend['metadata']['name']
|
|
69
|
-
if name.startswith(f'{cluster_name}-backend-'):
|
|
70
|
-
number = int(name.split('-')[-1])
|
|
71
|
-
max_number = max(max_number, number)
|
|
52
|
+
def render_specs(
|
|
53
|
+
task: 'konduktor.Task',
|
|
54
|
+
) -> Tuple[
|
|
55
|
+
Dict[str, Any], Dict[str, Any], List[Dict[str, Any]], Optional[Dict[str, Any]]
|
|
56
|
+
]:
|
|
57
|
+
"""Renders Kubernetes resource specifications from a Konduktor task.
|
|
72
58
|
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
59
|
+
Takes a Konduktor task and generates the necessary Kubernetes resource
|
|
60
|
+
specifications for deployment by filling the deployment.yaml.j2 template.
|
|
61
|
+
Automatically detects deployment type (vLLM/Aibrix vs General) based on
|
|
62
|
+
the task's run command.
|
|
77
63
|
|
|
64
|
+
Args:
|
|
65
|
+
task: A Konduktor Task object containing deployment configuration
|
|
66
|
+
including resources, serving settings, and run commands.
|
|
78
67
|
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
68
|
+
Returns:
|
|
69
|
+
A tuple containing:
|
|
70
|
+
- deployment_spec (Dict[str, Any]): Kubernetes Deployment specification
|
|
71
|
+
- service_spec (Dict[str, Any]): Kubernetes Service specification
|
|
72
|
+
- http_addon_resources (List[Dict[str, Any]]): List of HTTP add-on resources
|
|
73
|
+
(HTTPScaledObject and Ingress) for general deployments; empty for vLLM
|
|
74
|
+
- pa_resource (Optional[Dict[str, Any]]): PodAutoscaler specification for
|
|
75
|
+
vLLM deployments with autoscaling enabled, None otherwise; empty for general
|
|
76
|
+
|
|
77
|
+
Raises:
|
|
78
|
+
ValueError: If required specs are missing after template rendering or
|
|
79
|
+
if spec validation fails.
|
|
80
|
+
"""
|
|
82
81
|
general = True
|
|
83
82
|
if task.run and 'vllm.entrypoints.openai.api_server' in task.run:
|
|
84
83
|
general = False
|
|
@@ -102,6 +101,9 @@ def render_specs(
|
|
|
102
101
|
'min_replicas': task.serving.min_replicas if task.serving else 1,
|
|
103
102
|
'max_replicas': task.serving.max_replicas if task.serving else 1,
|
|
104
103
|
'ports': task.serving.ports if task.serving else 8000,
|
|
104
|
+
'probe_path': (
|
|
105
|
+
task.serving.get('probe', None) if task.serving else None
|
|
106
|
+
),
|
|
105
107
|
'autoscaler': (
|
|
106
108
|
'true'
|
|
107
109
|
if (
|
|
@@ -111,6 +113,15 @@ def render_specs(
|
|
|
111
113
|
else 'false'
|
|
112
114
|
),
|
|
113
115
|
'general': general,
|
|
116
|
+
# Strip last 3 chars: backend Apoxy setup uses unique
|
|
117
|
+
# suffixes (3 random numbers)to avoid Apoxy bugs when
|
|
118
|
+
# deleting/creating TunnelNode resources with same names too
|
|
119
|
+
# quickly, but we hide this complexity from user-facing endpoints
|
|
120
|
+
'general_base_host': (
|
|
121
|
+
f'{get_unique_cluster_name_from_tunnel()[:-3]}2.trainy.us'
|
|
122
|
+
)
|
|
123
|
+
if general
|
|
124
|
+
else None,
|
|
114
125
|
**_DEPLOYMENT_METADATA_LABELS,
|
|
115
126
|
},
|
|
116
127
|
temp.name,
|
|
@@ -119,7 +130,8 @@ def render_specs(
|
|
|
119
130
|
|
|
120
131
|
deployment_spec = None
|
|
121
132
|
service_spec = None
|
|
122
|
-
|
|
133
|
+
http_addon_resources = [] # For general deployments
|
|
134
|
+
pa_resource = None # For aibrix deployments w autoscaling
|
|
123
135
|
|
|
124
136
|
for doc in docs:
|
|
125
137
|
kind = doc.get('kind')
|
|
@@ -127,153 +139,104 @@ def render_specs(
|
|
|
127
139
|
deployment_spec = doc
|
|
128
140
|
elif kind == 'Service':
|
|
129
141
|
service_spec = doc
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
142
|
+
# HTTPScaledObject resource for general deployments w autoscaling only
|
|
143
|
+
elif kind == 'HTTPScaledObject':
|
|
144
|
+
http_addon_resources.append(doc)
|
|
145
|
+
# Ingress resource for all general deployments
|
|
146
|
+
elif kind == 'Ingress':
|
|
147
|
+
http_addon_resources.append(doc)
|
|
148
|
+
# PodAutoscaler resource for aibrix deployments w autoscaling only
|
|
149
|
+
elif kind == 'PodAutoscaler':
|
|
150
|
+
pa_resource = doc
|
|
136
151
|
|
|
137
152
|
if deployment_spec is None:
|
|
138
153
|
raise ValueError('Deployment manifest not found.')
|
|
139
154
|
if service_spec is None:
|
|
140
155
|
raise ValueError('Service manifest not found.')
|
|
156
|
+
if general and not http_addon_resources:
|
|
157
|
+
raise ValueError('General deployment manifests not found.')
|
|
158
|
+
if (
|
|
159
|
+
not general
|
|
160
|
+
and task.serving
|
|
161
|
+
and task.serving.min_replicas != task.serving.max_replicas
|
|
162
|
+
and pa_resource is None
|
|
163
|
+
):
|
|
164
|
+
raise ValueError('Aibrix deployment PodAutoscaler manifest not found.')
|
|
141
165
|
|
|
142
166
|
# Validate specs before returning
|
|
143
167
|
try:
|
|
144
168
|
validator.validate_deployment_spec(deployment_spec)
|
|
145
169
|
validator.validate_service_spec(service_spec)
|
|
146
|
-
# Only validate HPA if it exists (APA doesn't have official schema)
|
|
147
|
-
if autoscaler_spec and autoscaler_spec.get('kind') == 'HorizontalPodAutoscaler':
|
|
148
|
-
validator.validate_horizontalpodautoscaler_spec(autoscaler_spec)
|
|
149
170
|
except ValueError as e:
|
|
150
171
|
raise ValueError(f'Spec validation failed: {e}')
|
|
151
172
|
|
|
152
|
-
return deployment_spec, service_spec,
|
|
173
|
+
return deployment_spec, service_spec, http_addon_resources, pa_resource
|
|
153
174
|
|
|
154
175
|
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
176
|
+
def create_pod_autoscaler(
|
|
177
|
+
namespace: str,
|
|
178
|
+
task: 'konduktor.Task',
|
|
179
|
+
dryrun: bool = False,
|
|
180
|
+
) -> None:
|
|
181
|
+
"""Creates Aibrix PodAutoscaler for non-general deployments."""
|
|
182
|
+
|
|
183
|
+
# Check if this is a non-general deployment
|
|
158
184
|
general = True
|
|
159
185
|
if task.run and 'vllm.entrypoints.openai.api_server' in task.run:
|
|
160
186
|
general = False
|
|
161
187
|
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
if task.run:
|
|
166
|
-
task.run = task.run.replace('__KONDUKTOR_TASK_NAME__', task.name)
|
|
167
|
-
|
|
168
|
-
unique_cluster_name = get_unique_cluster_name_from_tunnel()
|
|
169
|
-
cluster_name = unique_cluster_name[:-3]
|
|
170
|
-
deployment_number = get_next_deployment_number(unique_cluster_name)
|
|
188
|
+
# Only create PA for aibrix deployments w autoscaling
|
|
189
|
+
if general:
|
|
190
|
+
return
|
|
171
191
|
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
'ports': task.serving.ports if task.serving else 8000,
|
|
179
|
-
'general': general,
|
|
180
|
-
'cluster_name': cluster_name,
|
|
181
|
-
'unique_cluster_name': unique_cluster_name,
|
|
182
|
-
'deployment_number': deployment_number,
|
|
183
|
-
**_DEPLOYMENT_METADATA_LABELS,
|
|
184
|
-
},
|
|
185
|
-
temp.name,
|
|
192
|
+
# Check if autoscaling is needed
|
|
193
|
+
if not task.serving or task.serving.min_replicas == task.serving.max_replicas:
|
|
194
|
+
logger.debug(
|
|
195
|
+
f'[DEBUG] No autoscaling needed: '
|
|
196
|
+
f'min={task.serving.min_replicas if task.serving else "None"}, '
|
|
197
|
+
f'max={task.serving.max_replicas if task.serving else "None"}'
|
|
186
198
|
)
|
|
187
|
-
|
|
188
|
-
return docs
|
|
199
|
+
return # No autoscaling needed
|
|
189
200
|
|
|
201
|
+
logger.debug(
|
|
202
|
+
f'[DEBUG] PA autoscaling enabled: '
|
|
203
|
+
f'min={task.serving.min_replicas}, max={task.serving.max_replicas}'
|
|
204
|
+
)
|
|
190
205
|
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
task: 'konduktor.Task',
|
|
194
|
-
dryrun: bool = False,
|
|
195
|
-
) -> None:
|
|
196
|
-
"""Creates Apoxy resources for a general deployment."""
|
|
197
|
-
|
|
198
|
-
apoxy_specs = render_apoxy_spec(task)
|
|
206
|
+
# Get the PA spec from the rendered template
|
|
207
|
+
_, _, _, pa_spec = render_specs(task)
|
|
199
208
|
|
|
200
|
-
if not
|
|
209
|
+
if not pa_spec:
|
|
210
|
+
logger.warning('[DEBUG] No PodAutoscaler found in rendered template')
|
|
201
211
|
return
|
|
202
212
|
|
|
203
213
|
if dryrun:
|
|
204
|
-
logger.debug(
|
|
214
|
+
logger.debug(
|
|
215
|
+
f'[DRYRUN] Would create PA autoscaler: '
|
|
216
|
+
f'{pa_spec["metadata"].get("name", "<no-name>")}'
|
|
217
|
+
)
|
|
205
218
|
return
|
|
206
219
|
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
custom_api = kube_client.crd_api(context=context)
|
|
210
|
-
|
|
211
|
-
for spec in apoxy_specs:
|
|
212
|
-
kind = spec.get('kind')
|
|
213
|
-
name = spec['metadata']['name']
|
|
220
|
+
context = kubernetes_utils.get_current_kube_config_context_name()
|
|
221
|
+
custom_api = kube_client.crd_api(context=context)
|
|
214
222
|
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
group='gateway.apoxy.dev',
|
|
227
|
-
version='v1',
|
|
228
|
-
plural='httproutes',
|
|
229
|
-
body=spec,
|
|
230
|
-
)
|
|
231
|
-
logger.info(f'Apoxy HTTPRoute {name} created')
|
|
232
|
-
except Exception as e:
|
|
233
|
-
if '409' in str(e) or 'AlreadyExists' in str(e):
|
|
234
|
-
try:
|
|
235
|
-
# Delete first, then create
|
|
236
|
-
if kind == 'Backend':
|
|
237
|
-
custom_api.delete_cluster_custom_object(
|
|
238
|
-
group='core.apoxy.dev',
|
|
239
|
-
version='v1alpha',
|
|
240
|
-
plural='backends',
|
|
241
|
-
name=name,
|
|
242
|
-
)
|
|
243
|
-
custom_api.create_cluster_custom_object(
|
|
244
|
-
group='core.apoxy.dev',
|
|
245
|
-
version='v1alpha',
|
|
246
|
-
plural='backends',
|
|
247
|
-
body=spec,
|
|
248
|
-
)
|
|
249
|
-
elif kind == 'HTTPRoute':
|
|
250
|
-
custom_api.delete_cluster_custom_object(
|
|
251
|
-
group='gateway.apoxy.dev',
|
|
252
|
-
version='v1',
|
|
253
|
-
plural='httproutes',
|
|
254
|
-
name=name,
|
|
255
|
-
)
|
|
256
|
-
custom_api.create_cluster_custom_object(
|
|
257
|
-
group='gateway.apoxy.dev',
|
|
258
|
-
version='v1',
|
|
259
|
-
plural='httproutes',
|
|
260
|
-
body=spec,
|
|
261
|
-
)
|
|
262
|
-
logger.info(f'Apoxy {kind} {name} deleted and recreated')
|
|
263
|
-
except Exception as delete_create_error:
|
|
264
|
-
logger.error(
|
|
265
|
-
f'Failed to delete and recreate {kind} {name}: '
|
|
266
|
-
f'{delete_create_error}'
|
|
267
|
-
)
|
|
268
|
-
raise
|
|
269
|
-
elif '404' in str(e) or 'NotFound' in str(e):
|
|
270
|
-
logger.warning(f'Apoxy CRD for {kind} not found. Skipping {name}.')
|
|
271
|
-
logger.info('Make sure Apoxy is deployed and CRDs are ready.')
|
|
272
|
-
continue
|
|
273
|
-
else:
|
|
274
|
-
raise
|
|
223
|
+
# Create KPA for aibrix deployments w autoscaling
|
|
224
|
+
name = pa_spec.get('metadata', {}).get('name', '<no-name>')
|
|
225
|
+
try:
|
|
226
|
+
custom_api.create_namespaced_custom_object(
|
|
227
|
+
group='autoscaling.aibrix.ai',
|
|
228
|
+
version='v1alpha1',
|
|
229
|
+
namespace=namespace,
|
|
230
|
+
plural='podautoscalers',
|
|
231
|
+
body=pa_spec,
|
|
232
|
+
)
|
|
233
|
+
logger.info(f'Pod autoscaler {name} created')
|
|
275
234
|
except Exception as e:
|
|
276
|
-
|
|
235
|
+
if '409' in str(e) or 'AlreadyExists' in str(e):
|
|
236
|
+
logger.warning(f'Pod autoscaler {name} already exists, skipping')
|
|
237
|
+
else:
|
|
238
|
+
logger.error(f'Error creating pod autoscaler {name}: {e}')
|
|
239
|
+
raise
|
|
277
240
|
|
|
278
241
|
|
|
279
242
|
def create_deployment(
|
|
@@ -286,7 +249,7 @@ def create_deployment(
|
|
|
286
249
|
|
|
287
250
|
assert task.resources is not None, 'Task resources are undefined'
|
|
288
251
|
|
|
289
|
-
deployment_spec, _, _ = render_specs(task)
|
|
252
|
+
deployment_spec, _, _, _ = render_specs(task)
|
|
290
253
|
|
|
291
254
|
# Inject deployment-specific pod metadata
|
|
292
255
|
pod_utils.inject_deployment_pod_metadata(pod_spec, task)
|
|
@@ -330,7 +293,7 @@ def create_service(
|
|
|
330
293
|
|
|
331
294
|
assert task.resources is not None, 'Task resources are undefined'
|
|
332
295
|
|
|
333
|
-
_, service_spec, _ = render_specs(task)
|
|
296
|
+
_, service_spec, _, _ = render_specs(task)
|
|
334
297
|
|
|
335
298
|
if dryrun:
|
|
336
299
|
logger.debug(f'[DRYRUN] Would create service:\n{service_spec}')
|
|
@@ -354,39 +317,77 @@ def create_service(
|
|
|
354
317
|
error_message = error_body.get('message', '')
|
|
355
318
|
logger.error(f'Error creating service: {error_message}')
|
|
356
319
|
except json.JSONDecodeError:
|
|
357
|
-
logger.error(f'Error creating service: {
|
|
320
|
+
logger.error(f'Error creating service: {error_message}')
|
|
358
321
|
raise err
|
|
359
322
|
|
|
360
323
|
|
|
361
|
-
def
|
|
362
|
-
|
|
324
|
+
def create_http_addon_resources(
|
|
325
|
+
namespace: str,
|
|
326
|
+
task: 'konduktor.Task',
|
|
327
|
+
dryrun: bool = False,
|
|
328
|
+
) -> None:
|
|
329
|
+
"""Creates HTTP Add-on resources for general deployments."""
|
|
330
|
+
|
|
331
|
+
# Check if this is a non-general deployment
|
|
332
|
+
general = True
|
|
333
|
+
if task.run and 'vllm.entrypoints.openai.api_server' in task.run:
|
|
334
|
+
general = False
|
|
363
335
|
|
|
364
|
-
|
|
336
|
+
# Only create PA for aibrix deployments w autoscaling
|
|
337
|
+
if not general:
|
|
365
338
|
return
|
|
366
339
|
|
|
367
|
-
|
|
368
|
-
kind = autoscaler_spec.get('kind')
|
|
369
|
-
context = kubernetes_utils.get_current_kube_config_context_name()
|
|
340
|
+
_, _, http_addon_resources, _ = render_specs(task)
|
|
370
341
|
|
|
371
|
-
if
|
|
372
|
-
logger.debug(
|
|
373
|
-
return
|
|
342
|
+
if not http_addon_resources:
|
|
343
|
+
logger.debug('[DEBUG] No HTTP Add-on resources to create')
|
|
344
|
+
return
|
|
374
345
|
|
|
375
|
-
if
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
group='autoscaling.aibrix.ai',
|
|
379
|
-
version='v1alpha1',
|
|
380
|
-
namespace=namespace,
|
|
381
|
-
plural='podautoscalers',
|
|
382
|
-
body=autoscaler_spec,
|
|
383
|
-
)
|
|
384
|
-
elif kind == 'HorizontalPodAutoscaler':
|
|
385
|
-
autoscaling_api = kube_client.autoscaling_api(context=context)
|
|
386
|
-
return autoscaling_api.create_namespaced_horizontal_pod_autoscaler(
|
|
387
|
-
namespace=namespace,
|
|
388
|
-
body=autoscaler_spec,
|
|
346
|
+
if dryrun:
|
|
347
|
+
logger.debug(
|
|
348
|
+
f'[DRYRUN] Would create HTTP Add-on resources:\n' f'{http_addon_resources}'
|
|
389
349
|
)
|
|
350
|
+
return
|
|
351
|
+
|
|
352
|
+
context = kubernetes_utils.get_current_kube_config_context_name()
|
|
353
|
+
logger.debug(f'[DEBUG] Using Kubernetes context: {context}')
|
|
354
|
+
|
|
355
|
+
for resource in http_addon_resources:
|
|
356
|
+
kind = resource.get('kind')
|
|
357
|
+
name = resource['metadata']['name']
|
|
358
|
+
|
|
359
|
+
logger.debug(f'[DEBUG] Creating {kind}: {name}')
|
|
360
|
+
|
|
361
|
+
try:
|
|
362
|
+
if kind == 'HTTPScaledObject':
|
|
363
|
+
# Create HTTPScaledObject (only for autoscaling)
|
|
364
|
+
custom_api = kube_client.crd_api(context=context)
|
|
365
|
+
custom_api.create_namespaced_custom_object(
|
|
366
|
+
group='http.keda.sh',
|
|
367
|
+
version='v1alpha1',
|
|
368
|
+
namespace=namespace,
|
|
369
|
+
plural='httpscaledobjects',
|
|
370
|
+
body=resource,
|
|
371
|
+
)
|
|
372
|
+
logger.info(f'HTTPScaledObject {name} created')
|
|
373
|
+
|
|
374
|
+
elif kind == 'Ingress':
|
|
375
|
+
# Create Ingress (always needed for external access)
|
|
376
|
+
networking_api = kube_client.networking_api(context=context)
|
|
377
|
+
networking_api.create_namespaced_ingress(
|
|
378
|
+
namespace=namespace,
|
|
379
|
+
body=resource,
|
|
380
|
+
)
|
|
381
|
+
logger.info(f'Ingress {name} created')
|
|
382
|
+
|
|
383
|
+
except Exception as e:
|
|
384
|
+
if '409' in str(e) or 'AlreadyExists' in str(e):
|
|
385
|
+
logger.warning(
|
|
386
|
+
f'HTTP Add-on resource {kind} {name} already exists, skipping'
|
|
387
|
+
)
|
|
388
|
+
else:
|
|
389
|
+
logger.error(f'Error creating HTTP Add-on resource {kind} {name}: {e}')
|
|
390
|
+
raise
|
|
390
391
|
|
|
391
392
|
|
|
392
393
|
def list_models(namespace: str) -> List[str]:
|
|
@@ -402,7 +403,7 @@ def list_models(namespace: str) -> List[str]:
|
|
|
402
403
|
label_selector = DEPLOYMENT_NAME_LABEL
|
|
403
404
|
model_names: set[str] = set()
|
|
404
405
|
|
|
405
|
-
#
|
|
406
|
+
# Deployments
|
|
406
407
|
for deploy in apps.list_namespaced_deployment(
|
|
407
408
|
namespace, label_selector=label_selector
|
|
408
409
|
).items:
|
|
@@ -411,7 +412,7 @@ def list_models(namespace: str) -> List[str]:
|
|
|
411
412
|
if name:
|
|
412
413
|
model_names.add(name)
|
|
413
414
|
|
|
414
|
-
#
|
|
415
|
+
# Services
|
|
415
416
|
for svc in core.list_namespaced_service(
|
|
416
417
|
namespace, label_selector=label_selector
|
|
417
418
|
).items:
|
|
@@ -420,17 +421,16 @@ def list_models(namespace: str) -> List[str]:
|
|
|
420
421
|
if name:
|
|
421
422
|
model_names.add(name)
|
|
422
423
|
|
|
423
|
-
#
|
|
424
|
-
# APA
|
|
424
|
+
# Podautoscalers (KPA only)
|
|
425
425
|
try:
|
|
426
|
-
|
|
426
|
+
pa_list = crds.list_namespaced_custom_object(
|
|
427
427
|
group='autoscaling.aibrix.ai',
|
|
428
428
|
version='v1alpha1',
|
|
429
429
|
namespace=namespace,
|
|
430
430
|
plural='podautoscalers',
|
|
431
431
|
)
|
|
432
|
-
for
|
|
433
|
-
labels =
|
|
432
|
+
for pa in pa_list.get('items', []):
|
|
433
|
+
labels = pa.get('metadata', {}).get('labels', {})
|
|
434
434
|
name = labels.get(DEPLOYMENT_NAME_LABEL)
|
|
435
435
|
if name:
|
|
436
436
|
model_names.add(name)
|
|
@@ -439,7 +439,7 @@ def list_models(namespace: str) -> List[str]:
|
|
|
439
439
|
# re-raise if it's not just missing CRD
|
|
440
440
|
raise
|
|
441
441
|
# otherwise ignore, cluster just doesn't have Aibrix CRDs
|
|
442
|
-
logger.warning('Skipping
|
|
442
|
+
logger.warning('Skipping PA lookup. Aibrix CRDs not found in cluster')
|
|
443
443
|
|
|
444
444
|
# HPA
|
|
445
445
|
autoscaling_api = kube_client.autoscaling_api(context=context)
|
|
@@ -455,56 +455,134 @@ def list_models(namespace: str) -> List[str]:
|
|
|
455
455
|
return sorted(model_names)
|
|
456
456
|
|
|
457
457
|
|
|
458
|
-
def
|
|
458
|
+
def get_autoscaler_status_for_deployment(
|
|
459
|
+
name: str, autoscalers_map: dict, is_general: bool
|
|
460
|
+
) -> bool:
|
|
461
|
+
"""Return autoscaler readiness by deployment type.
|
|
462
|
+
|
|
463
|
+
- General: returns hpa_ready
|
|
464
|
+
- vLLM/Aibrix: returns kpa_ready
|
|
459
465
|
"""
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
466
|
+
|
|
467
|
+
def _is_ready(obj: dict) -> bool:
|
|
468
|
+
try:
|
|
469
|
+
conditions = obj.get('status', {}).get('conditions') or []
|
|
470
|
+
kind = obj.get('kind') or ''
|
|
471
|
+
|
|
472
|
+
for cond in conditions:
|
|
473
|
+
if cond.get('type') == 'AbleToScale' and cond.get('status') == 'True':
|
|
474
|
+
return True
|
|
475
|
+
|
|
476
|
+
if kind == 'HorizontalPodAutoscaler':
|
|
477
|
+
# Check for ScalingActive condition
|
|
478
|
+
for cond in conditions:
|
|
479
|
+
if cond.get('type') == 'ScalingActive':
|
|
480
|
+
# ScalingActive: True means actively scaling
|
|
481
|
+
if cond.get('status') == 'True':
|
|
482
|
+
return True
|
|
483
|
+
# ScalingActive: False with ScalingDisabled reason
|
|
484
|
+
# is normal for scale-to-zero
|
|
485
|
+
if (
|
|
486
|
+
cond.get('status') == 'False'
|
|
487
|
+
and cond.get('reason') == 'ScalingDisabled'
|
|
488
|
+
):
|
|
489
|
+
return True
|
|
490
|
+
|
|
491
|
+
# Treat existing HPA with no conditions as ready
|
|
492
|
+
return not conditions or any(
|
|
493
|
+
c.get('type') == 'AbleToScale' and c.get('status') == 'True'
|
|
494
|
+
for c in conditions
|
|
495
|
+
)
|
|
496
|
+
except Exception as e:
|
|
497
|
+
logger.warning(f'Error checking autoscaler readiness: {e}')
|
|
498
|
+
return False
|
|
499
|
+
|
|
500
|
+
kpa_ready = False
|
|
501
|
+
hpa_ready = False
|
|
502
|
+
|
|
503
|
+
dep_autos = autoscalers_map.get(name, {})
|
|
504
|
+
|
|
505
|
+
if is_general:
|
|
506
|
+
if 'hpa' in dep_autos:
|
|
507
|
+
hpa_ready = _is_ready(dep_autos['hpa'])
|
|
508
|
+
return hpa_ready
|
|
509
|
+
return False
|
|
510
|
+
|
|
511
|
+
if 'kpa' in dep_autos:
|
|
512
|
+
kpa_ready = _is_ready(dep_autos['kpa'])
|
|
513
|
+
return kpa_ready
|
|
514
|
+
return False
|
|
515
|
+
|
|
516
|
+
|
|
517
|
+
def _extract_min_max_from_autoscaler(autoscaler: dict) -> tuple[str, str]:
|
|
518
|
+
"""Extract min/max replicas across PA/HPA/KEDA.
|
|
519
|
+
|
|
520
|
+
Returns (min_str, max_str). Unknowns as '?'.
|
|
463
521
|
"""
|
|
464
522
|
try:
|
|
465
|
-
if
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
for
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
if
|
|
474
|
-
return
|
|
475
|
-
|
|
476
|
-
#
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
523
|
+
if not autoscaler:
|
|
524
|
+
return '?', '?'
|
|
525
|
+
|
|
526
|
+
spec = autoscaler.get('spec', {})
|
|
527
|
+
|
|
528
|
+
# Check for HTTPScaledObject format (replicas.min/max)
|
|
529
|
+
if 'replicas' in spec:
|
|
530
|
+
replicas = spec.get('replicas', {})
|
|
531
|
+
if 'min' in replicas or 'max' in replicas:
|
|
532
|
+
return (str(replicas.get('min', '?')), str(replicas.get('max', '?')))
|
|
533
|
+
|
|
534
|
+
# Check for KEDA ScaledObject format (minReplicaCount/maxReplicaCount)
|
|
535
|
+
if 'minReplicaCount' in spec or 'maxReplicaCount' in spec:
|
|
536
|
+
return (
|
|
537
|
+
str(spec.get('minReplicaCount', '?')),
|
|
538
|
+
str(spec.get('maxReplicaCount', '?')),
|
|
539
|
+
)
|
|
480
540
|
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
541
|
+
# Check for PA/HPA format (minReplicas/maxReplicas)
|
|
542
|
+
if 'minReplicas' in spec or 'maxReplicas' in spec:
|
|
543
|
+
return str(spec.get('minReplicas', '?')), str(spec.get('maxReplicas', '?'))
|
|
544
|
+
except Exception:
|
|
545
|
+
pass
|
|
546
|
+
return '?', '?'
|
|
484
547
|
|
|
485
548
|
|
|
486
549
|
def build_autoscaler_map(namespace: str, context: str) -> dict[str, dict]:
|
|
487
|
-
"""Fetch
|
|
488
|
-
|
|
550
|
+
"""Fetch autoscalers and return a simple map keyed by deployment name.
|
|
551
|
+
|
|
552
|
+
Simplified model:
|
|
553
|
+
- Aibrix deployments: 1 PodAutoscaler (KPA) if autoscaling enabled
|
|
554
|
+
- General deployments: 1 HPA (created by KEDA) if autoscaling enabled
|
|
555
|
+
- No autoscaling: No autoscaler
|
|
489
556
|
|
|
490
|
-
|
|
557
|
+
Returns: {deployment_name: {'kpa': pa_obj} or {'hpa': hpa_obj}}
|
|
558
|
+
"""
|
|
559
|
+
autoscalers: Dict[str, Dict[str, Any]] = {}
|
|
560
|
+
|
|
561
|
+
# --- Aibrix deployment KPA ---
|
|
491
562
|
try:
|
|
492
563
|
crd_api = kube_client.crd_api(context=context)
|
|
493
|
-
|
|
564
|
+
pa_list = crd_api.list_namespaced_custom_object(
|
|
494
565
|
group='autoscaling.aibrix.ai',
|
|
495
566
|
version='v1alpha1',
|
|
496
567
|
namespace=namespace,
|
|
497
568
|
plural='podautoscalers',
|
|
498
569
|
)
|
|
499
|
-
for
|
|
500
|
-
labels =
|
|
570
|
+
for pa in pa_list.get('items', []):
|
|
571
|
+
labels = pa.get('metadata', {}).get('labels', {})
|
|
501
572
|
dep_name = labels.get(DEPLOYMENT_NAME_LABEL)
|
|
573
|
+
if not dep_name:
|
|
574
|
+
# Fallback to scaleTargetRef.name
|
|
575
|
+
spec = pa.get('spec', {})
|
|
576
|
+
scale_ref = spec.get('scaleTargetRef', {})
|
|
577
|
+
dep_name = scale_ref.get('name')
|
|
502
578
|
if dep_name:
|
|
503
|
-
autoscalers[dep_name] =
|
|
579
|
+
autoscalers[dep_name] = {'kpa': pa}
|
|
580
|
+
if pa_list.get('items'):
|
|
581
|
+
logger.debug(f"Found {len(pa_list.get('items', []))} PodAutoscalers")
|
|
504
582
|
except Exception as e:
|
|
505
|
-
logger.warning(f'Error fetching
|
|
583
|
+
logger.warning(f'Error fetching PodAutoscalers: {e}')
|
|
506
584
|
|
|
507
|
-
# ---
|
|
585
|
+
# --- General deployment HPA ---
|
|
508
586
|
try:
|
|
509
587
|
autoscaling_api = kube_client.autoscaling_api(context=context)
|
|
510
588
|
hpa_list = autoscaling_api.list_namespaced_horizontal_pod_autoscaler(
|
|
@@ -513,8 +591,18 @@ def build_autoscaler_map(namespace: str, context: str) -> dict[str, dict]:
|
|
|
513
591
|
for hpa in hpa_list.items:
|
|
514
592
|
labels = getattr(hpa.metadata, 'labels', {}) or {}
|
|
515
593
|
dep_name = labels.get(DEPLOYMENT_NAME_LABEL)
|
|
516
|
-
if
|
|
517
|
-
|
|
594
|
+
if not dep_name:
|
|
595
|
+
# Fallback to scaleTargetRef.name
|
|
596
|
+
spec = hpa.spec.to_dict() if hpa.spec else {}
|
|
597
|
+
scale_ref = spec.get('scale_target_ref', {})
|
|
598
|
+
dep_name = scale_ref.get('name')
|
|
599
|
+
if dep_name:
|
|
600
|
+
hpa_dict = hpa.to_dict()
|
|
601
|
+
hpa_dict['kind'] = 'HorizontalPodAutoscaler'
|
|
602
|
+
hpa_dict['apiVersion'] = 'autoscaling/v2'
|
|
603
|
+
autoscalers[dep_name] = {'hpa': hpa_dict}
|
|
604
|
+
if hpa_list.items:
|
|
605
|
+
logger.debug(f'Found {len(hpa_list.items)} HPAs')
|
|
518
606
|
except Exception as e:
|
|
519
607
|
logger.warning(f'Error fetching HPAs: {e}')
|
|
520
608
|
|
|
@@ -539,28 +627,55 @@ def get_model_status(
|
|
|
539
627
|
d = deployments[name]
|
|
540
628
|
ready = (d.status.ready_replicas or 0) if d.status else 0
|
|
541
629
|
desired = (d.spec.replicas or 0) if d.spec else 0
|
|
542
|
-
|
|
630
|
+
|
|
631
|
+
labels = d.metadata.labels or {}
|
|
632
|
+
is_aibrix = AIBRIX_NAME_LABEL in labels
|
|
633
|
+
|
|
634
|
+
if is_aibrix and name in autoscalers:
|
|
635
|
+
# For Aibrix deployments, get the original min replicas from
|
|
636
|
+
# deployment labels
|
|
637
|
+
original_min_replicas = 0
|
|
638
|
+
original_min_str = labels.get('trainy.ai/original-min-replicas')
|
|
639
|
+
if original_min_str:
|
|
640
|
+
try:
|
|
641
|
+
original_min_replicas = int(original_min_str)
|
|
642
|
+
except (ValueError, TypeError):
|
|
643
|
+
pass
|
|
644
|
+
|
|
645
|
+
# For Aibrix deployments, consider ready if:
|
|
646
|
+
# 1. Ready replicas >= original minimum replicas, OR
|
|
647
|
+
# 2. If original_min_replicas is 0 (scale-to-zero allowed),
|
|
648
|
+
# then ready == desired
|
|
649
|
+
if original_min_replicas == 0:
|
|
650
|
+
status['deployment'] = 'ready' if ready == desired else 'pending'
|
|
651
|
+
else:
|
|
652
|
+
status['deployment'] = (
|
|
653
|
+
'ready' if ready >= original_min_replicas else 'pending'
|
|
654
|
+
)
|
|
655
|
+
else:
|
|
656
|
+
# General deployments or no autoscaler: use simple ready == desired check
|
|
657
|
+
status['deployment'] = 'ready' if ready == desired else 'pending'
|
|
543
658
|
|
|
544
659
|
# --- Service ---
|
|
545
660
|
if name in services:
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
if is_vllm:
|
|
551
|
-
status['service'] = 'ready'
|
|
552
|
-
else:
|
|
553
|
-
lb_ready = False
|
|
554
|
-
if s.status and s.status.load_balancer and s.status.load_balancer.ingress:
|
|
555
|
-
ingress = s.status.load_balancer.ingress
|
|
556
|
-
if ingress and (ingress[0].ip or ingress[0].hostname):
|
|
557
|
-
lb_ready = True
|
|
558
|
-
status['service'] = 'ready' if lb_ready else 'pending'
|
|
661
|
+
status['service'] = 'ready'
|
|
662
|
+
else:
|
|
663
|
+
status['service'] = 'missing'
|
|
559
664
|
|
|
560
665
|
# --- Autoscaler ---
|
|
561
666
|
if name in autoscalers:
|
|
562
|
-
a
|
|
563
|
-
|
|
667
|
+
# Check if this is a general deployment (not vLLM/Aibrix)
|
|
668
|
+
is_general = True
|
|
669
|
+
if deployments.get(name) and hasattr(deployments[name].metadata, 'labels'):
|
|
670
|
+
labels = deployments[name].metadata.labels or {}
|
|
671
|
+
if AIBRIX_NAME_LABEL in labels:
|
|
672
|
+
is_general = False
|
|
673
|
+
|
|
674
|
+
# Check actual autoscaler readiness
|
|
675
|
+
autoscaler_ready = get_autoscaler_status_for_deployment(
|
|
676
|
+
name, autoscalers, is_general
|
|
677
|
+
)
|
|
678
|
+
status['autoscaler'] = 'ready' if autoscaler_ready else 'pending'
|
|
564
679
|
else:
|
|
565
680
|
status['autoscaler'] = None
|
|
566
681
|
|
|
@@ -591,7 +706,7 @@ def get_service(namespace: str, job_name: str) -> Optional[Any]:
|
|
|
591
706
|
|
|
592
707
|
def get_autoscaler(namespace: str, job_name: str) -> Optional[Any]:
|
|
593
708
|
context = kubernetes_utils.get_current_kube_config_context_name()
|
|
594
|
-
# --- Try Aibrix
|
|
709
|
+
# --- Try Aibrix PA first ---
|
|
595
710
|
crd_api = kube_client.crd_api(context=context)
|
|
596
711
|
try:
|
|
597
712
|
return crd_api.get_namespaced_custom_object(
|
|
@@ -599,7 +714,7 @@ def get_autoscaler(namespace: str, job_name: str) -> Optional[Any]:
|
|
|
599
714
|
version='v1alpha1',
|
|
600
715
|
namespace=namespace,
|
|
601
716
|
plural='podautoscalers',
|
|
602
|
-
name=f'{job_name}-
|
|
717
|
+
name=f'{job_name}-pa',
|
|
603
718
|
)
|
|
604
719
|
except ApiException as e:
|
|
605
720
|
if e.status != 404:
|
|
@@ -612,6 +727,19 @@ def get_autoscaler(namespace: str, job_name: str) -> Optional[Any]:
|
|
|
612
727
|
return autoscaling_api.read_namespaced_horizontal_pod_autoscaler(
|
|
613
728
|
name=f'{job_name}-hpa', namespace=namespace
|
|
614
729
|
).to_dict()
|
|
730
|
+
except ApiException as e:
|
|
731
|
+
if e.status != 404:
|
|
732
|
+
raise
|
|
733
|
+
|
|
734
|
+
# --- Try KEDA ScaledObject ---
|
|
735
|
+
try:
|
|
736
|
+
return crd_api.get_namespaced_custom_object(
|
|
737
|
+
group='keda.sh',
|
|
738
|
+
version='v1alpha1',
|
|
739
|
+
namespace=namespace,
|
|
740
|
+
plural='scaledobjects',
|
|
741
|
+
name=f'{job_name}-keda',
|
|
742
|
+
)
|
|
615
743
|
except ApiException as e:
|
|
616
744
|
if e.status == 404:
|
|
617
745
|
return None
|
|
@@ -677,52 +805,133 @@ def delete_service(namespace: str, name: str) -> Optional[Dict[str, Any]]:
|
|
|
677
805
|
|
|
678
806
|
|
|
679
807
|
def delete_autoscaler(namespace: str, name: str) -> Optional[Dict[str, Any]]:
|
|
680
|
-
"""
|
|
808
|
+
"""Delete all autoscalers associated with a deployment name.
|
|
809
|
+
|
|
810
|
+
This includes:
|
|
811
|
+
- All Aibrix PodAutoscalers (e.g., "-pa", "-apa") targeting the deployment
|
|
812
|
+
- Any HorizontalPodAutoscaler named "<name>-hpa"
|
|
813
|
+
- Any KEDA ScaledObject named "<name>-keda"
|
|
814
|
+
"""
|
|
681
815
|
context = kubernetes_utils.get_current_kube_config_context_name()
|
|
682
816
|
|
|
683
|
-
# ---
|
|
817
|
+
# --- Delete ALL PodAutoscalers that target this deployment ---
|
|
684
818
|
try:
|
|
685
819
|
custom_api = kube_client.crd_api(context=context)
|
|
686
|
-
|
|
820
|
+
pa_list = custom_api.list_namespaced_custom_object(
|
|
687
821
|
group='autoscaling.aibrix.ai',
|
|
688
822
|
version='v1alpha1',
|
|
689
823
|
namespace=namespace,
|
|
690
824
|
plural='podautoscalers',
|
|
691
|
-
name=f'{name}-apa',
|
|
692
825
|
)
|
|
693
|
-
|
|
826
|
+
for pa in pa_list.get('items', []):
|
|
827
|
+
meta = pa.get('metadata', {})
|
|
828
|
+
spec = pa.get('spec', {})
|
|
829
|
+
pa_name = meta.get('name', '')
|
|
830
|
+
labels = meta.get('labels', {})
|
|
831
|
+
scale_ref = spec.get('scaleTargetRef', {}).get('name')
|
|
832
|
+
targets_deployment = (
|
|
833
|
+
labels.get(DEPLOYMENT_NAME_LABEL) == name
|
|
834
|
+
or scale_ref == name
|
|
835
|
+
or pa_name.startswith(f'{name}-')
|
|
836
|
+
)
|
|
837
|
+
if targets_deployment:
|
|
838
|
+
try:
|
|
839
|
+
custom_api.delete_namespaced_custom_object(
|
|
840
|
+
group='autoscaling.aibrix.ai',
|
|
841
|
+
version='v1alpha1',
|
|
842
|
+
namespace=namespace,
|
|
843
|
+
plural='podautoscalers',
|
|
844
|
+
name=pa_name,
|
|
845
|
+
)
|
|
846
|
+
logger.info(f'Deleted PodAutoscaler: {pa_name}')
|
|
847
|
+
except kube_client.api_exception() as err:
|
|
848
|
+
if getattr(err, 'status', None) != 404:
|
|
849
|
+
raise
|
|
694
850
|
except kube_client.api_exception() as err:
|
|
695
|
-
# If
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
if err.status != 404:
|
|
699
|
-
raise
|
|
700
|
-
except Exception:
|
|
701
|
-
if getattr(err, 'status', None) != 404:
|
|
702
|
-
raise
|
|
851
|
+
# If PA CRD is missing, skip; otherwise bubble up
|
|
852
|
+
if getattr(err, 'status', None) not in (404, None):
|
|
853
|
+
raise
|
|
703
854
|
|
|
704
|
-
# ---
|
|
855
|
+
# --- Delete HPA ---
|
|
705
856
|
try:
|
|
706
857
|
autoscaling_api = kube_client.autoscaling_api(context=context)
|
|
707
|
-
|
|
858
|
+
autoscaling_api.delete_namespaced_horizontal_pod_autoscaler(
|
|
708
859
|
name=f'{name}-hpa',
|
|
709
860
|
namespace=namespace,
|
|
710
861
|
)
|
|
862
|
+
logger.info(f'Deleted HPA: {name}-hpa')
|
|
711
863
|
except kube_client.api_exception() as err:
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
864
|
+
if getattr(err, 'status', None) not in (404, None):
|
|
865
|
+
try:
|
|
866
|
+
error_body = json.loads(err.body)
|
|
867
|
+
error_message = error_body.get('message', '')
|
|
868
|
+
logger.error(f'Error deleting HPA: {error_message}')
|
|
869
|
+
except json.JSONDecodeError:
|
|
870
|
+
logger.error(f'Error deleting HPA: {err.body}')
|
|
871
|
+
raise err
|
|
872
|
+
|
|
873
|
+
# --- Delete KEDA ScaledObject ---
|
|
874
|
+
try:
|
|
875
|
+
custom_api = kube_client.crd_api(context=context)
|
|
876
|
+
custom_api.delete_namespaced_custom_object(
|
|
877
|
+
group='keda.sh',
|
|
878
|
+
version='v1alpha1',
|
|
879
|
+
namespace=namespace,
|
|
880
|
+
plural='scaledobjects',
|
|
881
|
+
name=f'{name}-keda',
|
|
882
|
+
)
|
|
883
|
+
logger.info(f'Deleted ScaledObject: {name}-keda')
|
|
884
|
+
except kube_client.api_exception() as err:
|
|
885
|
+
if getattr(err, 'status', None) not in (404, None):
|
|
886
|
+
try:
|
|
887
|
+
error_body = json.loads(err.body)
|
|
888
|
+
error_message = error_body.get('message', '')
|
|
889
|
+
logger.error(f'Error deleting KEDA ScaledObject: {error_message}')
|
|
890
|
+
except json.JSONDecodeError:
|
|
891
|
+
logger.error(f'Error deleting KEDA ScaledObject: {err.body}')
|
|
892
|
+
raise err
|
|
893
|
+
|
|
894
|
+
return None
|
|
895
|
+
|
|
896
|
+
|
|
897
|
+
def delete_http_addon_resources(name: str, namespace: str) -> None:
|
|
898
|
+
"""Deletes HTTP Add-on resources for general deployments."""
|
|
899
|
+
context = kubernetes_utils.get_current_kube_config_context_name()
|
|
900
|
+
|
|
901
|
+
# Delete HTTPScaledObject
|
|
902
|
+
try:
|
|
903
|
+
custom_api = kube_client.crd_api(context=context)
|
|
904
|
+
custom_api.delete_namespaced_custom_object(
|
|
905
|
+
group='http.keda.sh',
|
|
906
|
+
version='v1alpha1',
|
|
907
|
+
namespace=namespace,
|
|
908
|
+
plural='httpscaledobjects',
|
|
909
|
+
name=f'{name}-httpscaledobject',
|
|
910
|
+
)
|
|
911
|
+
logger.info(f'Deleted HTTPScaledObject: {name}-httpscaledobject')
|
|
912
|
+
except kube_client.api_exception() as err:
|
|
913
|
+
if err.status != 404:
|
|
914
|
+
logger.debug(
|
|
915
|
+
f'Failed to delete HTTPScaledObject {name}-httpscaledobject: {err}'
|
|
916
|
+
)
|
|
917
|
+
|
|
918
|
+
# Delete Ingress
|
|
919
|
+
try:
|
|
920
|
+
networking_api = kube_client.networking_api(context=context)
|
|
921
|
+
networking_api.delete_namespaced_ingress(
|
|
922
|
+
name=f'{name}-ingress',
|
|
923
|
+
namespace=namespace,
|
|
924
|
+
)
|
|
925
|
+
logger.info(f'Deleted Ingress: {name}-ingress')
|
|
926
|
+
except kube_client.api_exception() as err:
|
|
927
|
+
if err.status != 404:
|
|
928
|
+
logger.debug(f'Failed to delete Ingress {name}-ingress: {err}')
|
|
719
929
|
|
|
720
930
|
|
|
721
931
|
def delete_serving_specs(name: str, namespace: str) -> None:
|
|
722
932
|
for kind, delete_fn in [
|
|
723
933
|
('deployment', delete_deployment),
|
|
724
934
|
('service', delete_service),
|
|
725
|
-
('podautoscaler', delete_autoscaler),
|
|
726
935
|
]:
|
|
727
936
|
try:
|
|
728
937
|
delete_fn(namespace, name)
|
|
@@ -730,6 +939,15 @@ def delete_serving_specs(name: str, namespace: str) -> None:
|
|
|
730
939
|
except Exception as e:
|
|
731
940
|
logger.debug(f'Failed to delete {kind} {name}: {e}')
|
|
732
941
|
|
|
942
|
+
# Delete autoscaler resources (Aibrix PA, HPA, or KEDA ScaledObject)
|
|
943
|
+
try:
|
|
944
|
+
delete_autoscaler(namespace=namespace, name=name)
|
|
945
|
+
except Exception as e:
|
|
946
|
+
logger.debug(f'Failed to delete autoscaler for {name}: {e}')
|
|
947
|
+
|
|
948
|
+
# Delete HTTP Add-on resources for general deployments
|
|
949
|
+
delete_http_addon_resources(name, namespace)
|
|
950
|
+
|
|
733
951
|
|
|
734
952
|
def _get_resource_summary(deployment) -> str:
|
|
735
953
|
"""Extract and format pod resource information from a deployment.
|
|
@@ -779,6 +997,24 @@ def get_envoy_external_ip() -> Optional[str]:
|
|
|
779
997
|
return None
|
|
780
998
|
|
|
781
999
|
|
|
1000
|
+
def get_ingress_nginx_external_ip() -> Optional[str]:
|
|
1001
|
+
"""Get the external IP of the keda-ingress-nginx-controller LoadBalancer."""
|
|
1002
|
+
context = kubernetes_utils.get_current_kube_config_context_name()
|
|
1003
|
+
core_api = kube_client.core_api(context=context)
|
|
1004
|
+
try:
|
|
1005
|
+
# Look for keda-ingress-nginx-controller service in keda namespace
|
|
1006
|
+
service = core_api.read_namespaced_service(
|
|
1007
|
+
name='keda-ingress-nginx-controller', namespace='keda'
|
|
1008
|
+
)
|
|
1009
|
+
if service.spec.type == 'LoadBalancer':
|
|
1010
|
+
ingress = service.status.load_balancer.ingress
|
|
1011
|
+
if ingress:
|
|
1012
|
+
return ingress[0].ip or ingress[0].hostname
|
|
1013
|
+
except Exception:
|
|
1014
|
+
pass
|
|
1015
|
+
return None
|
|
1016
|
+
|
|
1017
|
+
|
|
782
1018
|
def get_unique_cluster_name_from_tunnel() -> str:
|
|
783
1019
|
"""Get cluster name from the apoxy deployment command."""
|
|
784
1020
|
try:
|
|
@@ -787,7 +1023,7 @@ def get_unique_cluster_name_from_tunnel() -> str:
|
|
|
787
1023
|
|
|
788
1024
|
# Get the apoxy deployment
|
|
789
1025
|
deployment = apps_api.read_namespaced_deployment(
|
|
790
|
-
name='apoxy', namespace='
|
|
1026
|
+
name='apoxy', namespace='apoxy-system'
|
|
791
1027
|
)
|
|
792
1028
|
|
|
793
1029
|
# Extract cluster name from the command
|
|
@@ -820,112 +1056,59 @@ def get_endpoint_type_from_config() -> str:
|
|
|
820
1056
|
try:
|
|
821
1057
|
# Use the proper config system that handles KONDUKTOR_CONFIG env var
|
|
822
1058
|
endpoint_type = konduktor_config.get_nested(('serving', 'endpoint'), 'trainy')
|
|
1059
|
+
logger.debug(f'[DEBUG] Config endpoint_type: {endpoint_type}')
|
|
823
1060
|
return endpoint_type.lower()
|
|
824
1061
|
except Exception as e:
|
|
825
1062
|
logger.warning(f'Error reading endpoint config: {e}')
|
|
826
1063
|
|
|
827
1064
|
# Default to trainy if config not found or error
|
|
1065
|
+
logger.debug('[DEBUG] Falling back to default endpoint type: trainy')
|
|
828
1066
|
return 'trainy'
|
|
829
1067
|
|
|
830
1068
|
|
|
831
|
-
def
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
core_api = kube_client.core_api(context=context)
|
|
836
|
-
|
|
837
|
-
# Get the service
|
|
838
|
-
service = core_api.read_namespaced_service(
|
|
839
|
-
name=service_name, namespace='default'
|
|
840
|
-
)
|
|
841
|
-
|
|
842
|
-
# Check if it's LoadBalancer type
|
|
843
|
-
if service.spec.type == 'LoadBalancer':
|
|
844
|
-
ingress = service.status.load_balancer.ingress
|
|
845
|
-
if ingress and len(ingress) > 0:
|
|
846
|
-
ip = ingress[0].ip
|
|
847
|
-
if ip:
|
|
848
|
-
return f'{ip}:{service.spec.ports[0].port}'
|
|
849
|
-
|
|
850
|
-
# If not LoadBalancer or no IP, return pending
|
|
851
|
-
return '<pending>'
|
|
852
|
-
|
|
853
|
-
except Exception:
|
|
854
|
-
return '<pending>'
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
def get_vllm_deployment_endpoint(force_direct: bool = False) -> str:
|
|
858
|
-
"""Get the endpoint for vLLM/Aibrix deployments based on config."""
|
|
1069
|
+
def get_deployment_endpoint(
|
|
1070
|
+
force_direct: bool = False, deployment_type: str = 'AIBRIX'
|
|
1071
|
+
) -> str:
|
|
1072
|
+
"""Get the endpoint for both vLLM/Aibrix and general deployments."""
|
|
859
1073
|
if force_direct:
|
|
860
|
-
# Force direct endpoint display regardless of config
|
|
861
1074
|
endpoint_type = 'direct'
|
|
862
1075
|
else:
|
|
863
1076
|
endpoint_type = get_endpoint_type_from_config()
|
|
864
1077
|
|
|
865
1078
|
if endpoint_type == 'direct':
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
# Fallback to direct endpoint if trainy.us not available
|
|
1079
|
+
# Check if this is a general deployment
|
|
1080
|
+
if deployment_type == 'GENERAL':
|
|
1081
|
+
# General deployments: ingress IP + Host header
|
|
1082
|
+
ingress_ip = get_ingress_nginx_external_ip()
|
|
1083
|
+
if ingress_ip:
|
|
1084
|
+
return f'{ingress_ip}'
|
|
1085
|
+
else:
|
|
1086
|
+
return '<pending>'
|
|
1087
|
+
else:
|
|
1088
|
+
# vLLM/Aibrix deployments: envoy IP
|
|
877
1089
|
try:
|
|
878
1090
|
aibrix_endpoint = get_envoy_external_ip()
|
|
879
|
-
|
|
880
|
-
# Aibrix deployments route through Envoy Gateway on port 80
|
|
881
|
-
return f'{aibrix_endpoint}'
|
|
1091
|
+
return aibrix_endpoint or '<pending>'
|
|
882
1092
|
except Exception:
|
|
883
|
-
|
|
884
|
-
return '<pending>'
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
def get_general_deployment_endpoint(
|
|
888
|
-
service_name: str, force_direct: bool = False
|
|
889
|
-
) -> str:
|
|
890
|
-
"""Get the endpoint for a general deployment based on config."""
|
|
891
|
-
if force_direct:
|
|
892
|
-
# Force direct endpoint display regardless of config
|
|
893
|
-
endpoint_type = 'direct'
|
|
894
|
-
else:
|
|
895
|
-
endpoint_type = get_endpoint_type_from_config()
|
|
896
|
-
|
|
897
|
-
if endpoint_type == 'direct':
|
|
898
|
-
# Use LoadBalancer IP with port
|
|
899
|
-
return _get_loadbalancer_endpoint_with_port(service_name)
|
|
1093
|
+
return '<pending>'
|
|
900
1094
|
else:
|
|
901
|
-
# Use Apoxy (trainy.us)
|
|
1095
|
+
# Use Apoxy (trainy.us)
|
|
902
1096
|
try:
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
|
|
916
|
-
|
|
917
|
-
|
|
918
|
-
endpoint_name = labels.get('endpoint_name')
|
|
919
|
-
if endpoint_name:
|
|
920
|
-
return endpoint_name
|
|
921
|
-
|
|
922
|
-
# Fallback if no route found - try direct LoadBalancer endpoint
|
|
923
|
-
return _get_loadbalancer_endpoint_with_port(service_name)
|
|
924
|
-
|
|
925
|
-
except Exception as e:
|
|
926
|
-
logger.warning(f'Endpoint error for general deployment {service_name}: {e}')
|
|
927
|
-
# Fallback to direct LoadBalancer endpoint on error
|
|
928
|
-
return _get_loadbalancer_endpoint_with_port(service_name)
|
|
1097
|
+
cluster_name = get_unique_cluster_name_from_tunnel()
|
|
1098
|
+
if deployment_type == 'GENERAL':
|
|
1099
|
+
# Strip last 3 chars: backend Apoxy setup uses unique
|
|
1100
|
+
# suffixes (3 random numbers)to avoid Apoxy bugs when
|
|
1101
|
+
# deleting/creating TunnelNode resources with same names too
|
|
1102
|
+
# quickly, but we hide this complexity from user-facing endpoints
|
|
1103
|
+
return f'{cluster_name[:-3]}2.trainy.us' # General deployments
|
|
1104
|
+
else:
|
|
1105
|
+
# Strip last 3 chars: backend Apoxy setup uses unique
|
|
1106
|
+
# suffixes (3 random numbers)to avoid Apoxy bugs when
|
|
1107
|
+
# deleting/creating TunnelNode resources with same names too
|
|
1108
|
+
# quickly, but we hide this complexity from user-facing endpoints
|
|
1109
|
+
return f'{cluster_name[:-3]}.trainy.us' # vLLM deployments
|
|
1110
|
+
except Exception:
|
|
1111
|
+
return '<pending>'
|
|
929
1112
|
|
|
930
1113
|
|
|
931
1114
|
def show_status_table(namespace: str, all_users: bool, force_direct: bool = False):
|
|
@@ -962,7 +1145,9 @@ def show_status_table(namespace: str, all_users: bool, force_direct: bool = Fals
|
|
|
962
1145
|
is_ci = os.environ.get('CI') or os.environ.get('BUILDKITE')
|
|
963
1146
|
|
|
964
1147
|
# Get Aibrix endpoint once for all Aibrix deployments
|
|
965
|
-
aibrix_endpoint =
|
|
1148
|
+
aibrix_endpoint = get_deployment_endpoint(force_direct, 'AIBRIX')
|
|
1149
|
+
# Get General endpoint once for all General deployments
|
|
1150
|
+
general_endpoint = get_deployment_endpoint(force_direct, 'GENERAL')
|
|
966
1151
|
|
|
967
1152
|
table = Table(title=title, box=box.ASCII if is_ci else box.ROUNDED)
|
|
968
1153
|
if all_users:
|
|
@@ -1017,14 +1202,33 @@ def show_status_table(namespace: str, all_users: bool, force_direct: bool = Fals
|
|
|
1017
1202
|
}
|
|
1018
1203
|
return f"{label}: {emoji_map.get(state, '❓')}"
|
|
1019
1204
|
|
|
1205
|
+
# Check if this is a general deployment (not vLLM/Aibrix)
|
|
1206
|
+
is_general = True
|
|
1207
|
+
if deployment and hasattr(deployment.metadata, 'labels'):
|
|
1208
|
+
labels = deployment.metadata.labels or {}
|
|
1209
|
+
if AIBRIX_NAME_LABEL in labels:
|
|
1210
|
+
is_general = False
|
|
1211
|
+
|
|
1020
1212
|
summary_lines = [
|
|
1021
1213
|
emoji_line('Deploym', status['deployment'] or 'missing'),
|
|
1022
1214
|
emoji_line('Service', status['service'] or 'missing'),
|
|
1023
1215
|
]
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
|
|
1216
|
+
|
|
1217
|
+
if is_general:
|
|
1218
|
+
# Autoscaler for General: HPA only
|
|
1219
|
+
hpa_ready = get_autoscaler_status_for_deployment(
|
|
1220
|
+
name, autoscalers_map, is_general=True
|
|
1027
1221
|
)
|
|
1222
|
+
if name in autoscalers_map:
|
|
1223
|
+
summary_lines.append(f"AScaler: {'✅' if hpa_ready else '❓'}")
|
|
1224
|
+
else:
|
|
1225
|
+
# Autoscaler for vLLM: only KPA (APA no longer used)
|
|
1226
|
+
if name in autoscalers_map:
|
|
1227
|
+
kpa_ready = get_autoscaler_status_for_deployment(
|
|
1228
|
+
name, autoscalers_map, is_general=False
|
|
1229
|
+
)
|
|
1230
|
+
if 'kpa' in autoscalers_map.get(name, {}):
|
|
1231
|
+
summary_lines.append(f"AScaler: {'✅' if kpa_ready else '❓'}")
|
|
1028
1232
|
summary = '\n'.join(summary_lines)
|
|
1029
1233
|
|
|
1030
1234
|
# Overall status
|
|
@@ -1057,29 +1261,61 @@ def show_status_table(namespace: str, all_users: bool, force_direct: bool = Fals
|
|
|
1057
1261
|
|
|
1058
1262
|
endpoint_str = '<pending>'
|
|
1059
1263
|
if AIBRIX_NAME_LABEL in labels:
|
|
1060
|
-
# Aibrix deployment
|
|
1061
|
-
|
|
1264
|
+
# Aibrix deployment
|
|
1265
|
+
endpoint_type = get_endpoint_type_from_config()
|
|
1266
|
+
if force_direct or endpoint_type == 'direct':
|
|
1267
|
+
# Direct access: use http for IP endpoints
|
|
1268
|
+
endpoint_str = (
|
|
1269
|
+
f'http://{aibrix_endpoint}'
|
|
1270
|
+
if aibrix_endpoint != '<pending>'
|
|
1271
|
+
else aibrix_endpoint
|
|
1272
|
+
)
|
|
1273
|
+
else:
|
|
1274
|
+
# Apoxy access: use https for trainy.us endpoints
|
|
1275
|
+
endpoint_str = (
|
|
1276
|
+
f'https://{aibrix_endpoint}'
|
|
1277
|
+
if aibrix_endpoint != '<pending>'
|
|
1278
|
+
else aibrix_endpoint
|
|
1279
|
+
)
|
|
1062
1280
|
else:
|
|
1063
1281
|
# General deployment
|
|
1064
|
-
|
|
1282
|
+
endpoint_type = get_endpoint_type_from_config()
|
|
1283
|
+
if force_direct or endpoint_type == 'direct':
|
|
1284
|
+
# Direct access: IP + Host header
|
|
1285
|
+
endpoint_str = f'http://{general_endpoint}\nHost: {name}'
|
|
1286
|
+
else:
|
|
1287
|
+
# Apoxy access: single host + path
|
|
1288
|
+
endpoint_str = f'https://{general_endpoint}/{name}'
|
|
1065
1289
|
|
|
1066
1290
|
# Replicas
|
|
1067
|
-
|
|
1068
|
-
str(deployment.status.ready_replicas or 0)
|
|
1069
|
-
|
|
1070
|
-
|
|
1291
|
+
if deployment:
|
|
1292
|
+
ready_replicas = str(deployment.status.ready_replicas or 0)
|
|
1293
|
+
desired_replicas = str(deployment.spec.replicas or 0)
|
|
1294
|
+
else:
|
|
1295
|
+
ready_replicas = '?'
|
|
1296
|
+
desired_replicas = '?'
|
|
1297
|
+
|
|
1071
1298
|
replicas_text = Text()
|
|
1072
1299
|
replicas_text.append(
|
|
1073
1300
|
f'Ready: {ready_replicas}/{desired_replicas}\n', style='bold white'
|
|
1074
1301
|
)
|
|
1302
|
+
|
|
1075
1303
|
if status['autoscaler']:
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
|
-
|
|
1081
|
-
|
|
1082
|
-
|
|
1304
|
+
# Get min/max from deployment labels
|
|
1305
|
+
min_r, max_r = '?', '?'
|
|
1306
|
+
|
|
1307
|
+
if deployment and hasattr(deployment.metadata, 'labels'):
|
|
1308
|
+
labels = deployment.metadata.labels or {}
|
|
1309
|
+
# All deployments with autoscaling get these labels from the template
|
|
1310
|
+
original_min_str = labels.get('trainy.ai/original-min-replicas')
|
|
1311
|
+
original_max_str = labels.get('trainy.ai/original-max-replicas')
|
|
1312
|
+
if original_min_str and original_max_str:
|
|
1313
|
+
min_r, max_r = original_min_str, original_max_str
|
|
1314
|
+
logger.debug(
|
|
1315
|
+
f'[DEBUG] Got replicas from deployment labels: '
|
|
1316
|
+
f'min={min_r}, max={max_r}'
|
|
1317
|
+
)
|
|
1318
|
+
|
|
1083
1319
|
replicas_text.append(f'Min : {min_r}\n', style='bold white')
|
|
1084
1320
|
replicas_text.append(f'Max : {max_r}', style='bold white')
|
|
1085
1321
|
|