konduktor-nightly 0.1.0.dev20250919104536__py3-none-any.whl → 0.1.0.dev20250921104307__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of konduktor-nightly might be problematic. Click here for more details.
- konduktor/__init__.py +2 -2
- konduktor/backends/deployment.py +14 -8
- konduktor/backends/deployment_utils.py +594 -358
- konduktor/cli.py +4 -2
- konduktor/manifests/aibrix-setup.yaml +274 -0
- konduktor/manifests/apoxy-setup.yaml +42 -9
- konduktor/manifests/apoxy-setup2.yaml +69 -5
- konduktor/serving.py +10 -6
- konduktor/task.py +7 -0
- konduktor/templates/deployment.yaml.j2 +91 -44
- konduktor/utils/schemas.py +1 -1
- {konduktor_nightly-0.1.0.dev20250919104536.dist-info → konduktor_nightly-0.1.0.dev20250921104307.dist-info}/METADATA +1 -1
- {konduktor_nightly-0.1.0.dev20250919104536.dist-info → konduktor_nightly-0.1.0.dev20250921104307.dist-info}/RECORD +16 -16
- konduktor/templates/apoxy-deployment.yaml.j2 +0 -33
- {konduktor_nightly-0.1.0.dev20250919104536.dist-info → konduktor_nightly-0.1.0.dev20250921104307.dist-info}/LICENSE +0 -0
- {konduktor_nightly-0.1.0.dev20250919104536.dist-info → konduktor_nightly-0.1.0.dev20250921104307.dist-info}/WHEEL +0 -0
- {konduktor_nightly-0.1.0.dev20250919104536.dist-info → konduktor_nightly-0.1.0.dev20250921104307.dist-info}/entry_points.txt +0 -0
konduktor/cli.py
CHANGED
|
@@ -973,7 +973,9 @@ def down(
|
|
|
973
973
|
|
|
974
974
|
if all:
|
|
975
975
|
assert jobs_specs is not None, f'No jobs found in namespace {namespace}'
|
|
976
|
-
|
|
976
|
+
if len(jobs_specs) == 0:
|
|
977
|
+
click.secho(f'No jobs found in namespace {namespace}', fg='yellow')
|
|
978
|
+
return
|
|
977
979
|
jobs = [job['metadata']['name'] for job in jobs_specs]
|
|
978
980
|
elif jobs:
|
|
979
981
|
# Get all available jobs to match against patterns
|
|
@@ -1630,7 +1632,7 @@ def list_secrets(all_users: bool):
|
|
|
1630
1632
|
|
|
1631
1633
|
@cli.group(cls=_NaturalOrderGroup)
|
|
1632
1634
|
def serve():
|
|
1633
|
-
"""Manage
|
|
1635
|
+
"""Manage deployment serving with Konduktor.
|
|
1634
1636
|
|
|
1635
1637
|
USAGE: konduktor serve COMMAND
|
|
1636
1638
|
|
|
@@ -0,0 +1,274 @@
|
|
|
1
|
+
# Aibrix Setup - vLLM Deployment Infrastructure
|
|
2
|
+
#
|
|
3
|
+
# This file sets up the infrastructure needed for vLLM (Aibrix) deployments:
|
|
4
|
+
# 1. Envoy Gateway configuration for HTTP routing
|
|
5
|
+
# 2. Aibrix Activator service for request-based autoscaling (KPA)
|
|
6
|
+
# 3. HTTP route mirroring for prewarming vLLM models
|
|
7
|
+
# 4. Lua script for extracting model names from OpenAI-compatible requests
|
|
8
|
+
#
|
|
9
|
+
# The activator tracks incoming requests and provides metrics to scale
|
|
10
|
+
# vLLM deployments based on demand (requests per second).
|
|
11
|
+
|
|
12
|
+
# This file is kept separate from apoxy setup files because it is
|
|
13
|
+
# only used in actual clusters, not in the test kind clusters.
|
|
14
|
+
|
|
15
|
+
apiVersion: v1
|
|
16
|
+
kind: ConfigMap
|
|
17
|
+
metadata:
|
|
18
|
+
name: envoy-gateway-config
|
|
19
|
+
namespace: envoy-gateway-system
|
|
20
|
+
data:
|
|
21
|
+
envoy-gateway.yaml: |
|
|
22
|
+
apiVersion: gateway.envoyproxy.io/v1alpha1
|
|
23
|
+
kind: EnvoyGateway
|
|
24
|
+
provider:
|
|
25
|
+
type: Kubernetes
|
|
26
|
+
gateway:
|
|
27
|
+
controllerName: gateway.envoyproxy.io/gatewayclass-controller
|
|
28
|
+
extensionApis:
|
|
29
|
+
enableEnvoyPatchPolicy: true
|
|
30
|
+
---
|
|
31
|
+
apiVersion: v1
|
|
32
|
+
kind: Namespace
|
|
33
|
+
metadata:
|
|
34
|
+
name: aibrix-activator
|
|
35
|
+
---
|
|
36
|
+
apiVersion: v1
|
|
37
|
+
kind: ConfigMap
|
|
38
|
+
metadata:
|
|
39
|
+
name: activator-code
|
|
40
|
+
namespace: aibrix-activator
|
|
41
|
+
data:
|
|
42
|
+
activator.py: |
|
|
43
|
+
import os, time, json
|
|
44
|
+
from collections import defaultdict, deque
|
|
45
|
+
from fastapi import FastAPI, Request
|
|
46
|
+
from fastapi.responses import PlainTextResponse, JSONResponse
|
|
47
|
+
|
|
48
|
+
NAMESPACE = os.getenv("NAMESPACE", "default")
|
|
49
|
+
WINDOW_SEC = int(os.getenv("WINDOW_SEC", "30")) # demand lookback
|
|
50
|
+
CAPACITY_RPS = float(os.getenv("CAPACITY_RPS", "1.0")) # per-replica capacity
|
|
51
|
+
MIN_WAKE = int(os.getenv("MIN_REPLICA_ON_WAKE", "1"))
|
|
52
|
+
MAX_REPLICAS = int(os.getenv("MAX_REPLICAS", "8"))
|
|
53
|
+
|
|
54
|
+
app = FastAPI()
|
|
55
|
+
events = defaultdict(deque) # key=(ns,model) -> deque[timestamps]
|
|
56
|
+
|
|
57
|
+
def _prune(q, now):
|
|
58
|
+
while q and now - q[0] > WINDOW_SEC: q.popleft()
|
|
59
|
+
|
|
60
|
+
def _bump(ns, model):
|
|
61
|
+
now = time.time()
|
|
62
|
+
q = events[(ns, model)]
|
|
63
|
+
q.append(now)
|
|
64
|
+
_prune(q, now)
|
|
65
|
+
|
|
66
|
+
def _desired(ns, model):
|
|
67
|
+
now = time.time()
|
|
68
|
+
q = events[(ns, model)]
|
|
69
|
+
_prune(q, now)
|
|
70
|
+
rps = len(q) / max(WINDOW_SEC, 1)
|
|
71
|
+
if len(q) == 0: return 0
|
|
72
|
+
# Convert demand to desired replicas
|
|
73
|
+
import math
|
|
74
|
+
d = max(MIN_WAKE, math.ceil(rps / max(CAPACITY_RPS, 1e-6)))
|
|
75
|
+
return max(0, min(d, MAX_REPLICAS))
|
|
76
|
+
|
|
77
|
+
def _extract_model(headers, body_bytes):
|
|
78
|
+
# Prefer header (OpenAI-compatible)
|
|
79
|
+
m = headers.get("model") or headers.get("x-model")
|
|
80
|
+
if m: return m
|
|
81
|
+
# Try JSON body
|
|
82
|
+
try:
|
|
83
|
+
j = json.loads(body_bytes or b"{}")
|
|
84
|
+
if isinstance(j, dict):
|
|
85
|
+
# OpenAI schema: {"model": "...", ...}
|
|
86
|
+
if "model" in j and isinstance(j["model"], str):
|
|
87
|
+
return j["model"]
|
|
88
|
+
except Exception:
|
|
89
|
+
pass
|
|
90
|
+
return None
|
|
91
|
+
|
|
92
|
+
# Mirror endpoints (same as your API paths); quick 204 response
|
|
93
|
+
@app.post("/v1/completions")
|
|
94
|
+
@app.post("/v1/chat/completions")
|
|
95
|
+
async def mirrored(request: Request):
|
|
96
|
+
body = await request.body()
|
|
97
|
+
model = _extract_model(request.headers, body)
|
|
98
|
+
if model:
|
|
99
|
+
_bump(NAMESPACE, model)
|
|
100
|
+
return JSONResponse({"ok": True}, status_code=204)
|
|
101
|
+
|
|
102
|
+
# Catch-all POST (safety net if your gateway uses different paths)
|
|
103
|
+
@app.post("/{full_path:path}")
|
|
104
|
+
async def mirrored_generic(request: Request, full_path: str):
|
|
105
|
+
body = await request.body()
|
|
106
|
+
model = _extract_model(request.headers, body)
|
|
107
|
+
if model:
|
|
108
|
+
_bump(NAMESPACE, model)
|
|
109
|
+
return JSONResponse({"ok": True}, status_code=204)
|
|
110
|
+
|
|
111
|
+
# Metrics for KPA and Debugging
|
|
112
|
+
@app.get("/metrics/{ns}/{model}", response_class=PlainTextResponse)
|
|
113
|
+
async def metrics(ns: str, model: str):
|
|
114
|
+
d = _desired(ns, model)
|
|
115
|
+
now = time.time()
|
|
116
|
+
q = events[(ns, model)]
|
|
117
|
+
_prune(q, now)
|
|
118
|
+
rps = len(q) / max(WINDOW_SEC, 1)
|
|
119
|
+
return (
|
|
120
|
+
"# HELP vllm:deployment_replicas Number of suggested replicas.\n"
|
|
121
|
+
"# TYPE vllm:deployment_replicas gauge\n"
|
|
122
|
+
f'vllm:deployment_replicas{{namespace="{ns}",model_name="{model}"}} {d}\n'
|
|
123
|
+
"# HELP vllm:observed_rps Incoming requests per second.\n"
|
|
124
|
+
"# TYPE vllm:observed_rps gauge\n"
|
|
125
|
+
f'vllm:observed_rps{{namespace="{ns}",model_name="{model}"}} {rps:.2f}\n'
|
|
126
|
+
)
|
|
127
|
+
---
|
|
128
|
+
apiVersion: apps/v1
|
|
129
|
+
kind: Deployment
|
|
130
|
+
metadata:
|
|
131
|
+
name: aibrix-activator
|
|
132
|
+
namespace: aibrix-activator
|
|
133
|
+
spec:
|
|
134
|
+
replicas: 1
|
|
135
|
+
selector: { matchLabels: { app: aibrix-activator } }
|
|
136
|
+
template:
|
|
137
|
+
metadata: { labels: { app: aibrix-activator } }
|
|
138
|
+
spec:
|
|
139
|
+
containers:
|
|
140
|
+
- name: activator
|
|
141
|
+
image: python:3.11-slim
|
|
142
|
+
command: ["bash","-lc"]
|
|
143
|
+
args:
|
|
144
|
+
- |
|
|
145
|
+
pip install fastapi uvicorn >/dev/null && \
|
|
146
|
+
uvicorn activator:app --host 0.0.0.0 --port 8080
|
|
147
|
+
env:
|
|
148
|
+
- { name: NAMESPACE, value: "default" }
|
|
149
|
+
- { name: WINDOW_SEC, value: "30" }
|
|
150
|
+
- { name: CAPACITY_RPS, value: "1.0" }
|
|
151
|
+
- { name: MIN_REPLICA_ON_WAKE, value: "1" }
|
|
152
|
+
- { name: MAX_REPLICAS, value: "8" }
|
|
153
|
+
ports: [{containerPort: 8080}]
|
|
154
|
+
volumeMounts:
|
|
155
|
+
- { name: code, mountPath: /app/activator.py, subPath: activator.py }
|
|
156
|
+
workingDir: /app
|
|
157
|
+
volumes:
|
|
158
|
+
- name: code
|
|
159
|
+
configMap: { name: activator-code }
|
|
160
|
+
---
|
|
161
|
+
apiVersion: v1
|
|
162
|
+
kind: Service
|
|
163
|
+
metadata:
|
|
164
|
+
name: aibrix-activator
|
|
165
|
+
namespace: aibrix-activator
|
|
166
|
+
spec:
|
|
167
|
+
selector: { app: aibrix-activator }
|
|
168
|
+
ports:
|
|
169
|
+
- name: http
|
|
170
|
+
port: 8080
|
|
171
|
+
targetPort: 8080
|
|
172
|
+
protocol: TCP
|
|
173
|
+
type: ClusterIP
|
|
174
|
+
---
|
|
175
|
+
apiVersion: gateway.networking.k8s.io/v1beta1
|
|
176
|
+
kind: ReferenceGrant
|
|
177
|
+
metadata:
|
|
178
|
+
name: allow-httproute-to-activator
|
|
179
|
+
namespace: aibrix-activator
|
|
180
|
+
spec:
|
|
181
|
+
from:
|
|
182
|
+
- group: gateway.networking.k8s.io
|
|
183
|
+
kind: HTTPRoute
|
|
184
|
+
namespace: aibrix-system
|
|
185
|
+
to:
|
|
186
|
+
- group: ""
|
|
187
|
+
kind: Service
|
|
188
|
+
name: aibrix-activator
|
|
189
|
+
---
|
|
190
|
+
apiVersion: gateway.networking.k8s.io/v1
|
|
191
|
+
kind: HTTPRoute
|
|
192
|
+
metadata:
|
|
193
|
+
name: activator-mirror-sink
|
|
194
|
+
namespace: aibrix-system
|
|
195
|
+
spec:
|
|
196
|
+
parentRefs:
|
|
197
|
+
- group: gateway.networking.k8s.io
|
|
198
|
+
kind: Gateway
|
|
199
|
+
name: aibrix-eg
|
|
200
|
+
namespace: aibrix-system
|
|
201
|
+
rules:
|
|
202
|
+
- matches:
|
|
203
|
+
- path:
|
|
204
|
+
type: PathPrefix
|
|
205
|
+
value: /__activator_sink__
|
|
206
|
+
backendRefs:
|
|
207
|
+
- name: aibrix-activator
|
|
208
|
+
namespace: aibrix-activator
|
|
209
|
+
port: 8080
|
|
210
|
+
---
|
|
211
|
+
apiVersion: gateway.envoyproxy.io/v1alpha1
|
|
212
|
+
kind: EnvoyPatchPolicy
|
|
213
|
+
metadata:
|
|
214
|
+
name: prewarm-completions-lua
|
|
215
|
+
namespace: aibrix-system
|
|
216
|
+
spec:
|
|
217
|
+
targetRef:
|
|
218
|
+
group: gateway.networking.k8s.io
|
|
219
|
+
kind: Gateway
|
|
220
|
+
name: aibrix-eg
|
|
221
|
+
type: JSONPatch
|
|
222
|
+
jsonPatches:
|
|
223
|
+
- type: "type.googleapis.com/envoy.config.listener.v3.Listener"
|
|
224
|
+
name: "aibrix-system/aibrix-eg/http"
|
|
225
|
+
operation:
|
|
226
|
+
op: add
|
|
227
|
+
path: "/default_filter_chain/filters/0/typed_config/http_filters/0"
|
|
228
|
+
value:
|
|
229
|
+
name: envoy.filters.http.lua
|
|
230
|
+
typed_config:
|
|
231
|
+
"@type": type.googleapis.com/envoy.extensions.filters.http.lua.v3.Lua
|
|
232
|
+
inlineCode: |
|
|
233
|
+
function envoy_on_request(handle)
|
|
234
|
+
local path = handle:headers():get(":path") or ""
|
|
235
|
+
if string.find(path, "^/v1/completions") or string.find(path, "^/v1/chat/completions") then
|
|
236
|
+
-- Try to get model from header first
|
|
237
|
+
local model = handle:headers():get("model") or ""
|
|
238
|
+
|
|
239
|
+
-- If no model in header, try to extract from JSON body
|
|
240
|
+
if model == "" then
|
|
241
|
+
local ct = handle:headers():get("content-type") or ""
|
|
242
|
+
if string.find(ct:lower(), "application/json") then
|
|
243
|
+
local body = handle:body()
|
|
244
|
+
if body and body:length() > 0 then
|
|
245
|
+
local raw = body:getBytes(0, math.min(body:length(), 1024))
|
|
246
|
+
-- Simple regex to extract model from JSON: "model":"value"
|
|
247
|
+
local model_match = raw:match('"model"%s*:%s*"([^"]+)"')
|
|
248
|
+
if model_match then
|
|
249
|
+
model = model_match
|
|
250
|
+
end
|
|
251
|
+
end
|
|
252
|
+
end
|
|
253
|
+
end
|
|
254
|
+
|
|
255
|
+
-- Only proceed if we have a model
|
|
256
|
+
if model ~= "" then
|
|
257
|
+
-- fire-and-forget wake signal; very short timeout
|
|
258
|
+
pcall(function()
|
|
259
|
+
handle:httpCall(
|
|
260
|
+
"httproute/aibrix-system/activator-mirror-sink/rule/0",
|
|
261
|
+
{
|
|
262
|
+
[":method"] = "POST",
|
|
263
|
+
[":path"] = "/v1/completions",
|
|
264
|
+
[":authority"] = "aibrix-activator.aibrix-activator.svc.cluster.local",
|
|
265
|
+
["content-type"] = "application/json",
|
|
266
|
+
["model"] = model
|
|
267
|
+
},
|
|
268
|
+
"{}",
|
|
269
|
+
5 -- ms
|
|
270
|
+
)
|
|
271
|
+
end)
|
|
272
|
+
end
|
|
273
|
+
end
|
|
274
|
+
end
|
|
@@ -1,8 +1,25 @@
|
|
|
1
|
+
# Apoxy Setup (Part 1/3) - Core Infrastructure
|
|
2
|
+
#
|
|
3
|
+
# This file sets up the core Apoxy infrastructure for external access to deployments:
|
|
4
|
+
# 1. Apoxy system namespace and RBAC
|
|
5
|
+
# 2. Kubeconfig secret for cluster access (populated by CI)
|
|
6
|
+
# 3. Apoxy tunnel controller and proxy services
|
|
7
|
+
# 4. Network policies for cross-namespace access
|
|
8
|
+
#
|
|
9
|
+
# Split into 2 files because:
|
|
10
|
+
# - apoxy-setup.yaml: Core infrastructure (1 per cluster) (needs to be applied first)
|
|
11
|
+
# - apoxy-setup2.yaml: All routing rules for both deployment types
|
|
12
|
+
|
|
13
|
+
apiVersion: v1
|
|
14
|
+
kind: Namespace
|
|
15
|
+
metadata:
|
|
16
|
+
name: apoxy-system
|
|
17
|
+
---
|
|
1
18
|
apiVersion: v1
|
|
2
19
|
kind: Secret
|
|
3
20
|
metadata:
|
|
4
21
|
name: trainy-kubeconfig
|
|
5
|
-
namespace:
|
|
22
|
+
namespace: apoxy-system
|
|
6
23
|
type: Opaque
|
|
7
24
|
data:
|
|
8
25
|
# this gets replaced by buildkite CI secret APOXY_AUTH
|
|
@@ -13,7 +30,7 @@ apiVersion: v1
|
|
|
13
30
|
kind: ServiceAccount
|
|
14
31
|
metadata:
|
|
15
32
|
name: kube-controller
|
|
16
|
-
namespace:
|
|
33
|
+
namespace: apoxy-system
|
|
17
34
|
---
|
|
18
35
|
apiVersion: rbac.authorization.k8s.io/v1
|
|
19
36
|
kind: ClusterRole
|
|
@@ -35,13 +52,13 @@ roleRef:
|
|
|
35
52
|
subjects:
|
|
36
53
|
- kind: ServiceAccount
|
|
37
54
|
name: kube-controller
|
|
38
|
-
namespace:
|
|
55
|
+
namespace: apoxy-system
|
|
39
56
|
---
|
|
40
57
|
apiVersion: apps/v1
|
|
41
58
|
kind: Deployment
|
|
42
59
|
metadata:
|
|
43
60
|
name: kube-controller
|
|
44
|
-
namespace:
|
|
61
|
+
namespace: apoxy-system
|
|
45
62
|
labels:
|
|
46
63
|
app: kube-controller
|
|
47
64
|
spec:
|
|
@@ -85,7 +102,7 @@ apiVersion: v1
|
|
|
85
102
|
kind: Service
|
|
86
103
|
metadata:
|
|
87
104
|
name: kube-controller
|
|
88
|
-
namespace:
|
|
105
|
+
namespace: apoxy-system
|
|
89
106
|
labels:
|
|
90
107
|
app: kube-controller
|
|
91
108
|
spec:
|
|
@@ -101,7 +118,7 @@ apiVersion: v1
|
|
|
101
118
|
kind: ConfigMap
|
|
102
119
|
metadata:
|
|
103
120
|
name: apoxy-config
|
|
104
|
-
namespace:
|
|
121
|
+
namespace: apoxy-system
|
|
105
122
|
data:
|
|
106
123
|
config.yaml: |
|
|
107
124
|
apiVersion: config.apoxy.dev/v1alpha1
|
|
@@ -118,7 +135,7 @@ apiVersion: apps/v1
|
|
|
118
135
|
kind: Deployment
|
|
119
136
|
metadata:
|
|
120
137
|
name: apoxy
|
|
121
|
-
namespace:
|
|
138
|
+
namespace: apoxy-system
|
|
122
139
|
labels:
|
|
123
140
|
app: apoxy
|
|
124
141
|
spec:
|
|
@@ -133,7 +150,7 @@ spec:
|
|
|
133
150
|
spec:
|
|
134
151
|
containers:
|
|
135
152
|
- name: apoxy
|
|
136
|
-
image: apoxy/apoxy:v0.11.
|
|
153
|
+
image: apoxy/apoxy:v0.11.18
|
|
137
154
|
command: ["apoxy", "tunnel", "run", "UNIQUE-TEMPNAME", "--insecure-skip-verify"]
|
|
138
155
|
volumeMounts:
|
|
139
156
|
- name: kubeconfig-volume
|
|
@@ -148,4 +165,20 @@ spec:
|
|
|
148
165
|
secretName: trainy-kubeconfig
|
|
149
166
|
- name: apoxy-config-volume
|
|
150
167
|
configMap:
|
|
151
|
-
name: apoxy-config
|
|
168
|
+
name: apoxy-config
|
|
169
|
+
---
|
|
170
|
+
# NetworkPolicy to allow Apoxy to reach services in other namespaces
|
|
171
|
+
apiVersion: networking.k8s.io/v1
|
|
172
|
+
kind: NetworkPolicy
|
|
173
|
+
metadata:
|
|
174
|
+
name: apoxy-cross-namespace-access
|
|
175
|
+
namespace: apoxy-system
|
|
176
|
+
spec:
|
|
177
|
+
podSelector:
|
|
178
|
+
matchLabels:
|
|
179
|
+
app: apoxy
|
|
180
|
+
policyTypes:
|
|
181
|
+
- Egress
|
|
182
|
+
egress:
|
|
183
|
+
# Allow all egress traffic
|
|
184
|
+
- {}
|
|
@@ -1,3 +1,20 @@
|
|
|
1
|
+
# Apoxy Setup (Part 2/2) - Deployment Routing
|
|
2
|
+
#
|
|
3
|
+
# This file sets up Apoxy routing for both vLLM and general deployments:
|
|
4
|
+
# 1. TunnelNode for secure tunnel connection
|
|
5
|
+
# 2. Backend for vLLM pointing to Envoy Gateway
|
|
6
|
+
# 3. HTTPRoute for company.trainy.us -> vLLM deployments
|
|
7
|
+
# 4. Backend for general deployments pointing to nginx ingress
|
|
8
|
+
# 5. HTTPRoute for company2.trainy.us -> general deployments
|
|
9
|
+
# 6. KEDA proxy service for HTTP autoscaling
|
|
10
|
+
# 7. 60s timeout for all requests
|
|
11
|
+
#
|
|
12
|
+
# Split into 2 files because:
|
|
13
|
+
# - apoxy-setup.yaml: Core infrastructure (1 per cluster) (needs to be applied first)
|
|
14
|
+
# - apoxy-setup2.yaml: All routing rules for both deployment types
|
|
15
|
+
|
|
16
|
+
# NOTE: TunnelNode should technically be in the first apoxy-setup.yaml but it
|
|
17
|
+
# needs to be created after the core infrastructure is created, so we put it here.
|
|
1
18
|
apiVersion: core.apoxy.dev/v1alpha
|
|
2
19
|
kind: TunnelNode
|
|
3
20
|
metadata:
|
|
@@ -6,7 +23,7 @@ spec:
|
|
|
6
23
|
egressGateway:
|
|
7
24
|
enabled: true
|
|
8
25
|
---
|
|
9
|
-
#
|
|
26
|
+
# Backend for vLLM deployments
|
|
10
27
|
apiVersion: core.apoxy.dev/v1alpha
|
|
11
28
|
kind: Backend
|
|
12
29
|
metadata:
|
|
@@ -15,7 +32,7 @@ spec:
|
|
|
15
32
|
endpoints:
|
|
16
33
|
- fqdn: envoy-aibrix-system-aibrix-eg-903790dc.envoy-gateway-system.UNIQUE-TEMPNAME.tun.apoxy.net
|
|
17
34
|
---
|
|
18
|
-
#
|
|
35
|
+
# HTTPRoute for vLLM deployments
|
|
19
36
|
apiVersion: gateway.apoxy.dev/v1
|
|
20
37
|
kind: HTTPRoute
|
|
21
38
|
metadata:
|
|
@@ -29,6 +46,53 @@ spec:
|
|
|
29
46
|
- 'TEMPNAME.trainy.us'
|
|
30
47
|
rules:
|
|
31
48
|
- backendRefs:
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
49
|
+
- kind: Backend
|
|
50
|
+
name: UNIQUE-TEMPNAME-backend
|
|
51
|
+
port: 80
|
|
52
|
+
timeouts:
|
|
53
|
+
request: "60s"
|
|
54
|
+
---
|
|
55
|
+
# Backend for general deployments
|
|
56
|
+
apiVersion: core.apoxy.dev/v1alpha
|
|
57
|
+
kind: Backend
|
|
58
|
+
metadata:
|
|
59
|
+
name: UNIQUE-TEMPNAME-backend2
|
|
60
|
+
spec:
|
|
61
|
+
endpoints:
|
|
62
|
+
- fqdn: ingress-nginx-controller.keda.UNIQUE-TEMPNAME.tun.apoxy.net
|
|
63
|
+
---
|
|
64
|
+
# HTTPRoute for general deployments
|
|
65
|
+
apiVersion: gateway.apoxy.dev/v1
|
|
66
|
+
kind: HTTPRoute
|
|
67
|
+
metadata:
|
|
68
|
+
name: UNIQUE-TEMPNAME-route2
|
|
69
|
+
spec:
|
|
70
|
+
parentRefs:
|
|
71
|
+
- name: default
|
|
72
|
+
kind: Gateway
|
|
73
|
+
port: 443
|
|
74
|
+
hostnames:
|
|
75
|
+
- 'TEMPNAME2.trainy.us'
|
|
76
|
+
rules:
|
|
77
|
+
- backendRefs:
|
|
78
|
+
- kind: Backend
|
|
79
|
+
name: UNIQUE-TEMPNAME-backend2
|
|
80
|
+
port: 80
|
|
81
|
+
timeouts:
|
|
82
|
+
request: "60s"
|
|
83
|
+
|
|
84
|
+
# KEDA proxy service (1 per cluster) (For general deployments)
|
|
85
|
+
---
|
|
86
|
+
apiVersion: v1
|
|
87
|
+
kind: Service
|
|
88
|
+
metadata:
|
|
89
|
+
name: keda-proxy
|
|
90
|
+
namespace: default
|
|
91
|
+
spec:
|
|
92
|
+
type: ExternalName
|
|
93
|
+
externalName: keda-add-ons-http-interceptor-proxy.keda
|
|
94
|
+
ports:
|
|
95
|
+
- name: http
|
|
96
|
+
port: 8080
|
|
97
|
+
protocol: TCP
|
|
98
|
+
targetPort: 8080
|
konduktor/serving.py
CHANGED
|
@@ -49,11 +49,15 @@ class Serving:
|
|
|
49
49
|
if min_replicas is None:
|
|
50
50
|
min_replicas = max_replicas
|
|
51
51
|
if max_replicas is None:
|
|
52
|
-
max_replicas
|
|
52
|
+
# Edge case: if min_replicas is 0, set max_replicas to 1
|
|
53
|
+
if min_replicas == 0:
|
|
54
|
+
max_replicas = 1
|
|
55
|
+
else:
|
|
56
|
+
max_replicas = min_replicas
|
|
53
57
|
|
|
54
|
-
if min_replicas is not None and min_replicas
|
|
58
|
+
if min_replicas is not None and min_replicas < 0:
|
|
55
59
|
with ux_utils.print_exception_no_traceback():
|
|
56
|
-
raise ValueError('min_replicas must be >=
|
|
60
|
+
raise ValueError('min_replicas must be >= 0')
|
|
57
61
|
|
|
58
62
|
if (
|
|
59
63
|
max_replicas is not None
|
|
@@ -139,9 +143,9 @@ class Serving:
|
|
|
139
143
|
|
|
140
144
|
def to_yaml_config(self) -> Dict[str, Union[int, str]]:
|
|
141
145
|
config: Dict[str, Union[int, str]] = {
|
|
142
|
-
'min_replicas': self._min_replicas
|
|
143
|
-
'max_replicas': self._max_replicas
|
|
144
|
-
'ports': self._ports
|
|
146
|
+
'min_replicas': self._min_replicas if self._min_replicas is not None else 1,
|
|
147
|
+
'max_replicas': self._max_replicas if self._max_replicas is not None else 1,
|
|
148
|
+
'ports': self._ports if self._ports is not None else 8000,
|
|
145
149
|
}
|
|
146
150
|
# Only include probe if it's not None
|
|
147
151
|
if self._probe is not None:
|
konduktor/task.py
CHANGED
|
@@ -567,6 +567,13 @@ class Task:
|
|
|
567
567
|
f'less than min_replicas ({serving.min_replicas})'
|
|
568
568
|
)
|
|
569
569
|
|
|
570
|
+
if serving.max_replicas == 0 and serving.min_replicas == 0:
|
|
571
|
+
with ux_utils.print_exception_no_traceback():
|
|
572
|
+
raise ValueError(
|
|
573
|
+
f'max_replicas ({serving.max_replicas}) and '
|
|
574
|
+
f'min_replicas ({serving.min_replicas}) cannot both be 0'
|
|
575
|
+
)
|
|
576
|
+
|
|
570
577
|
if isinstance(serving, konduktor.Serving):
|
|
571
578
|
serving = serving
|
|
572
579
|
self.serving = serving
|