konduktor-nightly 0.1.0.dev20250919104536__py3-none-any.whl → 0.1.0.dev20250921104307__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of konduktor-nightly might be problematic. Click here for more details.

konduktor/cli.py CHANGED
@@ -973,7 +973,9 @@ def down(
973
973
 
974
974
  if all:
975
975
  assert jobs_specs is not None, f'No jobs found in namespace {namespace}'
976
- assert len(jobs_specs) > 0, f'No jobs found in namespace {namespace}'
976
+ if len(jobs_specs) == 0:
977
+ click.secho(f'No jobs found in namespace {namespace}', fg='yellow')
978
+ return
977
979
  jobs = [job['metadata']['name'] for job in jobs_specs]
978
980
  elif jobs:
979
981
  # Get all available jobs to match against patterns
@@ -1630,7 +1632,7 @@ def list_secrets(all_users: bool):
1630
1632
 
1631
1633
  @cli.group(cls=_NaturalOrderGroup)
1632
1634
  def serve():
1633
- """Manage LLM serving with Konduktor.
1635
+ """Manage deployment serving with Konduktor.
1634
1636
 
1635
1637
  USAGE: konduktor serve COMMAND
1636
1638
 
@@ -0,0 +1,274 @@
1
+ # Aibrix Setup - vLLM Deployment Infrastructure
2
+ #
3
+ # This file sets up the infrastructure needed for vLLM (Aibrix) deployments:
4
+ # 1. Envoy Gateway configuration for HTTP routing
5
+ # 2. Aibrix Activator service for request-based autoscaling (KPA)
6
+ # 3. HTTP route mirroring for prewarming vLLM models
7
+ # 4. Lua script for extracting model names from OpenAI-compatible requests
8
+ #
9
+ # The activator tracks incoming requests and provides metrics to scale
10
+ # vLLM deployments based on demand (requests per second).
11
+
12
+ # This file is kept separate from apoxy setup files because it is
13
+ # only used in actual clusters, not in the test kind clusters.
14
+
15
+ apiVersion: v1
16
+ kind: ConfigMap
17
+ metadata:
18
+ name: envoy-gateway-config
19
+ namespace: envoy-gateway-system
20
+ data:
21
+ envoy-gateway.yaml: |
22
+ apiVersion: gateway.envoyproxy.io/v1alpha1
23
+ kind: EnvoyGateway
24
+ provider:
25
+ type: Kubernetes
26
+ gateway:
27
+ controllerName: gateway.envoyproxy.io/gatewayclass-controller
28
+ extensionApis:
29
+ enableEnvoyPatchPolicy: true
30
+ ---
31
+ apiVersion: v1
32
+ kind: Namespace
33
+ metadata:
34
+ name: aibrix-activator
35
+ ---
36
+ apiVersion: v1
37
+ kind: ConfigMap
38
+ metadata:
39
+ name: activator-code
40
+ namespace: aibrix-activator
41
+ data:
42
+ activator.py: |
43
+ import os, time, json
44
+ from collections import defaultdict, deque
45
+ from fastapi import FastAPI, Request
46
+ from fastapi.responses import PlainTextResponse, JSONResponse
47
+
48
+ NAMESPACE = os.getenv("NAMESPACE", "default")
49
+ WINDOW_SEC = int(os.getenv("WINDOW_SEC", "30")) # demand lookback
50
+ CAPACITY_RPS = float(os.getenv("CAPACITY_RPS", "1.0")) # per-replica capacity
51
+ MIN_WAKE = int(os.getenv("MIN_REPLICA_ON_WAKE", "1"))
52
+ MAX_REPLICAS = int(os.getenv("MAX_REPLICAS", "8"))
53
+
54
+ app = FastAPI()
55
+ events = defaultdict(deque) # key=(ns,model) -> deque[timestamps]
56
+
57
+ def _prune(q, now):
58
+ while q and now - q[0] > WINDOW_SEC: q.popleft()
59
+
60
+ def _bump(ns, model):
61
+ now = time.time()
62
+ q = events[(ns, model)]
63
+ q.append(now)
64
+ _prune(q, now)
65
+
66
+ def _desired(ns, model):
67
+ now = time.time()
68
+ q = events[(ns, model)]
69
+ _prune(q, now)
70
+ rps = len(q) / max(WINDOW_SEC, 1)
71
+ if len(q) == 0: return 0
72
+ # Convert demand to desired replicas
73
+ import math
74
+ d = max(MIN_WAKE, math.ceil(rps / max(CAPACITY_RPS, 1e-6)))
75
+ return max(0, min(d, MAX_REPLICAS))
76
+
77
+ def _extract_model(headers, body_bytes):
78
+ # Prefer header (OpenAI-compatible)
79
+ m = headers.get("model") or headers.get("x-model")
80
+ if m: return m
81
+ # Try JSON body
82
+ try:
83
+ j = json.loads(body_bytes or b"{}")
84
+ if isinstance(j, dict):
85
+ # OpenAI schema: {"model": "...", ...}
86
+ if "model" in j and isinstance(j["model"], str):
87
+ return j["model"]
88
+ except Exception:
89
+ pass
90
+ return None
91
+
92
+ # Mirror endpoints (same as your API paths); quick 204 response
93
+ @app.post("/v1/completions")
94
+ @app.post("/v1/chat/completions")
95
+ async def mirrored(request: Request):
96
+ body = await request.body()
97
+ model = _extract_model(request.headers, body)
98
+ if model:
99
+ _bump(NAMESPACE, model)
100
+ return JSONResponse({"ok": True}, status_code=204)
101
+
102
+ # Catch-all POST (safety net if your gateway uses different paths)
103
+ @app.post("/{full_path:path}")
104
+ async def mirrored_generic(request: Request, full_path: str):
105
+ body = await request.body()
106
+ model = _extract_model(request.headers, body)
107
+ if model:
108
+ _bump(NAMESPACE, model)
109
+ return JSONResponse({"ok": True}, status_code=204)
110
+
111
+ # Metrics for KPA and Debugging
112
+ @app.get("/metrics/{ns}/{model}", response_class=PlainTextResponse)
113
+ async def metrics(ns: str, model: str):
114
+ d = _desired(ns, model)
115
+ now = time.time()
116
+ q = events[(ns, model)]
117
+ _prune(q, now)
118
+ rps = len(q) / max(WINDOW_SEC, 1)
119
+ return (
120
+ "# HELP vllm:deployment_replicas Number of suggested replicas.\n"
121
+ "# TYPE vllm:deployment_replicas gauge\n"
122
+ f'vllm:deployment_replicas{{namespace="{ns}",model_name="{model}"}} {d}\n'
123
+ "# HELP vllm:observed_rps Incoming requests per second.\n"
124
+ "# TYPE vllm:observed_rps gauge\n"
125
+ f'vllm:observed_rps{{namespace="{ns}",model_name="{model}"}} {rps:.2f}\n'
126
+ )
127
+ ---
128
+ apiVersion: apps/v1
129
+ kind: Deployment
130
+ metadata:
131
+ name: aibrix-activator
132
+ namespace: aibrix-activator
133
+ spec:
134
+ replicas: 1
135
+ selector: { matchLabels: { app: aibrix-activator } }
136
+ template:
137
+ metadata: { labels: { app: aibrix-activator } }
138
+ spec:
139
+ containers:
140
+ - name: activator
141
+ image: python:3.11-slim
142
+ command: ["bash","-lc"]
143
+ args:
144
+ - |
145
+ pip install fastapi uvicorn >/dev/null && \
146
+ uvicorn activator:app --host 0.0.0.0 --port 8080
147
+ env:
148
+ - { name: NAMESPACE, value: "default" }
149
+ - { name: WINDOW_SEC, value: "30" }
150
+ - { name: CAPACITY_RPS, value: "1.0" }
151
+ - { name: MIN_REPLICA_ON_WAKE, value: "1" }
152
+ - { name: MAX_REPLICAS, value: "8" }
153
+ ports: [{containerPort: 8080}]
154
+ volumeMounts:
155
+ - { name: code, mountPath: /app/activator.py, subPath: activator.py }
156
+ workingDir: /app
157
+ volumes:
158
+ - name: code
159
+ configMap: { name: activator-code }
160
+ ---
161
+ apiVersion: v1
162
+ kind: Service
163
+ metadata:
164
+ name: aibrix-activator
165
+ namespace: aibrix-activator
166
+ spec:
167
+ selector: { app: aibrix-activator }
168
+ ports:
169
+ - name: http
170
+ port: 8080
171
+ targetPort: 8080
172
+ protocol: TCP
173
+ type: ClusterIP
174
+ ---
175
+ apiVersion: gateway.networking.k8s.io/v1beta1
176
+ kind: ReferenceGrant
177
+ metadata:
178
+ name: allow-httproute-to-activator
179
+ namespace: aibrix-activator
180
+ spec:
181
+ from:
182
+ - group: gateway.networking.k8s.io
183
+ kind: HTTPRoute
184
+ namespace: aibrix-system
185
+ to:
186
+ - group: ""
187
+ kind: Service
188
+ name: aibrix-activator
189
+ ---
190
+ apiVersion: gateway.networking.k8s.io/v1
191
+ kind: HTTPRoute
192
+ metadata:
193
+ name: activator-mirror-sink
194
+ namespace: aibrix-system
195
+ spec:
196
+ parentRefs:
197
+ - group: gateway.networking.k8s.io
198
+ kind: Gateway
199
+ name: aibrix-eg
200
+ namespace: aibrix-system
201
+ rules:
202
+ - matches:
203
+ - path:
204
+ type: PathPrefix
205
+ value: /__activator_sink__
206
+ backendRefs:
207
+ - name: aibrix-activator
208
+ namespace: aibrix-activator
209
+ port: 8080
210
+ ---
211
+ apiVersion: gateway.envoyproxy.io/v1alpha1
212
+ kind: EnvoyPatchPolicy
213
+ metadata:
214
+ name: prewarm-completions-lua
215
+ namespace: aibrix-system
216
+ spec:
217
+ targetRef:
218
+ group: gateway.networking.k8s.io
219
+ kind: Gateway
220
+ name: aibrix-eg
221
+ type: JSONPatch
222
+ jsonPatches:
223
+ - type: "type.googleapis.com/envoy.config.listener.v3.Listener"
224
+ name: "aibrix-system/aibrix-eg/http"
225
+ operation:
226
+ op: add
227
+ path: "/default_filter_chain/filters/0/typed_config/http_filters/0"
228
+ value:
229
+ name: envoy.filters.http.lua
230
+ typed_config:
231
+ "@type": type.googleapis.com/envoy.extensions.filters.http.lua.v3.Lua
232
+ inlineCode: |
233
+ function envoy_on_request(handle)
234
+ local path = handle:headers():get(":path") or ""
235
+ if string.find(path, "^/v1/completions") or string.find(path, "^/v1/chat/completions") then
236
+ -- Try to get model from header first
237
+ local model = handle:headers():get("model") or ""
238
+
239
+ -- If no model in header, try to extract from JSON body
240
+ if model == "" then
241
+ local ct = handle:headers():get("content-type") or ""
242
+ if string.find(ct:lower(), "application/json") then
243
+ local body = handle:body()
244
+ if body and body:length() > 0 then
245
+ local raw = body:getBytes(0, math.min(body:length(), 1024))
246
+ -- Simple regex to extract model from JSON: "model":"value"
247
+ local model_match = raw:match('"model"%s*:%s*"([^"]+)"')
248
+ if model_match then
249
+ model = model_match
250
+ end
251
+ end
252
+ end
253
+ end
254
+
255
+ -- Only proceed if we have a model
256
+ if model ~= "" then
257
+ -- fire-and-forget wake signal; very short timeout
258
+ pcall(function()
259
+ handle:httpCall(
260
+ "httproute/aibrix-system/activator-mirror-sink/rule/0",
261
+ {
262
+ [":method"] = "POST",
263
+ [":path"] = "/v1/completions",
264
+ [":authority"] = "aibrix-activator.aibrix-activator.svc.cluster.local",
265
+ ["content-type"] = "application/json",
266
+ ["model"] = model
267
+ },
268
+ "{}",
269
+ 5 -- ms
270
+ )
271
+ end)
272
+ end
273
+ end
274
+ end
@@ -1,8 +1,25 @@
1
+ # Apoxy Setup (Part 1/3) - Core Infrastructure
2
+ #
3
+ # This file sets up the core Apoxy infrastructure for external access to deployments:
4
+ # 1. Apoxy system namespace and RBAC
5
+ # 2. Kubeconfig secret for cluster access (populated by CI)
6
+ # 3. Apoxy tunnel controller and proxy services
7
+ # 4. Network policies for cross-namespace access
8
+ #
9
+ # Split into 2 files because:
10
+ # - apoxy-setup.yaml: Core infrastructure (1 per cluster) (needs to be applied first)
11
+ # - apoxy-setup2.yaml: All routing rules for both deployment types
12
+
13
+ apiVersion: v1
14
+ kind: Namespace
15
+ metadata:
16
+ name: apoxy-system
17
+ ---
1
18
  apiVersion: v1
2
19
  kind: Secret
3
20
  metadata:
4
21
  name: trainy-kubeconfig
5
- namespace: default
22
+ namespace: apoxy-system
6
23
  type: Opaque
7
24
  data:
8
25
  # this gets replaced by buildkite CI secret APOXY_AUTH
@@ -13,7 +30,7 @@ apiVersion: v1
13
30
  kind: ServiceAccount
14
31
  metadata:
15
32
  name: kube-controller
16
- namespace: default
33
+ namespace: apoxy-system
17
34
  ---
18
35
  apiVersion: rbac.authorization.k8s.io/v1
19
36
  kind: ClusterRole
@@ -35,13 +52,13 @@ roleRef:
35
52
  subjects:
36
53
  - kind: ServiceAccount
37
54
  name: kube-controller
38
- namespace: default
55
+ namespace: apoxy-system
39
56
  ---
40
57
  apiVersion: apps/v1
41
58
  kind: Deployment
42
59
  metadata:
43
60
  name: kube-controller
44
- namespace: default
61
+ namespace: apoxy-system
45
62
  labels:
46
63
  app: kube-controller
47
64
  spec:
@@ -85,7 +102,7 @@ apiVersion: v1
85
102
  kind: Service
86
103
  metadata:
87
104
  name: kube-controller
88
- namespace: default
105
+ namespace: apoxy-system
89
106
  labels:
90
107
  app: kube-controller
91
108
  spec:
@@ -101,7 +118,7 @@ apiVersion: v1
101
118
  kind: ConfigMap
102
119
  metadata:
103
120
  name: apoxy-config
104
- namespace: default
121
+ namespace: apoxy-system
105
122
  data:
106
123
  config.yaml: |
107
124
  apiVersion: config.apoxy.dev/v1alpha1
@@ -118,7 +135,7 @@ apiVersion: apps/v1
118
135
  kind: Deployment
119
136
  metadata:
120
137
  name: apoxy
121
- namespace: default
138
+ namespace: apoxy-system
122
139
  labels:
123
140
  app: apoxy
124
141
  spec:
@@ -133,7 +150,7 @@ spec:
133
150
  spec:
134
151
  containers:
135
152
  - name: apoxy
136
- image: apoxy/apoxy:v0.11.14
153
+ image: apoxy/apoxy:v0.11.18
137
154
  command: ["apoxy", "tunnel", "run", "UNIQUE-TEMPNAME", "--insecure-skip-verify"]
138
155
  volumeMounts:
139
156
  - name: kubeconfig-volume
@@ -148,4 +165,20 @@ spec:
148
165
  secretName: trainy-kubeconfig
149
166
  - name: apoxy-config-volume
150
167
  configMap:
151
- name: apoxy-config
168
+ name: apoxy-config
169
+ ---
170
+ # NetworkPolicy to allow Apoxy to reach services in other namespaces
171
+ apiVersion: networking.k8s.io/v1
172
+ kind: NetworkPolicy
173
+ metadata:
174
+ name: apoxy-cross-namespace-access
175
+ namespace: apoxy-system
176
+ spec:
177
+ podSelector:
178
+ matchLabels:
179
+ app: apoxy
180
+ policyTypes:
181
+ - Egress
182
+ egress:
183
+ # Allow all egress traffic
184
+ - {}
@@ -1,3 +1,20 @@
1
+ # Apoxy Setup (Part 2/2) - Deployment Routing
2
+ #
3
+ # This file sets up Apoxy routing for both vLLM and general deployments:
4
+ # 1. TunnelNode for secure tunnel connection
5
+ # 2. Backend for vLLM pointing to Envoy Gateway
6
+ # 3. HTTPRoute for company.trainy.us -> vLLM deployments
7
+ # 4. Backend for general deployments pointing to nginx ingress
8
+ # 5. HTTPRoute for company2.trainy.us -> general deployments
9
+ # 6. KEDA proxy service for HTTP autoscaling
10
+ # 7. 60s timeout for all requests
11
+ #
12
+ # Split into 2 files because:
13
+ # - apoxy-setup.yaml: Core infrastructure (1 per cluster) (needs to be applied first)
14
+ # - apoxy-setup2.yaml: All routing rules for both deployment types
15
+
16
+ # NOTE: TunnelNode should technically be in the first apoxy-setup.yaml but it
17
+ # needs to be created after the core infrastructure is created, so we put it here.
1
18
  apiVersion: core.apoxy.dev/v1alpha
2
19
  kind: TunnelNode
3
20
  metadata:
@@ -6,7 +23,7 @@ spec:
6
23
  egressGateway:
7
24
  enabled: true
8
25
  ---
9
- # Add just your backend for aibrix
26
+ # Backend for vLLM deployments
10
27
  apiVersion: core.apoxy.dev/v1alpha
11
28
  kind: Backend
12
29
  metadata:
@@ -15,7 +32,7 @@ spec:
15
32
  endpoints:
16
33
  - fqdn: envoy-aibrix-system-aibrix-eg-903790dc.envoy-gateway-system.UNIQUE-TEMPNAME.tun.apoxy.net
17
34
  ---
18
- # Add just your route for aibrix
35
+ # HTTPRoute for vLLM deployments
19
36
  apiVersion: gateway.apoxy.dev/v1
20
37
  kind: HTTPRoute
21
38
  metadata:
@@ -29,6 +46,53 @@ spec:
29
46
  - 'TEMPNAME.trainy.us'
30
47
  rules:
31
48
  - backendRefs:
32
- - kind: Backend
33
- name: UNIQUE-TEMPNAME-backend
34
- port: 80
49
+ - kind: Backend
50
+ name: UNIQUE-TEMPNAME-backend
51
+ port: 80
52
+ timeouts:
53
+ request: "60s"
54
+ ---
55
+ # Backend for general deployments
56
+ apiVersion: core.apoxy.dev/v1alpha
57
+ kind: Backend
58
+ metadata:
59
+ name: UNIQUE-TEMPNAME-backend2
60
+ spec:
61
+ endpoints:
62
+ - fqdn: ingress-nginx-controller.keda.UNIQUE-TEMPNAME.tun.apoxy.net
63
+ ---
64
+ # HTTPRoute for general deployments
65
+ apiVersion: gateway.apoxy.dev/v1
66
+ kind: HTTPRoute
67
+ metadata:
68
+ name: UNIQUE-TEMPNAME-route2
69
+ spec:
70
+ parentRefs:
71
+ - name: default
72
+ kind: Gateway
73
+ port: 443
74
+ hostnames:
75
+ - 'TEMPNAME2.trainy.us'
76
+ rules:
77
+ - backendRefs:
78
+ - kind: Backend
79
+ name: UNIQUE-TEMPNAME-backend2
80
+ port: 80
81
+ timeouts:
82
+ request: "60s"
83
+
84
+ # KEDA proxy service (1 per cluster) (For general deployments)
85
+ ---
86
+ apiVersion: v1
87
+ kind: Service
88
+ metadata:
89
+ name: keda-proxy
90
+ namespace: default
91
+ spec:
92
+ type: ExternalName
93
+ externalName: keda-add-ons-http-interceptor-proxy.keda
94
+ ports:
95
+ - name: http
96
+ port: 8080
97
+ protocol: TCP
98
+ targetPort: 8080
konduktor/serving.py CHANGED
@@ -49,11 +49,15 @@ class Serving:
49
49
  if min_replicas is None:
50
50
  min_replicas = max_replicas
51
51
  if max_replicas is None:
52
- max_replicas = min_replicas
52
+ # Edge case: if min_replicas is 0, set max_replicas to 1
53
+ if min_replicas == 0:
54
+ max_replicas = 1
55
+ else:
56
+ max_replicas = min_replicas
53
57
 
54
- if min_replicas is not None and min_replicas <= 0:
58
+ if min_replicas is not None and min_replicas < 0:
55
59
  with ux_utils.print_exception_no_traceback():
56
- raise ValueError('min_replicas must be >= 1')
60
+ raise ValueError('min_replicas must be >= 0')
57
61
 
58
62
  if (
59
63
  max_replicas is not None
@@ -139,9 +143,9 @@ class Serving:
139
143
 
140
144
  def to_yaml_config(self) -> Dict[str, Union[int, str]]:
141
145
  config: Dict[str, Union[int, str]] = {
142
- 'min_replicas': self._min_replicas or 1,
143
- 'max_replicas': self._max_replicas or 1,
144
- 'ports': self._ports or 8000,
146
+ 'min_replicas': self._min_replicas if self._min_replicas is not None else 1,
147
+ 'max_replicas': self._max_replicas if self._max_replicas is not None else 1,
148
+ 'ports': self._ports if self._ports is not None else 8000,
145
149
  }
146
150
  # Only include probe if it's not None
147
151
  if self._probe is not None:
konduktor/task.py CHANGED
@@ -567,6 +567,13 @@ class Task:
567
567
  f'less than min_replicas ({serving.min_replicas})'
568
568
  )
569
569
 
570
+ if serving.max_replicas == 0 and serving.min_replicas == 0:
571
+ with ux_utils.print_exception_no_traceback():
572
+ raise ValueError(
573
+ f'max_replicas ({serving.max_replicas}) and '
574
+ f'min_replicas ({serving.min_replicas}) cannot both be 0'
575
+ )
576
+
570
577
  if isinstance(serving, konduktor.Serving):
571
578
  serving = serving
572
579
  self.serving = serving