konduktor-nightly 0.1.0.dev20250915104603__py3-none-any.whl → 0.1.0.dev20251107104752__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,430 @@
1
+ # Aibrix Setup - vLLM Deployment Infrastructure
2
+ #
3
+ # This file sets up the infrastructure needed for vLLM (Aibrix) deployments:
4
+ # 1. Envoy Gateway configuration for HTTP routing
5
+ # 2. Aibrix Activator service for request-based autoscaling (KPA)
6
+ # 3. HTTP route mirroring for prewarming vLLM models
7
+ # 4. Lua script for extracting model names from OpenAI-compatible requests
8
+ #
9
+ # The activator tracks incoming requests and provides metrics to scale
10
+ # vLLM deployments based on demand (requests per second).
11
+
12
+ # This file is kept separate from apoxy setup files because it is
13
+ # only used in actual clusters, not in the test kind clusters.
14
+
15
+ apiVersion: v1
16
+ kind: ConfigMap
17
+ metadata:
18
+ name: envoy-gateway-config
19
+ namespace: envoy-gateway-system
20
+ data:
21
+ envoy-gateway.yaml: |
22
+ apiVersion: gateway.envoyproxy.io/v1alpha1
23
+ kind: EnvoyGateway
24
+ provider:
25
+ type: Kubernetes
26
+ gateway:
27
+ controllerName: gateway.envoyproxy.io/gatewayclass-controller
28
+ extensionApis:
29
+ enableEnvoyPatchPolicy: true
30
+ ---
31
+ apiVersion: v1
32
+ kind: Namespace
33
+ metadata:
34
+ name: aibrix-activator
35
+ ---
36
+ apiVersion: v1
37
+ kind: ServiceAccount
38
+ metadata:
39
+ name: aibrix-activator
40
+ namespace: aibrix-activator
41
+ ---
42
+ apiVersion: rbac.authorization.k8s.io/v1
43
+ kind: ClusterRole
44
+ metadata:
45
+ name: aibrix-activator
46
+ rules:
47
+ - apiGroups: ["apps"]
48
+ resources: ["deployments"]
49
+ verbs: ["get", "list", "watch"]
50
+ ---
51
+ apiVersion: rbac.authorization.k8s.io/v1
52
+ kind: ClusterRoleBinding
53
+ metadata:
54
+ name: aibrix-activator
55
+ roleRef:
56
+ apiGroup: rbac.authorization.k8s.io
57
+ kind: ClusterRole
58
+ name: aibrix-activator
59
+ subjects:
60
+ - kind: ServiceAccount
61
+ name: aibrix-activator
62
+ namespace: aibrix-activator
63
+ ---
64
+ apiVersion: v1
65
+ kind: ConfigMap
66
+ metadata:
67
+ name: activator-code
68
+ namespace: aibrix-activator
69
+ data:
70
+ activator.py: |
71
+ import os, time, json
72
+ from collections import defaultdict, deque
73
+ from fastapi import FastAPI, Request
74
+ from fastapi.responses import PlainTextResponse, JSONResponse
75
+ import asyncio
76
+ from kubernetes import client, config
77
+
78
+ NAMESPACE = os.getenv("NAMESPACE", "default")
79
+ WINDOW_SEC = int(os.getenv("WINDOW_SEC", "30")) # demand lookback
80
+ CAPACITY_RPS = float(os.getenv("CAPACITY_RPS", "1.0")) # per-replica capacity
81
+ MIN_WAKE = int(os.getenv("MIN_REPLICA_ON_WAKE", "1"))
82
+ MAX_REPLICAS = int(os.getenv("MAX_REPLICAS", "8"))
83
+ CLEANUP_INTERVAL = int(os.getenv("CLEANUP_INTERVAL", "300")) # 5 minutes
84
+
85
+ app = FastAPI()
86
+ events = defaultdict(deque) # key=(ns,model) -> deque[timestamps]
87
+
88
+ # Initialize Kubernetes client
89
+ try:
90
+ config.load_incluster_config()
91
+ k8s_apps_v1 = client.AppsV1Api()
92
+ except:
93
+ k8s_apps_v1 = None
94
+
95
+ def _prune(q, now):
96
+ while q and now - q[0] > WINDOW_SEC: q.popleft()
97
+
98
+ def _bump(ns, model):
99
+ now = time.time()
100
+ q = events[(ns, model)]
101
+ q.append(now)
102
+ _prune(q, now)
103
+
104
+ def _desired(ns, model):
105
+ now = time.time()
106
+ q = events[(ns, model)]
107
+ _prune(q, now)
108
+ rps = len(q) / max(WINDOW_SEC, 1)
109
+ if len(q) == 0: return 0
110
+ # Convert demand to desired replicas
111
+ import math
112
+ d = max(MIN_WAKE, math.ceil(rps / max(CAPACITY_RPS, 1e-6)))
113
+ return max(0, min(d, MAX_REPLICAS))
114
+
115
+ def _extract_model(headers, body_bytes):
116
+ # Prefer header (OpenAI-compatible)
117
+ m = headers.get("model") or headers.get("x-model")
118
+ if m: return m
119
+ # Try JSON body
120
+ try:
121
+ j = json.loads(body_bytes or b"{}")
122
+ if isinstance(j, dict):
123
+ # OpenAI schema: {"model": "...", ...}
124
+ if "model" in j and isinstance(j["model"], str):
125
+ return j["model"]
126
+ except Exception:
127
+ pass
128
+ return None
129
+
130
+ def _get_existing_deployments():
131
+ """Get list of existing Aibrix deployments from Kubernetes"""
132
+ if not k8s_apps_v1:
133
+ return set()
134
+ try:
135
+ deployments = k8s_apps_v1.list_namespaced_deployment(
136
+ namespace=NAMESPACE,
137
+ label_selector="model.aibrix.ai/name"
138
+ )
139
+ return {d.metadata.name for d in deployments.items}
140
+ except Exception:
141
+ return set()
142
+
143
+ def _cleanup_stale_entries():
144
+ """Remove entries for deployments that no longer exist"""
145
+ if not k8s_apps_v1:
146
+ return
147
+ try:
148
+ existing_deployments = _get_existing_deployments()
149
+ # Remove entries for deployments that no longer exist
150
+ keys_to_remove = []
151
+ for (ns, model) in list(events.keys()):
152
+ if ns == NAMESPACE and model not in existing_deployments:
153
+ keys_to_remove.append((ns, model))
154
+
155
+ for key in keys_to_remove:
156
+ del events[key]
157
+ print(f"Cleaned up stale entry for deployment: {key[1]}")
158
+ except Exception as e:
159
+ print(f"Error during cleanup: {e}")
160
+
161
+ async def _cleanup_task():
162
+ """Background task to periodically clean up stale entries"""
163
+ while True:
164
+ await asyncio.sleep(CLEANUP_INTERVAL)
165
+ _cleanup_stale_entries()
166
+
167
+ @app.on_event("startup")
168
+ async def startup_event():
169
+ """Start background cleanup task"""
170
+ asyncio.create_task(_cleanup_task())
171
+
172
+ # Mirror endpoints (same as your API paths); quick 204 response
173
+ @app.post("/v1/completions")
174
+ @app.post("/v1/chat/completions")
175
+ async def mirrored(request: Request):
176
+ body = await request.body()
177
+ model = _extract_model(request.headers, body)
178
+ if model:
179
+ _bump(NAMESPACE, model)
180
+ return JSONResponse({"ok": True}, status_code=204)
181
+
182
+ # Catch-all POST (safety net if your gateway uses different paths)
183
+ @app.post("/{full_path:path}")
184
+ async def mirrored_generic(request: Request, full_path: str):
185
+ body = await request.body()
186
+ model = _extract_model(request.headers, body)
187
+ if model:
188
+ _bump(NAMESPACE, model)
189
+ return JSONResponse({"ok": True}, status_code=204)
190
+
191
+ # Prometheus-friendly aggregate endpoint: export ALL (ns, model)
192
+ @app.get("/metrics", response_class=PlainTextResponse)
193
+ async def metrics_all():
194
+ lines = []
195
+ # Idiomatic names
196
+ lines.append("# HELP vllm_deployment_replicas Number of suggested replicas.")
197
+ lines.append("# TYPE vllm_deployment_replicas gauge")
198
+ lines.append("# HELP vllm_observed_rps Incoming requests per second.")
199
+ lines.append("# TYPE vllm_observed_rps gauge")
200
+ now = time.time()
201
+ for (ns, model), q in list(events.items()):
202
+ _prune(q, now)
203
+ rps = len(q) / max(WINDOW_SEC, 1)
204
+ d = _desired(ns, model)
205
+ lines.append(f'vllm_deployment_replicas{{namespace="{ns}",model_name="{model}"}} {d}')
206
+ lines.append(f'vllm_observed_rps{{namespace="{ns}",model_name="{model}"}} {rps:.6f}')
207
+ # (Optional) keep legacy names with colons for back-compat
208
+ lines.append("# HELP vllm:deployment_replicas Number of suggested replicas.")
209
+ lines.append("# TYPE vllm:deployment_replicas gauge")
210
+ lines.append("# HELP vllm:observed_rps Incoming requests per second.")
211
+ lines.append("# TYPE vllm:observed_rps gauge")
212
+ now = time.time()
213
+ for (ns, model), q in list(events.items()):
214
+ _prune(q, now)
215
+ rps = len(q) / max(WINDOW_SEC, 1)
216
+ d = _desired(ns, model)
217
+ lines.append(f'vllm:deployment_replicas{{namespace="{ns}",model_name="{model}"}} {d}')
218
+ lines.append(f'vllm:observed_rps{{namespace="{ns}",model_name="{model}"}} {rps:.6f}')
219
+ return "\n".join(lines) + "\n"
220
+
221
+
222
+ # Metrics for KPA and Debugging
223
+ @app.get("/metrics/{ns}/{model}", response_class=PlainTextResponse)
224
+ async def metrics(ns: str, model: str):
225
+ d = _desired(ns, model)
226
+ now = time.time()
227
+ q = events[(ns, model)]
228
+ _prune(q, now)
229
+ rps = len(q) / max(WINDOW_SEC, 1)
230
+ return (
231
+ "# HELP vllm:deployment_replicas Number of suggested replicas.\n"
232
+ "# TYPE vllm:deployment_replicas gauge\n"
233
+ f'vllm:deployment_replicas{{namespace="{ns}",model_name="{model}"}} {d}\n'
234
+ "# HELP vllm:observed_rps Incoming requests per second.\n"
235
+ "# TYPE vllm:observed_rps gauge\n"
236
+ f'vllm:observed_rps{{namespace="{ns}",model_name="{model}"}} {rps:.2f}\n'
237
+ )
238
+ ---
239
+ apiVersion: apps/v1
240
+ kind: Deployment
241
+ metadata:
242
+ name: aibrix-activator
243
+ namespace: aibrix-activator
244
+ spec:
245
+ replicas: 1
246
+ selector: { matchLabels: { app: aibrix-activator } }
247
+ template:
248
+ metadata: { labels: { app: aibrix-activator } }
249
+ spec:
250
+ containers:
251
+ - name: activator
252
+ image: python:3.11-slim
253
+ command: ["bash","-lc"]
254
+ args:
255
+ - |
256
+ pip install fastapi uvicorn kubernetes >/dev/null && \
257
+ uvicorn activator:app --host 0.0.0.0 --port 8080
258
+ env:
259
+ - { name: NAMESPACE, value: "default" }
260
+ - { name: WINDOW_SEC, value: "30" }
261
+ - { name: CAPACITY_RPS, value: "1.0" }
262
+ - { name: MIN_REPLICA_ON_WAKE, value: "1" }
263
+ - { name: MAX_REPLICAS, value: "8" }
264
+ - { name: CLEANUP_INTERVAL, value: "300" }
265
+ ports: [{containerPort: 8080}]
266
+ volumeMounts:
267
+ - { name: code, mountPath: /app/activator.py, subPath: activator.py }
268
+ workingDir: /app
269
+ serviceAccountName: aibrix-activator
270
+ volumes:
271
+ - name: code
272
+ configMap: { name: activator-code }
273
+ ---
274
+ apiVersion: v1
275
+ kind: Service
276
+ metadata:
277
+ name: aibrix-activator
278
+ namespace: aibrix-activator
279
+ annotations:
280
+ prometheus.io/scrape: "true"
281
+ prometheus.io/port: "8080"
282
+ prometheus.io/path: "/metrics"
283
+ labels:
284
+ app: aibrix-activator
285
+ prometheus-discovery: "true"
286
+ spec:
287
+ selector: { app: aibrix-activator }
288
+ ports:
289
+ - name: http
290
+ port: 8080
291
+ targetPort: 8080
292
+ protocol: TCP
293
+ type: ClusterIP
294
+ ---
295
+ apiVersion: monitoring.coreos.com/v1
296
+ kind: ServiceMonitor
297
+ metadata:
298
+ name: aibrix-activator
299
+ namespace: prometheus
300
+ labels:
301
+ app: aibrix-activator
302
+ spec:
303
+ selector:
304
+ matchLabels:
305
+ app: aibrix-activator
306
+ namespaceSelector:
307
+ matchNames:
308
+ - aibrix-activator
309
+ endpoints:
310
+ - port: http
311
+ path: /metrics
312
+ ---
313
+ apiVersion: monitoring.coreos.com/v1
314
+ kind: ServiceMonitor
315
+ metadata:
316
+ name: vllm-deployments
317
+ namespace: prometheus
318
+ labels:
319
+ app: vllm-deployments
320
+ spec:
321
+ selector:
322
+ matchLabels:
323
+ prometheus-discovery: "true"
324
+ namespaceSelector:
325
+ matchNames:
326
+ - default
327
+ endpoints:
328
+ - port: serve
329
+ path: /metrics
330
+ ---
331
+ apiVersion: gateway.networking.k8s.io/v1beta1
332
+ kind: ReferenceGrant
333
+ metadata:
334
+ name: allow-httproute-to-activator
335
+ namespace: aibrix-activator
336
+ spec:
337
+ from:
338
+ - group: gateway.networking.k8s.io
339
+ kind: HTTPRoute
340
+ namespace: aibrix-system
341
+ to:
342
+ - group: ""
343
+ kind: Service
344
+ name: aibrix-activator
345
+ ---
346
+ apiVersion: gateway.networking.k8s.io/v1
347
+ kind: HTTPRoute
348
+ metadata:
349
+ name: activator-mirror-sink
350
+ namespace: aibrix-system
351
+ spec:
352
+ parentRefs:
353
+ - group: gateway.networking.k8s.io
354
+ kind: Gateway
355
+ name: aibrix-eg
356
+ namespace: aibrix-system
357
+ rules:
358
+ - matches:
359
+ - path:
360
+ type: PathPrefix
361
+ value: /__activator_sink__
362
+ backendRefs:
363
+ - name: aibrix-activator
364
+ namespace: aibrix-activator
365
+ port: 8080
366
+ ---
367
+ apiVersion: gateway.envoyproxy.io/v1alpha1
368
+ kind: EnvoyPatchPolicy
369
+ metadata:
370
+ name: prewarm-completions-lua
371
+ namespace: aibrix-system
372
+ spec:
373
+ targetRef:
374
+ group: gateway.networking.k8s.io
375
+ kind: Gateway
376
+ name: aibrix-eg
377
+ type: JSONPatch
378
+ jsonPatches:
379
+ - type: "type.googleapis.com/envoy.config.listener.v3.Listener"
380
+ name: "aibrix-system/aibrix-eg/http"
381
+ operation:
382
+ op: add
383
+ path: "/default_filter_chain/filters/0/typed_config/http_filters/0"
384
+ value:
385
+ name: envoy.filters.http.lua
386
+ typed_config:
387
+ "@type": type.googleapis.com/envoy.extensions.filters.http.lua.v3.Lua
388
+ inlineCode: |
389
+ function envoy_on_request(handle)
390
+ local path = handle:headers():get(":path") or ""
391
+ if string.find(path, "^/v1/completions") or string.find(path, "^/v1/chat/completions") then
392
+ -- Try to get model from header first
393
+ local model = handle:headers():get("model") or ""
394
+
395
+ -- If no model in header, try to extract from JSON body
396
+ if model == "" then
397
+ local ct = handle:headers():get("content-type") or ""
398
+ if string.find(ct:lower(), "application/json") then
399
+ local body = handle:body()
400
+ if body and body:length() > 0 then
401
+ local raw = body:getBytes(0, math.min(body:length(), 1024))
402
+ -- Simple regex to extract model from JSON: "model":"value"
403
+ local model_match = raw:match('"model"%s*:%s*"([^"]+)"')
404
+ if model_match then
405
+ model = model_match
406
+ end
407
+ end
408
+ end
409
+ end
410
+
411
+ -- Only proceed if we have a model
412
+ if model ~= "" then
413
+ -- fire-and-forget wake signal; very short timeout
414
+ pcall(function()
415
+ handle:httpCall(
416
+ "httproute/aibrix-system/activator-mirror-sink/rule/0",
417
+ {
418
+ [":method"] = "POST",
419
+ [":path"] = "/v1/completions",
420
+ [":authority"] = "aibrix-activator.aibrix-activator.svc.cluster.local",
421
+ ["content-type"] = "application/json",
422
+ ["model"] = model
423
+ },
424
+ "{}",
425
+ 5 -- ms
426
+ )
427
+ end)
428
+ end
429
+ end
430
+ end
@@ -1,8 +1,25 @@
1
+ # Apoxy Setup (Part 1/3) - Core Infrastructure
2
+ #
3
+ # This file sets up the core Apoxy infrastructure for external access to deployments:
4
+ # 1. Apoxy system namespace and RBAC
5
+ # 2. Kubeconfig secret for cluster access (populated by CI)
6
+ # 3. Apoxy tunnel controller and proxy services
7
+ # 4. Network policies for cross-namespace access
8
+ #
9
+ # Split into 2 files because:
10
+ # - apoxy-setup.yaml: Core infrastructure (1 per cluster) (needs to be applied first)
11
+ # - apoxy-setup2.yaml: All routing rules for both deployment types
12
+
13
+ apiVersion: v1
14
+ kind: Namespace
15
+ metadata:
16
+ name: apoxy-system
17
+ ---
1
18
  apiVersion: v1
2
19
  kind: Secret
3
20
  metadata:
4
21
  name: trainy-kubeconfig
5
- namespace: default
22
+ namespace: apoxy-system
6
23
  type: Opaque
7
24
  data:
8
25
  # this gets replaced by buildkite CI secret APOXY_AUTH
@@ -13,7 +30,7 @@ apiVersion: v1
13
30
  kind: ServiceAccount
14
31
  metadata:
15
32
  name: kube-controller
16
- namespace: default
33
+ namespace: apoxy-system
17
34
  ---
18
35
  apiVersion: rbac.authorization.k8s.io/v1
19
36
  kind: ClusterRole
@@ -35,13 +52,13 @@ roleRef:
35
52
  subjects:
36
53
  - kind: ServiceAccount
37
54
  name: kube-controller
38
- namespace: default
55
+ namespace: apoxy-system
39
56
  ---
40
57
  apiVersion: apps/v1
41
58
  kind: Deployment
42
59
  metadata:
43
60
  name: kube-controller
44
- namespace: default
61
+ namespace: apoxy-system
45
62
  labels:
46
63
  app: kube-controller
47
64
  spec:
@@ -85,7 +102,7 @@ apiVersion: v1
85
102
  kind: Service
86
103
  metadata:
87
104
  name: kube-controller
88
- namespace: default
105
+ namespace: apoxy-system
89
106
  labels:
90
107
  app: kube-controller
91
108
  spec:
@@ -101,7 +118,7 @@ apiVersion: v1
101
118
  kind: ConfigMap
102
119
  metadata:
103
120
  name: apoxy-config
104
- namespace: default
121
+ namespace: apoxy-system
105
122
  data:
106
123
  config.yaml: |
107
124
  apiVersion: config.apoxy.dev/v1alpha1
@@ -118,7 +135,7 @@ apiVersion: apps/v1
118
135
  kind: Deployment
119
136
  metadata:
120
137
  name: apoxy
121
- namespace: default
138
+ namespace: apoxy-system
122
139
  labels:
123
140
  app: apoxy
124
141
  spec:
@@ -133,7 +150,7 @@ spec:
133
150
  spec:
134
151
  containers:
135
152
  - name: apoxy
136
- image: apoxy/apoxy:v0.11.14
153
+ image: apoxy/apoxy:v0.11.18
137
154
  command: ["apoxy", "tunnel", "run", "UNIQUE-TEMPNAME", "--insecure-skip-verify"]
138
155
  volumeMounts:
139
156
  - name: kubeconfig-volume
@@ -148,4 +165,20 @@ spec:
148
165
  secretName: trainy-kubeconfig
149
166
  - name: apoxy-config-volume
150
167
  configMap:
151
- name: apoxy-config
168
+ name: apoxy-config
169
+ ---
170
+ # NetworkPolicy to allow Apoxy to reach services in other namespaces
171
+ apiVersion: networking.k8s.io/v1
172
+ kind: NetworkPolicy
173
+ metadata:
174
+ name: apoxy-cross-namespace-access
175
+ namespace: apoxy-system
176
+ spec:
177
+ podSelector:
178
+ matchLabels:
179
+ app: apoxy
180
+ policyTypes:
181
+ - Egress
182
+ egress:
183
+ # Allow all egress traffic
184
+ - {}
@@ -1,3 +1,20 @@
1
+ # Apoxy Setup (Part 2/2) - Deployment Routing
2
+ #
3
+ # This file sets up Apoxy routing for both vLLM and general deployments:
4
+ # 1. TunnelNode for secure tunnel connection
5
+ # 2. Backend for vLLM pointing to Envoy Gateway
6
+ # 3. HTTPRoute for company.trainy.us -> vLLM deployments
7
+ # 4. Backend for general deployments pointing to nginx ingress
8
+ # 5. HTTPRoute for company2.trainy.us -> general deployments
9
+ # 6. KEDA proxy service for HTTP autoscaling
10
+ # 7. 60s timeout for all requests
11
+ #
12
+ # Split into 2 files because:
13
+ # - apoxy-setup.yaml: Core infrastructure (1 per cluster) (needs to be applied first)
14
+ # - apoxy-setup2.yaml: All routing rules for both deployment types
15
+
16
+ # NOTE: TunnelNode should technically be in the first apoxy-setup.yaml but it
17
+ # needs to be created after the core infrastructure is created, so we put it here.
1
18
  apiVersion: core.apoxy.dev/v1alpha
2
19
  kind: TunnelNode
3
20
  metadata:
@@ -6,7 +23,7 @@ spec:
6
23
  egressGateway:
7
24
  enabled: true
8
25
  ---
9
- # Add just your backend for aibrix
26
+ # Backend for vLLM deployments
10
27
  apiVersion: core.apoxy.dev/v1alpha
11
28
  kind: Backend
12
29
  metadata:
@@ -15,7 +32,7 @@ spec:
15
32
  endpoints:
16
33
  - fqdn: envoy-aibrix-system-aibrix-eg-903790dc.envoy-gateway-system.UNIQUE-TEMPNAME.tun.apoxy.net
17
34
  ---
18
- # Add just your route for aibrix
35
+ # HTTPRoute for vLLM deployments
19
36
  apiVersion: gateway.apoxy.dev/v1
20
37
  kind: HTTPRoute
21
38
  metadata:
@@ -29,6 +46,53 @@ spec:
29
46
  - 'TEMPNAME.trainy.us'
30
47
  rules:
31
48
  - backendRefs:
32
- - kind: Backend
33
- name: UNIQUE-TEMPNAME-backend
34
- port: 80
49
+ - kind: Backend
50
+ name: UNIQUE-TEMPNAME-backend
51
+ port: 80
52
+ timeouts:
53
+ request: "60s"
54
+ ---
55
+ # Backend for general deployments
56
+ apiVersion: core.apoxy.dev/v1alpha
57
+ kind: Backend
58
+ metadata:
59
+ name: UNIQUE-TEMPNAME-backend2
60
+ spec:
61
+ endpoints:
62
+ - fqdn: keda-ingress-nginx-controller.keda.UNIQUE-TEMPNAME.tun.apoxy.net
63
+ ---
64
+ # HTTPRoute for general deployments
65
+ apiVersion: gateway.apoxy.dev/v1
66
+ kind: HTTPRoute
67
+ metadata:
68
+ name: UNIQUE-TEMPNAME-route2
69
+ spec:
70
+ parentRefs:
71
+ - name: default
72
+ kind: Gateway
73
+ port: 443
74
+ hostnames:
75
+ - 'TEMPNAME2.trainy.us'
76
+ rules:
77
+ - backendRefs:
78
+ - kind: Backend
79
+ name: UNIQUE-TEMPNAME-backend2
80
+ port: 80
81
+ timeouts:
82
+ request: "60s"
83
+
84
+ # KEDA proxy service (1 per cluster) (For general deployments)
85
+ ---
86
+ apiVersion: v1
87
+ kind: Service
88
+ metadata:
89
+ name: keda-proxy
90
+ namespace: default
91
+ spec:
92
+ type: ExternalName
93
+ externalName: keda-add-ons-http-interceptor-proxy.keda
94
+ ports:
95
+ - name: http
96
+ port: 8080
97
+ protocol: TCP
98
+ targetPort: 8080
konduktor/resource.py CHANGED
@@ -59,6 +59,8 @@ class Resources:
59
59
  # Internal use only.
60
60
  # pylint: disable=invalid-name
61
61
  _cluster_config_overrides: Optional[Dict[str, Any]] = None,
62
+ # used to prevent double validation of image (would happen from overrides)
63
+ _validate_image: bool = True,
62
64
  ):
63
65
  """Initialize a Resources object.
64
66
 
@@ -124,7 +126,8 @@ class Resources:
124
126
  if isinstance(image_id, str):
125
127
  self._image_id = image_id.strip()
126
128
  # Validate Docker image format and existence
127
- validator.validate_and_warn_image(self._image_id, 'task')
129
+ if _validate_image:
130
+ validator.validate_and_warn_image(self._image_id, 'task')
128
131
 
129
132
  self._labels = labels
130
133
  self._cluster_config_overrides = _cluster_config_overrides
@@ -435,15 +438,19 @@ class Resources:
435
438
 
436
439
  def copy(self, **override) -> 'Resources':
437
440
  """Returns a copy of the given Resources."""
441
+ # used to prevent double validation of image (would happen from overrides)
442
+ new_image_id = override.pop('image_id', self.image_id)
438
443
  resources = Resources(
439
444
  cloud=override.pop('cloud', self.cloud),
440
445
  cpus=override.pop('cpus', self._cpus),
441
446
  memory=override.pop('memory', self.memory),
442
447
  accelerators=override.pop('accelerators', self.accelerators),
443
448
  disk_size=override.pop('disk_size', self.disk_size),
444
- image_id=override.pop('image_id', self.image_id),
449
+ image_id=new_image_id,
445
450
  labels=override.pop('labels', self.labels),
446
451
  job_config=override.pop('job_config', self.job_config),
452
+ # used to prevent double validation of image (would happen from overrides)
453
+ _validate_image=(new_image_id != self.image_id),
447
454
  )
448
455
  assert len(override) == 0
449
456
  return resources
konduktor/serving.py CHANGED
@@ -49,11 +49,15 @@ class Serving:
49
49
  if min_replicas is None:
50
50
  min_replicas = max_replicas
51
51
  if max_replicas is None:
52
- max_replicas = min_replicas
52
+ # Edge case: if min_replicas is 0, set max_replicas to 1
53
+ if min_replicas == 0:
54
+ max_replicas = 1
55
+ else:
56
+ max_replicas = min_replicas
53
57
 
54
- if min_replicas is not None and min_replicas <= 0:
58
+ if min_replicas is not None and min_replicas < 0:
55
59
  with ux_utils.print_exception_no_traceback():
56
- raise ValueError('min_replicas must be >= 1')
60
+ raise ValueError('min_replicas must be >= 0')
57
61
 
58
62
  if (
59
63
  max_replicas is not None
@@ -139,9 +143,9 @@ class Serving:
139
143
 
140
144
  def to_yaml_config(self) -> Dict[str, Union[int, str]]:
141
145
  config: Dict[str, Union[int, str]] = {
142
- 'min_replicas': self._min_replicas or 1,
143
- 'max_replicas': self._max_replicas or 1,
144
- 'ports': self._ports or 8000,
146
+ 'min_replicas': self._min_replicas if self._min_replicas is not None else 1,
147
+ 'max_replicas': self._max_replicas if self._max_replicas is not None else 1,
148
+ 'ports': self._ports if self._ports is not None else 8000,
145
149
  }
146
150
  # Only include probe if it's not None
147
151
  if self._probe is not None: