konduktor-nightly 0.1.0.dev20251128104812__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. konduktor/__init__.py +49 -0
  2. konduktor/adaptors/__init__.py +0 -0
  3. konduktor/adaptors/aws.py +221 -0
  4. konduktor/adaptors/common.py +118 -0
  5. konduktor/adaptors/gcp.py +126 -0
  6. konduktor/authentication.py +124 -0
  7. konduktor/backends/__init__.py +6 -0
  8. konduktor/backends/backend.py +86 -0
  9. konduktor/backends/constants.py +21 -0
  10. konduktor/backends/deployment.py +204 -0
  11. konduktor/backends/deployment_utils.py +1351 -0
  12. konduktor/backends/jobset.py +225 -0
  13. konduktor/backends/jobset_utils.py +726 -0
  14. konduktor/backends/pod_utils.py +501 -0
  15. konduktor/check.py +184 -0
  16. konduktor/cli.py +1945 -0
  17. konduktor/config.py +420 -0
  18. konduktor/constants.py +36 -0
  19. konduktor/controller/__init__.py +0 -0
  20. konduktor/controller/constants.py +56 -0
  21. konduktor/controller/launch.py +44 -0
  22. konduktor/controller/node.py +116 -0
  23. konduktor/controller/parse.py +111 -0
  24. konduktor/dashboard/README.md +30 -0
  25. konduktor/dashboard/backend/main.py +169 -0
  26. konduktor/dashboard/backend/sockets.py +154 -0
  27. konduktor/dashboard/frontend/.eslintrc.json +3 -0
  28. konduktor/dashboard/frontend/.gitignore +36 -0
  29. konduktor/dashboard/frontend/app/api/jobs/route.js +71 -0
  30. konduktor/dashboard/frontend/app/api/namespaces/route.js +69 -0
  31. konduktor/dashboard/frontend/app/components/Grafana.jsx +66 -0
  32. konduktor/dashboard/frontend/app/components/JobsData.jsx +197 -0
  33. konduktor/dashboard/frontend/app/components/LogsData.jsx +139 -0
  34. konduktor/dashboard/frontend/app/components/NavMenu.jsx +39 -0
  35. konduktor/dashboard/frontend/app/components/NavTabs.jsx +73 -0
  36. konduktor/dashboard/frontend/app/components/NavTabs2.jsx +30 -0
  37. konduktor/dashboard/frontend/app/components/SelectBtn.jsx +27 -0
  38. konduktor/dashboard/frontend/app/components/lib/utils.js +6 -0
  39. konduktor/dashboard/frontend/app/components/ui/chip-select.jsx +78 -0
  40. konduktor/dashboard/frontend/app/components/ui/input.jsx +19 -0
  41. konduktor/dashboard/frontend/app/components/ui/navigation-menu.jsx +104 -0
  42. konduktor/dashboard/frontend/app/components/ui/select.jsx +120 -0
  43. konduktor/dashboard/frontend/app/favicon.ico +0 -0
  44. konduktor/dashboard/frontend/app/globals.css +120 -0
  45. konduktor/dashboard/frontend/app/jobs/page.js +10 -0
  46. konduktor/dashboard/frontend/app/layout.js +22 -0
  47. konduktor/dashboard/frontend/app/logs/page.js +11 -0
  48. konduktor/dashboard/frontend/app/page.js +12 -0
  49. konduktor/dashboard/frontend/jsconfig.json +7 -0
  50. konduktor/dashboard/frontend/next.config.mjs +4 -0
  51. konduktor/dashboard/frontend/package-lock.json +6687 -0
  52. konduktor/dashboard/frontend/package.json +37 -0
  53. konduktor/dashboard/frontend/postcss.config.mjs +8 -0
  54. konduktor/dashboard/frontend/server.js +64 -0
  55. konduktor/dashboard/frontend/tailwind.config.js +17 -0
  56. konduktor/data/__init__.py +9 -0
  57. konduktor/data/aws/__init__.py +15 -0
  58. konduktor/data/aws/s3.py +1138 -0
  59. konduktor/data/constants.py +7 -0
  60. konduktor/data/data_utils.py +268 -0
  61. konduktor/data/gcp/__init__.py +19 -0
  62. konduktor/data/gcp/constants.py +42 -0
  63. konduktor/data/gcp/gcs.py +994 -0
  64. konduktor/data/gcp/utils.py +9 -0
  65. konduktor/data/registry.py +19 -0
  66. konduktor/data/storage.py +812 -0
  67. konduktor/data/storage_utils.py +535 -0
  68. konduktor/execution.py +447 -0
  69. konduktor/kube_client.py +237 -0
  70. konduktor/logging.py +111 -0
  71. konduktor/manifests/aibrix-setup.yaml +430 -0
  72. konduktor/manifests/apoxy-setup.yaml +184 -0
  73. konduktor/manifests/apoxy-setup2.yaml +98 -0
  74. konduktor/manifests/controller_deployment.yaml +69 -0
  75. konduktor/manifests/dashboard_deployment.yaml +131 -0
  76. konduktor/manifests/dmesg_daemonset.yaml +57 -0
  77. konduktor/manifests/pod_cleanup_controller.yaml +129 -0
  78. konduktor/resource.py +546 -0
  79. konduktor/serving.py +153 -0
  80. konduktor/task.py +949 -0
  81. konduktor/templates/deployment.yaml.j2 +191 -0
  82. konduktor/templates/jobset.yaml.j2 +43 -0
  83. konduktor/templates/pod.yaml.j2 +563 -0
  84. konduktor/usage/__init__.py +0 -0
  85. konduktor/usage/constants.py +21 -0
  86. konduktor/utils/__init__.py +0 -0
  87. konduktor/utils/accelerator_registry.py +17 -0
  88. konduktor/utils/annotations.py +62 -0
  89. konduktor/utils/base64_utils.py +95 -0
  90. konduktor/utils/common_utils.py +426 -0
  91. konduktor/utils/constants.py +5 -0
  92. konduktor/utils/env_options.py +55 -0
  93. konduktor/utils/exceptions.py +234 -0
  94. konduktor/utils/kubernetes_enums.py +8 -0
  95. konduktor/utils/kubernetes_utils.py +763 -0
  96. konduktor/utils/log_utils.py +467 -0
  97. konduktor/utils/loki_utils.py +102 -0
  98. konduktor/utils/rich_utils.py +123 -0
  99. konduktor/utils/schemas.py +625 -0
  100. konduktor/utils/subprocess_utils.py +273 -0
  101. konduktor/utils/ux_utils.py +247 -0
  102. konduktor/utils/validator.py +461 -0
  103. konduktor_nightly-0.1.0.dev20251128104812.dist-info/LICENSE +91 -0
  104. konduktor_nightly-0.1.0.dev20251128104812.dist-info/METADATA +98 -0
  105. konduktor_nightly-0.1.0.dev20251128104812.dist-info/RECORD +107 -0
  106. konduktor_nightly-0.1.0.dev20251128104812.dist-info/WHEEL +4 -0
  107. konduktor_nightly-0.1.0.dev20251128104812.dist-info/entry_points.txt +3 -0
konduktor/logging.py ADDED
@@ -0,0 +1,111 @@
1
+ """Logging utilities."""
2
+
3
+ import contextlib
4
+ import logging
5
+ import os
6
+ import threading
7
+ from datetime import datetime
8
+
9
+ import colorama
10
+
11
+ from konduktor import constants
12
+
13
+ CHECK_MARK_EMOJI = '\U00002714' # Heavy check mark unicode
14
+ PARTY_POPPER_EMOJI = '\U0001f389' # Party popper unicode
15
+
16
+ _FORMAT = '[%(levelname).1s %(asctime)s %(filename)s:%(lineno)d] %(message)s'
17
+ _DATE_FORMAT = '%m-%d %H:%M:%S'
18
+
19
+ _logging_config = threading.local()
20
+ _log_path = None
21
+
22
+
23
+ class NewLineFormatter(logging.Formatter):
24
+ """Adds logging prefix to newlines to align multi-line messages."""
25
+
26
+ def __init__(self, fmt, datefmt=None, dim=False):
27
+ logging.Formatter.__init__(self, fmt, datefmt)
28
+ self.dim = dim
29
+
30
+ def format(self, record):
31
+ msg = logging.Formatter.format(self, record)
32
+ if record.message != '':
33
+ parts = msg.partition(record.message)
34
+ msg = msg.replace('\n', '\r\n' + parts[0])
35
+ if self.dim:
36
+ msg = colorama.Style.DIM + msg + colorama.Style.RESET_ALL
37
+ return msg
38
+
39
+
40
+ FORMATTER = NewLineFormatter(_FORMAT, datefmt=_DATE_FORMAT)
41
+
42
+
43
+ @contextlib.contextmanager
44
+ def set_logging_level(logger: str, level: int):
45
+ logger = logging.getLogger(logger)
46
+ original_level = logger.level
47
+ logger.setLevel(level)
48
+ try:
49
+ yield
50
+ finally:
51
+ logger.setLevel(original_level)
52
+
53
+
54
+ def get_logger(name: str):
55
+ global _log_path
56
+
57
+ logger = logging.getLogger(name)
58
+
59
+ # Avoid duplicate handlers
60
+ if logger.hasHandlers():
61
+ return logger
62
+
63
+ logger.setLevel(logging.DEBUG) # Always capture all levels internally
64
+
65
+ # --- File logging: Always enabled ---
66
+ if not _log_path:
67
+ log_dir = os.path.expanduser('~/.konduktor/logs')
68
+ os.makedirs(log_dir, exist_ok=True)
69
+ timestamp = datetime.now().strftime('%Y%m%d-%H%M%S')
70
+ _log_path = os.path.join(log_dir, f'konduktor-logs-{timestamp}.log')
71
+ print(f'Log file: {_log_path}')
72
+
73
+ fh = logging.FileHandler(_log_path)
74
+ fh.setLevel(logging.DEBUG)
75
+ fh.setFormatter(FORMATTER)
76
+ logger.addHandler(fh)
77
+
78
+ # --- Console logging: INFO level by default, DEBUG if KONDUKTOR_DEBUG=1 ---
79
+ ch = logging.StreamHandler()
80
+ if os.environ.get('KONDUKTOR_DEBUG') == '1':
81
+ ch.setLevel(logging.DEBUG)
82
+ else:
83
+ ch.setLevel(logging.INFO)
84
+ ch.setFormatter(FORMATTER)
85
+ logger.addHandler(ch)
86
+
87
+ logger.propagate = False
88
+ return logger
89
+
90
+
91
+ def is_silent():
92
+ if not hasattr(_logging_config, 'is_silent'):
93
+ # Should not set it globally, as the global assignment
94
+ # will be executed only once if the module is imported
95
+ # in the main thread, and will not be executed in other
96
+ # threads.
97
+ _logging_config.is_silent = False
98
+ return _logging_config.is_silent
99
+
100
+
101
+ def get_run_timestamp() -> str:
102
+ return 'konduktor-' + datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
103
+
104
+
105
+ def generate_tmp_logging_file_path(file_name: str) -> str:
106
+ """Generate an absolute path of a tmp file for logging."""
107
+ run_timestamp = get_run_timestamp()
108
+ log_dir = os.path.join(constants.KONDUKTOR_LOGS_DIRECTORY, run_timestamp)
109
+ log_path = os.path.expanduser(os.path.join(log_dir, file_name))
110
+
111
+ return log_path
@@ -0,0 +1,430 @@
1
+ # Aibrix Setup - vLLM Deployment Infrastructure
2
+ #
3
+ # This file sets up the infrastructure needed for vLLM (Aibrix) deployments:
4
+ # 1. Envoy Gateway configuration for HTTP routing
5
+ # 2. Aibrix Activator service for request-based autoscaling (KPA)
6
+ # 3. HTTP route mirroring for prewarming vLLM models
7
+ # 4. Lua script for extracting model names from OpenAI-compatible requests
8
+ #
9
+ # The activator tracks incoming requests and provides metrics to scale
10
+ # vLLM deployments based on demand (requests per second).
11
+
12
+ # This file is kept separate from apoxy setup files because it is
13
+ # only used in actual clusters, not in the test kind clusters.
14
+
15
+ apiVersion: v1
16
+ kind: ConfigMap
17
+ metadata:
18
+ name: envoy-gateway-config
19
+ namespace: envoy-gateway-system
20
+ data:
21
+ envoy-gateway.yaml: |
22
+ apiVersion: gateway.envoyproxy.io/v1alpha1
23
+ kind: EnvoyGateway
24
+ provider:
25
+ type: Kubernetes
26
+ gateway:
27
+ controllerName: gateway.envoyproxy.io/gatewayclass-controller
28
+ extensionApis:
29
+ enableEnvoyPatchPolicy: true
30
+ ---
31
+ apiVersion: v1
32
+ kind: Namespace
33
+ metadata:
34
+ name: aibrix-activator
35
+ ---
36
+ apiVersion: v1
37
+ kind: ServiceAccount
38
+ metadata:
39
+ name: aibrix-activator
40
+ namespace: aibrix-activator
41
+ ---
42
+ apiVersion: rbac.authorization.k8s.io/v1
43
+ kind: ClusterRole
44
+ metadata:
45
+ name: aibrix-activator
46
+ rules:
47
+ - apiGroups: ["apps"]
48
+ resources: ["deployments"]
49
+ verbs: ["get", "list", "watch"]
50
+ ---
51
+ apiVersion: rbac.authorization.k8s.io/v1
52
+ kind: ClusterRoleBinding
53
+ metadata:
54
+ name: aibrix-activator
55
+ roleRef:
56
+ apiGroup: rbac.authorization.k8s.io
57
+ kind: ClusterRole
58
+ name: aibrix-activator
59
+ subjects:
60
+ - kind: ServiceAccount
61
+ name: aibrix-activator
62
+ namespace: aibrix-activator
63
+ ---
64
+ apiVersion: v1
65
+ kind: ConfigMap
66
+ metadata:
67
+ name: activator-code
68
+ namespace: aibrix-activator
69
+ data:
70
+ activator.py: |
71
+ import os, time, json
72
+ from collections import defaultdict, deque
73
+ from fastapi import FastAPI, Request
74
+ from fastapi.responses import PlainTextResponse, JSONResponse
75
+ import asyncio
76
+ from kubernetes import client, config
77
+
78
+ NAMESPACE = os.getenv("NAMESPACE", "default")
79
+ WINDOW_SEC = int(os.getenv("WINDOW_SEC", "30")) # demand lookback
80
+ CAPACITY_RPS = float(os.getenv("CAPACITY_RPS", "1.0")) # per-replica capacity
81
+ MIN_WAKE = int(os.getenv("MIN_REPLICA_ON_WAKE", "1"))
82
+ MAX_REPLICAS = int(os.getenv("MAX_REPLICAS", "8"))
83
+ CLEANUP_INTERVAL = int(os.getenv("CLEANUP_INTERVAL", "300")) # 5 minutes
84
+
85
+ app = FastAPI()
86
+ events = defaultdict(deque) # key=(ns,model) -> deque[timestamps]
87
+
88
+ # Initialize Kubernetes client
89
+ try:
90
+ config.load_incluster_config()
91
+ k8s_apps_v1 = client.AppsV1Api()
92
+ except:
93
+ k8s_apps_v1 = None
94
+
95
+ def _prune(q, now):
96
+ while q and now - q[0] > WINDOW_SEC: q.popleft()
97
+
98
+ def _bump(ns, model):
99
+ now = time.time()
100
+ q = events[(ns, model)]
101
+ q.append(now)
102
+ _prune(q, now)
103
+
104
+ def _desired(ns, model):
105
+ now = time.time()
106
+ q = events[(ns, model)]
107
+ _prune(q, now)
108
+ rps = len(q) / max(WINDOW_SEC, 1)
109
+ if len(q) == 0: return 0
110
+ # Convert demand to desired replicas
111
+ import math
112
+ d = max(MIN_WAKE, math.ceil(rps / max(CAPACITY_RPS, 1e-6)))
113
+ return max(0, min(d, MAX_REPLICAS))
114
+
115
+ def _extract_model(headers, body_bytes):
116
+ # Prefer header (OpenAI-compatible)
117
+ m = headers.get("model") or headers.get("x-model")
118
+ if m: return m
119
+ # Try JSON body
120
+ try:
121
+ j = json.loads(body_bytes or b"{}")
122
+ if isinstance(j, dict):
123
+ # OpenAI schema: {"model": "...", ...}
124
+ if "model" in j and isinstance(j["model"], str):
125
+ return j["model"]
126
+ except Exception:
127
+ pass
128
+ return None
129
+
130
+ def _get_existing_deployments():
131
+ """Get list of existing Aibrix deployments from Kubernetes"""
132
+ if not k8s_apps_v1:
133
+ return set()
134
+ try:
135
+ deployments = k8s_apps_v1.list_namespaced_deployment(
136
+ namespace=NAMESPACE,
137
+ label_selector="model.aibrix.ai/name"
138
+ )
139
+ return {d.metadata.name for d in deployments.items}
140
+ except Exception:
141
+ return set()
142
+
143
+ def _cleanup_stale_entries():
144
+ """Remove entries for deployments that no longer exist"""
145
+ if not k8s_apps_v1:
146
+ return
147
+ try:
148
+ existing_deployments = _get_existing_deployments()
149
+ # Remove entries for deployments that no longer exist
150
+ keys_to_remove = []
151
+ for (ns, model) in list(events.keys()):
152
+ if ns == NAMESPACE and model not in existing_deployments:
153
+ keys_to_remove.append((ns, model))
154
+
155
+ for key in keys_to_remove:
156
+ del events[key]
157
+ print(f"Cleaned up stale entry for deployment: {key[1]}")
158
+ except Exception as e:
159
+ print(f"Error during cleanup: {e}")
160
+
161
+ async def _cleanup_task():
162
+ """Background task to periodically clean up stale entries"""
163
+ while True:
164
+ await asyncio.sleep(CLEANUP_INTERVAL)
165
+ _cleanup_stale_entries()
166
+
167
+ @app.on_event("startup")
168
+ async def startup_event():
169
+ """Start background cleanup task"""
170
+ asyncio.create_task(_cleanup_task())
171
+
172
+ # Mirror endpoints (same as your API paths); quick 204 response
173
+ @app.post("/v1/completions")
174
+ @app.post("/v1/chat/completions")
175
+ async def mirrored(request: Request):
176
+ body = await request.body()
177
+ model = _extract_model(request.headers, body)
178
+ if model:
179
+ _bump(NAMESPACE, model)
180
+ return JSONResponse({"ok": True}, status_code=204)
181
+
182
+ # Catch-all POST (safety net if your gateway uses different paths)
183
+ @app.post("/{full_path:path}")
184
+ async def mirrored_generic(request: Request, full_path: str):
185
+ body = await request.body()
186
+ model = _extract_model(request.headers, body)
187
+ if model:
188
+ _bump(NAMESPACE, model)
189
+ return JSONResponse({"ok": True}, status_code=204)
190
+
191
+ # Prometheus-friendly aggregate endpoint: export ALL (ns, model)
192
+ @app.get("/metrics", response_class=PlainTextResponse)
193
+ async def metrics_all():
194
+ lines = []
195
+ # Idiomatic names
196
+ lines.append("# HELP vllm_deployment_replicas Number of suggested replicas.")
197
+ lines.append("# TYPE vllm_deployment_replicas gauge")
198
+ lines.append("# HELP vllm_observed_rps Incoming requests per second.")
199
+ lines.append("# TYPE vllm_observed_rps gauge")
200
+ now = time.time()
201
+ for (ns, model), q in list(events.items()):
202
+ _prune(q, now)
203
+ rps = len(q) / max(WINDOW_SEC, 1)
204
+ d = _desired(ns, model)
205
+ lines.append(f'vllm_deployment_replicas{{namespace="{ns}",model_name="{model}"}} {d}')
206
+ lines.append(f'vllm_observed_rps{{namespace="{ns}",model_name="{model}"}} {rps:.6f}')
207
+ # (Optional) keep legacy names with colons for back-compat
208
+ lines.append("# HELP vllm:deployment_replicas Number of suggested replicas.")
209
+ lines.append("# TYPE vllm:deployment_replicas gauge")
210
+ lines.append("# HELP vllm:observed_rps Incoming requests per second.")
211
+ lines.append("# TYPE vllm:observed_rps gauge")
212
+ now = time.time()
213
+ for (ns, model), q in list(events.items()):
214
+ _prune(q, now)
215
+ rps = len(q) / max(WINDOW_SEC, 1)
216
+ d = _desired(ns, model)
217
+ lines.append(f'vllm:deployment_replicas{{namespace="{ns}",model_name="{model}"}} {d}')
218
+ lines.append(f'vllm:observed_rps{{namespace="{ns}",model_name="{model}"}} {rps:.6f}')
219
+ return "\n".join(lines) + "\n"
220
+
221
+
222
+ # Metrics for KPA and Debugging
223
+ @app.get("/metrics/{ns}/{model}", response_class=PlainTextResponse)
224
+ async def metrics(ns: str, model: str):
225
+ d = _desired(ns, model)
226
+ now = time.time()
227
+ q = events[(ns, model)]
228
+ _prune(q, now)
229
+ rps = len(q) / max(WINDOW_SEC, 1)
230
+ return (
231
+ "# HELP vllm:deployment_replicas Number of suggested replicas.\n"
232
+ "# TYPE vllm:deployment_replicas gauge\n"
233
+ f'vllm:deployment_replicas{{namespace="{ns}",model_name="{model}"}} {d}\n'
234
+ "# HELP vllm:observed_rps Incoming requests per second.\n"
235
+ "# TYPE vllm:observed_rps gauge\n"
236
+ f'vllm:observed_rps{{namespace="{ns}",model_name="{model}"}} {rps:.2f}\n'
237
+ )
238
+ ---
239
+ apiVersion: apps/v1
240
+ kind: Deployment
241
+ metadata:
242
+ name: aibrix-activator
243
+ namespace: aibrix-activator
244
+ spec:
245
+ replicas: 1
246
+ selector: { matchLabels: { app: aibrix-activator } }
247
+ template:
248
+ metadata: { labels: { app: aibrix-activator } }
249
+ spec:
250
+ containers:
251
+ - name: activator
252
+ image: python:3.11-slim
253
+ command: ["bash","-lc"]
254
+ args:
255
+ - |
256
+ pip install fastapi uvicorn kubernetes >/dev/null && \
257
+ uvicorn activator:app --host 0.0.0.0 --port 8080
258
+ env:
259
+ - { name: NAMESPACE, value: "default" }
260
+ - { name: WINDOW_SEC, value: "30" }
261
+ - { name: CAPACITY_RPS, value: "1.0" }
262
+ - { name: MIN_REPLICA_ON_WAKE, value: "1" }
263
+ - { name: MAX_REPLICAS, value: "8" }
264
+ - { name: CLEANUP_INTERVAL, value: "300" }
265
+ ports: [{containerPort: 8080}]
266
+ volumeMounts:
267
+ - { name: code, mountPath: /app/activator.py, subPath: activator.py }
268
+ workingDir: /app
269
+ serviceAccountName: aibrix-activator
270
+ volumes:
271
+ - name: code
272
+ configMap: { name: activator-code }
273
+ ---
274
+ apiVersion: v1
275
+ kind: Service
276
+ metadata:
277
+ name: aibrix-activator
278
+ namespace: aibrix-activator
279
+ annotations:
280
+ prometheus.io/scrape: "true"
281
+ prometheus.io/port: "8080"
282
+ prometheus.io/path: "/metrics"
283
+ labels:
284
+ app: aibrix-activator
285
+ prometheus-discovery: "true"
286
+ spec:
287
+ selector: { app: aibrix-activator }
288
+ ports:
289
+ - name: http
290
+ port: 8080
291
+ targetPort: 8080
292
+ protocol: TCP
293
+ type: ClusterIP
294
+ ---
295
+ apiVersion: monitoring.coreos.com/v1
296
+ kind: ServiceMonitor
297
+ metadata:
298
+ name: aibrix-activator
299
+ namespace: prometheus
300
+ labels:
301
+ app: aibrix-activator
302
+ spec:
303
+ selector:
304
+ matchLabels:
305
+ app: aibrix-activator
306
+ namespaceSelector:
307
+ matchNames:
308
+ - aibrix-activator
309
+ endpoints:
310
+ - port: http
311
+ path: /metrics
312
+ ---
313
+ apiVersion: monitoring.coreos.com/v1
314
+ kind: ServiceMonitor
315
+ metadata:
316
+ name: vllm-deployments
317
+ namespace: prometheus
318
+ labels:
319
+ app: vllm-deployments
320
+ spec:
321
+ selector:
322
+ matchLabels:
323
+ prometheus-discovery: "true"
324
+ namespaceSelector:
325
+ matchNames:
326
+ - default
327
+ endpoints:
328
+ - port: serve
329
+ path: /metrics
330
+ ---
331
+ apiVersion: gateway.networking.k8s.io/v1beta1
332
+ kind: ReferenceGrant
333
+ metadata:
334
+ name: allow-httproute-to-activator
335
+ namespace: aibrix-activator
336
+ spec:
337
+ from:
338
+ - group: gateway.networking.k8s.io
339
+ kind: HTTPRoute
340
+ namespace: aibrix-system
341
+ to:
342
+ - group: ""
343
+ kind: Service
344
+ name: aibrix-activator
345
+ ---
346
+ apiVersion: gateway.networking.k8s.io/v1
347
+ kind: HTTPRoute
348
+ metadata:
349
+ name: activator-mirror-sink
350
+ namespace: aibrix-system
351
+ spec:
352
+ parentRefs:
353
+ - group: gateway.networking.k8s.io
354
+ kind: Gateway
355
+ name: aibrix-eg
356
+ namespace: aibrix-system
357
+ rules:
358
+ - matches:
359
+ - path:
360
+ type: PathPrefix
361
+ value: /__activator_sink__
362
+ backendRefs:
363
+ - name: aibrix-activator
364
+ namespace: aibrix-activator
365
+ port: 8080
366
+ ---
367
+ apiVersion: gateway.envoyproxy.io/v1alpha1
368
+ kind: EnvoyPatchPolicy
369
+ metadata:
370
+ name: prewarm-completions-lua
371
+ namespace: aibrix-system
372
+ spec:
373
+ targetRef:
374
+ group: gateway.networking.k8s.io
375
+ kind: Gateway
376
+ name: aibrix-eg
377
+ type: JSONPatch
378
+ jsonPatches:
379
+ - type: "type.googleapis.com/envoy.config.listener.v3.Listener"
380
+ name: "aibrix-system/aibrix-eg/http"
381
+ operation:
382
+ op: add
383
+ path: "/default_filter_chain/filters/0/typed_config/http_filters/0"
384
+ value:
385
+ name: envoy.filters.http.lua
386
+ typed_config:
387
+ "@type": type.googleapis.com/envoy.extensions.filters.http.lua.v3.Lua
388
+ inlineCode: |
389
+ function envoy_on_request(handle)
390
+ local path = handle:headers():get(":path") or ""
391
+ if string.find(path, "^/v1/completions") or string.find(path, "^/v1/chat/completions") then
392
+ -- Try to get model from header first
393
+ local model = handle:headers():get("model") or ""
394
+
395
+ -- If no model in header, try to extract from JSON body
396
+ if model == "" then
397
+ local ct = handle:headers():get("content-type") or ""
398
+ if string.find(ct:lower(), "application/json") then
399
+ local body = handle:body()
400
+ if body and body:length() > 0 then
401
+ local raw = body:getBytes(0, math.min(body:length(), 1024))
402
+ -- Simple regex to extract model from JSON: "model":"value"
403
+ local model_match = raw:match('"model"%s*:%s*"([^"]+)"')
404
+ if model_match then
405
+ model = model_match
406
+ end
407
+ end
408
+ end
409
+ end
410
+
411
+ -- Only proceed if we have a model
412
+ if model ~= "" then
413
+ -- fire-and-forget wake signal; very short timeout
414
+ pcall(function()
415
+ handle:httpCall(
416
+ "httproute/aibrix-system/activator-mirror-sink/rule/0",
417
+ {
418
+ [":method"] = "POST",
419
+ [":path"] = "/v1/completions",
420
+ [":authority"] = "aibrix-activator.aibrix-activator.svc.cluster.local",
421
+ ["content-type"] = "application/json",
422
+ ["model"] = model
423
+ },
424
+ "{}",
425
+ 5 -- ms
426
+ )
427
+ end)
428
+ end
429
+ end
430
+ end