konduktor-nightly 0.1.0.dev20250919104536__py3-none-any.whl → 0.1.0.dev20250921104307__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of konduktor-nightly might be problematic. Click here for more details.

@@ -12,6 +12,10 @@ metadata:
12
12
  {{ deployment_num_accelerators_label }}: "{{ num_accelerators }}"
13
13
  trainy.ai/has-autoscaler: "{{ autoscaler }}"
14
14
  trainy.ai/konduktor-managed: "true"
15
+ {% if autoscaler == 'true' %}
16
+ trainy.ai/original-min-replicas: "{{ min_replicas }}"
17
+ trainy.ai/original-max-replicas: "{{ max_replicas }}"
18
+ {% endif %}
15
19
  name: {{ name }}
16
20
  namespace: default
17
21
  spec:
@@ -61,18 +65,15 @@ spec:
61
65
  {{ model_name_label }}: {{ name }}
62
66
  {% endif %}
63
67
  {{ deployment_name_label }}: "{{ name }}"
64
- {% if general %}
65
- type: LoadBalancer
66
- {% else %}
67
68
  type: ClusterIP
68
- {% endif %}
69
69
 
70
- {% if not general %}
70
+ # AIBRIX PODAUTOSCALER STUFF (KPA)
71
+ {% if not general and autoscaler == 'true' %}
71
72
  ---
72
73
  apiVersion: autoscaling.aibrix.ai/v1alpha1
73
74
  kind: PodAutoscaler
74
75
  metadata:
75
- name: {{ name }}-apa
76
+ name: {{ name }}-pa
76
77
  namespace: default
77
78
  labels:
78
79
  {{ model_name_label }}: {{ name }}
@@ -80,63 +81,109 @@ metadata:
80
81
  app.kubernetes.io/managed-by: kustomize
81
82
  {{ deployment_name_label }}: "{{ name }}"
82
83
  {{ deployment_user_label }}: "{{ user }}"
83
- annotations:
84
- autoscaling.aibrix.ai/up-fluctuation-tolerance: '0.1'
85
- autoscaling.aibrix.ai/down-fluctuation-tolerance: '0.2'
86
- apa.autoscaling.aibrix.ai/window: 30s
87
84
  spec:
88
- scalingStrategy: APA
85
+ scalingStrategy: KPA
89
86
  minReplicas: {{ min_replicas }}
90
87
  maxReplicas: {{ max_replicas }}
91
88
  metricsSources:
92
- - metricSourceType: pod
89
+ - metricSourceType: domain
93
90
  protocolType: http
94
- port: "{{ ports }}"
95
- path: metrics
96
- targetMetric: gpu_cache_usage_perc
97
- targetValue: '0.5'
91
+ endpoint: aibrix-activator.aibrix-activator.svc.cluster.local:8080
92
+ path: /metrics/default/{{ name }}
93
+ targetMetric: vllm:deployment_replicas
94
+ targetValue: "1"
98
95
  scaleTargetRef:
99
96
  apiVersion: apps/v1
100
97
  kind: Deployment
101
98
  name: {{ name }}
102
99
  {% endif %}
103
100
 
101
+ # KEDA HTTP ADD-ON STUFF (1 per deployment)
104
102
  {% if general %}
103
+ {% if autoscaler == 'true' %}
104
+ # HTTPScaledObject (1 per deployment) - only when autoscaling enabled
105
105
  ---
106
- apiVersion: autoscaling/v2
107
- kind: HorizontalPodAutoscaler
106
+ apiVersion: http.keda.sh/v1alpha1
107
+ kind: HTTPScaledObject
108
108
  metadata:
109
- name: {{ name }}-hpa
109
+ name: {{ name }}-httpscaledobject
110
110
  namespace: default
111
111
  labels:
112
112
  {{ deployment_name_label }}: "{{ name }}"
113
113
  {{ deployment_user_label }}: "{{ user }}"
114
- trainy.ai/has-autoscaler: "{{ autoscaler }}"
115
114
  spec:
115
+ hosts:
116
+ - {{ name }}
117
+ pathPrefixes:
118
+ - "/"
119
+ {% if probe_path %}
120
+ - "{{ probe_path }}"
121
+ {% endif %}
116
122
  scaleTargetRef:
117
- apiVersion: apps/v1
123
+ name: "{{ name }}"
118
124
  kind: Deployment
119
- name: {{ name }}
120
- minReplicas: {{ min_replicas }}
121
- maxReplicas: {{ max_replicas }}
122
- metrics:
123
- - type: Resource
124
- resource:
125
- name: cpu
126
- target:
127
- type: Utilization
128
- averageUtilization: 50
129
- behavior:
130
- scaleDown:
131
- stabilizationWindowSeconds: 60
132
- policies:
133
- - type: Percent
134
- value: 100
135
- periodSeconds: 15
136
- scaleUp:
137
- stabilizationWindowSeconds: 20
138
- policies:
139
- - type: Percent
140
- value: 100
141
- periodSeconds: 15
125
+ apiVersion: apps/v1
126
+ service: "{{ name }}"
127
+ port: {{ ports }}
128
+ replicas:
129
+ min: {{ min_replicas }}
130
+ max: {{ max_replicas }}
131
+ scaledownPeriod: 1200 # 20 minutes
132
+ scalingMetric:
133
+ requestRate:
134
+ targetValue: 4
135
+ granularity: "1s"
136
+ window: "30s"
137
+ {% endif %}
138
+
139
+ # INGRESS (1 per deployment)
140
+ ---
141
+ apiVersion: networking.k8s.io/v1
142
+ kind: Ingress
143
+ metadata:
144
+ name: {{ name }}-ingress
145
+ annotations:
146
+ nginx.ingress.kubernetes.io/use-regex: "true"
147
+ nginx.ingress.kubernetes.io/rewrite-target: /$1
148
+ {% if autoscaler == 'true' %}
149
+ nginx.ingress.kubernetes.io/upstream-vhost: "{{ name }}"
150
+ {% endif %}
151
+ spec:
152
+ ingressClassName: nginx
153
+ rules:
154
+ - host: {{ general_base_host }}
155
+ http:
156
+ paths:
157
+ - path: /{{ name }}(.*)
158
+ pathType: ImplementationSpecific
159
+ backend:
160
+ service:
161
+ {% if autoscaler == 'true' %}
162
+ # Use KEDA interceptor for autoscaling
163
+ name: keda-proxy
164
+ port:
165
+ number: 8080
166
+ {% else %}
167
+ # Direct to app service for fixed replicas
168
+ name: {{ name }}
169
+ port:
170
+ number: {{ ports }}
171
+ {% endif %}
172
+ # Direct access convenience rule (via LB IP + Host: {{ name }})
173
+ - host: {{ name }}
174
+ http:
175
+ paths:
176
+ - path: /(.*)
177
+ pathType: ImplementationSpecific
178
+ backend:
179
+ service:
180
+ {% if autoscaler == 'true' %}
181
+ name: keda-proxy
182
+ port:
183
+ number: 8080
184
+ {% else %}
185
+ name: {{ name }}
186
+ port:
187
+ number: {{ ports }}
188
+ {% endif %}
142
189
  {% endif %}
@@ -389,7 +389,7 @@ def get_serving_schema():
389
389
  'properties': {
390
390
  'min_replicas': {
391
391
  'type': 'integer',
392
- 'minimum': 1,
392
+ 'minimum': 0,
393
393
  'description': 'Minimum number of replicas for autoscaling.',
394
394
  },
395
395
  'max_replicas': {
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: konduktor-nightly
3
- Version: 0.1.0.dev20250919104536
3
+ Version: 0.1.0.dev20250921104307
4
4
  Summary: GPU Cluster Health Management
5
5
  Author: Andrew Aikawa
6
6
  Author-email: asai@berkeley.edu
@@ -1,4 +1,4 @@
1
- konduktor/__init__.py,sha256=jxyJKjEi7MB605Q6QUD49c52B7T2f4EpMyNrkDcMnao,1574
1
+ konduktor/__init__.py,sha256=i-3wiQO7jODykrQYHnkmj7qoFXpZYAp1NRSWS2HzVA4,1574
2
2
  konduktor/adaptors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  konduktor/adaptors/aws.py,sha256=s47Ra-GaqCQibzVfmD0pmwEWHif1EGO5opMbwkLxTCU,8244
4
4
  konduktor/adaptors/common.py,sha256=ZIqzjx77PIHUwpjfAQ1uX8B2aX78YMuGj4Bppd-MdyM,4183
@@ -7,13 +7,13 @@ konduktor/authentication.py,sha256=_mVy3eqoKohicHostFiGwG1-2ybxP-l7ouofQ0LRlCY,4
7
7
  konduktor/backends/__init__.py,sha256=usWJ8HdZJEyg7MIsN8Zcz9rk9e2Lq5dWJ8dv6hCN3ys,199
8
8
  konduktor/backends/backend.py,sha256=qh0bp94lzoTYZkzyQv2-CVrB5l91FkG2vclXg24UFC0,2910
9
9
  konduktor/backends/constants.py,sha256=nt9G9AmFCOMwO4GuKgRQSzJJuKapOmaROp4_Y0tMF5A,732
10
- konduktor/backends/deployment.py,sha256=fswN9hX_7NwcEogYmo1xn3WgWF8XCcGDvV5yx54_CA0,5860
11
- konduktor/backends/deployment_utils.py,sha256=qcuoLPeMvEVqgD_h71hQZXAp4ZCdXsFeSBxhtXW6pAA,39846
10
+ konduktor/backends/deployment.py,sha256=VXz0GMvFzqq8kd1-pXaNaTsEtSyMmVO2OSd-_-T9nIE,6171
11
+ konduktor/backends/deployment_utils.py,sha256=rfVpiM0KlXma8SxA029TczrpbY-gRG3T0pj2eeARkE8,49257
12
12
  konduktor/backends/jobset.py,sha256=drt8Gc0iYQx18JWXBU6XfhUvC2xCKd8szSJ2JC4O20Q,8640
13
13
  konduktor/backends/jobset_utils.py,sha256=If4pv5peB_yXrJJwjkySgVzbjcxDEDWfkOQxUkwSlOk,26386
14
14
  konduktor/backends/pod_utils.py,sha256=KP_PAgsdNHFgt4Od-5gAtpifAKIL7DMBg7NJ44uqikg,14885
15
15
  konduktor/check.py,sha256=JennyWoaqSKhdyfUldd266KwVXTPJpcYQa4EED4a_BA,7569
16
- konduktor/cli.py,sha256=OwVdT4ibAQoAJO79YzlThQv_VKlHpsD-CHRwOzehGQ8,57613
16
+ konduktor/cli.py,sha256=HOL_O7BpYCl7uF5mKkMvXLjFNMTeAh2hYkraaJ6zi3w,57674
17
17
  konduktor/config.py,sha256=9upqgCCYvcu6fKw7tovEYC1MWTkAAir0_WHPdayylbI,15536
18
18
  konduktor/constants.py,sha256=T3AeXXxuQHINW_bAWyztvDeS8r4g8kXBGIwIq13cys0,1814
19
19
  konduktor/controller/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -68,17 +68,17 @@ konduktor/data/storage_utils.py,sha256=n4GivkN0KMqmyOTDznF0Z-hzsJvm7KCEh5i5HgFAT
68
68
  konduktor/execution.py,sha256=d0EP79iSrW2uFsoqn0YV_4kgIupPIqpMOParXx0y3kg,18519
69
69
  konduktor/kube_client.py,sha256=HtM3d-_GigHnfGINRANchApR9_OigqczBgeYJ6Dj4j0,8504
70
70
  konduktor/logging.py,sha256=xtcCdnecmC3rqMTyunK-klQRINojI7NI4Apag78i9jM,3221
71
- konduktor/manifests/apoxy-setup.yaml,sha256=HDZu7Evm_siIpK1E4tNZ9WVTnFV2LhBXwjJlFOYSTcU,3319
72
- konduktor/manifests/apoxy-setup2.yaml,sha256=BhXsgcVrLBruLXnF7xlj0Ej6YVJFYMABJIpYtwakQMo,731
71
+ konduktor/manifests/aibrix-setup.yaml,sha256=cUi7ToRRasRt2bXq53htV4QoCDEcxp2pvnCi-esJ4yk,9109
72
+ konduktor/manifests/apoxy-setup.yaml,sha256=EipknCq33aBdxu9BIo6y5novjO0B_d_DCWqY44zYNuU,4262
73
+ konduktor/manifests/apoxy-setup2.yaml,sha256=Benmg-io8audt_vi5T_T7vK8MYnaTnpX-bNDgRXM5t0,2512
73
74
  konduktor/manifests/controller_deployment.yaml,sha256=6p3oSLkEVONZsvKZGqVop0Dhn4bo3lrigRmhf8NXBHE,1730
74
75
  konduktor/manifests/dashboard_deployment.yaml,sha256=xJLd4FbPMAosI0fIv5_8y7dV9bw0Vsf81l-w4MB_aU8,2837
75
76
  konduktor/manifests/dmesg_daemonset.yaml,sha256=pSWt7YOeTYjS0l0iki1fvHOs7MhY-sH-RQfVW6JJyno,1391
76
77
  konduktor/manifests/pod_cleanup_controller.yaml,sha256=hziL1Ka1kCAEL9R7Tjvpb80iw1vcq9_3gwHCu75Bi0A,3939
77
78
  konduktor/resource.py,sha256=kfdhnUR_9kDLSm2sUAkv1sLQXyAkI08p3wIzCz1p7-M,20791
78
- konduktor/serving.py,sha256=sh8TPAUXg23Bkt0ByatIMdxFFqzRm18HJTEkt3wHzdo,5147
79
- konduktor/task.py,sha256=oFRHdMevg7lGYkHugCHl89FUREfq9M-l0Qd3N-rjHMA,37727
80
- konduktor/templates/apoxy-deployment.yaml.j2,sha256=_EdT7w0rBK3if1INHT6GGUEugy0mOkRfYOWRgBcKLdo,942
81
- konduktor/templates/deployment.yaml.j2,sha256=uXFjDQaimbpFdAn2RJGaIvS_PzDY136cw_L3QMjz3ZA,3452
79
+ konduktor/serving.py,sha256=4s8cQhsVjf-HByZF65pbMxuqaV319hUSQE9pC8gP4Sg,5405
80
+ konduktor/task.py,sha256=LcTblvTNCSFNBHNUIOGN-i43whR8B6ccBm3HZTDoCMs,38050
81
+ konduktor/templates/deployment.yaml.j2,sha256=ByA33oaBgWnSx_QZ6yKeaMcCjktphcwF8R5KiOlM4pE,4859
82
82
  konduktor/templates/jobset.yaml.j2,sha256=NQcVeRNsTLLmTnJRnkL1vr45mSeth-b11YShXn_RoSg,1323
83
83
  konduktor/templates/pod.yaml.j2,sha256=p9yE-AQkCF2Tgjd1QQiOLtzgI6Gbpps-MZYJZ9fQWIs,19159
84
84
  konduktor/usage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -96,12 +96,12 @@ konduktor/utils/kubernetes_utils.py,sha256=XleYxzG64hciZb-CjzBDjX8BOMhFATIIHZlXD
96
96
  konduktor/utils/log_utils.py,sha256=VUyTtN819BJnSwm33-73-h8aaD51Y5Gawt6ek2kU1tk,18181
97
97
  konduktor/utils/loki_utils.py,sha256=eOGiD7dZNuwzmyXKiifyqz00EVh2nwcUPFSiPkac9y0,4050
98
98
  konduktor/utils/rich_utils.py,sha256=ycADW6Ij3wX3uT8ou7T8qxX519RxlkJivsLvUahQaJo,3583
99
- konduktor/utils/schemas.py,sha256=X2q-Nuk71EfMQXl4QFOtFWlQgd8tC_jFAo5dScmlEQc,19067
99
+ konduktor/utils/schemas.py,sha256=cr39nEAgjluhXoUYnvIwCwLBH8rLds37MBsF1uQv1rw,19067
100
100
  konduktor/utils/subprocess_utils.py,sha256=WoFkoFhGecPR8-rF8WJxbIe-YtV94LXz9UG64SDhCY4,9448
101
101
  konduktor/utils/ux_utils.py,sha256=LSH4b5lckD157qDF4keThxtkGdxNrAfGKmH1ewhZkm4,8646
102
102
  konduktor/utils/validator.py,sha256=gCB5v9Up9bCWD_92fS5ChfRRXj_m56Ky9uzd_77wXGI,16927
103
- konduktor_nightly-0.1.0.dev20250919104536.dist-info/LICENSE,sha256=MuuqTZbHvmqXR_aNKAXzggdV45ANd3wQ5YI7tnpZhm0,6586
104
- konduktor_nightly-0.1.0.dev20250919104536.dist-info/METADATA,sha256=Iu3P8ik5D0tOYXoP5LMArmXo-JqSjbRsw2EMWiVRPRs,4247
105
- konduktor_nightly-0.1.0.dev20250919104536.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
106
- konduktor_nightly-0.1.0.dev20250919104536.dist-info/entry_points.txt,sha256=k3nG5wDFIJhNqsZWrHk4d0irIB2Ns9s47cjRWYsTCT8,48
107
- konduktor_nightly-0.1.0.dev20250919104536.dist-info/RECORD,,
103
+ konduktor_nightly-0.1.0.dev20250921104307.dist-info/LICENSE,sha256=MuuqTZbHvmqXR_aNKAXzggdV45ANd3wQ5YI7tnpZhm0,6586
104
+ konduktor_nightly-0.1.0.dev20250921104307.dist-info/METADATA,sha256=-KRFEgnGTyI9F7gXRZmm7Yu1fsXMigW2Uo0JotzLwAw,4247
105
+ konduktor_nightly-0.1.0.dev20250921104307.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
106
+ konduktor_nightly-0.1.0.dev20250921104307.dist-info/entry_points.txt,sha256=k3nG5wDFIJhNqsZWrHk4d0irIB2Ns9s47cjRWYsTCT8,48
107
+ konduktor_nightly-0.1.0.dev20250921104307.dist-info/RECORD,,
@@ -1,33 +0,0 @@
1
- ---
2
- # Apoxy Backend for general deployment
3
- apiVersion: core.apoxy.dev/v1alpha
4
- kind: Backend
5
- metadata:
6
- name: {{ unique_cluster_name }}-backend-{{ deployment_number }}
7
- labels:
8
- task_name: {{ name }}
9
- endpoint_name: {{ cluster_name }}-{{ deployment_number }}.trainy.us
10
- spec:
11
- endpoints:
12
- - fqdn: {{ name }}.default.{{ unique_cluster_name }}.tun.apoxy.net
13
- ---
14
- # Apoxy Route for general deployment
15
- apiVersion: gateway.apoxy.dev/v1
16
- kind: HTTPRoute
17
- metadata:
18
- name: {{ unique_cluster_name }}-route-{{ deployment_number }}
19
- labels:
20
- task_name: {{ name }}
21
- endpoint_name: {{ cluster_name }}-{{ deployment_number }}.trainy.us
22
- spec:
23
- parentRefs:
24
- - name: default
25
- kind: Gateway
26
- port: 443
27
- hostnames:
28
- - '{{ cluster_name }}-{{ deployment_number }}.trainy.us'
29
- rules:
30
- - backendRefs:
31
- - kind: Backend
32
- name: {{ unique_cluster_name }}-backend-{{ deployment_number }}
33
- port: {{ ports }}