kalavai-client 0.6.22__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,2 +1,2 @@
1
1
 
2
- __version__ = "0.6.22"
2
+ __version__ = "0.7.0"
@@ -2,6 +2,13 @@ helmDefaults:
2
2
  timeout: 1200
3
3
 
4
4
  repositories:
5
+ # amd gpu operator: https://github.com/ROCm/gpu-operator
6
+ - name: rocm
7
+ url: https://rocm.github.io/gpu-operator
8
+ # required by rocm
9
+ - name: jetstacks
10
+ url: https://charts.jetstack.io
11
+ ###
5
12
  - name: kuberay
6
13
  url: https://ray-project.github.io/kuberay-helm/
7
14
  - name: nvidia
@@ -24,6 +31,20 @@ repositories:
24
31
  url: https://project-hami.github.io/HAMi
25
32
 
26
33
  releases:
34
+ - name: certificates
35
+ namespace: kalavai
36
+ chart: jetstacks/cert-manager
37
+ version: v1.15.1
38
+ installed: true
39
+ set:
40
+ - name: crds.enabled
41
+ value: true
42
+ - name: rocm
43
+ needs:
44
+ - kalavai/certificates
45
+ namespace: kalavai
46
+ chart: rocm/gpu-operator-charts
47
+ installed: true
27
48
  - name: datashim
28
49
  namespace: dlf
29
50
  chart: kalavai/kalavai-datashim
@@ -156,7 +177,7 @@ releases:
156
177
  - name: replicas
157
178
  value: 1
158
179
  - name: image_tag
159
- value: "v2025.07.34"
180
+ value: "v2025.08.3" #"v2025.07.34"
160
181
  - name: deployment.in_cluster
161
182
  value: "True"
162
183
  - name: deployment.kalavai_username_key
@@ -186,28 +207,7 @@ releases:
186
207
  - name: nvidia-gpu-operator
187
208
  namespace: kalavai
188
209
  chart: kalavai/gpu
189
- installed: false
190
- # - name: hami-vgpu
191
- # namespace: kalavai
192
- # chart: kalavai/hami
193
- # installed: true
194
- # set:
195
- # - name: resourceCores
196
- # value: "nvidia.com/gpucores"
197
- # - name: devicePlugin.runtimeClassName
198
- # value: "nvidia"
199
- # - name: scheduler.defaultSchedulerPolicy.nodeSchedulerPolicy
200
- # value: "binpack"
201
- # - name: scheduler.defaultSchedulerPolicy.gpuSchedulerPolicy
202
- # value: "binpack"
203
- # - name: scheduler.defaultCores
204
- # value: "100"
205
- # - name: scheduler.kubeScheduler.imageTag
206
- # value: v1.31.1
207
- # - name: devicePlugin.deviceMemoryScaling
208
- # value: "1"
209
- # - name: devicePlugin.deviceSplitCount
210
- # value: "1"
210
+ installed: false
211
211
  - name: hami-vgpu
212
212
  namespace: kalavai
213
213
  chart: hami-charts/hami
kalavai_client/cli.py CHANGED
@@ -1166,38 +1166,47 @@ def job__delete(name, *others, force_namespace: str=None):
1166
1166
 
1167
1167
 
1168
1168
  @arguably.command
1169
- def job__estimate(billion_parameters, *others, precision=32):
1170
- """Guesstimate of resources needed based on required memory and current resources"""
1171
- try:
1172
- CLUSTER.validate_cluster()
1173
- except Exception as e:
1174
- console.log(f"[red]Problems with your pool: {str(e)}")
1175
- return
1176
-
1177
- average_vram = 8
1178
- required_memory = float(billion_parameters) * (precision / 8) / 1.2
1179
- available_gpus = load_gpu_models()
1180
- vrams = []
1181
- for _, gpus in available_gpus:
1182
- for model in gpus["gpus"]:
1183
- vrams.extend([int(model["memory"])/1000] * int(gpus["capacity"]) )
1184
- vrams = sorted(vrams, reverse=False)
1169
+ def job__estimate(
1170
+ *others,
1171
+ model_size: float,
1172
+ precision: str = "fp16",
1173
+ context_window: int = 2048,
1174
+ batch_size: int = 1,
1175
+ num_layers: int = 32, # total layers, or num_key_value_heads, whatever is minimum (impacts KV cache)
1176
+ hidden_dim: int = 4096,
1177
+ overhead_factor: float = 0.15
1178
+ ):
1179
+ # Bytes per parameter based on precision
1180
+ precision_bytes = {
1181
+ "fp32": 4,
1182
+ "fp16": 2,
1183
+ "int8": 1,
1184
+ "int4": 0.5
1185
+ }
1185
1186
 
1186
- console.log(f"There are {len(vrams)} GPUs available ({sum(vrams)}GBs)")
1187
- console.log(f"A [yellow]{billion_parameters}B[white] model requires [yellow]~{required_memory:.2f}GB vRAM[white] at {precision}bits precision")
1187
+ if precision not in precision_bytes:
1188
+ raise ValueError(f"Unsupported precision: {precision}. Choose from {list(precision_bytes.keys())}")
1188
1189
 
1189
- if sum(vrams) < required_memory:
1190
- console.log("Current capacity is insufficient to host the model, but it can be scheduled for when it is!")
1191
- console.log(f"Average devices have {average_vram}GB vRAM, use {math.ceil(required_memory/(average_vram))} GPU workers")
1192
- else:
1193
- current_vram = 0
1194
- n_devices = 0
1195
- for mem in vrams:
1196
- current_vram += mem
1197
- n_devices += 1
1198
- if current_vram > required_memory:
1199
- break
1200
- console.log(f"Looking at current capacity, use [green]{n_devices} GPU workers[white] for a total [green]{current_vram:.2f} GB vRAM")
1190
+ # Model parameters
1191
+ total_params = model_size * 1e9
1192
+ model_weights_vram = total_params * precision_bytes[precision] / 1e9 # GB
1193
+
1194
+ # KV Cache memory
1195
+ # Approximation: KV cache = batch × layers × hidden_dim × 2 (K/V) × context × bytes
1196
+ kv_cache_vram = (
1197
+ batch_size * num_layers * hidden_dim * 2 * context_window * precision_bytes[precision]
1198
+ ) / 1e9 # GB
1199
+
1200
+ # Total VRAM including overhead
1201
+ total_vram = model_weights_vram + kv_cache_vram
1202
+ total_vram *= (1 + overhead_factor)
1203
+
1204
+ result = {
1205
+ "model_weights_vram_gb": round(model_weights_vram, 2),
1206
+ "kv_cache_vram_gb": round(kv_cache_vram, 2),
1207
+ "estimated_total_vram_gb": round(total_vram, 2)
1208
+ }
1209
+ console.log(f"[green]{result}")
1201
1210
 
1202
1211
  @arguably.command
1203
1212
  def job__status(name, *others):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: kalavai-client
3
- Version: 0.6.22
3
+ Version: 0.7.0
4
4
  Summary: Client app for kalavai platform
5
5
  License: Apache-2.0
6
6
  Keywords: LLM,platform
@@ -1,7 +1,7 @@
1
- kalavai_client/__init__.py,sha256=v2m8n1AiRAfLYkbEyWtMgln2mUtzHOjwrvGiSwSCHCg,23
1
+ kalavai_client/__init__.py,sha256=vNC3xHXqvZzke4pHwX0fLN2HjCBaYwJV_rhNXqxKX6I,22
2
2
  kalavai_client/__main__.py,sha256=WQUfxvRsBJH5gsCJg8pLz95QnZIj7Ol8psTO77m0QE0,73
3
3
  kalavai_client/assets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- kalavai_client/assets/apps.yaml,sha256=HT1Yl_tPp5ysjn9TRhL7PlP1w67QOYZGs0ge-bQlvF4,6891
4
+ kalavai_client/assets/apps.yaml,sha256=ylXUApravPGFRIFitx60Bx7EqQi8K6xMKcIb51pDevI,6730
5
5
  kalavai_client/assets/apps_values.yaml,sha256=LeSNd3PwkIx0wkTIlEk2KNz3Yy4sXSaHALQEkopdhKE,2165
6
6
  kalavai_client/assets/docker-compose-gui.yaml,sha256=OAVO0ohaCpDB9FGeih0yAbVNwUfDtaCzssZ25uiuJyA,787
7
7
  kalavai_client/assets/docker-compose-template.yaml,sha256=vW7GhOl_PaUodehJk8qajOlE0deZXrPc7qizg5SeYyc,1859
@@ -13,13 +13,13 @@ kalavai_client/assets/user_workspace_values.yaml,sha256=G0HOzQUxrDMCwuW9kbWUZaKM
13
13
  kalavai_client/auth.py,sha256=EB3PMvKUn5_KAQkezkEHEt-OMZXyfkZguIQlUFkEHcA,3243
14
14
  kalavai_client/bridge_api.py,sha256=qiN0jleaooj2vYVYxHrG_nfdgY2rKpZyvFE4uz6hkoA,27088
15
15
  kalavai_client/bridge_models.py,sha256=mUh67hzhudqGxJEFHx2KGrf-Sjrt_CwkzLno8Xsm7hk,3043
16
- kalavai_client/cli.py,sha256=ZRNOv1oUvU7Freu47PotrwqJRrBMSFgmoCHg620UdZM,48146
16
+ kalavai_client/cli.py,sha256=QihLex32dFmneV8-JhMA7J1BXqOp1mI-IJH37O6-EzE,48007
17
17
  kalavai_client/cluster.py,sha256=Z2PIXbZuSAv9xmw-MyZP1M41BpVMpirLzG51bqGA-zc,13548
18
18
  kalavai_client/core.py,sha256=JVXSMmYvbNBl9ggVPGNJRryK54doySTrGDj-WhAlkfY,35760
19
19
  kalavai_client/env.py,sha256=t6dfjg5GY6lbprbmlr9dVOP_KouPwdN94wnDL5zCgIM,2902
20
20
  kalavai_client/utils.py,sha256=1mz-dzoJhZ9GJKU7jiGYBC1tP37SXHvxToMqqEir8R0,13438
21
- kalavai_client-0.6.22.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
22
- kalavai_client-0.6.22.dist-info/METADATA,sha256=uquNcK5cPCexquqwajy7XvqBC1LEE2USs677qtcFqMg,12776
23
- kalavai_client-0.6.22.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
24
- kalavai_client-0.6.22.dist-info/entry_points.txt,sha256=9T6D45gxwzfVbglMm1r6XPdXuuZdHfy_7fCeu2jUphc,50
25
- kalavai_client-0.6.22.dist-info/RECORD,,
21
+ kalavai_client-0.7.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
22
+ kalavai_client-0.7.0.dist-info/METADATA,sha256=4cWmWAY7QZHbrVVsTIfI_d7yp3thrAXagOVBr1hJ9fk,12775
23
+ kalavai_client-0.7.0.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
24
+ kalavai_client-0.7.0.dist-info/entry_points.txt,sha256=9T6D45gxwzfVbglMm1r6XPdXuuZdHfy_7fCeu2jUphc,50
25
+ kalavai_client-0.7.0.dist-info/RECORD,,