kalavai-client 0.6.21__tar.gz → 0.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. {kalavai_client-0.6.21 → kalavai_client-0.7.0}/PKG-INFO +1 -1
  2. kalavai_client-0.7.0/kalavai_client/__init__.py +2 -0
  3. {kalavai_client-0.6.21 → kalavai_client-0.7.0}/kalavai_client/assets/apps.yaml +26 -11
  4. {kalavai_client-0.6.21 → kalavai_client-0.7.0}/kalavai_client/cli.py +39 -30
  5. {kalavai_client-0.6.21 → kalavai_client-0.7.0}/pyproject.toml +1 -1
  6. kalavai_client-0.6.21/kalavai_client/__init__.py +0 -2
  7. {kalavai_client-0.6.21 → kalavai_client-0.7.0}/LICENSE +0 -0
  8. {kalavai_client-0.6.21 → kalavai_client-0.7.0}/README.md +0 -0
  9. {kalavai_client-0.6.21 → kalavai_client-0.7.0}/kalavai_client/__main__.py +0 -0
  10. {kalavai_client-0.6.21 → kalavai_client-0.7.0}/kalavai_client/assets/__init__.py +0 -0
  11. {kalavai_client-0.6.21 → kalavai_client-0.7.0}/kalavai_client/assets/apps_values.yaml +0 -0
  12. {kalavai_client-0.6.21 → kalavai_client-0.7.0}/kalavai_client/assets/docker-compose-gui.yaml +0 -0
  13. {kalavai_client-0.6.21 → kalavai_client-0.7.0}/kalavai_client/assets/docker-compose-template.yaml +0 -0
  14. {kalavai_client-0.6.21 → kalavai_client-0.7.0}/kalavai_client/assets/nginx.conf +0 -0
  15. {kalavai_client-0.6.21 → kalavai_client-0.7.0}/kalavai_client/assets/pool_config_template.yaml +0 -0
  16. {kalavai_client-0.6.21 → kalavai_client-0.7.0}/kalavai_client/assets/pool_config_values.yaml +0 -0
  17. {kalavai_client-0.6.21 → kalavai_client-0.7.0}/kalavai_client/assets/user_workspace.yaml +0 -0
  18. {kalavai_client-0.6.21 → kalavai_client-0.7.0}/kalavai_client/assets/user_workspace_values.yaml +0 -0
  19. {kalavai_client-0.6.21 → kalavai_client-0.7.0}/kalavai_client/auth.py +0 -0
  20. {kalavai_client-0.6.21 → kalavai_client-0.7.0}/kalavai_client/bridge_api.py +0 -0
  21. {kalavai_client-0.6.21 → kalavai_client-0.7.0}/kalavai_client/bridge_models.py +0 -0
  22. {kalavai_client-0.6.21 → kalavai_client-0.7.0}/kalavai_client/cluster.py +0 -0
  23. {kalavai_client-0.6.21 → kalavai_client-0.7.0}/kalavai_client/core.py +0 -0
  24. {kalavai_client-0.6.21 → kalavai_client-0.7.0}/kalavai_client/env.py +0 -0
  25. {kalavai_client-0.6.21 → kalavai_client-0.7.0}/kalavai_client/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: kalavai-client
3
- Version: 0.6.21
3
+ Version: 0.7.0
4
4
  Summary: Client app for kalavai platform
5
5
  License: Apache-2.0
6
6
  Keywords: LLM,platform
@@ -0,0 +1,2 @@
1
+
2
+ __version__ = "0.7.0"
@@ -2,6 +2,13 @@ helmDefaults:
2
2
  timeout: 1200
3
3
 
4
4
  repositories:
5
+ # amd gpu operator: https://github.com/ROCm/gpu-operator
6
+ - name: rocm
7
+ url: https://rocm.github.io/gpu-operator
8
+ # required by rocm
9
+ - name: jetstacks
10
+ url: https://charts.jetstack.io
11
+ ###
5
12
  - name: kuberay
6
13
  url: https://ray-project.github.io/kuberay-helm/
7
14
  - name: nvidia
@@ -20,8 +27,24 @@ repositories:
20
27
  url: https://charts.min.io/
21
28
  - name: langfuse
22
29
  url: https://langfuse.github.io/langfuse-k8s
30
+ - name: hami-charts
31
+ url: https://project-hami.github.io/HAMi
23
32
 
24
33
  releases:
34
+ - name: certificates
35
+ namespace: kalavai
36
+ chart: jetstacks/cert-manager
37
+ version: v1.15.1
38
+ installed: true
39
+ set:
40
+ - name: crds.enabled
41
+ value: true
42
+ - name: rocm
43
+ needs:
44
+ - kalavai/certificates
45
+ namespace: kalavai
46
+ chart: rocm/gpu-operator-charts
47
+ installed: true
25
48
  - name: datashim
26
49
  namespace: dlf
27
50
  chart: kalavai/kalavai-datashim
@@ -154,7 +177,7 @@ releases:
154
177
  - name: replicas
155
178
  value: 1
156
179
  - name: image_tag
157
- value: "v2025.07.34"
180
+ value: "v2025.08.3" #"v2025.07.34"
158
181
  - name: deployment.in_cluster
159
182
  value: "True"
160
183
  - name: deployment.kalavai_username_key
@@ -184,10 +207,10 @@ releases:
184
207
  - name: nvidia-gpu-operator
185
208
  namespace: kalavai
186
209
  chart: kalavai/gpu
187
- installed: false
210
+ installed: false
188
211
  - name: hami-vgpu
189
212
  namespace: kalavai
190
- chart: kalavai/hami
213
+ chart: hami-charts/hami
191
214
  installed: true
192
215
  set:
193
216
  - name: resourceCores
@@ -206,13 +229,5 @@ releases:
206
229
  value: "1"
207
230
  - name: devicePlugin.deviceSplitCount
208
231
  value: "1"
209
- # - name: scheduler.customWebhook.port
210
- # value: "30498"
211
- # - name: scheduler.service.schedulerPort
212
- # value: "30498"
213
- # - name: scheduler.service.monitorPort
214
- # value: "30493"
215
- # - name: devicePlugin.service.httpPort
216
- # value: "30492"
217
232
 
218
233
 
@@ -1166,38 +1166,47 @@ def job__delete(name, *others, force_namespace: str=None):
1166
1166
 
1167
1167
 
1168
1168
  @arguably.command
1169
- def job__estimate(billion_parameters, *others, precision=32):
1170
- """Guesstimate of resources needed based on required memory and current resources"""
1171
- try:
1172
- CLUSTER.validate_cluster()
1173
- except Exception as e:
1174
- console.log(f"[red]Problems with your pool: {str(e)}")
1175
- return
1176
-
1177
- average_vram = 8
1178
- required_memory = float(billion_parameters) * (precision / 8) / 1.2
1179
- available_gpus = load_gpu_models()
1180
- vrams = []
1181
- for _, gpus in available_gpus:
1182
- for model in gpus["gpus"]:
1183
- vrams.extend([int(model["memory"])/1000] * int(gpus["capacity"]) )
1184
- vrams = sorted(vrams, reverse=False)
1169
+ def job__estimate(
1170
+ *others,
1171
+ model_size: float,
1172
+ precision: str = "fp16",
1173
+ context_window: int = 2048,
1174
+ batch_size: int = 1,
1175
+ num_layers: int = 32, # total layers, or num_key_value_heads, whatever is minimum (impacts KV cache)
1176
+ hidden_dim: int = 4096,
1177
+ overhead_factor: float = 0.15
1178
+ ):
1179
+ # Bytes per parameter based on precision
1180
+ precision_bytes = {
1181
+ "fp32": 4,
1182
+ "fp16": 2,
1183
+ "int8": 1,
1184
+ "int4": 0.5
1185
+ }
1185
1186
 
1186
- console.log(f"There are {len(vrams)} GPUs available ({sum(vrams)}GBs)")
1187
- console.log(f"A [yellow]{billion_parameters}B[white] model requires [yellow]~{required_memory:.2f}GB vRAM[white] at {precision}bits precision")
1187
+ if precision not in precision_bytes:
1188
+ raise ValueError(f"Unsupported precision: {precision}. Choose from {list(precision_bytes.keys())}")
1188
1189
 
1189
- if sum(vrams) < required_memory:
1190
- console.log("Current capacity is insufficient to host the model, but it can be scheduled for when it is!")
1191
- console.log(f"Average devices have {average_vram}GB vRAM, use {math.ceil(required_memory/(average_vram))} GPU workers")
1192
- else:
1193
- current_vram = 0
1194
- n_devices = 0
1195
- for mem in vrams:
1196
- current_vram += mem
1197
- n_devices += 1
1198
- if current_vram > required_memory:
1199
- break
1200
- console.log(f"Looking at current capacity, use [green]{n_devices} GPU workers[white] for a total [green]{current_vram:.2f} GB vRAM")
1190
+ # Model parameters
1191
+ total_params = model_size * 1e9
1192
+ model_weights_vram = total_params * precision_bytes[precision] / 1e9 # GB
1193
+
1194
+ # KV Cache memory
1195
+ # Approximation: KV cache = batch × layers × hidden_dim × 2 (K/V) × context × bytes
1196
+ kv_cache_vram = (
1197
+ batch_size * num_layers * hidden_dim * 2 * context_window * precision_bytes[precision]
1198
+ ) / 1e9 # GB
1199
+
1200
+ # Total VRAM including overhead
1201
+ total_vram = model_weights_vram + kv_cache_vram
1202
+ total_vram *= (1 + overhead_factor)
1203
+
1204
+ result = {
1205
+ "model_weights_vram_gb": round(model_weights_vram, 2),
1206
+ "kv_cache_vram_gb": round(kv_cache_vram, 2),
1207
+ "estimated_total_vram_gb": round(total_vram, 2)
1208
+ }
1209
+ console.log(f"[green]{result}")
1201
1210
 
1202
1211
  @arguably.command
1203
1212
  def job__status(name, *others):
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "kalavai-client"
3
- version = "0.6.21"
3
+ version = "0.7.0"
4
4
  authors = [
5
5
  {name = "Carlos Fernandez Musoles", email = "carlos@kalavai.net"}
6
6
  ]
@@ -1,2 +0,0 @@
1
-
2
- __version__ = "0.6.21"
File without changes