PyPI - kalavai-client - Versions diffs - 0.6.21__tar.gz → 0.7.0__tar.gz - Mend

kalavai-client 0.6.21tar.gz → 0.7.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

{kalavai_client-0.6.21 → kalavai_client-0.7.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: kalavai-client
-Version: 0.6.21
+Version: 0.7.0
 Summary: Client app for kalavai platform
 License: Apache-2.0
 Keywords: LLM,platform

kalavai_client-0.7.0/kalavai_client/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@
1	+
2	+ __version__ = "0.7.0"

{kalavai_client-0.6.21 → kalavai_client-0.7.0}/kalavai_client/assets/apps.yaml RENAMED Viewed

@@ -2,6 +2,13 @@ helmDefaults:
   timeout: 1200
 repositories:
+  # amd gpu operator: https://github.com/ROCm/gpu-operator
+  - name: rocm
+    url: https://rocm.github.io/gpu-operator
+  # required by rocm
+  - name: jetstacks
+    url: https://charts.jetstack.io
+  ###
   - name: kuberay
     url: https://ray-project.github.io/kuberay-helm/
   - name: nvidia
@@ -20,8 +27,24 @@ repositories:
     url: https://charts.min.io/
   - name: langfuse
     url: https://langfuse.github.io/langfuse-k8s
+  - name: hami-charts
+    url: https://project-hami.github.io/HAMi
 releases:
+  - name: certificates
+    namespace: kalavai
+    chart: jetstacks/cert-manager
+    version: v1.15.1
+    installed: true
+    set:
+    - name: crds.enabled
+      value: true
+  - name: rocm
+    needs:
+    - kalavai/certificates
+    namespace: kalavai
+    chart: rocm/gpu-operator-charts
+    installed: true
   - name: datashim
     namespace: dlf
     chart: kalavai/kalavai-datashim
@@ -154,7 +177,7 @@ releases:
     - name: replicas
       value: 1
     - name: image_tag
-      value: "v2025.07.34"
+      value: "v2025.08.3" #"v2025.07.34"
     - name: deployment.in_cluster
       value: "True"
     - name: deployment.kalavai_username_key
@@ -184,10 +207,10 @@ releases:
   - name: nvidia-gpu-operator
     namespace: kalavai
     chart: kalavai/gpu
-    installed: false
+    installed: false
   - name: hami-vgpu
     namespace: kalavai
-    chart: kalavai/hami
+    chart: hami-charts/hami
     installed: true
     set:
     - name: resourceCores
@@ -206,13 +229,5 @@ releases:
       value: "1"
     - name: devicePlugin.deviceSplitCount
       value: "1"
-    # - name: scheduler.customWebhook.port
-    #   value: "30498"
-    # - name: scheduler.service.schedulerPort
-    #   value: "30498"
-    # - name: scheduler.service.monitorPort
-    #   value: "30493"
-    # - name: devicePlugin.service.httpPort
-    #   value: "30492"

{kalavai_client-0.6.21 → kalavai_client-0.7.0}/kalavai_client/cli.py RENAMED Viewed

@@ -1166,38 +1166,47 @@ def job__delete(name, *others, force_namespace: str=None):
 @arguably.command
-def job__estimate(billion_parameters, *others, precision=32):
-    """Guesstimate of resources needed based on required memory and current resources"""
-    try:
-        CLUSTER.validate_cluster()
-    except Exception as e:
-        console.log(f"[red]Problems with your pool: {str(e)}")
-        return
-    average_vram = 8
-    required_memory = float(billion_parameters) * (precision / 8) / 1.2
-    available_gpus = load_gpu_models()
-    vrams = []
-    for _, gpus in available_gpus:
-        for model in gpus["gpus"]:
-            vrams.extend([int(model["memory"])/1000] * int(gpus["capacity"]) )
-    vrams = sorted(vrams, reverse=False)
+def job__estimate(
+    *others,
+    model_size: float,
+    precision: str = "fp16",
+    context_window: int = 2048,
+    batch_size: int = 1,
+    num_layers: int = 32, # total layers, or num_key_value_heads, whatever is minimum (impacts KV cache)
+    hidden_dim: int = 4096,
+    overhead_factor: float = 0.15
+):
+    # Bytes per parameter based on precision
+    precision_bytes = {
+        "fp32": 4,
+        "fp16": 2,
+        "int8": 1,
+        "int4": 0.5
+    }
-    console.log(f"There are {len(vrams)} GPUs available ({sum(vrams)}GBs)")
-    console.log(f"A [yellow]{billion_parameters}B[white] model requires [yellow]~{required_memory:.2f}GB vRAM[white] at {precision}bits precision")
+    if precision not in precision_bytes:
+        raise ValueError(f"Unsupported precision: {precision}. Choose from {list(precision_bytes.keys())}")
-    if sum(vrams) < required_memory:
-        console.log("Current capacity is insufficient to host the model, but it can be scheduled for when it is!")
-        console.log(f"Average devices have {average_vram}GB vRAM, use {math.ceil(required_memory/(average_vram))} GPU workers")
-    else:
-        current_vram = 0
-        n_devices = 0
-        for mem in vrams:
-            current_vram += mem
-            n_devices += 1
-            if current_vram > required_memory:
-                break
-        console.log(f"Looking at current capacity, use [green]{n_devices} GPU workers[white] for a total [green]{current_vram:.2f} GB vRAM")
+    # Model parameters
+    total_params = model_size * 1e9
+    model_weights_vram = total_params * precision_bytes[precision] / 1e9  # GB
+    # KV Cache memory
+    # Approximation: KV cache = batch × layers × hidden_dim × 2 (K/V) × context × bytes
+    kv_cache_vram = (
+        batch_size * num_layers * hidden_dim * 2 * context_window * precision_bytes[precision]
+    ) / 1e9  # GB
+    # Total VRAM including overhead
+    total_vram = model_weights_vram + kv_cache_vram
+    total_vram *= (1 + overhead_factor)
+    result = {
+        "model_weights_vram_gb": round(model_weights_vram, 2),
+        "kv_cache_vram_gb": round(kv_cache_vram, 2),
+        "estimated_total_vram_gb": round(total_vram, 2)
+    }
+    console.log(f"[green]{result}")
 @arguably.command
 def job__status(name, *others):

{kalavai_client-0.6.21 → kalavai_client-0.7.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name            = "kalavai-client"
-version         = "0.6.21"
+version         = "0.7.0"
 authors = [
   {name = "Carlos Fernandez Musoles", email = "carlos@kalavai.net"}
 ]