PyPI - kalavai-client - Versions diffs - 0.6.22__py3-none-any.whl → 0.7.0__py3-none-any.whl - Mend

kalavai-client 0.6.22py3-none-any.whl → 0.7.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

kalavai_client/__init__.py CHANGED Viewed

@@ -1,2 +1,2 @@
-__version__ = "0.6.22"
+__version__ = "0.7.0"

kalavai_client/assets/apps.yaml CHANGED Viewed

@@ -2,6 +2,13 @@ helmDefaults:
   timeout: 1200
 repositories:
+  # amd gpu operator: https://github.com/ROCm/gpu-operator
+  - name: rocm
+    url: https://rocm.github.io/gpu-operator
+  # required by rocm
+  - name: jetstacks
+    url: https://charts.jetstack.io
+  ###
   - name: kuberay
     url: https://ray-project.github.io/kuberay-helm/
   - name: nvidia
@@ -24,6 +31,20 @@ repositories:
     url: https://project-hami.github.io/HAMi
 releases:
+  - name: certificates
+    namespace: kalavai
+    chart: jetstacks/cert-manager
+    version: v1.15.1
+    installed: true
+    set:
+    - name: crds.enabled
+      value: true
+  - name: rocm
+    needs:
+    - kalavai/certificates
+    namespace: kalavai
+    chart: rocm/gpu-operator-charts
+    installed: true
   - name: datashim
     namespace: dlf
     chart: kalavai/kalavai-datashim
@@ -156,7 +177,7 @@ releases:
     - name: replicas
       value: 1
     - name: image_tag
-      value: "v2025.07.34"
+      value: "v2025.08.3" #"v2025.07.34"
     - name: deployment.in_cluster
       value: "True"
     - name: deployment.kalavai_username_key
@@ -186,28 +207,7 @@ releases:
   - name: nvidia-gpu-operator
     namespace: kalavai
     chart: kalavai/gpu
-    installed: false
-  # - name: hami-vgpu
-  #   namespace: kalavai
-  #   chart: kalavai/hami
-  #   installed: true
-  #   set:
-  #   - name: resourceCores
-  #     value: "nvidia.com/gpucores"
-  #   - name: devicePlugin.runtimeClassName
-  #     value: "nvidia"
-  #   - name: scheduler.defaultSchedulerPolicy.nodeSchedulerPolicy
-  #     value: "binpack"
-  #   - name: scheduler.defaultSchedulerPolicy.gpuSchedulerPolicy
-  #     value: "binpack"
-  #   - name: scheduler.defaultCores
-  #     value: "100"
-  #   - name: scheduler.kubeScheduler.imageTag
-  #     value: v1.31.1
-  #   - name: devicePlugin.deviceMemoryScaling
-  #     value: "1"
-  #   - name: devicePlugin.deviceSplitCount
-  #     value: "1"
+    installed: false
   - name: hami-vgpu
     namespace: kalavai
     chart: hami-charts/hami

kalavai_client/cli.py CHANGED Viewed

@@ -1166,38 +1166,47 @@ def job__delete(name, *others, force_namespace: str=None):
 @arguably.command
-def job__estimate(billion_parameters, *others, precision=32):
-    """Guesstimate of resources needed based on required memory and current resources"""
-    try:
-        CLUSTER.validate_cluster()
-    except Exception as e:
-        console.log(f"[red]Problems with your pool: {str(e)}")
-        return
-    average_vram = 8
-    required_memory = float(billion_parameters) * (precision / 8) / 1.2
-    available_gpus = load_gpu_models()
-    vrams = []
-    for _, gpus in available_gpus:
-        for model in gpus["gpus"]:
-            vrams.extend([int(model["memory"])/1000] * int(gpus["capacity"]) )
-    vrams = sorted(vrams, reverse=False)
+def job__estimate(
+    *others,
+    model_size: float,
+    precision: str = "fp16",
+    context_window: int = 2048,
+    batch_size: int = 1,
+    num_layers: int = 32, # total layers, or num_key_value_heads, whatever is minimum (impacts KV cache)
+    hidden_dim: int = 4096,
+    overhead_factor: float = 0.15
+):
+    # Bytes per parameter based on precision
+    precision_bytes = {
+        "fp32": 4,
+        "fp16": 2,
+        "int8": 1,
+        "int4": 0.5
+    }
-    console.log(f"There are {len(vrams)} GPUs available ({sum(vrams)}GBs)")
-    console.log(f"A [yellow]{billion_parameters}B[white] model requires [yellow]~{required_memory:.2f}GB vRAM[white] at {precision}bits precision")
+    if precision not in precision_bytes:
+        raise ValueError(f"Unsupported precision: {precision}. Choose from {list(precision_bytes.keys())}")
-    if sum(vrams) < required_memory:
-        console.log("Current capacity is insufficient to host the model, but it can be scheduled for when it is!")
-        console.log(f"Average devices have {average_vram}GB vRAM, use {math.ceil(required_memory/(average_vram))} GPU workers")
-    else:
-        current_vram = 0
-        n_devices = 0
-        for mem in vrams:
-            current_vram += mem
-            n_devices += 1
-            if current_vram > required_memory:
-                break
-        console.log(f"Looking at current capacity, use [green]{n_devices} GPU workers[white] for a total [green]{current_vram:.2f} GB vRAM")
+    # Model parameters
+    total_params = model_size * 1e9
+    model_weights_vram = total_params * precision_bytes[precision] / 1e9  # GB
+    # KV Cache memory
+    # Approximation: KV cache = batch × layers × hidden_dim × 2 (K/V) × context × bytes
+    kv_cache_vram = (
+        batch_size * num_layers * hidden_dim * 2 * context_window * precision_bytes[precision]
+    ) / 1e9  # GB
+    # Total VRAM including overhead
+    total_vram = model_weights_vram + kv_cache_vram
+    total_vram *= (1 + overhead_factor)
+    result = {
+        "model_weights_vram_gb": round(model_weights_vram, 2),
+        "kv_cache_vram_gb": round(kv_cache_vram, 2),
+        "estimated_total_vram_gb": round(total_vram, 2)
+    }
+    console.log(f"[green]{result}")
 @arguably.command
 def job__status(name, *others):

{kalavai_client-0.6.22.dist-info → kalavai_client-0.7.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: kalavai-client
-Version: 0.6.22
+Version: 0.7.0
 Summary: Client app for kalavai platform
 License: Apache-2.0
 Keywords: LLM,platform

{kalavai_client-0.6.22.dist-info → kalavai_client-0.7.0.dist-info}/RECORD RENAMED Viewed

@@ -1,7 +1,7 @@
-kalavai_client/__init__.py,sha256=v2m8n1AiRAfLYkbEyWtMgln2mUtzHOjwrvGiSwSCHCg,23
+kalavai_client/__init__.py,sha256=vNC3xHXqvZzke4pHwX0fLN2HjCBaYwJV_rhNXqxKX6I,22
 kalavai_client/__main__.py,sha256=WQUfxvRsBJH5gsCJg8pLz95QnZIj7Ol8psTO77m0QE0,73
 kalavai_client/assets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-kalavai_client/assets/apps.yaml,sha256=HT1Yl_tPp5ysjn9TRhL7PlP1w67QOYZGs0ge-bQlvF4,6891
+kalavai_client/assets/apps.yaml,sha256=ylXUApravPGFRIFitx60Bx7EqQi8K6xMKcIb51pDevI,6730
 kalavai_client/assets/apps_values.yaml,sha256=LeSNd3PwkIx0wkTIlEk2KNz3Yy4sXSaHALQEkopdhKE,2165
 kalavai_client/assets/docker-compose-gui.yaml,sha256=OAVO0ohaCpDB9FGeih0yAbVNwUfDtaCzssZ25uiuJyA,787
 kalavai_client/assets/docker-compose-template.yaml,sha256=vW7GhOl_PaUodehJk8qajOlE0deZXrPc7qizg5SeYyc,1859
@@ -13,13 +13,13 @@ kalavai_client/assets/user_workspace_values.yaml,sha256=G0HOzQUxrDMCwuW9kbWUZaKM
 kalavai_client/auth.py,sha256=EB3PMvKUn5_KAQkezkEHEt-OMZXyfkZguIQlUFkEHcA,3243
 kalavai_client/bridge_api.py,sha256=qiN0jleaooj2vYVYxHrG_nfdgY2rKpZyvFE4uz6hkoA,27088
 kalavai_client/bridge_models.py,sha256=mUh67hzhudqGxJEFHx2KGrf-Sjrt_CwkzLno8Xsm7hk,3043
-kalavai_client/cli.py,sha256=ZRNOv1oUvU7Freu47PotrwqJRrBMSFgmoCHg620UdZM,48146
+kalavai_client/cli.py,sha256=QihLex32dFmneV8-JhMA7J1BXqOp1mI-IJH37O6-EzE,48007
 kalavai_client/cluster.py,sha256=Z2PIXbZuSAv9xmw-MyZP1M41BpVMpirLzG51bqGA-zc,13548
 kalavai_client/core.py,sha256=JVXSMmYvbNBl9ggVPGNJRryK54doySTrGDj-WhAlkfY,35760
 kalavai_client/env.py,sha256=t6dfjg5GY6lbprbmlr9dVOP_KouPwdN94wnDL5zCgIM,2902
 kalavai_client/utils.py,sha256=1mz-dzoJhZ9GJKU7jiGYBC1tP37SXHvxToMqqEir8R0,13438
-kalavai_client-0.6.22.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-kalavai_client-0.6.22.dist-info/METADATA,sha256=uquNcK5cPCexquqwajy7XvqBC1LEE2USs677qtcFqMg,12776
-kalavai_client-0.6.22.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
-kalavai_client-0.6.22.dist-info/entry_points.txt,sha256=9T6D45gxwzfVbglMm1r6XPdXuuZdHfy_7fCeu2jUphc,50
-kalavai_client-0.6.22.dist-info/RECORD,,
+kalavai_client-0.7.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+kalavai_client-0.7.0.dist-info/METADATA,sha256=4cWmWAY7QZHbrVVsTIfI_d7yp3thrAXagOVBr1hJ9fk,12775
+kalavai_client-0.7.0.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
+kalavai_client-0.7.0.dist-info/entry_points.txt,sha256=9T6D45gxwzfVbglMm1r6XPdXuuZdHfy_7fCeu2jUphc,50
+kalavai_client-0.7.0.dist-info/RECORD,,

{kalavai_client-0.6.22.dist-info → kalavai_client-0.7.0.dist-info}/LICENSE RENAMED Viewed

File without changes

{kalavai_client-0.6.22.dist-info → kalavai_client-0.7.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{kalavai_client-0.6.22.dist-info → kalavai_client-0.7.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

kalavai-client 0.6.22__py3-none-any.whl → 0.7.0__py3-none-any.whl

kalavai-client 0.6.22py3-none-any.whl → 0.7.0py3-none-any.whl