PyPI - kalavai-client - Versions diffs - 0.7.6__tar.gz → 0.7.8__tar.gz - Mend

kalavai-client 0.7.6tar.gz → 0.7.8tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of kalavai-client might be problematic. Click here for more details.

Files changed (25) hide show

{kalavai_client-0.7.6 → kalavai_client-0.7.8}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: kalavai-client
-Version: 0.7.6
+Version: 0.7.8
 Summary: Client app for kalavai platform
 License-Expression: Apache-2.0
 License-File: LICENSE

kalavai_client-0.7.8/kalavai_client/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@
1	+
2	+ __version__ = "0.7.8"

{kalavai_client-0.7.6 → kalavai_client-0.7.8}/kalavai_client/assets/apps.yaml RENAMED Viewed

@@ -174,7 +174,7 @@ releases:
     - name: replicas
       value: 1
     - name: image_tag
-      value: "v2025.10.1" #"v2025.07.34"
+      value: "{{watcher_image_tag}}" #"v2025.07.34"
     - name: deployment.in_cluster
       value: "True"
     - name: deployment.kalavai_username_key

{kalavai_client-0.7.6 → kalavai_client-0.7.8}/kalavai_client/assets/default_pool_config.yaml RENAMED Viewed

@@ -4,6 +4,7 @@ server:
   location: null
   name: "kalavai_cluster"
   mtu: 1280
+  watcher_image_tag: "v2025.10.3"
 core:
   # Deploy systems

{kalavai_client-0.7.6 → kalavai_client-0.7.8}/kalavai_client/bridge_api.py RENAMED Viewed

@@ -59,7 +59,6 @@ from kalavai_client.core import (
     add_node_labels,
     get_node_labels,
     generate_worker_package,
-    get_deployment_values,
     TokenType
 )
 from kalavai_client.utils import (
@@ -447,20 +446,6 @@ def job_deploy(request: DeployJobRequest, api_key: str = Depends(verify_api_key)
     )
     return result
-@app.get("/get_deployment_values",
-    operation_id="get_deployment_values",
-    summary="Get deployment template values for a given model",
-    description="Given a model id from Huggingface, return the deployment template values required to load the model instance in the pool, including number of workers, number of gpus and gpu backend.",
-    tags=["job_management"],
-    response_description="Deployment template values")
-def get_deployment_template_values(model_id: str, api_key: str = Depends(verify_api_key)):
-    """
-    Get the deployment template values for a given model id:
-    - **model_id**: Model id from Huggingface type mode
-    """
-    return get_deployment_values(model_id=model_id)
 @app.post("/delete_job",
     operation_id="delete_job",
     summary="Terminate and remove a job from the pool",

{kalavai_client-0.7.6 → kalavai_client-0.7.8}/kalavai_client/cli.py RENAMED Viewed

@@ -6,6 +6,7 @@ import uuid
 import time
 import socket
 from pathlib import Path
+from typing import Annotated
 import yaml
@@ -62,8 +63,7 @@ from kalavai_client.core import (
     uncordon_nodes,
     TokenType,
     unregister_pool,
-    update_pool,
-    get_deployment_values
+    update_pool
 )
 from kalavai_client.utils import (
     check_gpu_drivers,
@@ -78,7 +78,8 @@ from kalavai_client.utils import (
     load_user_id,
     SERVER_IP_KEY,
     CLUSTER_NAME_KEY,
-    KALAVAI_AUTH
+    KALAVAI_AUTH,
+    parse_key_value_pairs
 )
@@ -397,7 +398,20 @@ def pool__list(*others, user_only=False):
 @arguably.command
-def pool__start(*others,  pool_config_file=None, apps: list=None, mtu: str=None, platform="amd64", ip_address: str=None, location: str=None, app_values: str=None, pool_config_values: str=None, non_interactive: bool=False):
+def pool__start(
+    *others,
+    pool_config_file=None,
+    apps: list=None,
+    mtu: str=None,
+    watcher_image_tag: str=None,
+    platform="amd64",
+    ip_address: str=None,
+    location: str=None,
+    app_values: str=None,
+    pool_config_values: str=None,
+    non_interactive: bool=False,
+    node_labels: Annotated[dict, arguably.arg.handler(parse_key_value_pairs)] = {}
+):
     """
     Start Kalavai pool and start/resume sharing resources.
@@ -409,6 +423,9 @@ def pool__start(*others,  pool_config_file=None, apps: list=None, mtu: str=None,
     if CLUSTER.is_cluster_init():
         console.log(f"[white] You are already connected to {load_server_info(data_key=CLUSTER_NAME_KEY, file=USER_LOCAL_SERVER_FILE)}. Enter [yellow]kalavai pool stop[white] to exit and join another one.")
         return
+    if node_labels:
+        console.log(f"[blue]Configuration received: {node_labels}")
     # User acknowledgement
     if not non_interactive:
@@ -438,10 +455,12 @@ def pool__start(*others,  pool_config_file=None, apps: list=None, mtu: str=None,
         ip_address=ip_address,
         location=location,
         target_platform=platform,
+        watcher_image_tag=watcher_image_tag,
         pool_config_file=pool_config_file,
         apps=apps,
         num_gpus=input_gpus(non_interactive=non_interactive),
-        mtu=mtu
+        mtu=mtu,
+        node_labels=node_labels
     )
     if "warning" in result:
@@ -499,14 +518,32 @@ def pool__check_token(token, *others, public=False, verbose=False):
     return True
 @arguably.command
-def pool__join(token, *others, mtu=None, platform="amd64", node_name=None, non_interactive=False):
+def pool__join(
+    token,
+    *others,
+    mtu=None,
+    platform="amd64",
+    node_name=None,
+    non_interactive=False,
+    node_labels: Annotated[dict, arguably.arg.handler(parse_key_value_pairs)] = {}
+):
     """
     Join Kalavai pool and start/resume sharing resources.
     Args:
+        token: Pool join token
         *others: all the other positional arguments go here
+        mtu: Maximum transmission unit
+        platform: Target platform (default: amd64)
+        node_name: Name for this node
+        non_interactive: Run in non-interactive mode
+        node_labels: Node labels as key=value pairs (e.g., "key1=value1,key2=value2")
     """
+    # Process node labels if provided
+    if node_labels:
+        console.log(f"[blue]Configuration received: {node_labels}")
     # check that k3s is not running already in the host
     # k3s service running or preinstalled
     if CLUSTER.is_agent_running():
@@ -554,7 +591,8 @@ def pool__join(token, *others, mtu=None, platform="amd64", node_name=None, non_i
         node_name=node_name,
         num_gpus=num_gpus,
         ip_address=ip_address,
-        mtu=mtu
+        mtu=mtu,
+        node_labels=node_labels
     )
     if "error" in result:
         console.log(f"[red]Error when connecting: {result}")
@@ -1170,12 +1208,6 @@ def job__delete(name, *others, force_namespace: str=None):
         console.log(f"{result}")
-@arguably.command
-def job__model_requirements(model_id: str, *others):
-    values = get_deployment_values(model_id=model_id)
-    console.log(values)
 @arguably.command
 def job__estimate(
     *others,

{kalavai_client-0.7.6 → kalavai_client-0.7.8}/kalavai_client/core.py RENAMED Viewed

@@ -47,6 +47,7 @@ from kalavai_client.utils import (
     WRITE_AUTH_KEY,
     WATCHER_PORT_KEY,
     WATCHER_SERVICE_KEY,
+    WATCHER_IMAGE_TAG_KEY,
     USER_NODE_LABEL_KEY,
     ALLOW_UNREGISTERED_USER_KEY,
     KALAVAI_AUTH
@@ -104,70 +105,6 @@ class TokenType(Enum):
     WORKER = 2
-def get_deployment_values(model_id: str):
-    """
-    Given a model ID and the resources in the pool, identify key
-    computing values required to deploy the model.
-    - GPU_BACKEND: rocm or cuda
-    - WORKERS: number of nodes to use
-    -
-    """
-    # get hardcoded deployment values (per model)
-    with open(MODEL_DEPLOYMENT_VALUES_MAPPING, "r") as f:
-        mapping = yaml.safe_load(f)
-    def _parse_memory_str(memory: str):
-        memory = memory.replace("G", "")
-        return int(memory)
-    def _get_num_workers(memory_values: list[int], size):
-        workers = 0
-        available_memory = 0
-        for gpu_mem in memory_values:
-            available_memory += gpu_mem
-            workers += 1
-            if available_memory >= size:
-                break
-        return workers
-    # get resources
-    if model_id in mapping:
-        model_size = mapping[model_id]["size"]
-        # get gpus and extract available memory
-        nvidia_gpu_mems = []
-        amd_gpu_mems = []
-        backends = set()
-        for node_name, gpus in load_gpu_models():
-            for gpu in gpus["gpus"]:
-                if "nvidia" in gpu["model"].lower():
-                    nvidia_gpu_mems.append(_parse_memory_str(gpu["memory"]))
-                    backends.add("cuda")
-                else:
-                    amd_gpu_mems.append(_parse_memory_str(gpu["memory"]))
-                    backends.add("rocm")
-        nvidia_gpu_mems = sorted(nvidia_gpu_mems, reverse=False)
-        amd_gpu_mems = sorted(amd_gpu_mems, reverse=False)
-        # calculate num workers required
-        if sum(nvidia_gpu_mems) >= model_size and sum(amd_gpu_mems) < model_size:
-            gpu_backend = "cuda"
-            num_workers = _get_num_workers(memory_values=nvidia_gpu_mems, size=model_size)
-        elif sum(amd_gpu_mems) >= model_size and sum(nvidia_gpu_mems) < model_size:
-            gpu_backend = "rocm"
-            num_workers = _get_num_workers(memory_values=amd_gpu_mems, size=model_size)
-        else:
-            gpu_backend = random.choice(list(backends))
-            num_workers = _get_num_workers(
-                memory_values=amd_gpu_mems if gpu_backend == "rocm" else nvidia_gpu_mems,
-                size=model_size
-            )
-        # populate selected template
-        mapping[model_id][gpu_backend]["values"]["workers"] = num_workers
-        mapping[model_id][gpu_backend]["values"]["pipeline_parallel_size"] = num_workers
-        return mapping[model_id][gpu_backend]
-    return None
 def set_schedulable(schedulable, node_names):
     """
     Delete job in the cluster
@@ -735,7 +672,8 @@ def join_pool(
         node_name=None,
         ip_address=None,
         target_platform="amd64",
-        mtu="1420"
+        mtu="1420",
+        node_labels={}
 ):
     compatibility = check_worker_compatibility()
     if len(compatibility["issues"]) > 0:
@@ -768,6 +706,7 @@ def join_pool(
     # join private network if provided
     node_labels = {
+        **node_labels,
         STORAGE_CLASS_LABEL: is_storage_compatible(),
         NODE_ROLE_LABEL: "worker"
     }
@@ -826,13 +765,15 @@ def create_pool(
         ip_address: str=None,
         location: str=None,
         target_platform: str="amd64",
+        watcher_image_tag: str=None,
         pool_config_file: str=None,
         description: str="",
         token_mode: TokenType=TokenType.USER,
         num_gpus: int=-1,
         node_name: str=None,
         mtu: str=None,
-        apps: list=[]
+        apps: list=[],
+        node_labels: dict={}
     ):
     if not check_seed_compatibility():
@@ -847,6 +788,7 @@ def create_pool(
     user_id = load_user_id()
     node_labels = {
+        **node_labels,
         STORAGE_CLASS_LABEL: is_storage_compatible(),
         NODE_ROLE_LABEL: "server"
     }
@@ -859,6 +801,7 @@ def create_pool(
         config_values = yaml.safe_load(f)
     # use default values if not provided
     try:
+        watcher_image_tag = config_values["server"]["watcher_image_tag"] if watcher_image_tag is None else watcher_image_tag
         cluster_name = config_values["server"]["name"] if cluster_name is None else cluster_name
         ip_address = config_values["server"]["ip_address"] if ip_address is None else ip_address
         location = config_values["server"]["location"] if location is None else location
@@ -916,7 +859,8 @@ def create_pool(
         WATCHER_PORT_KEY: DEFAULT_WATCHER_PORT,
         WATCHER_SERVICE_KEY: watcher_service,
         USER_NODE_LABEL_KEY: USER_NODE_LABEL,
-        ALLOW_UNREGISTERED_USER_KEY: True, # Change this if only registered users are allowed
+        WATCHER_IMAGE_TAG_KEY: watcher_image_tag,
+        ALLOW_UNREGISTERED_USER_KEY: True # Change this if only registered users are allowed
     }
     store_server_info(

{kalavai_client-0.7.6 → kalavai_client-0.7.8}/kalavai_client/utils.py RENAMED Viewed

@@ -44,6 +44,7 @@ USER_API_KEY = "user_api_key"
 READONLY_AUTH_KEY = "watcher_readonly_key"
 WATCHER_SERVICE_KEY = "watcher_service"
 WATCHER_PORT_KEY = "watcher_port"
+WATCHER_IMAGE_TAG_KEY = "watcher_image_tag"
 ENDPOINT_PORTS_KEY = "endpoint_ports"
 TEMPLATE_ID_FIELD = "id_field"
 TEMPLATE_ID_KEY = "deployment_id"
@@ -106,6 +107,32 @@ def is_storage_compatible():
         return False
 ################
+def parse_key_value_pairs(input_str: str) -> dict:
+    """Parse key=value pairs from a string into a dictionary.
+    Args:
+        input_str: String containing key=value pairs separated by commas
+    Returns:
+        Dictionary with parsed key-value pairs
+    Raises:
+        ValueError: If any pair is not in key=value format
+    """
+    if not input_str.strip():
+        return {}
+    result = {}
+    for pair in input_str.split(','):
+        pair = pair.strip()
+        if not pair:
+            continue
+        if '=' not in pair:
+            raise ValueError(f"Invalid key=value pair: '{pair}'. Expected format: key=value")
+        key, value = pair.split('=', 1)
+        result[key.strip()] = value.strip()
+    return result
 def extract_auth_token(headers):
     """
     Extract auth token. Valid headers:
@@ -129,8 +156,19 @@ def extract_auth_token(headers):
         return {"error": str(e)}
-def generate_compose_config(role, node_name, mtu="1420", target_platform="amd64", write_to_file=True, node_ip_address="0.0.0.0", num_gpus=0, node_labels=None, pool_ip=None, vpn_token=None, pool_token=None):
+def generate_compose_config(
+    role,
+    node_name,
+    mtu="1420",
+    target_platform="amd64",
+    write_to_file=True,
+    node_ip_address="0.0.0.0",
+    num_gpus=0,
+    node_labels=None,
+    pool_ip=None,
+    vpn_token=None,
+    pool_token=None
+):
     if node_labels is not None:
         node_labels = " ".join([f"--node-label {key}={value}" for key, value in node_labels.items()])
     rand_suffix = uuid.uuid4().hex[:8]

{kalavai_client-0.7.6 → kalavai_client-0.7.8}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name            = "kalavai-client"
-version         = "0.7.6"
+version         = "0.7.8"
 authors = [
   {name = "Carlos Fernandez Musoles", email = "carlos@kalavai.net"}
 ]