kalavai-client 0.5.21__tar.gz → 0.5.22__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. {kalavai_client-0.5.21 → kalavai_client-0.5.22}/PKG-INFO +1 -1
  2. kalavai_client-0.5.22/kalavai_client/__init__.py +2 -0
  3. {kalavai_client-0.5.21 → kalavai_client-0.5.22}/kalavai_client/assets/apps.yaml +1 -1
  4. {kalavai_client-0.5.21 → kalavai_client-0.5.22}/kalavai_client/assets/docker-compose-template.yaml +3 -3
  5. {kalavai_client-0.5.21 → kalavai_client-0.5.22}/kalavai_client/bridge_api.py +2 -1
  6. {kalavai_client-0.5.21 → kalavai_client-0.5.22}/kalavai_client/bridge_models.py +1 -0
  7. {kalavai_client-0.5.21 → kalavai_client-0.5.22}/kalavai_client/cli.py +7 -2
  8. {kalavai_client-0.5.21 → kalavai_client-0.5.22}/kalavai_client/core.py +11 -3
  9. {kalavai_client-0.5.21 → kalavai_client-0.5.22}/pyproject.toml +1 -1
  10. kalavai_client-0.5.21/kalavai_client/__init__.py +0 -2
  11. {kalavai_client-0.5.21 → kalavai_client-0.5.22}/LICENSE +0 -0
  12. {kalavai_client-0.5.21 → kalavai_client-0.5.22}/README.md +0 -0
  13. {kalavai_client-0.5.21 → kalavai_client-0.5.22}/kalavai_client/__main__.py +0 -0
  14. {kalavai_client-0.5.21 → kalavai_client-0.5.22}/kalavai_client/assets/__init__.py +0 -0
  15. {kalavai_client-0.5.21 → kalavai_client-0.5.22}/kalavai_client/assets/apps_values.yaml +0 -0
  16. {kalavai_client-0.5.21 → kalavai_client-0.5.22}/kalavai_client/assets/docker-compose-gui.yaml +0 -0
  17. {kalavai_client-0.5.21 → kalavai_client-0.5.22}/kalavai_client/assets/nginx.conf +0 -0
  18. {kalavai_client-0.5.21 → kalavai_client-0.5.22}/kalavai_client/assets/pool_config_template.yaml +0 -0
  19. {kalavai_client-0.5.21 → kalavai_client-0.5.22}/kalavai_client/assets/pool_config_values.yaml +0 -0
  20. {kalavai_client-0.5.21 → kalavai_client-0.5.22}/kalavai_client/assets/user_workspace.yaml +0 -0
  21. {kalavai_client-0.5.21 → kalavai_client-0.5.22}/kalavai_client/assets/user_workspace_values.yaml +0 -0
  22. {kalavai_client-0.5.21 → kalavai_client-0.5.22}/kalavai_client/auth.py +0 -0
  23. {kalavai_client-0.5.21 → kalavai_client-0.5.22}/kalavai_client/cluster.py +0 -0
  24. {kalavai_client-0.5.21 → kalavai_client-0.5.22}/kalavai_client/env.py +0 -0
  25. {kalavai_client-0.5.21 → kalavai_client-0.5.22}/kalavai_client/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: kalavai-client
3
- Version: 0.5.21
3
+ Version: 0.5.22
4
4
  Summary: Client app for kalavai platform
5
5
  License: Apache-2.0
6
6
  Keywords: LLM,platform
@@ -0,0 +1,2 @@
1
+
2
+ __version__ = "0.5.22"
@@ -139,7 +139,7 @@ releases:
139
139
  - name: replicas
140
140
  value: 2
141
141
  - name: image_tag
142
- value: "v2025.01.10"
142
+ value: "v2025.03.1"
143
143
  - name: deployment.in_cluster
144
144
  value: "True"
145
145
  - name: deployment.use_auth_key
@@ -103,7 +103,7 @@ services:
103
103
  {% endif %}
104
104
  {% endif %}
105
105
 
106
- networks:
107
- custom-network:
108
- driver: bridge
106
+ # networks:
107
+ # custom-network:
108
+ # driver: bridge
109
109
 
@@ -64,7 +64,8 @@ def pool_join(request: JoinPoolRequest):
64
64
  result = join_pool(
65
65
  token=request.token,
66
66
  num_gpus=request.num_gpus,
67
- node_name=request.node_name
67
+ node_name=request.node_name,
68
+ ip_address=request.ip_address
68
69
  )
69
70
  return result
70
71
 
@@ -17,6 +17,7 @@ class DeleteNodesRequest(BaseModel):
17
17
 
18
18
  class JoinPoolRequest(BaseModel):
19
19
  token: str
20
+ ip_address: str = None
20
21
  node_name: str = None
21
22
  num_gpus: int = None
22
23
 
@@ -511,11 +511,16 @@ def pool__join(token, *others, node_name=None):
511
511
  console.log("[red]Installation aborted")
512
512
  return
513
513
 
514
+ # select IP address (for external discovery)
515
+ console.log(f"Scanning for valid IPs")
516
+ ip_address = select_ip_address()
517
+
514
518
  console.log("Connecting worker to the pool...")
515
519
  result = join_pool(
516
520
  token=token,
517
521
  node_name=node_name,
518
- num_gpus=num_gpus
522
+ num_gpus=num_gpus,
523
+ ip_address=ip_address
519
524
  )
520
525
  if "error" in result:
521
526
  console.log(f"[red]Error when connecting: {result}")
@@ -1046,7 +1051,7 @@ def job__run(template_name, *others, values: str=None, force_namespace: str=None
1046
1051
  )
1047
1052
 
1048
1053
  if "error" in result:
1049
- console.log(f"[red]Error when deploying job: {str(e)}")
1054
+ console.log(f"[red]Error when deploying job: {str(result['error'])}")
1050
1055
  else:
1051
1056
  console.log(f"[green]{template_name} job deployed")
1052
1057
 
@@ -267,6 +267,8 @@ def fetch_job_details(jobs: list[Job]):
267
267
  if ns != namespace: # same job name, different namespace
268
268
  continue
269
269
  for _, values in ss.items():
270
+ # TODO get 'restart_count' from values['conditions'][-1]["restart_count"]
271
+ # TODO: get nodes involved in deployment (needs kubewatcher)
270
272
  workers_status[values["status"]] += 1
271
273
  workers = "\n".join([f"{k}: {v}" for k, v in workers_status.items()])
272
274
  # get URL details
@@ -287,10 +289,12 @@ def fetch_job_details(jobs: list[Job]):
287
289
  urls = [f"http://{load_server_info(data_key=SERVER_IP_KEY, file=USER_LOCAL_SERVER_FILE)}:{node_port}" for node_port in node_ports]
288
290
  if "Ready" in workers_status and len(workers_status) == 1:
289
291
  status = "running"
290
- elif any([st in workers_status for st in ["Failed", "Completed"]]):
292
+ elif any([st in workers_status for st in ["Failed"]]):
291
293
  status = "error"
292
- elif any([st in workers_status for st in ["Pending"]]):
294
+ elif any([st in workers_status for st in ["Pending"]]) or len(workers_status) == 0:
293
295
  status = "pending"
296
+ elif any([st in workers_status for st in ["Succeeded", "Completed"]]):
297
+ status = "completed"
294
298
  else:
295
299
  status = "working"
296
300
  job_details.append(
@@ -569,7 +573,7 @@ def get_max_gpus():
569
573
  except:
570
574
  return 0
571
575
 
572
- def join_pool(token, num_gpus=None, node_name=None):
576
+ def join_pool(token, num_gpus=None, node_name=None, ip_address=None):
573
577
  compatibility = check_worker_compatibility()
574
578
  if len(compatibility["issues"]) > 0:
575
579
  return {"error": compatibility["issues"]}
@@ -626,6 +630,7 @@ def join_pool(token, num_gpus=None, node_name=None):
626
630
  # Generate docker compose recipe
627
631
  generate_compose_config(
628
632
  role="agent",
633
+ node_ip_address=ip_address,
629
634
  pool_ip=f"https://{kalavai_seed_ip}:6443",
630
635
  pool_token=kalavai_token,
631
636
  num_gpus=num_gpus,
@@ -701,6 +706,9 @@ def create_pool(cluster_name: str, ip_address: str, app_values: str=None, pool_c
701
706
  node_labels[USER_NODE_LABEL] = user["username"]
702
707
  except Exception as e:
703
708
  return {"error": f"[red]Error when joining network: {str(e)}"}
709
+
710
+ if num_gpus is None:
711
+ num_gpus = get_max_gpus()
704
712
 
705
713
  # Generate docker compose recipe
706
714
  generate_compose_config(
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "kalavai-client"
3
- version = "0.5.21"
3
+ version = "0.5.22"
4
4
  authors = [
5
5
  {name = "Carlos Fernandez Musoles", email = "carlos@kalavai.net"}
6
6
  ]
@@ -1,2 +0,0 @@
1
-
2
- __version__ = "0.5.21"
File without changes