kalavai-client 0.6.20__py3-none-any.whl → 0.6.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,2 +1,2 @@
1
1
 
2
- __version__ = "0.6.20"
2
+ __version__ = "0.6.22"
@@ -20,6 +20,8 @@ repositories:
20
20
  url: https://charts.min.io/
21
21
  - name: langfuse
22
22
  url: https://langfuse.github.io/langfuse-k8s
23
+ - name: hami-charts
24
+ url: https://project-hami.github.io/HAMi
23
25
 
24
26
  releases:
25
27
  - name: datashim
@@ -154,7 +156,7 @@ releases:
154
156
  - name: replicas
155
157
  value: 1
156
158
  - name: image_tag
157
- value: "v2025.07.33"
159
+ value: "v2025.07.34"
158
160
  - name: deployment.in_cluster
159
161
  value: "True"
160
162
  - name: deployment.kalavai_username_key
@@ -185,9 +187,30 @@ releases:
185
187
  namespace: kalavai
186
188
  chart: kalavai/gpu
187
189
  installed: false
190
+ # - name: hami-vgpu
191
+ # namespace: kalavai
192
+ # chart: kalavai/hami
193
+ # installed: true
194
+ # set:
195
+ # - name: resourceCores
196
+ # value: "nvidia.com/gpucores"
197
+ # - name: devicePlugin.runtimeClassName
198
+ # value: "nvidia"
199
+ # - name: scheduler.defaultSchedulerPolicy.nodeSchedulerPolicy
200
+ # value: "binpack"
201
+ # - name: scheduler.defaultSchedulerPolicy.gpuSchedulerPolicy
202
+ # value: "binpack"
203
+ # - name: scheduler.defaultCores
204
+ # value: "100"
205
+ # - name: scheduler.kubeScheduler.imageTag
206
+ # value: v1.31.1
207
+ # - name: devicePlugin.deviceMemoryScaling
208
+ # value: "1"
209
+ # - name: devicePlugin.deviceSplitCount
210
+ # value: "1"
188
211
  - name: hami-vgpu
189
212
  namespace: kalavai
190
- chart: kalavai/hami
213
+ chart: hami-charts/hami
191
214
  installed: true
192
215
  set:
193
216
  - name: resourceCores
@@ -206,13 +229,5 @@ releases:
206
229
  value: "1"
207
230
  - name: devicePlugin.deviceSplitCount
208
231
  value: "1"
209
- # - name: scheduler.customWebhook.port
210
- # value: "30498"
211
- # - name: scheduler.service.schedulerPort
212
- # value: "30498"
213
- # - name: scheduler.service.monitorPort
214
- # value: "30493"
215
- # - name: devicePlugin.service.httpPort
216
- # value: "30492"
217
232
 
218
233
 
@@ -3,7 +3,7 @@ services:
3
3
  {{vpn_name}}:
4
4
  image: gravitl/netclient:v0.90.0
5
5
  container_name: {{vpn_name}}
6
- platform: linux/amd64
6
+ platform: linux/{{target_platform}}
7
7
  cap_add:
8
8
  - NET_ADMIN
9
9
  - SYS_MODULE
@@ -21,6 +21,7 @@ services:
21
21
  image: docker.io/bundenth/kalavai-runner:{{target_platform}}-latest
22
22
  pull_policy: always
23
23
  container_name: {{service_name}}
24
+ platform: linux/{{target_platform}}
24
25
  {% if vpn %}
25
26
  depends_on:
26
27
  - {{vpn_name}}
@@ -9,6 +9,10 @@ from starlette.requests import Request
9
9
  import uvicorn
10
10
 
11
11
  from kalavai_client.core import Job
12
+ from kalavai_client.env import (
13
+ KALAVAI_SERVICE_LABEL,
14
+ KALAVAI_SERVICE_LABEL_VALUE
15
+ )
12
16
  from kalavai_client.bridge_models import (
13
17
  CreatePoolRequest,
14
18
  InvitesRequest,
@@ -18,7 +22,8 @@ from kalavai_client.bridge_models import (
18
22
  DeleteJobRequest,
19
23
  JobDetailsRequest,
20
24
  NodesActionRequest,
21
- NodeLabelsRequest
25
+ NodeLabelsRequest,
26
+ WorkerConfigRequest
22
27
  )
23
28
  from kalavai_client.core import (
24
29
  create_pool,
@@ -34,6 +39,7 @@ from kalavai_client.core import (
34
39
  fetch_job_logs,
35
40
  fetch_job_templates,
36
41
  fetch_job_defaults,
42
+ fetch_pod_logs,
37
43
  deploy_job,
38
44
  delete_job,
39
45
  authenticate_user,
@@ -52,6 +58,7 @@ from kalavai_client.core import (
52
58
  uncordon_nodes,
53
59
  add_node_labels,
54
60
  get_node_labels,
61
+ generate_worker_package,
55
62
  TokenType
56
63
  )
57
64
  from kalavai_client.utils import (
@@ -241,6 +248,21 @@ def get_token(mode: int, api_key: str = Depends(verify_api_key)):
241
248
  """
242
249
  return get_pool_token(mode=TokenType(mode))
243
250
 
251
+ @app.post("/generate_worker_config",
252
+ operation_id="generate_worker_config",
253
+ summary="Generate a config file for a remote worker to connect to the pool",
254
+ description="Generate a config file for a remote worker to connect to the pool. Different token types provide different levels of access - join tokens allow nodes to contribute resources, while attach tokens allow management access.",
255
+ tags=["pool_management"],
256
+ response_description="Worker config file")
257
+ def generate_worker_config(request: WorkerConfigRequest, api_key: str = Depends(verify_api_key)):
258
+ return generate_worker_package(
259
+ node_name=request.node_name,
260
+ mode=TokenType(request.mode),
261
+ target_platform=request.target_platform,
262
+ num_gpus=request.num_gpus,
263
+ ip_address=request.ip_address,
264
+ storage_compatible=request.storage_compatible)
265
+
244
266
  @app.get("/fetch_devices",
245
267
  operation_id="fetch_devices",
246
268
  summary="Get list of all compute devices in the pool",
@@ -251,6 +273,15 @@ def get_devices(api_key: str = Depends(verify_api_key)):
251
273
  """Get list of available devices"""
252
274
  return fetch_devices()
253
275
 
276
+ @app.get("/fetch_service_logs",
277
+ operation_id="fetch_service_logs",
278
+ summary="Get logs for the kalavai API service",
279
+ description="Get logs for the kalavai API service, including internal logs, debugging messages and status of the service.",
280
+ tags=["info"],
281
+ response_description="Logs")
282
+ def get_service_logs(api_key: str = Depends(verify_api_key)):
283
+ return fetch_pod_logs(label_key=KALAVAI_SERVICE_LABEL, label_value=KALAVAI_SERVICE_LABEL_VALUE, force_namespace="kalavai")
284
+
254
285
  @app.post("/send_pool_invites",
255
286
  operation_id="send_pool_invites",
256
287
  summary="Send invitations to join the pool",
@@ -17,6 +17,15 @@ class CreatePoolRequest(BaseModel):
17
17
  token_mode: TokenType = Field(TokenType.USER, description="Token type for authentication")
18
18
  description: str = Field("", description="Description of the pool")
19
19
 
20
+ class WorkerConfigRequest(BaseModel):
21
+ node_name: str = Field(None, description="Name for the worker node")
22
+ mode: int = Field(2, description="Access mode for the worker (admin, worker or user)")
23
+ target_platform: str = Field("amd64", description="Target platform architecture for the worker (amd64 or arm64)")
24
+ num_gpus: int = Field(0, description="Number of GPUs to use on the worker node")
25
+ ip_address: str = Field("0.0.0.0", description="IP address of the worker node")
26
+ storage_compatible: bool = Field(True, description="Whether to use the node's storage capacity for volumes")
27
+
28
+
20
29
  class NodesActionRequest(BaseModel):
21
30
  nodes: list[str] = Field(None, description="List of node names to perform the action on, defaults to None")
22
31
 
kalavai_client/cli.py CHANGED
@@ -27,6 +27,8 @@ from kalavai_client.env import (
27
27
  USER_TEMPLATES_FOLDER,
28
28
  DOCKER_COMPOSE_GUI,
29
29
  USER_GUI_COMPOSE_FILE,
30
+ KALAVAI_SERVICE_LABEL,
31
+ KALAVAI_SERVICE_LABEL_VALUE,
30
32
  user_path,
31
33
  resource_path,
32
34
  )
@@ -37,6 +39,7 @@ from kalavai_client.core import (
37
39
  fetch_job_details,
38
40
  fetch_devices,
39
41
  fetch_job_logs,
42
+ fetch_pod_logs,
40
43
  fetch_gpus,
41
44
  generate_worker_package,
42
45
  load_gpu_models,
@@ -689,6 +692,25 @@ def pool__update(*others):
689
692
  else:
690
693
  console.log(f"[green]{result}")
691
694
 
695
+ @arguably.command
696
+ def pool__logs(*others):
697
+ """
698
+ Get the logs for the Kalavai API
699
+ """
700
+ logs = []
701
+
702
+ logs.append("Getting Kalavai API logs...")
703
+
704
+ logs = fetch_pod_logs(
705
+ label_key=KALAVAI_SERVICE_LABEL,
706
+ label_value=KALAVAI_SERVICE_LABEL_VALUE,
707
+ force_namespace="kalavai"
708
+ )
709
+ for name, log in logs.items():
710
+ console.log(f"[yellow]LOGS for service: {name}")
711
+ for key, value in log.items():
712
+ console.log(f"[yellow]{key}")
713
+ console.log(json.dumps(value, indent=2))
692
714
 
693
715
  @arguably.command
694
716
  def pool__status(*others, log_file=None):
kalavai_client/core.py CHANGED
@@ -313,7 +313,7 @@ def fetch_job_details(jobs: list[Job]):
313
313
  restart_counts = sum([c["restart_count"] for c in values["conditions"]])
314
314
  workers_status[values["status"]] += 1
315
315
  # get nodes involved in deployment (needs kubewatcher)
316
- if "node_name" in values:
316
+ if "node_name" in values and values["node_name"] is not None:
317
317
  host_nodes.add(values["node_name"])
318
318
 
319
319
  workers = "\n".join([f"{k}: {v}" for k, v in workers_status.items()])
@@ -454,9 +454,18 @@ def fetch_devices():
454
454
  return {"error": str(e)}
455
455
 
456
456
  def fetch_job_logs(job_name, force_namespace=None, pod_name=None, tail=100):
457
+ return fetch_pod_logs(
458
+ label_key=TEMPLATE_LABEL,
459
+ label_value=job_name,
460
+ pod_name=pod_name,
461
+ force_namespace=force_namespace,
462
+ tail=tail
463
+ )
464
+
465
+ def fetch_pod_logs(label_key, label_value, force_namespace=None, pod_name=None, tail=100):
457
466
  data = {
458
- "label": TEMPLATE_LABEL,
459
- "value": job_name,
467
+ "label": label_key,
468
+ "value": label_value,
460
469
  "tail": tail
461
470
  }
462
471
  if force_namespace is not None:
@@ -609,9 +618,16 @@ def attach_to_pool(token, node_name=None):
609
618
 
610
619
  return cluster_name
611
620
 
612
- def generate_worker_package(target_platform="amd64", num_gpus=0, node_name=None, ip_address="0.0.0.0", storage_compatible=True):
621
+ def generate_worker_package(
622
+ target_platform="amd64",
623
+ num_gpus=0,
624
+ node_name=None,
625
+ ip_address="0.0.0.0",
626
+ storage_compatible=True,
627
+ mode=TokenType.WORKER
628
+ ):
613
629
  # get pool data from token
614
- token = get_pool_token(mode=TokenType.WORKER)
630
+ token = get_pool_token(mode=mode)
615
631
  if "error" in token:
616
632
  return {"error": f"[red]Error when getting pool token: {token['error']}"}
617
633
 
kalavai_client/env.py CHANGED
@@ -32,6 +32,8 @@ def resource_path(relative_path: str):
32
32
 
33
33
 
34
34
  TEMPLATE_LABEL = "kalavai.job.name"
35
+ KALAVAI_SERVICE_LABEL = "app"
36
+ KALAVAI_SERVICE_LABEL_VALUE = "kube-watcher-api"
35
37
  STORAGE_CLASS_LABEL = "kalavai.storage.enabled"
36
38
  USER_NODE_LABEL = "kalavai.cluster.user"
37
39
  SERVER_IP_KEY = "server_ip"
kalavai_client/utils.py CHANGED
@@ -175,7 +175,6 @@ def is_watcher_alive(server_creds, user_cookie, timeout=30):
175
175
  timeout=timeout
176
176
  )
177
177
  except Exception as e:
178
- print(str(e))
179
178
  return False
180
179
  return True
181
180
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: kalavai-client
3
- Version: 0.6.20
3
+ Version: 0.6.22
4
4
  Summary: Client app for kalavai platform
5
5
  License: Apache-2.0
6
6
  Keywords: LLM,platform
@@ -1,25 +1,25 @@
1
- kalavai_client/__init__.py,sha256=sCg7mMwXeCTDe1p0b_ZZO5PQl1hoCmZ7zXqDv1j8bfY,23
1
+ kalavai_client/__init__.py,sha256=v2m8n1AiRAfLYkbEyWtMgln2mUtzHOjwrvGiSwSCHCg,23
2
2
  kalavai_client/__main__.py,sha256=WQUfxvRsBJH5gsCJg8pLz95QnZIj7Ol8psTO77m0QE0,73
3
3
  kalavai_client/assets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- kalavai_client/assets/apps.yaml,sha256=63sO_MJcUcSap4Dt3ADZWc7dUGTiYW5eczqNEbDnMSA,6410
4
+ kalavai_client/assets/apps.yaml,sha256=HT1Yl_tPp5ysjn9TRhL7PlP1w67QOYZGs0ge-bQlvF4,6891
5
5
  kalavai_client/assets/apps_values.yaml,sha256=LeSNd3PwkIx0wkTIlEk2KNz3Yy4sXSaHALQEkopdhKE,2165
6
6
  kalavai_client/assets/docker-compose-gui.yaml,sha256=OAVO0ohaCpDB9FGeih0yAbVNwUfDtaCzssZ25uiuJyA,787
7
- kalavai_client/assets/docker-compose-template.yaml,sha256=KHIwJ2WWX7Y7wQKiXRr82Jqd3IKRyls5zhTyl8mSmrc,1805
7
+ kalavai_client/assets/docker-compose-template.yaml,sha256=vW7GhOl_PaUodehJk8qajOlE0deZXrPc7qizg5SeYyc,1859
8
8
  kalavai_client/assets/nginx.conf,sha256=drVVCg8GHucz7hmt_BI6giAhK92OV71257NTs3LthwM,225
9
9
  kalavai_client/assets/pool_config_template.yaml,sha256=MhBZQsEMKrBgbUVSKgIGmXWhybeGKG6l5XvJb38y5GI,577
10
10
  kalavai_client/assets/pool_config_values.yaml,sha256=_iAnugramLiwJaaDcPSetThvOdR7yFiCffdMri-SQCU,68
11
11
  kalavai_client/assets/user_workspace.yaml,sha256=wDvlMYknOPABAEo0dsQwU7bac8iubjAG9tdkFbJZ5Go,476
12
12
  kalavai_client/assets/user_workspace_values.yaml,sha256=G0HOzQUxrDMCwuW9kbWUZaKMzDDPVwDwzBHCL2Xi2ZM,542
13
13
  kalavai_client/auth.py,sha256=EB3PMvKUn5_KAQkezkEHEt-OMZXyfkZguIQlUFkEHcA,3243
14
- kalavai_client/bridge_api.py,sha256=Hd7whTX2TAiNYX1G237hv2rqtKUBGRJkzUoWOMZm44A,25562
15
- kalavai_client/bridge_models.py,sha256=3mHCqIHVysLLkQvGT-DKqKOrtAlQSfEOdrwSq2yTRRU,2439
16
- kalavai_client/cli.py,sha256=UC2aRUvNI53Xq-ubyoKDoVaRS1VDRmKtU9sVMf9C5Sg,47522
14
+ kalavai_client/bridge_api.py,sha256=qiN0jleaooj2vYVYxHrG_nfdgY2rKpZyvFE4uz6hkoA,27088
15
+ kalavai_client/bridge_models.py,sha256=mUh67hzhudqGxJEFHx2KGrf-Sjrt_CwkzLno8Xsm7hk,3043
16
+ kalavai_client/cli.py,sha256=ZRNOv1oUvU7Freu47PotrwqJRrBMSFgmoCHg620UdZM,48146
17
17
  kalavai_client/cluster.py,sha256=Z2PIXbZuSAv9xmw-MyZP1M41BpVMpirLzG51bqGA-zc,13548
18
- kalavai_client/core.py,sha256=haNLna0TWzxmGx9cEhJjnV3r9YSOS3Fhtr4dt70LnwQ,35390
19
- kalavai_client/env.py,sha256=YsfZj7LWf6ABquDsoIFFkXCFYwenpDk8zVnGsf7qv98,2823
20
- kalavai_client/utils.py,sha256=5cUpMVsADF3JdDW0wbu-f38MURkhQz9pPngg0WxssJw,13460
21
- kalavai_client-0.6.20.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
22
- kalavai_client-0.6.20.dist-info/METADATA,sha256=COeOSfLyxsuzCteQJZYYzT3lFp1Lxexpe84A2UNcVx4,12776
23
- kalavai_client-0.6.20.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
24
- kalavai_client-0.6.20.dist-info/entry_points.txt,sha256=9T6D45gxwzfVbglMm1r6XPdXuuZdHfy_7fCeu2jUphc,50
25
- kalavai_client-0.6.20.dist-info/RECORD,,
18
+ kalavai_client/core.py,sha256=JVXSMmYvbNBl9ggVPGNJRryK54doySTrGDj-WhAlkfY,35760
19
+ kalavai_client/env.py,sha256=t6dfjg5GY6lbprbmlr9dVOP_KouPwdN94wnDL5zCgIM,2902
20
+ kalavai_client/utils.py,sha256=1mz-dzoJhZ9GJKU7jiGYBC1tP37SXHvxToMqqEir8R0,13438
21
+ kalavai_client-0.6.22.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
22
+ kalavai_client-0.6.22.dist-info/METADATA,sha256=uquNcK5cPCexquqwajy7XvqBC1LEE2USs677qtcFqMg,12776
23
+ kalavai_client-0.6.22.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
24
+ kalavai_client-0.6.22.dist-info/entry_points.txt,sha256=9T6D45gxwzfVbglMm1r6XPdXuuZdHfy_7fCeu2jUphc,50
25
+ kalavai_client-0.6.22.dist-info/RECORD,,