kalavai-client 0.6.16__py3-none-any.whl → 0.6.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,2 +1,2 @@
1
1
 
2
- __version__ = "0.6.16"
2
+ __version__ = "0.6.17"
@@ -18,6 +18,8 @@ repositories:
18
18
  url: https://opencost.github.io/opencost-helm-chart
19
19
  - name: minio
20
20
  url: https://charts.min.io/
21
+ - name: langfuse
22
+ url: https://langfuse.github.io/langfuse-k8s
21
23
 
22
24
  releases:
23
25
  - name: datashim
@@ -28,7 +30,7 @@ releases:
28
30
  - name: lago
29
31
  namespace: kalavai
30
32
  chart: kalavai/lago
31
- installed: {{deploy_llm_sidecars}}
33
+ installed: {{deploy_lago}}
32
34
  set:
33
35
  - name: external.api.nodePort
34
36
  value: 32000
@@ -79,7 +81,7 @@ releases:
79
81
  namespace: kalavai
80
82
  chart: kalavai/kalavai-helios
81
83
  version: "0.1.11"
82
- installed: false #{{deploy_llm_sidecars}}
84
+ installed: false
83
85
  set:
84
86
  - name: deployment.watcher_endpoint
85
87
  value: "http://{{watcher_service}}"
@@ -94,11 +96,11 @@ releases:
94
96
  - name: opencost
95
97
  namespace: opencost
96
98
  chart: opencost-charts/opencost
97
- installed: {{deploy_llm_sidecars}}
99
+ installed: {{deploy_opencost}}
98
100
  - name: prometheus
99
101
  namespace: prometheus-system
100
102
  chart: prometheus/prometheus
101
- installed: {{deploy_llm_sidecars}}
103
+ installed: {{deploy_prometheus}}
102
104
  set:
103
105
  - name: prometheus-pushgateway.enabled
104
106
  value: false
@@ -152,7 +154,7 @@ releases:
152
154
  - name: replicas
153
155
  value: 1
154
156
  - name: image_tag
155
- value: "v2025.06.7"
157
+ value: "v2025.06.15"
156
158
  - name: deployment.in_cluster
157
159
  value: "True"
158
160
  - name: deployment.kalavai_username_key
@@ -1,3 +1,24 @@
1
+ ### APS ###
2
+ - name: deploy_lago
3
+ default: "False"
4
+ description: "Deploy Lago payment system"
5
+
6
+ - name: deploy_opencost
7
+ default: "False"
8
+ description: "Deploy Opencost cost monitoring system"
9
+
10
+ - name: deploy_prometheus
11
+ default: "True"
12
+ description: "Deploy Prometheus system monitoring system"
13
+
14
+ - name: deploy_langfuse
15
+ default: "False"
16
+ description: "Deploy Langfuse LLM tracing system"
17
+
18
+ ######
19
+
20
+ ### VARIABLES ###
21
+
1
22
  - name: kalavai_api_endpoint
2
23
  default: https://platform.kalavai.net/_/api
3
24
  description: ""
@@ -78,4 +99,4 @@
78
99
 
79
100
  - name: minio_rootPassword
80
101
  default: "password"
81
- description: ""
102
+ description: ""
@@ -17,3 +17,4 @@
17
17
  # unmapMarkSnapChainRemoved: "ignored"
18
18
  # disableRevisionCounter: "true"
19
19
  # dataEngine: "v1"
20
+
@@ -1,12 +1,4 @@
1
- # STORAGE #
2
- - name: storage_label_selector
3
- default: "kalavai.storage.enabled:True"
4
- description: ""
5
-
6
1
  - name: storage_class_name
7
- default: "longhorn-model"
2
+ default: "longhorn"
8
3
  description: ""
9
4
 
10
- - name: storage_replicas
11
- default: 1
12
- description: ""
@@ -244,9 +244,9 @@ def send_pool_invites(request: InvitesRequest, api_key: str = Depends(verify_api
244
244
  summary="Fetch resources",
245
245
  description="Get available resources",
246
246
  response_description="Resource information")
247
- def resources(api_key: str = Depends(verify_api_key)):
247
+ def resources(request: NodesActionRequest, api_key: str = Depends(verify_api_key)):
248
248
  """Get available resources"""
249
- return fetch_resources()
249
+ return fetch_resources(node_names=request.nodes)
250
250
 
251
251
  @app.get("/fetch_job_names",
252
252
  summary="Fetch job names",
@@ -317,7 +317,7 @@ def job_templates(api_key: str = Depends(verify_api_key)):
317
317
  @app.get("/fetch_job_defaults",
318
318
  summary="Fetch job defaults",
319
319
  description="Get default values for a job template",
320
- response_description="Job default values")
320
+ response_description="Job metadata values")
321
321
  def job_templates(name: str, api_key: str = Depends(verify_api_key)):
322
322
  """
323
323
  Get job defaults with the following parameters:
@@ -18,7 +18,7 @@ class CreatePoolRequest(BaseModel):
18
18
  description: str = Field("", description="Description of the pool")
19
19
 
20
20
  class NodesActionRequest(BaseModel):
21
- nodes: list[str] = Field(description="List of node names to perform the action on")
21
+ nodes: list[str] = Field(None, description="List of node names to perform the action on")
22
22
 
23
23
  class JoinPoolRequest(BaseModel):
24
24
  token: str = Field(description="Token to join the pool")
kalavai_client/cli.py CHANGED
@@ -207,28 +207,43 @@ def input_gpus(non_interactive=False):
207
207
  @arguably.command
208
208
  def gui__start(
209
209
  *others,
210
- gui_frontend_port=3000,
211
- gui_backend_port=8000,
212
- bridge_port=8001,
213
210
  log_level="critical",
214
211
  backend_only=False
215
212
  ):
216
213
  """Run GUI (docker) and kalavai core backend (api)"""
217
- if len(set([gui_frontend_port, gui_backend_port, bridge_port])) < 3:
218
- console.log("[red]Error: ports must be unique")
219
- return
214
+ ports_needed = 1 if backend_only else 3
215
+ # find 3 available ports
216
+ ip = socket.gethostbyname (socket.gethostname())
217
+ ports = []
218
+ for port in range(49152,65535):
219
+ try:
220
+ serv = socket.socket(socket.AF_INET,socket.SOCK_STREAM) # create a new socket
221
+ serv.bind((ip, port)) # bind socket with address
222
+ serv.close()
223
+ ports.append(port)
224
+ except:
225
+ #port closed
226
+ pass
227
+ if len(ports) >= ports_needed:
228
+ break
220
229
 
230
+ if len(ports) < ports_needed:
231
+ # if not found, error
232
+ console.log(f"[red]Cannot initialise GUI: Could not find {ports_needed} free ports in your machine")
233
+ return
234
+ console.log(f"Using ports: {ports}")
235
+
221
236
  user_key = load_user_id()
222
237
  if user_key is not None:
223
238
  console.log(f"[green]Using user key: {user_key}")
224
239
  if not backend_only:
225
240
  values = {
226
- "gui_frontend_port": gui_frontend_port,
227
- "gui_backend_port": gui_backend_port,
228
- "bridge_port": bridge_port,
241
+ "gui_frontend_port": ports[1],
242
+ "gui_backend_port": ports[2],
243
+ "bridge_port": ports[0],
229
244
  "path": user_path("", create_path=True),
230
245
  "protected_access": user_key
231
- }
246
+ }
232
247
  compose_yaml = load_template(
233
248
  template_path=DOCKER_COMPOSE_GUI,
234
249
  values=values)
@@ -237,11 +252,11 @@ def gui__start(
237
252
 
238
253
  run_cmd(f"docker compose --file {USER_GUI_COMPOSE_FILE} up -d")
239
254
 
240
- console.log(f"[green]Loading GUI, may take a few minutes. It will be available at http://localhost:{gui_frontend_port}")
255
+ console.log(f"[green]Loading GUI, may take a few minutes. It will be available at http://localhost:{ports[1]}")
241
256
  print(
242
257
  "Deploying bridge API"
243
258
  )
244
- run_api(port=bridge_port, log_level=log_level)
259
+ run_api(port=ports[0], log_level=log_level)
245
260
 
246
261
  if not backend_only:
247
262
  run_cmd(f"docker compose --file {USER_GUI_COMPOSE_FILE} down")
@@ -1096,12 +1111,22 @@ def job__defaults(template_name, *others):
1096
1111
  return
1097
1112
 
1098
1113
  # deploy template with kube-watcher
1099
- defaults = fetch_job_defaults(name=template_name)
1114
+ data = fetch_job_defaults(name=template_name)
1115
+ metadata = data["metadata"]
1116
+ defaults = data["defaults"]
1100
1117
  if "error" in defaults:
1101
1118
  console.log(f"[red]Error when fetching job defaults: {defaults}")
1102
1119
  print(
1103
1120
  json.dumps(defaults, indent=3)
1104
1121
  )
1122
+ print(
1123
+ "*****************",
1124
+ "Metadata",
1125
+ "*****************"
1126
+ )
1127
+ print(
1128
+ json.dumps(metadata, indent=3)
1129
+ )
1105
1130
 
1106
1131
 
1107
1132
  @arguably.command
kalavai_client/core.py CHANGED
@@ -13,7 +13,6 @@ import re
13
13
 
14
14
  from kalavai_client.cluster import CLUSTER
15
15
  from kalavai_client.utils import (
16
- DEPLOY_LLM_SIDECARS_KEY,
17
16
  NODE_ROLE_LABEL,
18
17
  check_gpu_drivers,
19
18
  generate_join_token,
@@ -80,6 +79,7 @@ class Job(BaseModel):
80
79
  workers: Optional[str] = None
81
80
  endpoint: Optional[str] = None
82
81
  status: Optional[str] = None
82
+ host_nodes: Optional[str] = None
83
83
 
84
84
  class DeviceStatus(BaseModel):
85
85
  name: str
@@ -198,19 +198,22 @@ def get_ip_addresses(subnet=None):
198
198
  raise ValueError(f"No IPs available on subnet {subnet}")
199
199
  return ips
200
200
 
201
- def fetch_resources():
201
+ def fetch_resources(node_names: list[str]=None):
202
+ data = {}
203
+ if node_names is not None:
204
+ data["node_names"] = node_names
202
205
  try:
203
206
  total = request_to_server(
204
207
  method="get",
205
208
  endpoint="/v1/get_cluster_total_resources",
206
- data={},
209
+ data=data,
207
210
  server_creds=USER_LOCAL_SERVER_FILE,
208
211
  user_cookie=USER_COOKIE
209
212
  )
210
213
  available = request_to_server(
211
214
  method="get",
212
215
  endpoint="/v1/get_cluster_available_resources",
213
- data={},
216
+ data=data,
214
217
  server_creds=USER_LOCAL_SERVER_FILE,
215
218
  user_cookie=USER_COOKIE
216
219
  )
@@ -224,14 +227,14 @@ def fetch_job_defaults(name):
224
227
  "template": name
225
228
  }
226
229
  try:
227
- defaults = request_to_server(
230
+ metadata = request_to_server(
228
231
  method="get",
229
232
  endpoint="/v1/job_defaults",
230
233
  data=data,
231
234
  server_creds=USER_LOCAL_SERVER_FILE,
232
235
  user_cookie=USER_COOKIE
233
236
  )
234
- return defaults
237
+ return metadata
235
238
  except Exception as e:
236
239
  return {"error": str(e)}
237
240
 
@@ -296,14 +299,18 @@ def fetch_job_details(jobs: list[Job]):
296
299
  )
297
300
  workers_status = defaultdict(int)
298
301
  restart_counts = 0
302
+ host_nodes = set()
299
303
  for ns, ss in result.items():
300
304
  if ns != namespace: # same job name, different namespace
301
305
  continue
302
306
  for _, values in ss.items():
303
- # TODO: get nodes involved in deployment (needs kubewatcher)
304
307
  if "conditions" in values and values["conditions"] is not None:
305
308
  restart_counts = sum([c["restart_count"] for c in values["conditions"]])
306
309
  workers_status[values["status"]] += 1
310
+ # get nodes involved in deployment (needs kubewatcher)
311
+ if "node_name" in values:
312
+ host_nodes.add(values["node_name"])
313
+
307
314
  workers = "\n".join([f"{k}: {v}" for k, v in workers_status.items()])
308
315
  if restart_counts > 0:
309
316
  workers += f"\n({restart_counts} restart)"
@@ -320,7 +327,8 @@ def fetch_job_details(jobs: list[Job]):
320
327
  server_creds=USER_LOCAL_SERVER_FILE,
321
328
  user_cookie=USER_COOKIE
322
329
  )
323
- node_ports = [f"{p['node_port']} (mapped to {p['port']})" for s in result.values() for p in s["ports"]]
330
+ #node_ports = [f"{p['node_port']} (mapped to {p['port']})" for s in result.values() for p in s["ports"]]
331
+ node_ports = [f"{p['node_port']}" for s in result.values() for p in s["ports"]]
324
332
 
325
333
  urls = [f"http://{load_server_info(data_key=SERVER_IP_KEY, file=USER_LOCAL_SERVER_FILE)}:{node_port}" for node_port in node_ports]
326
334
  if "Ready" in workers_status and len(workers_status) == 1:
@@ -338,7 +346,8 @@ def fetch_job_details(jobs: list[Job]):
338
346
  name=deployment,
339
347
  workers=workers,
340
348
  endpoint="\n".join(urls),
341
- status=str(status))
349
+ status=str(status),
350
+ host_nodes=" ".join(host_nodes))
342
351
  )
343
352
 
344
353
  except Exception as e:
@@ -802,8 +811,7 @@ def create_pool(
802
811
  WATCHER_PORT_KEY: DEFAULT_WATCHER_PORT,
803
812
  WATCHER_SERVICE_KEY: watcher_service,
804
813
  USER_NODE_LABEL_KEY: USER_NODE_LABEL,
805
- ALLOW_UNREGISTERED_USER_KEY: True, # Change this if only registered users are allowed,
806
- DEPLOY_LLM_SIDECARS_KEY: location is not None
814
+ ALLOW_UNREGISTERED_USER_KEY: True, # Change this if only registered users are allowed
807
815
  }
808
816
 
809
817
  store_server_info(
kalavai_client/utils.py CHANGED
@@ -38,7 +38,6 @@ CLUSTER_NAME_KEY = "cluster_name"
38
38
  AUTH_KEY = "watcher_admin_key"
39
39
  WRITE_AUTH_KEY = "watcher_write_key"
40
40
  ALLOW_UNREGISTERED_USER_KEY = "watcher_allow_unregistered_user"
41
- DEPLOY_LLM_SIDECARS_KEY = "deploy_llm_sidecars"
42
41
  NODE_ROLE_LABEL = "kalavai.node_role"
43
42
  USER_API_KEY = "user_api_key"
44
43
  READONLY_AUTH_KEY = "watcher_readonly_key"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: kalavai-client
3
- Version: 0.6.16
3
+ Version: 0.6.17
4
4
  Summary: Client app for kalavai platform
5
5
  License: Apache-2.0
6
6
  Keywords: LLM,platform
@@ -0,0 +1,25 @@
1
+ kalavai_client/__init__.py,sha256=6D1LEuHzwFI-n2PmIawhX9C0K4e_BwIIJUAUnpkzCWE,23
2
+ kalavai_client/__main__.py,sha256=WQUfxvRsBJH5gsCJg8pLz95QnZIj7Ol8psTO77m0QE0,73
3
+ kalavai_client/assets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ kalavai_client/assets/apps.yaml,sha256=zVtfPqesNhoBLpNlhIPAVtxgXLqEQU2pK1GTzKGEqiQ,6395
5
+ kalavai_client/assets/apps_values.yaml,sha256=dvsAnMC1uk9oDsnITIYJc5CIg9LTwGzXldjPZTwRQyE,2069
6
+ kalavai_client/assets/docker-compose-gui.yaml,sha256=shqN78YLw0QP7bqTKveI4ppz5E-5b1JowmsSB4OG3nA,778
7
+ kalavai_client/assets/docker-compose-template.yaml,sha256=KHIwJ2WWX7Y7wQKiXRr82Jqd3IKRyls5zhTyl8mSmrc,1805
8
+ kalavai_client/assets/nginx.conf,sha256=drVVCg8GHucz7hmt_BI6giAhK92OV71257NTs3LthwM,225
9
+ kalavai_client/assets/pool_config_template.yaml,sha256=MhBZQsEMKrBgbUVSKgIGmXWhybeGKG6l5XvJb38y5GI,577
10
+ kalavai_client/assets/pool_config_values.yaml,sha256=_iAnugramLiwJaaDcPSetThvOdR7yFiCffdMri-SQCU,68
11
+ kalavai_client/assets/user_workspace.yaml,sha256=wDvlMYknOPABAEo0dsQwU7bac8iubjAG9tdkFbJZ5Go,476
12
+ kalavai_client/assets/user_workspace_values.yaml,sha256=G0HOzQUxrDMCwuW9kbWUZaKMzDDPVwDwzBHCL2Xi2ZM,542
13
+ kalavai_client/auth.py,sha256=EB3PMvKUn5_KAQkezkEHEt-OMZXyfkZguIQlUFkEHcA,3243
14
+ kalavai_client/bridge_api.py,sha256=5tYqI8UdG7K1Qskywk97kC0TpvYruUZxqxvbn-2nve4,15405
15
+ kalavai_client/bridge_models.py,sha256=t1fJGaF6YDMQdOnEU3XT8zTBHU8eUWJ1yhM5s7c6vMo,2546
16
+ kalavai_client/cli.py,sha256=SzKG7_ZG0ehMQsECQRWSvqj2Fju2Gd5O7uBa60bFBAY,47830
17
+ kalavai_client/cluster.py,sha256=Z2PIXbZuSAv9xmw-MyZP1M41BpVMpirLzG51bqGA-zc,13548
18
+ kalavai_client/core.py,sha256=dJVX5mhFzIshazCfAzb-AqpqWjkp_djgbMyNXzuAF48,34650
19
+ kalavai_client/env.py,sha256=YsfZj7LWf6ABquDsoIFFkXCFYwenpDk8zVnGsf7qv98,2823
20
+ kalavai_client/utils.py,sha256=bhvQzF12q7L2hGVrbcmXRDXXIsAdlzcsTms6RQRxGU4,12733
21
+ kalavai_client-0.6.17.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
22
+ kalavai_client-0.6.17.dist-info/METADATA,sha256=J5TEqnwPm3ZopGEi2MRn_ddy_VV101JcB8sl6MGL4iI,12655
23
+ kalavai_client-0.6.17.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
24
+ kalavai_client-0.6.17.dist-info/entry_points.txt,sha256=9T6D45gxwzfVbglMm1r6XPdXuuZdHfy_7fCeu2jUphc,50
25
+ kalavai_client-0.6.17.dist-info/RECORD,,
@@ -1,25 +0,0 @@
1
- kalavai_client/__init__.py,sha256=1--FABNdIxbiNn1wQox38stjIswkk1wPeOgoYJXMsNU,23
2
- kalavai_client/__main__.py,sha256=WQUfxvRsBJH5gsCJg8pLz95QnZIj7Ol8psTO77m0QE0,73
3
- kalavai_client/assets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- kalavai_client/assets/apps.yaml,sha256=Qe3RtY4kQbzZnF9K724FUbtqnkuCGfNUfK-WWtamATg,6365
5
- kalavai_client/assets/apps_values.yaml,sha256=WRew3bS1MztjzcJfphuJcKn0n2T1ICRupPpr_Csjt_s,1644
6
- kalavai_client/assets/docker-compose-gui.yaml,sha256=shqN78YLw0QP7bqTKveI4ppz5E-5b1JowmsSB4OG3nA,778
7
- kalavai_client/assets/docker-compose-template.yaml,sha256=KHIwJ2WWX7Y7wQKiXRr82Jqd3IKRyls5zhTyl8mSmrc,1805
8
- kalavai_client/assets/nginx.conf,sha256=drVVCg8GHucz7hmt_BI6giAhK92OV71257NTs3LthwM,225
9
- kalavai_client/assets/pool_config_template.yaml,sha256=fFz4w2-fMKD5KvyzFdfcWD_jSneRlmnjLc8hCctweX0,576
10
- kalavai_client/assets/pool_config_values.yaml,sha256=VrM3XHQfQo6QLZ68qvagooUptaYgl1pszniY_JUtemk,233
11
- kalavai_client/assets/user_workspace.yaml,sha256=wDvlMYknOPABAEo0dsQwU7bac8iubjAG9tdkFbJZ5Go,476
12
- kalavai_client/assets/user_workspace_values.yaml,sha256=G0HOzQUxrDMCwuW9kbWUZaKMzDDPVwDwzBHCL2Xi2ZM,542
13
- kalavai_client/auth.py,sha256=EB3PMvKUn5_KAQkezkEHEt-OMZXyfkZguIQlUFkEHcA,3243
14
- kalavai_client/bridge_api.py,sha256=O65aIh5lUl0KldRekHzLC-xdv1YJmrR14kt5-3UgCco,15351
15
- kalavai_client/bridge_models.py,sha256=5ALGbkb6cxKwXbrzeTa9ja0kiZkJBvnY3J1IsmXTn0U,2540
16
- kalavai_client/cli.py,sha256=_LK5OrCM5PYcYZo7lwXyfI3mlNzLFhL-BicKYbJkxeY,47123
17
- kalavai_client/cluster.py,sha256=Z2PIXbZuSAv9xmw-MyZP1M41BpVMpirLzG51bqGA-zc,13548
18
- kalavai_client/core.py,sha256=R8UBTTzMHVPHuM9nB70cIxUxVCHyBspEq1cAWH1OyOQ,34304
19
- kalavai_client/env.py,sha256=YsfZj7LWf6ABquDsoIFFkXCFYwenpDk8zVnGsf7qv98,2823
20
- kalavai_client/utils.py,sha256=kGtfEuXVG5LgMJk289ksFgYrsMHwKXN7yvS5wCIou8s,12781
21
- kalavai_client-0.6.16.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
22
- kalavai_client-0.6.16.dist-info/METADATA,sha256=K5mzqy8pSDdK6WWFSt8YZNTJLENfeV3OOGELq417dYs,12655
23
- kalavai_client-0.6.16.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
24
- kalavai_client-0.6.16.dist-info/entry_points.txt,sha256=9T6D45gxwzfVbglMm1r6XPdXuuZdHfy_7fCeu2jUphc,50
25
- kalavai_client-0.6.16.dist-info/RECORD,,