kalavai-client 0.5.27__py3-none-any.whl → 0.5.29__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,2 +1,2 @@
1
1
 
2
- __version__ = "0.5.27"
2
+ __version__ = "0.5.29"
@@ -25,6 +25,19 @@ releases:
25
25
  chart: kalavai/kalavai-datashim
26
26
  version: "0.4.0"
27
27
  installed: false
28
+ - name: lago
29
+ namespace: kalavai
30
+ chart: kalavai/lago
31
+ installed: true
32
+ set:
33
+ - name: external.api.nodePort
34
+ value: 32000
35
+ - name: external.front.nodePort
36
+ value: 30080
37
+ - name: apiUrl
38
+ value: http://{{cluster_ip}}:32000
39
+ - name: frontUrl
40
+ value: http://{{cluster_ip}}:30080
28
41
  - name: minio
29
42
  needs:
30
43
  - kalavai/longhorn
@@ -139,7 +152,7 @@ releases:
139
152
  - name: replicas
140
153
  value: 2
141
154
  - name: image_tag
142
- value: "v2025.03.3"
155
+ value: "v2025.03.11"
143
156
  - name: deployment.in_cluster
144
157
  value: "True"
145
158
  - name: deployment.use_auth_key
@@ -2,7 +2,26 @@ services:
2
2
  kalavai_gui:
3
3
  container_name: kalavai_gui
4
4
  image: bundenth/kalavai-gui:latest
5
- network_mode: host
5
+ #network_mode: host
6
+ extra_hosts:
7
+ - "host.docker.internal:host-gateway"
8
+ networks:
9
+ - kalavai-net
10
+ environment:
11
+ - KALAVAI_BRIDGE_URL=http://host.docker.internal
12
+ - KALAVAI_BRIDGE_PORT={{bridge_port}}
13
+ entrypoint: ["reflex"]
14
+ command: >
15
+ run
16
+ --backend-port {{gui_backend_port}}
17
+ --frontend-port {{gui_frontend_port}}
18
+ ports:
19
+ - "{{gui_backend_port}}:{{gui_backend_port}}"
20
+ - "{{gui_frontend_port}}:{{gui_frontend_port}}"
6
21
  volumes:
7
22
  - "{{path}}:/root/.cache/kalavai"
8
23
  restart: always
24
+
25
+ networks:
26
+ kalavai-net:
27
+ driver: bridge
@@ -162,7 +162,8 @@ def job_deploy(request: DeployJobRequest):
162
162
  result = deploy_job(
163
163
  template_name=request.template_name,
164
164
  values_dict=request.values,
165
- force_namespace=request.force_namespace
165
+ force_namespace=request.force_namespace,
166
+ target_labels=request.target_labels
166
167
  )
167
168
  return result
168
169
 
@@ -37,6 +37,7 @@ class DeployJobRequest(BaseModel):
37
37
  template_name: str
38
38
  values: dict
39
39
  force_namespace: str = None
40
+ target_labels: dict[str, str] = None
40
41
 
41
42
  class DeleteJobRequest(BaseModel):
42
43
  name: str
kalavai_client/cli.py CHANGED
@@ -34,6 +34,7 @@ from kalavai_client.env import (
34
34
  resource_path,
35
35
  )
36
36
  from kalavai_client.core import (
37
+ deploy_test_job,
37
38
  fetch_resources,
38
39
  fetch_job_names,
39
40
  fetch_job_details,
@@ -209,11 +210,15 @@ def input_gpus():
209
210
  @arguably.command
210
211
  def gui__start(*others, backend_only=False, gui_frontend_port=3000, gui_backend_port=8000, bridge_port=8001, log_level="critical"):
211
212
  """Run GUI (docker) and kalavai core backend (api)"""
212
-
213
+ if len(set([gui_frontend_port, gui_backend_port, bridge_port])) < 3:
214
+ console.log("[red]Error: ports must be unique")
215
+ return
216
+
213
217
  if not backend_only:
214
218
  values = {
215
219
  "gui_frontend_port": gui_frontend_port,
216
220
  "gui_backend_port": gui_backend_port,
221
+ "bridge_port": bridge_port,
217
222
  "path": user_path("")
218
223
  }
219
224
  compose_yaml = load_template(
@@ -225,6 +230,9 @@ def gui__start(*others, backend_only=False, gui_frontend_port=3000, gui_backend_
225
230
  run_cmd(f"docker compose --file {USER_GUI_COMPOSE_FILE} up -d")
226
231
 
227
232
  console.log(f"[green]Loading GUI, may take a few minutes. It will be available at http://localhost:{gui_frontend_port}")
233
+ print(
234
+ "Deploying bridge API"
235
+ )
228
236
  run_api(port=bridge_port, log_level=log_level)
229
237
 
230
238
  if not backend_only:
@@ -1028,7 +1036,7 @@ def job__run(template_name, *others, values: str=None, force_namespace: str=None
1028
1036
  console.log(f"[green]{template_name} job deployed")
1029
1037
 
1030
1038
  @arguably.command
1031
- def job__test(local_template_dir, *others, values, defaults, force_namespace: str=None):
1039
+ def job__test(local_template_dir, *others, values, force_namespace: str=None):
1032
1040
  """
1033
1041
  Helper to test local templates, useful for development
1034
1042
  """
@@ -1038,13 +1046,18 @@ def job__test(local_template_dir, *others, values, defaults, force_namespace: st
1038
1046
  console.log(f"[red]Problems with your pool: {str(e)}")
1039
1047
  return
1040
1048
 
1041
- if not os.path.isdir(local_template_dir):
1042
- console.log(f"[red]--local_template_dir ({local_template_dir}) is not a directory")
1049
+ if not os.path.isfile(os.path.join(local_template_dir, "template.yaml")):
1050
+ console.log(f"[red]template.yaml not found under {local_template_dir}")
1051
+ return
1052
+ if not os.path.isfile(os.path.join(local_template_dir, "values.yaml")):
1053
+ console.log(f"[red]values.yaml not found under {local_template_dir}")
1043
1054
  return
1044
1055
 
1045
1056
  # load template
1046
1057
  with open(os.path.join(local_template_dir, "template.yaml"), "r") as f:
1047
1058
  template_str = f.read()
1059
+ with open(os.path.join(local_template_dir, "values.yaml"), "r") as f:
1060
+ defaults = f.read()
1048
1061
 
1049
1062
  # load values
1050
1063
  if not os.path.isfile(values):
@@ -1054,37 +1067,17 @@ def job__test(local_template_dir, *others, values, defaults, force_namespace: st
1054
1067
  raw_values = yaml.load(f, Loader=yaml.SafeLoader)
1055
1068
  values_dict = {variable["name"]: variable['value'] for variable in raw_values}
1056
1069
 
1057
- # load defaults
1058
- if not os.path.isfile(defaults):
1059
- console.log(f"[red]--defaults ({defaults}) is not a valid local file")
1060
- return
1061
- with open(defaults, "r") as f:
1062
- defaults = f.read()
1070
+ result = deploy_test_job(
1071
+ template_str=template_str,
1072
+ values_dict=values_dict,
1073
+ default_values=defaults,
1074
+ force_namespace=force_namespace)
1063
1075
 
1064
- # submit custom deployment
1065
- data = {
1066
- "template": template_str,
1067
- "template_values": values_dict,
1068
- "default_values": defaults
1069
- }
1070
- if force_namespace is not None:
1071
- data["force_namespace"] = force_namespace
1072
-
1073
- try:
1074
- result = request_to_server(
1075
- method="post",
1076
- endpoint="/v1/deploy_custom_job",
1077
- data=data,
1078
- server_creds=USER_LOCAL_SERVER_FILE,
1079
- user_cookie=USER_COOKIE
1080
- )
1081
- console.log("Deployment result:")
1082
- print(
1083
- json.dumps(result,indent=3)
1084
- )
1085
- except Exception as e:
1086
- console.log(f"[red]Error when connecting to kalavai service: {str(e)}")
1087
-
1076
+ if "error" in result:
1077
+ console.log(f"[red]Error: {result['error']}")
1078
+ else:
1079
+ console.log("[green]Successfully deployed:")
1080
+ console.log(result)
1088
1081
 
1089
1082
  @arguably.command
1090
1083
  def job__defaults(template_name, *others):
@@ -1123,7 +1116,7 @@ def job__delete(name, *others, force_namespace: str=None):
1123
1116
  # deploy template with kube-watcher
1124
1117
  result = delete_job(name=name, force_namespace=force_namespace)
1125
1118
  if "error" in result:
1126
- console.log(f"[red]Error when deleting job: {str(e)}")
1119
+ console.log(f"[red]Error when deleting job: {result['error']}")
1127
1120
  else:
1128
1121
  console.log(f"{result}")
1129
1122
 
@@ -1209,13 +1202,14 @@ def job__list(*others):
1209
1202
  all_deployments = fetch_job_names()
1210
1203
  if "error" in all_deployments:
1211
1204
  console.log(f"[red]Error when connecting to kalavai service: {all_deployments}")
1212
- return
1205
+ return
1213
1206
 
1214
1207
  if len(all_deployments) == 0:
1215
1208
  console.log("[green]No deployments found.")
1216
1209
  return
1217
1210
 
1218
1211
  details = fetch_job_details(jobs=all_deployments)
1212
+
1219
1213
  if "error" in details:
1220
1214
  console.log(f"[red]{details}")
1221
1215
  return
@@ -1255,19 +1249,19 @@ def job__logs(name, *others, pod_name=None, stream=False, tail=100, force_namesp
1255
1249
  while True:
1256
1250
  try:
1257
1251
  if not stream:
1258
- for pod, logs in all_logs.items():
1252
+ for pod, info in all_logs.items():
1259
1253
  if pod_name is not None and pod_name != pod:
1260
1254
  continue
1261
- console.log(f"[yellow]Pod {pod}")
1262
- console.log(f"[green]{logs}")
1255
+ console.log(f"[yellow]Pod {pod} in {info['pod']['spec']['node_name']}")
1256
+ console.log(f"[green]{info['logs']}")
1263
1257
  break
1264
1258
  else:
1265
1259
  os.system("clear")
1266
- for pod, logs in all_logs.items():
1260
+ for pod, info in all_logs.items():
1267
1261
  if pod_name is not None and pod_name != pod:
1268
1262
  continue
1269
- print(f"Pod {pod}")
1270
- print(f"{logs}")
1263
+ print(f"Pod {pod} in {info['pod']['spec']['node_name']}")
1264
+ print(f"{info['logs']}")
1271
1265
  time.sleep(1)
1272
1266
  except KeyboardInterrupt:
1273
1267
  break
kalavai_client/core.py CHANGED
@@ -291,14 +291,18 @@ def fetch_job_details(jobs: list[Job]):
291
291
  user_cookie=USER_COOKIE
292
292
  )
293
293
  workers_status = defaultdict(int)
294
+ restart_counts = 0
294
295
  for ns, ss in result.items():
295
296
  if ns != namespace: # same job name, different namespace
296
297
  continue
297
298
  for _, values in ss.items():
298
- # TODO get 'restart_count' from values['conditions'][-1]["restart_count"]
299
299
  # TODO: get nodes involved in deployment (needs kubewatcher)
300
+ if "conditions" in values and values["conditions"] is not None:
301
+ restart_counts = sum([c["restart_count"] for c in values["conditions"]])
300
302
  workers_status[values["status"]] += 1
301
303
  workers = "\n".join([f"{k}: {v}" for k, v in workers_status.items()])
304
+ if restart_counts > 0:
305
+ workers += f"\n({restart_counts} restart)"
302
306
  # get URL details
303
307
  data = {
304
308
  "label": TEMPLATE_LABEL,
@@ -338,7 +342,7 @@ def fetch_job_details(jobs: list[Job]):
338
342
 
339
343
  return job_details
340
344
 
341
- def deploy_job(template_name, values_dict, force_namespace=None):
345
+ def deploy_job(template_name, values_dict, force_namespace=None, target_labels=None):
342
346
 
343
347
  # deploy template with kube-watcher
344
348
  data = {
@@ -347,6 +351,8 @@ def deploy_job(template_name, values_dict, force_namespace=None):
347
351
  }
348
352
  if force_namespace is not None:
349
353
  data["force_namespace"] = force_namespace
354
+ if target_labels is not None:
355
+ data["target_labels"] = target_labels
350
356
 
351
357
  try:
352
358
  result = request_to_server(
@@ -360,6 +366,29 @@ def deploy_job(template_name, values_dict, force_namespace=None):
360
366
  except Exception as e:
361
367
  return {"error": str(e)}
362
368
 
369
+ def deploy_test_job(template_str, values_dict, default_values, force_namespace=None):
370
+
371
+ # submit custom deployment
372
+ data = {
373
+ "template": template_str,
374
+ "template_values": values_dict,
375
+ "default_values": default_values
376
+ }
377
+ if force_namespace is not None:
378
+ data["force_namespace"] = force_namespace
379
+
380
+ try:
381
+ result = request_to_server(
382
+ method="post",
383
+ endpoint="/v1/deploy_custom_job",
384
+ data=data,
385
+ server_creds=USER_LOCAL_SERVER_FILE,
386
+ user_cookie=USER_COOKIE
387
+ )
388
+ return result
389
+ except Exception as e:
390
+ return {"error": str(e)}
391
+
363
392
  def delete_job(name, force_namespace=None):
364
393
  data = {
365
394
  "label": TEMPLATE_LABEL, # this ensures that both lws template and services are deleted
@@ -423,7 +452,7 @@ def fetch_job_logs(job_name, force_namespace=None, pod_name=None, tail=100):
423
452
  server_creds=USER_LOCAL_SERVER_FILE,
424
453
  user_cookie=USER_COOKIE
425
454
  )
426
- return {pod: logs for pod, logs in all_logs.items() if pod_name is None or pod_name == pod}
455
+ return {pod: info for pod, info in all_logs.items() if pod_name is None or pod_name == pod}
427
456
 
428
457
  except Exception as e:
429
458
  return {"error": str(e)}
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: kalavai-client
3
- Version: 0.5.27
3
+ Version: 0.5.29
4
4
  Summary: Client app for kalavai platform
5
5
  License: Apache-2.0
6
6
  Keywords: LLM,platform
@@ -1,9 +1,9 @@
1
- kalavai_client/__init__.py,sha256=Wek6gRlCzMJXNHhlUntIAj24wWPV3k64gGvcy5WL7rU,23
1
+ kalavai_client/__init__.py,sha256=GKwLB1LKAMgkwQjAmbvcykT9kbP8Rg1TdG1GyM2RMJw,23
2
2
  kalavai_client/__main__.py,sha256=WQUfxvRsBJH5gsCJg8pLz95QnZIj7Ol8psTO77m0QE0,73
3
3
  kalavai_client/assets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
- kalavai_client/assets/apps.yaml,sha256=d13TzkWtqdwpWOxuoG7eG0Jp0UhVUsboS28496H8iH4,5982
4
+ kalavai_client/assets/apps.yaml,sha256=NDlPoV0MPpD9WxqJIu-mksb-FPDtIQpxvH4icpmn2a8,6304
5
5
  kalavai_client/assets/apps_values.yaml,sha256=CjKVelPQHd-hm-DTMEuya92feKiphU9mh3HrosLYYPE,1676
6
- kalavai_client/assets/docker-compose-gui.yaml,sha256=oE-neG3HC2PhdH-mIxrZdQlhqnycrSP_p8fRxjaxDFE,192
6
+ kalavai_client/assets/docker-compose-gui.yaml,sha256=6OHZIDDTl_PwXSYo1d05JasWfT0iiUDhrja0nQDjrlw,692
7
7
  kalavai_client/assets/docker-compose-template.yaml,sha256=ii24Nn-dM5cZk9lxFgrzxnmK7yv_6kIIw7KUlWhvYeI,2831
8
8
  kalavai_client/assets/nginx.conf,sha256=drVVCg8GHucz7hmt_BI6giAhK92OV71257NTs3LthwM,225
9
9
  kalavai_client/assets/pool_config_template.yaml,sha256=fFz4w2-fMKD5KvyzFdfcWD_jSneRlmnjLc8hCctweX0,576
@@ -11,15 +11,15 @@ kalavai_client/assets/pool_config_values.yaml,sha256=VrM3XHQfQo6QLZ68qvagooUptaY
11
11
  kalavai_client/assets/user_workspace.yaml,sha256=wDvlMYknOPABAEo0dsQwU7bac8iubjAG9tdkFbJZ5Go,476
12
12
  kalavai_client/assets/user_workspace_values.yaml,sha256=G0HOzQUxrDMCwuW9kbWUZaKMzDDPVwDwzBHCL2Xi2ZM,542
13
13
  kalavai_client/auth.py,sha256=QsBh28L2LwjBBK6pTUE4Xu36lLDTyetyU1YfS1Hbb6g,1717
14
- kalavai_client/bridge_api.py,sha256=rXTz6WpzQtsDmBDUUOqPEjWx8vfiIWEfvP1iM4MYDGM,5501
15
- kalavai_client/bridge_models.py,sha256=WwGIaWBIk4s32YemgDB2CcrrCWC-KeZjTT3iBi-kaa0,936
16
- kalavai_client/cli.py,sha256=zQ205vqPW69oEt9EAO5wC8_yEvWbFEtLUnI90Oqt4t4,46642
14
+ kalavai_client/bridge_api.py,sha256=xBd3KGoDcruuo53uNWOwA30CIPYgQLX8IkJWJgNHq9s,5546
15
+ kalavai_client/bridge_models.py,sha256=k4ILxa8jfAcfgIIBJqK1DunDzHh_oNUpuiEpmhAfTP0,977
16
+ kalavai_client/cli.py,sha256=MNmn8DgnEuyaCV8cMqr17JvfjalHcrP5Udpjn2jVXp8,46713
17
17
  kalavai_client/cluster.py,sha256=gwjmdsd--YrffT0BmZDOEpbrdm3lPskUuN5jdgcrOR0,12947
18
- kalavai_client/core.py,sha256=Trv2DDdlBAsBYUaTKnosWrfboYRrZnRcv-jfvmG7-LU,32288
18
+ kalavai_client/core.py,sha256=m5FOPzix8oNBlLqgllgJ0PwLXxo-s438fjJ0FPRVIZs,33266
19
19
  kalavai_client/env.py,sha256=Zg2pP-xGJpQumo56KMBxBLgIsBmcNN0S9R-ZP2-s630,2604
20
20
  kalavai_client/utils.py,sha256=OPmrsycNyrs2ZpTsjAzBuPN8hQNJtsYDLPKU13tnf-U,13862
21
- kalavai_client-0.5.27.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
22
- kalavai_client-0.5.27.dist-info/METADATA,sha256=5-vseYyG7Ya57UyFAqdEabXKGoFrzTMRnJgPb6FRZ6Q,14443
23
- kalavai_client-0.5.27.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
24
- kalavai_client-0.5.27.dist-info/entry_points.txt,sha256=9T6D45gxwzfVbglMm1r6XPdXuuZdHfy_7fCeu2jUphc,50
25
- kalavai_client-0.5.27.dist-info/RECORD,,
21
+ kalavai_client-0.5.29.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
22
+ kalavai_client-0.5.29.dist-info/METADATA,sha256=ZsOolX-q_zOLr2N4eKqnMnkpxHuJquFlWK3KgMxODik,14443
23
+ kalavai_client-0.5.29.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
24
+ kalavai_client-0.5.29.dist-info/entry_points.txt,sha256=9T6D45gxwzfVbglMm1r6XPdXuuZdHfy_7fCeu2jUphc,50
25
+ kalavai_client-0.5.29.dist-info/RECORD,,