kalavai-client 0.5.27__py3-none-any.whl → 0.5.29__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kalavai_client/__init__.py +1 -1
- kalavai_client/assets/apps.yaml +14 -1
- kalavai_client/assets/docker-compose-gui.yaml +20 -1
- kalavai_client/bridge_api.py +2 -1
- kalavai_client/bridge_models.py +1 -0
- kalavai_client/cli.py +36 -42
- kalavai_client/core.py +32 -3
- {kalavai_client-0.5.27.dist-info → kalavai_client-0.5.29.dist-info}/METADATA +1 -1
- {kalavai_client-0.5.27.dist-info → kalavai_client-0.5.29.dist-info}/RECORD +12 -12
- {kalavai_client-0.5.27.dist-info → kalavai_client-0.5.29.dist-info}/LICENSE +0 -0
- {kalavai_client-0.5.27.dist-info → kalavai_client-0.5.29.dist-info}/WHEEL +0 -0
- {kalavai_client-0.5.27.dist-info → kalavai_client-0.5.29.dist-info}/entry_points.txt +0 -0
kalavai_client/__init__.py
CHANGED
@@ -1,2 +1,2 @@
|
|
1
1
|
|
2
|
-
__version__ = "0.5.
|
2
|
+
__version__ = "0.5.29"
|
kalavai_client/assets/apps.yaml
CHANGED
@@ -25,6 +25,19 @@ releases:
|
|
25
25
|
chart: kalavai/kalavai-datashim
|
26
26
|
version: "0.4.0"
|
27
27
|
installed: false
|
28
|
+
- name: lago
|
29
|
+
namespace: kalavai
|
30
|
+
chart: kalavai/lago
|
31
|
+
installed: true
|
32
|
+
set:
|
33
|
+
- name: external.api.nodePort
|
34
|
+
value: 32000
|
35
|
+
- name: external.front.nodePort
|
36
|
+
value: 30080
|
37
|
+
- name: apiUrl
|
38
|
+
value: http://{{cluster_ip}}:32000
|
39
|
+
- name: frontUrl
|
40
|
+
value: http://{{cluster_ip}}:30080
|
28
41
|
- name: minio
|
29
42
|
needs:
|
30
43
|
- kalavai/longhorn
|
@@ -139,7 +152,7 @@ releases:
|
|
139
152
|
- name: replicas
|
140
153
|
value: 2
|
141
154
|
- name: image_tag
|
142
|
-
value: "v2025.03.
|
155
|
+
value: "v2025.03.11"
|
143
156
|
- name: deployment.in_cluster
|
144
157
|
value: "True"
|
145
158
|
- name: deployment.use_auth_key
|
@@ -2,7 +2,26 @@ services:
|
|
2
2
|
kalavai_gui:
|
3
3
|
container_name: kalavai_gui
|
4
4
|
image: bundenth/kalavai-gui:latest
|
5
|
-
network_mode: host
|
5
|
+
#network_mode: host
|
6
|
+
extra_hosts:
|
7
|
+
- "host.docker.internal:host-gateway"
|
8
|
+
networks:
|
9
|
+
- kalavai-net
|
10
|
+
environment:
|
11
|
+
- KALAVAI_BRIDGE_URL=http://host.docker.internal
|
12
|
+
- KALAVAI_BRIDGE_PORT={{bridge_port}}
|
13
|
+
entrypoint: ["reflex"]
|
14
|
+
command: >
|
15
|
+
run
|
16
|
+
--backend-port {{gui_backend_port}}
|
17
|
+
--frontend-port {{gui_frontend_port}}
|
18
|
+
ports:
|
19
|
+
- "{{gui_backend_port}}:{{gui_backend_port}}"
|
20
|
+
- "{{gui_frontend_port}}:{{gui_frontend_port}}"
|
6
21
|
volumes:
|
7
22
|
- "{{path}}:/root/.cache/kalavai"
|
8
23
|
restart: always
|
24
|
+
|
25
|
+
networks:
|
26
|
+
kalavai-net:
|
27
|
+
driver: bridge
|
kalavai_client/bridge_api.py
CHANGED
@@ -162,7 +162,8 @@ def job_deploy(request: DeployJobRequest):
|
|
162
162
|
result = deploy_job(
|
163
163
|
template_name=request.template_name,
|
164
164
|
values_dict=request.values,
|
165
|
-
force_namespace=request.force_namespace
|
165
|
+
force_namespace=request.force_namespace,
|
166
|
+
target_labels=request.target_labels
|
166
167
|
)
|
167
168
|
return result
|
168
169
|
|
kalavai_client/bridge_models.py
CHANGED
kalavai_client/cli.py
CHANGED
@@ -34,6 +34,7 @@ from kalavai_client.env import (
|
|
34
34
|
resource_path,
|
35
35
|
)
|
36
36
|
from kalavai_client.core import (
|
37
|
+
deploy_test_job,
|
37
38
|
fetch_resources,
|
38
39
|
fetch_job_names,
|
39
40
|
fetch_job_details,
|
@@ -209,11 +210,15 @@ def input_gpus():
|
|
209
210
|
@arguably.command
|
210
211
|
def gui__start(*others, backend_only=False, gui_frontend_port=3000, gui_backend_port=8000, bridge_port=8001, log_level="critical"):
|
211
212
|
"""Run GUI (docker) and kalavai core backend (api)"""
|
212
|
-
|
213
|
+
if len(set([gui_frontend_port, gui_backend_port, bridge_port])) < 3:
|
214
|
+
console.log("[red]Error: ports must be unique")
|
215
|
+
return
|
216
|
+
|
213
217
|
if not backend_only:
|
214
218
|
values = {
|
215
219
|
"gui_frontend_port": gui_frontend_port,
|
216
220
|
"gui_backend_port": gui_backend_port,
|
221
|
+
"bridge_port": bridge_port,
|
217
222
|
"path": user_path("")
|
218
223
|
}
|
219
224
|
compose_yaml = load_template(
|
@@ -225,6 +230,9 @@ def gui__start(*others, backend_only=False, gui_frontend_port=3000, gui_backend_
|
|
225
230
|
run_cmd(f"docker compose --file {USER_GUI_COMPOSE_FILE} up -d")
|
226
231
|
|
227
232
|
console.log(f"[green]Loading GUI, may take a few minutes. It will be available at http://localhost:{gui_frontend_port}")
|
233
|
+
print(
|
234
|
+
"Deploying bridge API"
|
235
|
+
)
|
228
236
|
run_api(port=bridge_port, log_level=log_level)
|
229
237
|
|
230
238
|
if not backend_only:
|
@@ -1028,7 +1036,7 @@ def job__run(template_name, *others, values: str=None, force_namespace: str=None
|
|
1028
1036
|
console.log(f"[green]{template_name} job deployed")
|
1029
1037
|
|
1030
1038
|
@arguably.command
|
1031
|
-
def job__test(local_template_dir, *others, values,
|
1039
|
+
def job__test(local_template_dir, *others, values, force_namespace: str=None):
|
1032
1040
|
"""
|
1033
1041
|
Helper to test local templates, useful for development
|
1034
1042
|
"""
|
@@ -1038,13 +1046,18 @@ def job__test(local_template_dir, *others, values, defaults, force_namespace: st
|
|
1038
1046
|
console.log(f"[red]Problems with your pool: {str(e)}")
|
1039
1047
|
return
|
1040
1048
|
|
1041
|
-
if not os.path.
|
1042
|
-
console.log(f"[red]
|
1049
|
+
if not os.path.isfile(os.path.join(local_template_dir, "template.yaml")):
|
1050
|
+
console.log(f"[red]template.yaml not found under {local_template_dir}")
|
1051
|
+
return
|
1052
|
+
if not os.path.isfile(os.path.join(local_template_dir, "values.yaml")):
|
1053
|
+
console.log(f"[red]values.yaml not found under {local_template_dir}")
|
1043
1054
|
return
|
1044
1055
|
|
1045
1056
|
# load template
|
1046
1057
|
with open(os.path.join(local_template_dir, "template.yaml"), "r") as f:
|
1047
1058
|
template_str = f.read()
|
1059
|
+
with open(os.path.join(local_template_dir, "values.yaml"), "r") as f:
|
1060
|
+
defaults = f.read()
|
1048
1061
|
|
1049
1062
|
# load values
|
1050
1063
|
if not os.path.isfile(values):
|
@@ -1054,37 +1067,17 @@ def job__test(local_template_dir, *others, values, defaults, force_namespace: st
|
|
1054
1067
|
raw_values = yaml.load(f, Loader=yaml.SafeLoader)
|
1055
1068
|
values_dict = {variable["name"]: variable['value'] for variable in raw_values}
|
1056
1069
|
|
1057
|
-
|
1058
|
-
|
1059
|
-
|
1060
|
-
|
1061
|
-
|
1062
|
-
defaults = f.read()
|
1070
|
+
result = deploy_test_job(
|
1071
|
+
template_str=template_str,
|
1072
|
+
values_dict=values_dict,
|
1073
|
+
default_values=defaults,
|
1074
|
+
force_namespace=force_namespace)
|
1063
1075
|
|
1064
|
-
|
1065
|
-
|
1066
|
-
|
1067
|
-
"
|
1068
|
-
|
1069
|
-
}
|
1070
|
-
if force_namespace is not None:
|
1071
|
-
data["force_namespace"] = force_namespace
|
1072
|
-
|
1073
|
-
try:
|
1074
|
-
result = request_to_server(
|
1075
|
-
method="post",
|
1076
|
-
endpoint="/v1/deploy_custom_job",
|
1077
|
-
data=data,
|
1078
|
-
server_creds=USER_LOCAL_SERVER_FILE,
|
1079
|
-
user_cookie=USER_COOKIE
|
1080
|
-
)
|
1081
|
-
console.log("Deployment result:")
|
1082
|
-
print(
|
1083
|
-
json.dumps(result,indent=3)
|
1084
|
-
)
|
1085
|
-
except Exception as e:
|
1086
|
-
console.log(f"[red]Error when connecting to kalavai service: {str(e)}")
|
1087
|
-
|
1076
|
+
if "error" in result:
|
1077
|
+
console.log(f"[red]Error: {result['error']}")
|
1078
|
+
else:
|
1079
|
+
console.log("[green]Successfully deployed:")
|
1080
|
+
console.log(result)
|
1088
1081
|
|
1089
1082
|
@arguably.command
|
1090
1083
|
def job__defaults(template_name, *others):
|
@@ -1123,7 +1116,7 @@ def job__delete(name, *others, force_namespace: str=None):
|
|
1123
1116
|
# deploy template with kube-watcher
|
1124
1117
|
result = delete_job(name=name, force_namespace=force_namespace)
|
1125
1118
|
if "error" in result:
|
1126
|
-
console.log(f"[red]Error when deleting job: {
|
1119
|
+
console.log(f"[red]Error when deleting job: {result['error']}")
|
1127
1120
|
else:
|
1128
1121
|
console.log(f"{result}")
|
1129
1122
|
|
@@ -1209,13 +1202,14 @@ def job__list(*others):
|
|
1209
1202
|
all_deployments = fetch_job_names()
|
1210
1203
|
if "error" in all_deployments:
|
1211
1204
|
console.log(f"[red]Error when connecting to kalavai service: {all_deployments}")
|
1212
|
-
return
|
1205
|
+
return
|
1213
1206
|
|
1214
1207
|
if len(all_deployments) == 0:
|
1215
1208
|
console.log("[green]No deployments found.")
|
1216
1209
|
return
|
1217
1210
|
|
1218
1211
|
details = fetch_job_details(jobs=all_deployments)
|
1212
|
+
|
1219
1213
|
if "error" in details:
|
1220
1214
|
console.log(f"[red]{details}")
|
1221
1215
|
return
|
@@ -1255,19 +1249,19 @@ def job__logs(name, *others, pod_name=None, stream=False, tail=100, force_namesp
|
|
1255
1249
|
while True:
|
1256
1250
|
try:
|
1257
1251
|
if not stream:
|
1258
|
-
for pod,
|
1252
|
+
for pod, info in all_logs.items():
|
1259
1253
|
if pod_name is not None and pod_name != pod:
|
1260
1254
|
continue
|
1261
|
-
console.log(f"[yellow]Pod {pod}")
|
1262
|
-
console.log(f"[green]{logs}")
|
1255
|
+
console.log(f"[yellow]Pod {pod} in {info['pod']['spec']['node_name']}")
|
1256
|
+
console.log(f"[green]{info['logs']}")
|
1263
1257
|
break
|
1264
1258
|
else:
|
1265
1259
|
os.system("clear")
|
1266
|
-
for pod,
|
1260
|
+
for pod, info in all_logs.items():
|
1267
1261
|
if pod_name is not None and pod_name != pod:
|
1268
1262
|
continue
|
1269
|
-
print(f"Pod {pod}")
|
1270
|
-
print(f"{logs}")
|
1263
|
+
print(f"Pod {pod} in {info['pod']['spec']['node_name']}")
|
1264
|
+
print(f"{info['logs']}")
|
1271
1265
|
time.sleep(1)
|
1272
1266
|
except KeyboardInterrupt:
|
1273
1267
|
break
|
kalavai_client/core.py
CHANGED
@@ -291,14 +291,18 @@ def fetch_job_details(jobs: list[Job]):
|
|
291
291
|
user_cookie=USER_COOKIE
|
292
292
|
)
|
293
293
|
workers_status = defaultdict(int)
|
294
|
+
restart_counts = 0
|
294
295
|
for ns, ss in result.items():
|
295
296
|
if ns != namespace: # same job name, different namespace
|
296
297
|
continue
|
297
298
|
for _, values in ss.items():
|
298
|
-
# TODO get 'restart_count' from values['conditions'][-1]["restart_count"]
|
299
299
|
# TODO: get nodes involved in deployment (needs kubewatcher)
|
300
|
+
if "conditions" in values and values["conditions"] is not None:
|
301
|
+
restart_counts = sum([c["restart_count"] for c in values["conditions"]])
|
300
302
|
workers_status[values["status"]] += 1
|
301
303
|
workers = "\n".join([f"{k}: {v}" for k, v in workers_status.items()])
|
304
|
+
if restart_counts > 0:
|
305
|
+
workers += f"\n({restart_counts} restart)"
|
302
306
|
# get URL details
|
303
307
|
data = {
|
304
308
|
"label": TEMPLATE_LABEL,
|
@@ -338,7 +342,7 @@ def fetch_job_details(jobs: list[Job]):
|
|
338
342
|
|
339
343
|
return job_details
|
340
344
|
|
341
|
-
def deploy_job(template_name, values_dict, force_namespace=None):
|
345
|
+
def deploy_job(template_name, values_dict, force_namespace=None, target_labels=None):
|
342
346
|
|
343
347
|
# deploy template with kube-watcher
|
344
348
|
data = {
|
@@ -347,6 +351,8 @@ def deploy_job(template_name, values_dict, force_namespace=None):
|
|
347
351
|
}
|
348
352
|
if force_namespace is not None:
|
349
353
|
data["force_namespace"] = force_namespace
|
354
|
+
if target_labels is not None:
|
355
|
+
data["target_labels"] = target_labels
|
350
356
|
|
351
357
|
try:
|
352
358
|
result = request_to_server(
|
@@ -360,6 +366,29 @@ def deploy_job(template_name, values_dict, force_namespace=None):
|
|
360
366
|
except Exception as e:
|
361
367
|
return {"error": str(e)}
|
362
368
|
|
369
|
+
def deploy_test_job(template_str, values_dict, default_values, force_namespace=None):
|
370
|
+
|
371
|
+
# submit custom deployment
|
372
|
+
data = {
|
373
|
+
"template": template_str,
|
374
|
+
"template_values": values_dict,
|
375
|
+
"default_values": default_values
|
376
|
+
}
|
377
|
+
if force_namespace is not None:
|
378
|
+
data["force_namespace"] = force_namespace
|
379
|
+
|
380
|
+
try:
|
381
|
+
result = request_to_server(
|
382
|
+
method="post",
|
383
|
+
endpoint="/v1/deploy_custom_job",
|
384
|
+
data=data,
|
385
|
+
server_creds=USER_LOCAL_SERVER_FILE,
|
386
|
+
user_cookie=USER_COOKIE
|
387
|
+
)
|
388
|
+
return result
|
389
|
+
except Exception as e:
|
390
|
+
return {"error": str(e)}
|
391
|
+
|
363
392
|
def delete_job(name, force_namespace=None):
|
364
393
|
data = {
|
365
394
|
"label": TEMPLATE_LABEL, # this ensures that both lws template and services are deleted
|
@@ -423,7 +452,7 @@ def fetch_job_logs(job_name, force_namespace=None, pod_name=None, tail=100):
|
|
423
452
|
server_creds=USER_LOCAL_SERVER_FILE,
|
424
453
|
user_cookie=USER_COOKIE
|
425
454
|
)
|
426
|
-
return {pod:
|
455
|
+
return {pod: info for pod, info in all_logs.items() if pod_name is None or pod_name == pod}
|
427
456
|
|
428
457
|
except Exception as e:
|
429
458
|
return {"error": str(e)}
|
@@ -1,9 +1,9 @@
|
|
1
|
-
kalavai_client/__init__.py,sha256=
|
1
|
+
kalavai_client/__init__.py,sha256=GKwLB1LKAMgkwQjAmbvcykT9kbP8Rg1TdG1GyM2RMJw,23
|
2
2
|
kalavai_client/__main__.py,sha256=WQUfxvRsBJH5gsCJg8pLz95QnZIj7Ol8psTO77m0QE0,73
|
3
3
|
kalavai_client/assets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
|
-
kalavai_client/assets/apps.yaml,sha256=
|
4
|
+
kalavai_client/assets/apps.yaml,sha256=NDlPoV0MPpD9WxqJIu-mksb-FPDtIQpxvH4icpmn2a8,6304
|
5
5
|
kalavai_client/assets/apps_values.yaml,sha256=CjKVelPQHd-hm-DTMEuya92feKiphU9mh3HrosLYYPE,1676
|
6
|
-
kalavai_client/assets/docker-compose-gui.yaml,sha256=
|
6
|
+
kalavai_client/assets/docker-compose-gui.yaml,sha256=6OHZIDDTl_PwXSYo1d05JasWfT0iiUDhrja0nQDjrlw,692
|
7
7
|
kalavai_client/assets/docker-compose-template.yaml,sha256=ii24Nn-dM5cZk9lxFgrzxnmK7yv_6kIIw7KUlWhvYeI,2831
|
8
8
|
kalavai_client/assets/nginx.conf,sha256=drVVCg8GHucz7hmt_BI6giAhK92OV71257NTs3LthwM,225
|
9
9
|
kalavai_client/assets/pool_config_template.yaml,sha256=fFz4w2-fMKD5KvyzFdfcWD_jSneRlmnjLc8hCctweX0,576
|
@@ -11,15 +11,15 @@ kalavai_client/assets/pool_config_values.yaml,sha256=VrM3XHQfQo6QLZ68qvagooUptaY
|
|
11
11
|
kalavai_client/assets/user_workspace.yaml,sha256=wDvlMYknOPABAEo0dsQwU7bac8iubjAG9tdkFbJZ5Go,476
|
12
12
|
kalavai_client/assets/user_workspace_values.yaml,sha256=G0HOzQUxrDMCwuW9kbWUZaKMzDDPVwDwzBHCL2Xi2ZM,542
|
13
13
|
kalavai_client/auth.py,sha256=QsBh28L2LwjBBK6pTUE4Xu36lLDTyetyU1YfS1Hbb6g,1717
|
14
|
-
kalavai_client/bridge_api.py,sha256=
|
15
|
-
kalavai_client/bridge_models.py,sha256=
|
16
|
-
kalavai_client/cli.py,sha256=
|
14
|
+
kalavai_client/bridge_api.py,sha256=xBd3KGoDcruuo53uNWOwA30CIPYgQLX8IkJWJgNHq9s,5546
|
15
|
+
kalavai_client/bridge_models.py,sha256=k4ILxa8jfAcfgIIBJqK1DunDzHh_oNUpuiEpmhAfTP0,977
|
16
|
+
kalavai_client/cli.py,sha256=MNmn8DgnEuyaCV8cMqr17JvfjalHcrP5Udpjn2jVXp8,46713
|
17
17
|
kalavai_client/cluster.py,sha256=gwjmdsd--YrffT0BmZDOEpbrdm3lPskUuN5jdgcrOR0,12947
|
18
|
-
kalavai_client/core.py,sha256=
|
18
|
+
kalavai_client/core.py,sha256=m5FOPzix8oNBlLqgllgJ0PwLXxo-s438fjJ0FPRVIZs,33266
|
19
19
|
kalavai_client/env.py,sha256=Zg2pP-xGJpQumo56KMBxBLgIsBmcNN0S9R-ZP2-s630,2604
|
20
20
|
kalavai_client/utils.py,sha256=OPmrsycNyrs2ZpTsjAzBuPN8hQNJtsYDLPKU13tnf-U,13862
|
21
|
-
kalavai_client-0.5.
|
22
|
-
kalavai_client-0.5.
|
23
|
-
kalavai_client-0.5.
|
24
|
-
kalavai_client-0.5.
|
25
|
-
kalavai_client-0.5.
|
21
|
+
kalavai_client-0.5.29.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
22
|
+
kalavai_client-0.5.29.dist-info/METADATA,sha256=ZsOolX-q_zOLr2N4eKqnMnkpxHuJquFlWK3KgMxODik,14443
|
23
|
+
kalavai_client-0.5.29.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
|
24
|
+
kalavai_client-0.5.29.dist-info/entry_points.txt,sha256=9T6D45gxwzfVbglMm1r6XPdXuuZdHfy_7fCeu2jUphc,50
|
25
|
+
kalavai_client-0.5.29.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|