kalavai-client 0.5.19__py3-none-any.whl → 0.5.20__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- kalavai_client/__init__.py +1 -1
- kalavai_client/bridge_api.py +216 -0
- kalavai_client/bridge_models.py +37 -0
- kalavai_client/cli.py +41 -67
- kalavai_client/core.py +82 -8
- {kalavai_client-0.5.19.dist-info → kalavai_client-0.5.20.dist-info}/METADATA +50 -39
- {kalavai_client-0.5.19.dist-info → kalavai_client-0.5.20.dist-info}/RECORD +10 -8
- {kalavai_client-0.5.19.dist-info → kalavai_client-0.5.20.dist-info}/LICENSE +0 -0
- {kalavai_client-0.5.19.dist-info → kalavai_client-0.5.20.dist-info}/WHEEL +0 -0
- {kalavai_client-0.5.19.dist-info → kalavai_client-0.5.20.dist-info}/entry_points.txt +0 -0
kalavai_client/__init__.py
CHANGED
@@ -1,2 +1,2 @@
|
|
1
1
|
|
2
|
-
__version__ = "0.5.
|
2
|
+
__version__ = "0.5.20"
|
@@ -0,0 +1,216 @@
|
|
1
|
+
"""
|
2
|
+
Core kalavai service.
|
3
|
+
Used as a bridge between the kalavai-client app and the reflex frontend
|
4
|
+
"""
|
5
|
+
from fastapi import FastAPI
|
6
|
+
import uvicorn
|
7
|
+
|
8
|
+
from kalavai_client.bridge_models import (
|
9
|
+
CreatePoolRequest,
|
10
|
+
JoinPoolRequest,
|
11
|
+
StopPoolRequest,
|
12
|
+
DeployJobRequest,
|
13
|
+
DeleteJobRequest,
|
14
|
+
JobDetailsRequest,
|
15
|
+
DeleteNodesRequest
|
16
|
+
)
|
17
|
+
from kalavai_client.core import (
|
18
|
+
create_pool,
|
19
|
+
join_pool,
|
20
|
+
attach_to_pool,
|
21
|
+
stop_pool,
|
22
|
+
fetch_devices,
|
23
|
+
fetch_resources,
|
24
|
+
fetch_job_names,
|
25
|
+
fetch_gpus,
|
26
|
+
fetch_job_details,
|
27
|
+
fetch_job_logs,
|
28
|
+
fetch_job_templates,
|
29
|
+
fetch_job_defaults,
|
30
|
+
deploy_job,
|
31
|
+
delete_job,
|
32
|
+
authenticate_user,
|
33
|
+
load_user_session,
|
34
|
+
user_logout,
|
35
|
+
is_connected,
|
36
|
+
list_available_pools,
|
37
|
+
is_agent_running,
|
38
|
+
is_server,
|
39
|
+
pause_agent,
|
40
|
+
resume_agent,
|
41
|
+
get_ip_addresses,
|
42
|
+
get_pool_token,
|
43
|
+
delete_nodes,
|
44
|
+
TokenType
|
45
|
+
)
|
46
|
+
|
47
|
+
app = FastAPI()
|
48
|
+
|
49
|
+
@app.post("/create_pool")
|
50
|
+
def pool_create(request: CreatePoolRequest):
|
51
|
+
result = create_pool(
|
52
|
+
cluster_name=request.cluster_name,
|
53
|
+
ip_address=request.ip_address,
|
54
|
+
app_values=request.app_values,
|
55
|
+
num_gpus=request.num_gpus,
|
56
|
+
node_name=request.node_name,
|
57
|
+
only_registered_users=request.only_registered_users,
|
58
|
+
location=request.location
|
59
|
+
)
|
60
|
+
return result
|
61
|
+
|
62
|
+
@app.post("/join_pool")
|
63
|
+
def pool_join(request: JoinPoolRequest):
|
64
|
+
result = join_pool(
|
65
|
+
token=request.token,
|
66
|
+
num_gpus=request.num_gpus,
|
67
|
+
node_name=request.node_name
|
68
|
+
)
|
69
|
+
return result
|
70
|
+
|
71
|
+
@app.post("/attach_to_pool")
|
72
|
+
def pool_attach(request: JoinPoolRequest):
|
73
|
+
result = attach_to_pool(
|
74
|
+
token=request.token,
|
75
|
+
node_name=request.node_name
|
76
|
+
)
|
77
|
+
return result
|
78
|
+
|
79
|
+
@app.post("/stop_pool")
|
80
|
+
def pool_stop(request: StopPoolRequest):
|
81
|
+
result = stop_pool(
|
82
|
+
skip_node_deletion=request.skip_node_deletion
|
83
|
+
)
|
84
|
+
return result
|
85
|
+
|
86
|
+
@app.post("/delete_nodes")
|
87
|
+
def device_delete(request: DeleteNodesRequest):
|
88
|
+
result = delete_nodes(
|
89
|
+
nodes=request.nodes
|
90
|
+
)
|
91
|
+
return result
|
92
|
+
|
93
|
+
@app.get("/get_pool_token")
|
94
|
+
def devices(mode: int):
|
95
|
+
|
96
|
+
return get_pool_token(mode=TokenType(mode))
|
97
|
+
|
98
|
+
@app.get("/fetch_devices")
|
99
|
+
def devices():
|
100
|
+
return fetch_devices()
|
101
|
+
|
102
|
+
@app.get("/fetch_resources")
|
103
|
+
def resources():
|
104
|
+
return fetch_resources()
|
105
|
+
|
106
|
+
@app.get("/fetch_job_names")
|
107
|
+
def job_names():
|
108
|
+
return fetch_job_names()
|
109
|
+
|
110
|
+
@app.get("/fetch_gpus")
|
111
|
+
def gpus(available: bool = False):
|
112
|
+
return fetch_gpus(available=available)
|
113
|
+
|
114
|
+
@app.post("/fetch_job_details")
|
115
|
+
def job_details(request: JobDetailsRequest):
|
116
|
+
return fetch_job_details(jobs=request.jobs)
|
117
|
+
|
118
|
+
@app.get("/fetch_job_logs")
|
119
|
+
def job_logs(job_name: str, force_namespace: str=None, pod_name: str=None, tail: int=100):
|
120
|
+
return fetch_job_logs(
|
121
|
+
job_name=job_name,
|
122
|
+
force_namespace=force_namespace,
|
123
|
+
pod_name=pod_name,
|
124
|
+
tail=tail
|
125
|
+
)
|
126
|
+
|
127
|
+
@app.get("/fetch_job_templates")
|
128
|
+
def job_templates():
|
129
|
+
return fetch_job_templates()
|
130
|
+
|
131
|
+
@app.get("/fetch_job_defaults")
|
132
|
+
def job_templates(name: str):
|
133
|
+
return fetch_job_defaults(name=name)
|
134
|
+
|
135
|
+
@app.post("/deploy_job")
|
136
|
+
def job_deploy(request: DeployJobRequest):
|
137
|
+
result = deploy_job(
|
138
|
+
template_name=request.template_name,
|
139
|
+
values_dict=request.values,
|
140
|
+
force_namespace=request.force_namespace
|
141
|
+
)
|
142
|
+
return result
|
143
|
+
|
144
|
+
@app.post("/delete_job")
|
145
|
+
def job_delete(request: DeleteJobRequest):
|
146
|
+
result = delete_job(
|
147
|
+
name=request.name,
|
148
|
+
force_namespace=request.force_namespace
|
149
|
+
)
|
150
|
+
return result
|
151
|
+
|
152
|
+
@app.get("/authenticate_user")
|
153
|
+
def user_authenticate(username: str, password: str):
|
154
|
+
result = authenticate_user(
|
155
|
+
username=username,
|
156
|
+
password=password
|
157
|
+
)
|
158
|
+
return result
|
159
|
+
|
160
|
+
@app.get("/load_user_session")
|
161
|
+
def user_session():
|
162
|
+
result = load_user_session()
|
163
|
+
return result
|
164
|
+
|
165
|
+
@app.get("/user_logout")
|
166
|
+
def logout_user():
|
167
|
+
result = user_logout()
|
168
|
+
return result
|
169
|
+
|
170
|
+
@app.get("/is_connected")
|
171
|
+
def pool_connected():
|
172
|
+
result = is_connected()
|
173
|
+
return result
|
174
|
+
|
175
|
+
@app.get("/is_agent_running")
|
176
|
+
def agent_running():
|
177
|
+
result = is_agent_running()
|
178
|
+
return result
|
179
|
+
|
180
|
+
@app.get("/is_server")
|
181
|
+
def server():
|
182
|
+
result = is_server()
|
183
|
+
return result
|
184
|
+
|
185
|
+
@app.post("/pause_agent")
|
186
|
+
def agent_pause():
|
187
|
+
result = pause_agent()
|
188
|
+
return result
|
189
|
+
|
190
|
+
@app.post("/resume_agent")
|
191
|
+
def agent_resume():
|
192
|
+
result = resume_agent()
|
193
|
+
return result
|
194
|
+
|
195
|
+
@app.get("/get_ip_addresses")
|
196
|
+
def ip_addresses(subnet: str=None):
|
197
|
+
result = get_ip_addresses(subnet=subnet)
|
198
|
+
return result
|
199
|
+
|
200
|
+
@app.get("/list_available_pools")
|
201
|
+
def pool_connected(user_only: bool=False):
|
202
|
+
result = list_available_pools(user_only=user_only)
|
203
|
+
return result
|
204
|
+
|
205
|
+
|
206
|
+
def run_api(host="0.0.0.0", port=8001, log_level="critical"):
|
207
|
+
uvicorn.run(
|
208
|
+
app,
|
209
|
+
host=host,
|
210
|
+
port=port,
|
211
|
+
log_level=log_level
|
212
|
+
)
|
213
|
+
|
214
|
+
if __name__ == "__main__":
|
215
|
+
run_api()
|
216
|
+
|
@@ -0,0 +1,37 @@
|
|
1
|
+
from pydantic import BaseModel
|
2
|
+
|
3
|
+
from kalavai_client.core import Job
|
4
|
+
|
5
|
+
|
6
|
+
class CreatePoolRequest(BaseModel):
|
7
|
+
cluster_name: str
|
8
|
+
ip_address: str
|
9
|
+
app_values: dict = None
|
10
|
+
num_gpus: int = None
|
11
|
+
node_name: str = None
|
12
|
+
only_registered_users: bool = False
|
13
|
+
location: str = None
|
14
|
+
|
15
|
+
class DeleteNodesRequest(BaseModel):
|
16
|
+
nodes: list[str]
|
17
|
+
|
18
|
+
class JoinPoolRequest(BaseModel):
|
19
|
+
token: str
|
20
|
+
node_name: str = None
|
21
|
+
num_gpus: int = None
|
22
|
+
|
23
|
+
class JobDetailsRequest(BaseModel):
|
24
|
+
jobs: list[Job]
|
25
|
+
|
26
|
+
|
27
|
+
class StopPoolRequest(BaseModel):
|
28
|
+
skip_node_deletion: bool = False
|
29
|
+
|
30
|
+
class DeployJobRequest(BaseModel):
|
31
|
+
template_name: str
|
32
|
+
values: dict
|
33
|
+
force_namespace: str = None
|
34
|
+
|
35
|
+
class DeleteJobRequest(BaseModel):
|
36
|
+
name: str
|
37
|
+
force_namespace: str = None
|
kalavai_client/cli.py
CHANGED
@@ -15,6 +15,7 @@ import arguably
|
|
15
15
|
from rich.console import Console
|
16
16
|
|
17
17
|
from kalavai_client.cluster import CLUSTER
|
18
|
+
from kalavai_client.bridge_api import run_api
|
18
19
|
from kalavai_client.env import (
|
19
20
|
USER_COOKIE,
|
20
21
|
USER_LOCAL_SERVER_FILE,
|
@@ -50,13 +51,15 @@ from kalavai_client.core import (
|
|
50
51
|
create_pool,
|
51
52
|
get_ip_addresses,
|
52
53
|
pause_agent,
|
53
|
-
resume_agent
|
54
|
+
resume_agent,
|
55
|
+
get_pool_token,
|
56
|
+
delete_nodes,
|
57
|
+
TokenType
|
54
58
|
)
|
55
59
|
from kalavai_client.utils import (
|
56
60
|
check_gpu_drivers,
|
57
61
|
load_template,
|
58
62
|
run_cmd,
|
59
|
-
generate_join_token,
|
60
63
|
user_confirm,
|
61
64
|
generate_table,
|
62
65
|
request_to_server,
|
@@ -71,11 +74,6 @@ from kalavai_client.utils import (
|
|
71
74
|
get_public_seeds,
|
72
75
|
load_user_session,
|
73
76
|
SERVER_IP_KEY,
|
74
|
-
AUTH_KEY,
|
75
|
-
WATCHER_SERVICE_KEY,
|
76
|
-
READONLY_AUTH_KEY,
|
77
|
-
WRITE_AUTH_KEY,
|
78
|
-
PUBLIC_LOCATION_KEY,
|
79
77
|
NODE_NAME_KEY,
|
80
78
|
CLUSTER_NAME_KEY
|
81
79
|
)
|
@@ -225,29 +223,30 @@ def input_gpus():
|
|
225
223
|
##################
|
226
224
|
|
227
225
|
@arguably.command
|
228
|
-
def gui__start(*others):
|
229
|
-
"""Run GUI"""
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
def gui__stop(*others):
|
245
|
-
"""Stop GUI"""
|
246
|
-
run_cmd(f"docker compose --file {USER_GUI_COMPOSE_FILE} down")
|
226
|
+
def gui__start(*others, backend_only=False, gui_frontend_port=3000, gui_backend_port=8000, bridge_port=8001):
|
227
|
+
"""Run GUI (docker) and kalavai core backend (api)"""
|
228
|
+
|
229
|
+
if not backend_only:
|
230
|
+
values = {
|
231
|
+
"gui_frontend_port": gui_frontend_port,
|
232
|
+
"gui_backend_port": gui_backend_port,
|
233
|
+
"path": user_path("")
|
234
|
+
}
|
235
|
+
compose_yaml = load_template(
|
236
|
+
template_path=DOCKER_COMPOSE_GUI,
|
237
|
+
values=values)
|
238
|
+
with open(USER_GUI_COMPOSE_FILE, "w") as f:
|
239
|
+
f.write(compose_yaml)
|
240
|
+
|
241
|
+
run_cmd(f"docker compose --file {USER_GUI_COMPOSE_FILE} up -d")
|
247
242
|
|
243
|
+
console.log(f"[green]Loading GUI, may take a few minutes. It will be available at http://localhost:{gui_frontend_port}")
|
244
|
+
run_api(port=bridge_port)
|
245
|
+
|
246
|
+
if not backend_only:
|
247
|
+
run_cmd(f"docker compose --file {USER_GUI_COMPOSE_FILE} down")
|
248
248
|
console.log("[green]Kalavai GUI has been stopped")
|
249
249
|
|
250
|
-
|
251
250
|
@arguably.command
|
252
251
|
def login(*others, username: str=None):
|
253
252
|
"""
|
@@ -451,32 +450,19 @@ def pool__token(*others, admin=False, user=False, worker=False):
|
|
451
450
|
return
|
452
451
|
|
453
452
|
if admin:
|
454
|
-
|
453
|
+
mode = TokenType.ADMIN
|
455
454
|
elif user:
|
456
|
-
|
455
|
+
mode = TokenType.USER
|
457
456
|
else:
|
458
|
-
|
459
|
-
|
460
|
-
watcher_service = load_server_info(data_key=WATCHER_SERVICE_KEY, file=USER_LOCAL_SERVER_FILE)
|
461
|
-
public_location = load_server_info(data_key=PUBLIC_LOCATION_KEY, file=USER_LOCAL_SERVER_FILE)
|
462
|
-
|
463
|
-
cluster_token = CLUSTER.get_cluster_token()
|
464
|
-
|
465
|
-
ip_address = load_server_info(SERVER_IP_KEY, file=USER_LOCAL_SERVER_FILE)
|
466
|
-
cluster_name = load_server_info(data_key=CLUSTER_NAME_KEY, file=USER_LOCAL_SERVER_FILE)
|
457
|
+
mode = TokenType.WORKER
|
467
458
|
|
468
|
-
join_token =
|
469
|
-
cluster_ip=ip_address,
|
470
|
-
cluster_name=cluster_name,
|
471
|
-
cluster_token=cluster_token,
|
472
|
-
auth_key=auth_key,
|
473
|
-
watcher_service=watcher_service,
|
474
|
-
public_location=public_location
|
475
|
-
)
|
476
|
-
|
477
|
-
console.log("[green]Join token:")
|
478
|
-
print(join_token)
|
459
|
+
join_token = get_pool_token(mode=mode)
|
479
460
|
|
461
|
+
if "error" in join_token:
|
462
|
+
console.log(f"[red]{join_token}")
|
463
|
+
else:
|
464
|
+
console.log("[green]Join token:")
|
465
|
+
print(join_token)
|
480
466
|
return join_token
|
481
467
|
|
482
468
|
@arguably.command
|
@@ -949,24 +935,12 @@ def node__delete(name, *others):
|
|
949
935
|
console.log(f"[red]Problems with your pool: {str(e)}")
|
950
936
|
return
|
951
937
|
|
952
|
-
|
953
|
-
|
954
|
-
|
955
|
-
|
956
|
-
|
957
|
-
|
958
|
-
endpoint="/v1/delete_nodes",
|
959
|
-
data=data,
|
960
|
-
server_creds=USER_LOCAL_SERVER_FILE,
|
961
|
-
user_cookie=USER_COOKIE
|
962
|
-
)
|
963
|
-
if result is None or result is True:
|
964
|
-
console.log(f"Node {name} deleted successfully")
|
965
|
-
else:
|
966
|
-
console.log(f"{result}")
|
967
|
-
except Exception as e:
|
968
|
-
console.log(f"[yellow](ignore if stopping worker from dead server). Error when removing node {name}: {str(e)}")
|
969
|
-
|
938
|
+
result = delete_nodes(nodes=[name])
|
939
|
+
|
940
|
+
if "error" in result:
|
941
|
+
console.log(f"[red]{result}")
|
942
|
+
else:
|
943
|
+
console.log(f"[green]{result}")
|
970
944
|
|
971
945
|
@arguably.command
|
972
946
|
def node__cordon(node_name, *others):
|
kalavai_client/core.py
CHANGED
@@ -6,11 +6,14 @@ import uuid
|
|
6
6
|
import socket
|
7
7
|
import ipaddress
|
8
8
|
import netifaces as ni
|
9
|
-
|
9
|
+
from typing import Optional
|
10
10
|
from pydantic import BaseModel
|
11
|
+
from enum import Enum
|
11
12
|
|
12
13
|
from kalavai_client.cluster import CLUSTER
|
13
14
|
from kalavai_client.utils import (
|
15
|
+
check_gpu_drivers,
|
16
|
+
generate_join_token,
|
14
17
|
request_to_server,
|
15
18
|
load_server_info,
|
16
19
|
decode_dict,
|
@@ -68,11 +71,11 @@ from kalavai_client.env import (
|
|
68
71
|
)
|
69
72
|
|
70
73
|
class Job(BaseModel):
|
71
|
-
owner: str = None
|
72
|
-
name: str = None
|
73
|
-
workers: str = None
|
74
|
-
endpoint: str = None
|
75
|
-
status: str = None
|
74
|
+
owner: Optional[str] = None
|
75
|
+
name: Optional[str] = None
|
76
|
+
workers: Optional[str] = None
|
77
|
+
endpoint: Optional[str] = None
|
78
|
+
status: Optional[str] = None
|
76
79
|
|
77
80
|
class DeviceStatus(BaseModel):
|
78
81
|
name: str
|
@@ -89,6 +92,11 @@ class GPU(BaseModel):
|
|
89
92
|
ready: bool
|
90
93
|
model: str
|
91
94
|
|
95
|
+
class TokenType(Enum):
|
96
|
+
ADMIN = 0
|
97
|
+
USER = 1
|
98
|
+
WORKER = 2
|
99
|
+
|
92
100
|
|
93
101
|
def init_user_workspace(force_namespace=None):
|
94
102
|
|
@@ -461,6 +469,25 @@ def check_token(token, public=False):
|
|
461
469
|
return {"status": True}
|
462
470
|
except Exception as e:
|
463
471
|
return {"error": str(e)}
|
472
|
+
|
473
|
+
def delete_nodes(nodes):
|
474
|
+
data = {
|
475
|
+
"node_names": nodes
|
476
|
+
}
|
477
|
+
try:
|
478
|
+
result = request_to_server(
|
479
|
+
method="post",
|
480
|
+
endpoint="/v1/delete_nodes",
|
481
|
+
data=data,
|
482
|
+
server_creds=USER_LOCAL_SERVER_FILE,
|
483
|
+
user_cookie=USER_COOKIE
|
484
|
+
)
|
485
|
+
if result is None or result is True:
|
486
|
+
return {"success": nodes}
|
487
|
+
else:
|
488
|
+
return {"error": result}
|
489
|
+
except Exception as e:
|
490
|
+
return {"error": f"Error when removing nodes {nodes}: {str(e)}"}
|
464
491
|
|
465
492
|
def attach_to_pool(token, node_name=None):
|
466
493
|
if node_name is None:
|
@@ -530,11 +557,24 @@ def attach_to_pool(token, node_name=None):
|
|
530
557
|
|
531
558
|
return cluster_name
|
532
559
|
|
533
|
-
def
|
560
|
+
def get_max_gpus():
|
561
|
+
try:
|
562
|
+
has_gpus = check_gpu_drivers()
|
563
|
+
if has_gpus:
|
564
|
+
return int(run_cmd("nvidia-smi -L | wc -l").decode())
|
565
|
+
else:
|
566
|
+
return 0
|
567
|
+
except:
|
568
|
+
return 0
|
569
|
+
|
570
|
+
def join_pool(token, num_gpus=None, node_name=None):
|
534
571
|
compatibility = check_worker_compatibility()
|
535
572
|
if len(compatibility["issues"]) > 0:
|
536
573
|
return {"error": compatibility["issues"]}
|
537
|
-
|
574
|
+
|
575
|
+
if num_gpus is None:
|
576
|
+
num_gpus = get_max_gpus()
|
577
|
+
|
538
578
|
if node_name is None:
|
539
579
|
node_name = f"{socket.gethostname()}-{uuid.uuid4().hex[:6]}"
|
540
580
|
|
@@ -751,6 +791,40 @@ def create_pool(cluster_name: str, ip_address: str, app_values: str=None, pool_c
|
|
751
791
|
|
752
792
|
return {"success"}
|
753
793
|
|
794
|
+
def get_pool_token(mode: TokenType):
|
795
|
+
|
796
|
+
try:
|
797
|
+
match mode:
|
798
|
+
case TokenType.ADMIN:
|
799
|
+
auth_key = load_server_info(data_key=AUTH_KEY, file=USER_LOCAL_SERVER_FILE)
|
800
|
+
case TokenType.USER:
|
801
|
+
auth_key = load_server_info(data_key=WRITE_AUTH_KEY, file=USER_LOCAL_SERVER_FILE)
|
802
|
+
case _:
|
803
|
+
auth_key = load_server_info(data_key=READONLY_AUTH_KEY, file=USER_LOCAL_SERVER_FILE)
|
804
|
+
if auth_key is None:
|
805
|
+
return {"error": "Cannot generate selected token mode. Are you the seed node?"}
|
806
|
+
|
807
|
+
watcher_service = load_server_info(data_key=WATCHER_SERVICE_KEY, file=USER_LOCAL_SERVER_FILE)
|
808
|
+
public_location = load_server_info(data_key=PUBLIC_LOCATION_KEY, file=USER_LOCAL_SERVER_FILE)
|
809
|
+
|
810
|
+
cluster_token = CLUSTER.get_cluster_token()
|
811
|
+
|
812
|
+
ip_address = load_server_info(SERVER_IP_KEY, file=USER_LOCAL_SERVER_FILE)
|
813
|
+
cluster_name = load_server_info(data_key=CLUSTER_NAME_KEY, file=USER_LOCAL_SERVER_FILE)
|
814
|
+
|
815
|
+
join_token = generate_join_token(
|
816
|
+
cluster_ip=ip_address,
|
817
|
+
cluster_name=cluster_name,
|
818
|
+
cluster_token=cluster_token,
|
819
|
+
auth_key=auth_key,
|
820
|
+
watcher_service=watcher_service,
|
821
|
+
public_location=public_location
|
822
|
+
)
|
823
|
+
|
824
|
+
return {"token": join_token}
|
825
|
+
except Exception as e:
|
826
|
+
return {"error": f"Error when generating token: {str(e)}"}
|
827
|
+
|
754
828
|
def pool_init(pool_config_values_path=None):
|
755
829
|
"""Deploy configured objects to initialise pool"""
|
756
830
|
if pool_config_values_path is None:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: kalavai-client
|
3
|
-
Version: 0.5.
|
3
|
+
Version: 0.5.20
|
4
4
|
Summary: Client app for kalavai platform
|
5
5
|
License: Apache-2.0
|
6
6
|
Keywords: LLM,platform
|
@@ -71,6 +71,19 @@ Description-Content-Type: text/markdown
|
|
71
71
|
|
72
72
|
Kalavai's goal is to make using LLMs in real applications accessible and affordable to all. It's a _magic box_ that **integrates all the components required to make LLM useful in the age of massive computing**, from sourcing computing power, managing distributed infrastructure and storage, using industry-standard model engines and orchestration of LLMs.
|
73
73
|
|
74
|
+
### Core features
|
75
|
+
|
76
|
+
- Manage **multiple devices resources as one**. One pool of RAM, CPUs and GPUs
|
77
|
+
- **Deploy Large Language Models seamlessly across devices**, wherever they are (multiple clouds, on premises, personal devices)
|
78
|
+
- Auto-discovery: all **models are automatically exposed** through a single OpenAI-like API and a ChatGPT-like UI playground
|
79
|
+
- Compatible with [most popular model engines](#support-for-llm-engines)
|
80
|
+
- [Easy to expand](https://github.com/kalavai-net/kube-watcher/tree/main/templates) to custom workloads
|
81
|
+
|
82
|
+
|
83
|
+
<details>
|
84
|
+
|
85
|
+
**<summary>Video tutorials</summary>**
|
86
|
+
|
74
87
|
### Aggregate multiple devices in an LLM pool
|
75
88
|
|
76
89
|
https://github.com/user-attachments/assets/4be59886-1b76-4400-ab5c-c803e3e414ec
|
@@ -88,12 +101,16 @@ https://github.com/user-attachments/assets/7df73bbc-d129-46aa-8ce5-0735177dedeb
|
|
88
101
|
https://github.com/user-attachments/assets/0d2316f3-79ea-46ac-b41e-8ef720f52672
|
89
102
|
|
90
103
|
|
91
|
-
|
104
|
+
</details>
|
92
105
|
|
93
|
-
|
106
|
+
### Latest updates
|
94
107
|
|
108
|
+
- 20 February 2025: New shiny GUI interface to control LLM pools and deploy models
|
95
109
|
- 6 February 2025: 🔥🔥🔥 Access **DeepSeek R1 model for free** when you join our [public LLM pool](https://kalavai-net.github.io/kalavai-client/public_llm_pool/)
|
96
110
|
- 31 January 2025: `kalavai-client` is now a [PyPI package](https://pypi.org/project/kalavai-client/), easier to install than ever!
|
111
|
+
<details>
|
112
|
+
<summary>More news</summary>
|
113
|
+
|
97
114
|
- 27 January 2025: Support for accessing pools from remote computers
|
98
115
|
- 9 January 2025: Added support for [Aphrodite Engine](https://github.com/aphrodite-engine/aphrodite-engine) models
|
99
116
|
- 8 January 2025: Release of [a free, public, shared pool](/docs/docs/public_llm_pool.md) for community LLM deployment
|
@@ -102,6 +119,7 @@ https://github.com/user-attachments/assets/0d2316f3-79ea-46ac-b41e-8ef720f52672
|
|
102
119
|
- 24 November 2024: Common pools with private user spaces
|
103
120
|
- 30 October 2024: Release of our [public pool platform](https://platform.kalavai.net)
|
104
121
|
|
122
|
+
</details>
|
105
123
|
|
106
124
|
### Support for LLM engines
|
107
125
|
|
@@ -139,6 +157,10 @@ The `kalavai-client` is the main tool to interact with the Kalavai platform, to
|
|
139
157
|
From release **v0.5.0, you can now install `kalavai-client` in non-worker computers**. You can run a pool on a set of machines and have the client on a remote computer from which you access the LLM pool. Because the client only requires having python installed, this means more computers are now supported to run it.
|
140
158
|
|
141
159
|
|
160
|
+
<details>
|
161
|
+
|
162
|
+
<summary>Requirements</summary>
|
163
|
+
|
142
164
|
### Requirements
|
143
165
|
|
144
166
|
For workers sharing resources with the pool:
|
@@ -150,8 +172,11 @@ For workers sharing resources with the pool:
|
|
150
172
|
|
151
173
|
Any system that runs python 3.6+ is able to run the `kalavai-client` and therefore connect and operate an LLM pool, [without sharing with the pool](). Your computer won't be adding its capacity to the pool, but it wil be able to deploy jobs and interact with models.
|
152
174
|
|
175
|
+
</details>
|
176
|
+
|
177
|
+
<details>
|
153
178
|
|
154
|
-
|
179
|
+
<summary> Common issues</summary>
|
155
180
|
|
156
181
|
If you see the following error:
|
157
182
|
|
@@ -175,6 +200,7 @@ Upgrade your setuptools:
|
|
175
200
|
```bash
|
176
201
|
pip install -U setuptools
|
177
202
|
```
|
203
|
+
</details>
|
178
204
|
|
179
205
|
### Install the client
|
180
206
|
|
@@ -184,54 +210,32 @@ The client is a python package and can be installed with one command:
|
|
184
210
|
pip install kalavai-client
|
185
211
|
```
|
186
212
|
|
187
|
-
## Public LLM pools: crowdsource community resources
|
188
|
-
|
189
|
-
This is the **easiest and most powerful** way to experience Kalavai. It affords users the full resource capabilities of the community and access to all its deployed LLMs, via an [OpenAI-compatible endpoint](https://kalavai-net.github.io/kalavai-client/public_llm_pool/#single-api-endpoint) as well as a [UI-based playground](https://kalavai-net.github.io/kalavai-client/public_llm_pool/#ui-playground).
|
190
|
-
|
191
|
-
Check out [our guide](https://kalavai-net.github.io/kalavai-client/public_llm_pool/) on how to join and start deploying LLMs.
|
192
|
-
|
193
213
|
|
194
214
|
## Createa a local, private LLM pool
|
195
215
|
|
196
|
-
Kalavai is **free to use, no caps, for both commercial and non-commercial purposes**. All you need to get started is one or more computers that can see each other (i.e. within the same network), and you are good to go. If you
|
216
|
+
> Kalavai is **free to use, no caps, for both commercial and non-commercial purposes**. All you need to get started is one or more computers that can see each other (i.e. within the same network), and you are good to go. If you are interested in join computers in different locations / networks, [contact us](mailto:info@kalavai.net) or [book a demo](https://app.onecal.io/b/kalavai/book-a-demo) with the founders.
|
197
217
|
|
198
|
-
|
199
|
-
|
200
|
-
Simply use the client to start your seed node:
|
218
|
+
You can create and manage your pools with the new kalavai GUI, which can be started with:
|
201
219
|
|
202
220
|
```bash
|
203
|
-
kalavai
|
221
|
+
kalavai gui start
|
204
222
|
```
|
205
223
|
|
206
|
-
|
207
|
-
```bash
|
208
|
-
$ kalavai pool token --user
|
224
|
+
This will expose the GUI and the backend services in localhost. By default, the GUI is accessible via [http://localhost:3000](http://localhost:3000). In the UI users can create and join LLM pools, monitor devices, deploy LLMs and more.
|
209
225
|
|
210
|
-
|
211
|
-
```
|
212
|
-
|
213
|
-
### 2. Add worker nodes
|
214
|
-
|
215
|
-
Increase the power of your AI pool by inviting others to join.
|
216
|
-
|
217
|
-
Copy the joining token. On the worker node, run:
|
226
|
+

|
218
227
|
|
219
|
-
|
220
|
-
kalavai pool join <token>
|
221
|
-
```
|
228
|
+
Check out our [getting started guide](https://kalavai-net.github.io/kalavai-client/getting_started/) for next steps.
|
222
229
|
|
223
|
-
### 3. Attach more clients
|
224
230
|
|
225
|
-
|
231
|
+
## Public LLM pools: crowdsource community resources
|
226
232
|
|
227
|
-
|
228
|
-
kalavai pool attach <token>
|
229
|
-
```
|
233
|
+
This is the **easiest and most powerful** way to experience Kalavai. It affords users the full resource capabilities of the community and access to all its deployed LLMs, via an [OpenAI-compatible endpoint](https://kalavai-net.github.io/kalavai-client/public_llm_pool/#single-api-endpoint) as well as a [UI-based playground](https://kalavai-net.github.io/kalavai-client/public_llm_pool/#ui-playground).
|
230
234
|
|
231
|
-
|
235
|
+
Check out [our guide](https://kalavai-net.github.io/kalavai-client/public_llm_pool/) on how to join and start deploying LLMs.
|
232
236
|
|
233
237
|
|
234
|
-
|
238
|
+
## Enough already, let's run stuff!
|
235
239
|
|
236
240
|
Check our [examples](examples/) to put your new AI pool to good use!
|
237
241
|
- [Single node vLLM GPU LLM](examples/singlenode_gpu_vllm.md) deployment
|
@@ -244,6 +248,10 @@ Check our [examples](examples/) to put your new AI pool to good use!
|
|
244
248
|
|
245
249
|
If your system is not currently supported, [open an issue](https://github.com/kalavai-net/kalavai-client/issues) and request it. We are expanding this list constantly.
|
246
250
|
|
251
|
+
<details>
|
252
|
+
|
253
|
+
**<summary>Hardware and OS compatibility </summary>**
|
254
|
+
|
247
255
|
### OS compatibility
|
248
256
|
|
249
257
|
Since **worker nodes** run inside docker, any machine that can run docker **should** be compatible with Kalavai. Here are instructions for [linux](https://docs.docker.com/engine/install/), [Windows](https://docs.docker.com/desktop/setup/install/windows-install/) and [MacOS](https://docs.docker.com/desktop/setup/install/mac-install/).
|
@@ -257,6 +265,7 @@ The kalavai client, which controls and access pools, can be installed on any mac
|
|
257
265
|
- NVIDIA GPU
|
258
266
|
- AMD and Intel GPUs are currently not supported ([interested in helping us test it?](https://kalavai-net.github.io/kalavai-client/compatibility/#help-testing-amd-gpus))
|
259
267
|
|
268
|
+
</details>
|
260
269
|
|
261
270
|
## Roadmap
|
262
271
|
|
@@ -268,6 +277,7 @@ The kalavai client, which controls and access pools, can be installed on any mac
|
|
268
277
|
- [x] Collaborative LLM deployment
|
269
278
|
- [x] Ray cluster support
|
270
279
|
- [x] Kalavai client on Mac
|
280
|
+
- [x] Kalavai pools UI
|
271
281
|
- [ ] [TEMPLATE] [GPUStack](https://github.com/gpustack/gpustack) support
|
272
282
|
- [ ] [TEMPLATE] [exo](https://github.com/exo-explore/exo) support
|
273
283
|
- [ ] Support for AMD GPUs
|
@@ -293,7 +303,9 @@ Anything missing here? Give us a shout in the [discussion board](https://github.
|
|
293
303
|
|
294
304
|
## Build from source
|
295
305
|
|
296
|
-
|
306
|
+
<details>
|
307
|
+
|
308
|
+
<summary>Expand</summary>
|
297
309
|
|
298
310
|
Python version >= 3.6.
|
299
311
|
|
@@ -313,6 +325,7 @@ Build python wheels:
|
|
313
325
|
bash publish.sh build
|
314
326
|
```
|
315
327
|
|
328
|
+
</details>
|
316
329
|
|
317
330
|
### Unit tests
|
318
331
|
|
@@ -322,5 +335,3 @@ To run the unit tests, use:
|
|
322
335
|
python -m unittest
|
323
336
|
```
|
324
337
|
|
325
|
-
docker run --rm --net=host -v /root/.cache/kalavai/:/root/.cache/kalavai/ ghcr.io/helmfile/helmfile:v0.169.2 helmfile sync --file /root/.cache/kalavai/apps.yaml --kubeconfig /root/.cache/kalavai/kubeconfig
|
326
|
-
|
@@ -1,4 +1,4 @@
|
|
1
|
-
kalavai_client/__init__.py,sha256=
|
1
|
+
kalavai_client/__init__.py,sha256=YJ16sMq9PK__3-NzHbGxz6Zz5IYwpCEzy4p-Yddy-o8,23
|
2
2
|
kalavai_client/__main__.py,sha256=WQUfxvRsBJH5gsCJg8pLz95QnZIj7Ol8psTO77m0QE0,73
|
3
3
|
kalavai_client/assets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
4
|
kalavai_client/assets/apps.yaml,sha256=V1x1FY-fyYsYrXvcIMv3QrBCgJ7jNunluRyJh67eWB0,5983
|
@@ -11,13 +11,15 @@ kalavai_client/assets/pool_config_values.yaml,sha256=VrM3XHQfQo6QLZ68qvagooUptaY
|
|
11
11
|
kalavai_client/assets/user_workspace.yaml,sha256=wDvlMYknOPABAEo0dsQwU7bac8iubjAG9tdkFbJZ5Go,476
|
12
12
|
kalavai_client/assets/user_workspace_values.yaml,sha256=G0HOzQUxrDMCwuW9kbWUZaKMzDDPVwDwzBHCL2Xi2ZM,542
|
13
13
|
kalavai_client/auth.py,sha256=QsBh28L2LwjBBK6pTUE4Xu36lLDTyetyU1YfS1Hbb6g,1717
|
14
|
-
kalavai_client/
|
14
|
+
kalavai_client/bridge_api.py,sha256=hp5YjMu0HBI9VGMx6hahXfMIGPLwNtSd09UKxmKnGXc,4852
|
15
|
+
kalavai_client/bridge_models.py,sha256=rXBnE5r6Oe9GxGkk1ITkvp6YQqahp72Rrzf-QM2quH8,771
|
16
|
+
kalavai_client/cli.py,sha256=u9zy2H3Ntn3fOnQyYU8XjVPmouZNqyJxiyXBVA5EtEA,47862
|
15
17
|
kalavai_client/cluster.py,sha256=gwjmdsd--YrffT0BmZDOEpbrdm3lPskUuN5jdgcrOR0,12947
|
16
|
-
kalavai_client/core.py,sha256=
|
18
|
+
kalavai_client/core.py,sha256=JTyGpi4Xez_kAgEpcInErS-Ot3dFLoNY48nEBQJLuOc,29653
|
17
19
|
kalavai_client/env.py,sha256=Zg2pP-xGJpQumo56KMBxBLgIsBmcNN0S9R-ZP2-s630,2604
|
18
20
|
kalavai_client/utils.py,sha256=rz5W9PRZrTpgdmOs6yeqUi4f_q_L-3BJ5g1o7Asgnyo,13386
|
19
|
-
kalavai_client-0.5.
|
20
|
-
kalavai_client-0.5.
|
21
|
-
kalavai_client-0.5.
|
22
|
-
kalavai_client-0.5.
|
23
|
-
kalavai_client-0.5.
|
21
|
+
kalavai_client-0.5.20.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
22
|
+
kalavai_client-0.5.20.dist-info/METADATA,sha256=aHwXRZpoiBBSDbjthDxR2ncdeLTInAUJ2cci3Li7Dn0,14800
|
23
|
+
kalavai_client-0.5.20.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
|
24
|
+
kalavai_client-0.5.20.dist-info/entry_points.txt,sha256=9T6D45gxwzfVbglMm1r6XPdXuuZdHfy_7fCeu2jUphc,50
|
25
|
+
kalavai_client-0.5.20.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|