kalavai-client 0.6.14__py3-none-any.whl → 0.6.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kalavai_client/__init__.py +1 -1
- kalavai_client/assets/apps.yaml +7 -5
- kalavai_client/assets/apps_values.yaml +22 -1
- kalavai_client/assets/docker-compose-gui.yaml +1 -0
- kalavai_client/assets/docker-compose-template.yaml +2 -1
- kalavai_client/assets/pool_config_template.yaml +1 -0
- kalavai_client/assets/pool_config_values.yaml +1 -9
- kalavai_client/bridge_api.py +5 -10
- kalavai_client/bridge_models.py +1 -3
- kalavai_client/cli.py +44 -16
- kalavai_client/core.py +32 -14
- kalavai_client/utils.py +2 -2
- {kalavai_client-0.6.14.dist-info → kalavai_client-0.6.17.dist-info}/METADATA +23 -53
- kalavai_client-0.6.17.dist-info/RECORD +25 -0
- kalavai_client-0.6.14.dist-info/RECORD +0 -25
- {kalavai_client-0.6.14.dist-info → kalavai_client-0.6.17.dist-info}/LICENSE +0 -0
- {kalavai_client-0.6.14.dist-info → kalavai_client-0.6.17.dist-info}/WHEEL +0 -0
- {kalavai_client-0.6.14.dist-info → kalavai_client-0.6.17.dist-info}/entry_points.txt +0 -0
kalavai_client/__init__.py
CHANGED
@@ -1,2 +1,2 @@
|
|
1
1
|
|
2
|
-
__version__ = "0.6.
|
2
|
+
__version__ = "0.6.17"
|
kalavai_client/assets/apps.yaml
CHANGED
@@ -18,6 +18,8 @@ repositories:
|
|
18
18
|
url: https://opencost.github.io/opencost-helm-chart
|
19
19
|
- name: minio
|
20
20
|
url: https://charts.min.io/
|
21
|
+
- name: langfuse
|
22
|
+
url: https://langfuse.github.io/langfuse-k8s
|
21
23
|
|
22
24
|
releases:
|
23
25
|
- name: datashim
|
@@ -28,7 +30,7 @@ releases:
|
|
28
30
|
- name: lago
|
29
31
|
namespace: kalavai
|
30
32
|
chart: kalavai/lago
|
31
|
-
installed: {{
|
33
|
+
installed: {{deploy_lago}}
|
32
34
|
set:
|
33
35
|
- name: external.api.nodePort
|
34
36
|
value: 32000
|
@@ -79,7 +81,7 @@ releases:
|
|
79
81
|
namespace: kalavai
|
80
82
|
chart: kalavai/kalavai-helios
|
81
83
|
version: "0.1.11"
|
82
|
-
installed: false
|
84
|
+
installed: false
|
83
85
|
set:
|
84
86
|
- name: deployment.watcher_endpoint
|
85
87
|
value: "http://{{watcher_service}}"
|
@@ -94,11 +96,11 @@ releases:
|
|
94
96
|
- name: opencost
|
95
97
|
namespace: opencost
|
96
98
|
chart: opencost-charts/opencost
|
97
|
-
installed: {{
|
99
|
+
installed: {{deploy_opencost}}
|
98
100
|
- name: prometheus
|
99
101
|
namespace: prometheus-system
|
100
102
|
chart: prometheus/prometheus
|
101
|
-
installed: {{
|
103
|
+
installed: {{deploy_prometheus}}
|
102
104
|
set:
|
103
105
|
- name: prometheus-pushgateway.enabled
|
104
106
|
value: false
|
@@ -152,7 +154,7 @@ releases:
|
|
152
154
|
- name: replicas
|
153
155
|
value: 1
|
154
156
|
- name: image_tag
|
155
|
-
value: "v2025.06.
|
157
|
+
value: "v2025.06.15"
|
156
158
|
- name: deployment.in_cluster
|
157
159
|
value: "True"
|
158
160
|
- name: deployment.kalavai_username_key
|
@@ -1,3 +1,24 @@
|
|
1
|
+
### APS ###
|
2
|
+
- name: deploy_lago
|
3
|
+
default: "False"
|
4
|
+
description: "Deploy Lago payment system"
|
5
|
+
|
6
|
+
- name: deploy_opencost
|
7
|
+
default: "False"
|
8
|
+
description: "Deploy Opencost cost monitoring system"
|
9
|
+
|
10
|
+
- name: deploy_prometheus
|
11
|
+
default: "True"
|
12
|
+
description: "Deploy Prometheus system monitoring system"
|
13
|
+
|
14
|
+
- name: deploy_langfuse
|
15
|
+
default: "False"
|
16
|
+
description: "Deploy Langfuse LLM tracing system"
|
17
|
+
|
18
|
+
######
|
19
|
+
|
20
|
+
### VARIABLES ###
|
21
|
+
|
1
22
|
- name: kalavai_api_endpoint
|
2
23
|
default: https://platform.kalavai.net/_/api
|
3
24
|
description: ""
|
@@ -78,4 +99,4 @@
|
|
78
99
|
|
79
100
|
- name: minio_rootPassword
|
80
101
|
default: "password"
|
81
|
-
description: ""
|
102
|
+
description: ""
|
@@ -3,6 +3,7 @@ services:
|
|
3
3
|
{{vpn_name}}:
|
4
4
|
image: gravitl/netclient:v0.90.0
|
5
5
|
container_name: {{vpn_name}}
|
6
|
+
platform: linux/amd64
|
6
7
|
cap_add:
|
7
8
|
- NET_ADMIN
|
8
9
|
- SYS_MODULE
|
@@ -17,7 +18,7 @@ services:
|
|
17
18
|
# run worker only if command is set
|
18
19
|
{%if command %}
|
19
20
|
{{service_name}}:
|
20
|
-
image: docker.io/bundenth/kalavai-runner:
|
21
|
+
image: docker.io/bundenth/kalavai-runner:{{target_platform}}-latest
|
21
22
|
pull_policy: always
|
22
23
|
container_name: {{service_name}}
|
23
24
|
{% if vpn %}
|
@@ -1,12 +1,4 @@
|
|
1
|
-
# STORAGE #
|
2
|
-
- name: storage_label_selector
|
3
|
-
default: "kalavai.storage.enabled:True"
|
4
|
-
description: ""
|
5
|
-
|
6
1
|
- name: storage_class_name
|
7
|
-
default: "longhorn
|
2
|
+
default: "longhorn"
|
8
3
|
description: ""
|
9
4
|
|
10
|
-
- name: storage_replicas
|
11
|
-
default: 1
|
12
|
-
description: ""
|
kalavai_client/bridge_api.py
CHANGED
@@ -95,7 +95,6 @@ def pool_create(request: CreatePoolRequest, api_key: str = Depends(verify_api_ke
|
|
95
95
|
- **location**: Location of the pool
|
96
96
|
- **description**: Pool description
|
97
97
|
- **token_mode**: Token type for authentication
|
98
|
-
- **frontend**: Whether this is a frontend request
|
99
98
|
"""
|
100
99
|
result = create_pool(
|
101
100
|
cluster_name=request.cluster_name,
|
@@ -103,11 +102,9 @@ def pool_create(request: CreatePoolRequest, api_key: str = Depends(verify_api_ke
|
|
103
102
|
app_values=request.app_values,
|
104
103
|
num_gpus=request.num_gpus,
|
105
104
|
node_name=request.node_name,
|
106
|
-
only_registered_users=request.only_registered_users,
|
107
105
|
location=request.location,
|
108
106
|
description=request.description,
|
109
|
-
token_mode=request.token_mode
|
110
|
-
frontend=request.frontend
|
107
|
+
token_mode=request.token_mode
|
111
108
|
)
|
112
109
|
return result
|
113
110
|
|
@@ -123,14 +120,12 @@ def pool_join(request: JoinPoolRequest, api_key: str = Depends(verify_api_key)):
|
|
123
120
|
- **ip_address**: IP address for the node
|
124
121
|
- **node_name**: Name of the node
|
125
122
|
- **num_gpus**: Number of GPUs to allocate
|
126
|
-
- **frontend**: Whether this is a frontend request
|
127
123
|
"""
|
128
124
|
result = join_pool(
|
129
125
|
token=request.token,
|
130
126
|
num_gpus=request.num_gpus,
|
131
127
|
node_name=request.node_name,
|
132
|
-
ip_address=request.ip_address
|
133
|
-
frontend=request.frontend
|
128
|
+
ip_address=request.ip_address
|
134
129
|
)
|
135
130
|
return result
|
136
131
|
|
@@ -249,9 +244,9 @@ def send_pool_invites(request: InvitesRequest, api_key: str = Depends(verify_api
|
|
249
244
|
summary="Fetch resources",
|
250
245
|
description="Get available resources",
|
251
246
|
response_description="Resource information")
|
252
|
-
def resources(api_key: str = Depends(verify_api_key)):
|
247
|
+
def resources(request: NodesActionRequest, api_key: str = Depends(verify_api_key)):
|
253
248
|
"""Get available resources"""
|
254
|
-
return fetch_resources()
|
249
|
+
return fetch_resources(node_names=request.nodes)
|
255
250
|
|
256
251
|
@app.get("/fetch_job_names",
|
257
252
|
summary="Fetch job names",
|
@@ -322,7 +317,7 @@ def job_templates(api_key: str = Depends(verify_api_key)):
|
|
322
317
|
@app.get("/fetch_job_defaults",
|
323
318
|
summary="Fetch job defaults",
|
324
319
|
description="Get default values for a job template",
|
325
|
-
response_description="Job
|
320
|
+
response_description="Job metadata values")
|
326
321
|
def job_templates(name: str, api_key: str = Depends(verify_api_key)):
|
327
322
|
"""
|
328
323
|
Get job defaults with the following parameters:
|
kalavai_client/bridge_models.py
CHANGED
@@ -13,14 +13,12 @@ class CreatePoolRequest(BaseModel):
|
|
13
13
|
app_values: dict = Field(None, description="Application configuration values")
|
14
14
|
num_gpus: int = Field(None, description="Number of GPUs to allocate")
|
15
15
|
node_name: str = Field(None, description="Name of the node")
|
16
|
-
only_registered_users: bool = Field(False, description="Whether to restrict access to registered users only")
|
17
16
|
location: str = Field(None, description="Geographic location of the pool")
|
18
17
|
token_mode: TokenType = Field(TokenType.USER, description="Token type for authentication")
|
19
18
|
description: str = Field("", description="Description of the pool")
|
20
|
-
frontend: bool = Field(False, description="Whether this is a frontend request")
|
21
19
|
|
22
20
|
class NodesActionRequest(BaseModel):
|
23
|
-
nodes: list[str] = Field(description="List of node names to perform the action on")
|
21
|
+
nodes: list[str] = Field(None, description="List of node names to perform the action on")
|
24
22
|
|
25
23
|
class JoinPoolRequest(BaseModel):
|
26
24
|
token: str = Field(description="Token to join the pool")
|
kalavai_client/cli.py
CHANGED
@@ -207,28 +207,43 @@ def input_gpus(non_interactive=False):
|
|
207
207
|
@arguably.command
|
208
208
|
def gui__start(
|
209
209
|
*others,
|
210
|
-
gui_frontend_port=3000,
|
211
|
-
gui_backend_port=8000,
|
212
|
-
bridge_port=8001,
|
213
210
|
log_level="critical",
|
214
211
|
backend_only=False
|
215
212
|
):
|
216
213
|
"""Run GUI (docker) and kalavai core backend (api)"""
|
217
|
-
|
218
|
-
|
219
|
-
|
214
|
+
ports_needed = 1 if backend_only else 3
|
215
|
+
# find 3 available ports
|
216
|
+
ip = socket.gethostbyname (socket.gethostname())
|
217
|
+
ports = []
|
218
|
+
for port in range(49152,65535):
|
219
|
+
try:
|
220
|
+
serv = socket.socket(socket.AF_INET,socket.SOCK_STREAM) # create a new socket
|
221
|
+
serv.bind((ip, port)) # bind socket with address
|
222
|
+
serv.close()
|
223
|
+
ports.append(port)
|
224
|
+
except:
|
225
|
+
#port closed
|
226
|
+
pass
|
227
|
+
if len(ports) >= ports_needed:
|
228
|
+
break
|
220
229
|
|
230
|
+
if len(ports) < ports_needed:
|
231
|
+
# if not found, error
|
232
|
+
console.log(f"[red]Cannot initialise GUI: Could not find {ports_needed} free ports in your machine")
|
233
|
+
return
|
234
|
+
console.log(f"Using ports: {ports}")
|
235
|
+
|
221
236
|
user_key = load_user_id()
|
222
237
|
if user_key is not None:
|
223
238
|
console.log(f"[green]Using user key: {user_key}")
|
224
239
|
if not backend_only:
|
225
240
|
values = {
|
226
|
-
"gui_frontend_port":
|
227
|
-
"gui_backend_port":
|
228
|
-
"bridge_port":
|
241
|
+
"gui_frontend_port": ports[1],
|
242
|
+
"gui_backend_port": ports[2],
|
243
|
+
"bridge_port": ports[0],
|
229
244
|
"path": user_path("", create_path=True),
|
230
245
|
"protected_access": user_key
|
231
|
-
|
246
|
+
}
|
232
247
|
compose_yaml = load_template(
|
233
248
|
template_path=DOCKER_COMPOSE_GUI,
|
234
249
|
values=values)
|
@@ -237,11 +252,11 @@ def gui__start(
|
|
237
252
|
|
238
253
|
run_cmd(f"docker compose --file {USER_GUI_COMPOSE_FILE} up -d")
|
239
254
|
|
240
|
-
console.log(f"[green]Loading GUI, may take a few minutes. It will be available at http://localhost:{
|
255
|
+
console.log(f"[green]Loading GUI, may take a few minutes. It will be available at http://localhost:{ports[1]}")
|
241
256
|
print(
|
242
257
|
"Deploying bridge API"
|
243
258
|
)
|
244
|
-
run_api(port=
|
259
|
+
run_api(port=ports[0], log_level=log_level)
|
245
260
|
|
246
261
|
if not backend_only:
|
247
262
|
run_cmd(f"docker compose --file {USER_GUI_COMPOSE_FILE} down")
|
@@ -325,7 +340,7 @@ def pool__unpublish(cluster_name=None, *others):
|
|
325
340
|
console.log(f"[green]Your cluster has been removed from {KALAVAI_PLATFORM_URL}")
|
326
341
|
|
327
342
|
@arguably.command
|
328
|
-
def pool__package_worker(output_file, *others, num_gpus=0, ip_address="0.0.0.0", node_name=None, storage_compatible=True):
|
343
|
+
def pool__package_worker(output_file, *others, platform="amd64", num_gpus=0, ip_address="0.0.0.0", node_name=None, storage_compatible=True):
|
329
344
|
"""
|
330
345
|
[AUTH]Package a worker for distribution (docker compose only)
|
331
346
|
"""
|
@@ -335,6 +350,7 @@ def pool__package_worker(output_file, *others, num_gpus=0, ip_address="0.0.0.0",
|
|
335
350
|
return
|
336
351
|
|
337
352
|
compose = generate_worker_package(
|
353
|
+
target_platform=platform,
|
338
354
|
num_gpus=num_gpus,
|
339
355
|
ip_address=ip_address,
|
340
356
|
node_name=node_name,
|
@@ -374,7 +390,7 @@ def pool__list(*others, user_only=False):
|
|
374
390
|
|
375
391
|
|
376
392
|
@arguably.command
|
377
|
-
def pool__start(cluster_name, *others, ip_address: str=None, location: str=None, app_values: str=None, pool_config_values: str=None, non_interactive: bool=False):
|
393
|
+
def pool__start(cluster_name, *others, platform="amd64", ip_address: str=None, location: str=None, app_values: str=None, pool_config_values: str=None, non_interactive: bool=False):
|
378
394
|
"""
|
379
395
|
Start Kalavai pool and start/resume sharing resources.
|
380
396
|
|
@@ -411,6 +427,7 @@ def pool__start(cluster_name, *others, ip_address: str=None, location: str=None
|
|
411
427
|
console.log(f"[green]Creating {cluster_name} pool, this may take a few minutes...")
|
412
428
|
|
413
429
|
result = create_pool(
|
430
|
+
target_platform=platform,
|
414
431
|
cluster_name=cluster_name,
|
415
432
|
ip_address=ip_address,
|
416
433
|
app_values=app_values,
|
@@ -472,7 +489,7 @@ def pool__check_token(token, *others, public=False):
|
|
472
489
|
return True
|
473
490
|
|
474
491
|
@arguably.command
|
475
|
-
def pool__join(token, *others, node_name=None, non_interactive=False):
|
492
|
+
def pool__join(token, *others, platform="amd64", node_name=None, non_interactive=False):
|
476
493
|
"""
|
477
494
|
Join Kalavai pool and start/resume sharing resources.
|
478
495
|
|
@@ -522,6 +539,7 @@ def pool__join(token, *others, node_name=None, non_interactive=False):
|
|
522
539
|
|
523
540
|
console.log("Connecting worker to the pool...")
|
524
541
|
result = join_pool(
|
542
|
+
target_platform=platform,
|
525
543
|
token=token,
|
526
544
|
node_name=node_name,
|
527
545
|
num_gpus=num_gpus,
|
@@ -1093,12 +1111,22 @@ def job__defaults(template_name, *others):
|
|
1093
1111
|
return
|
1094
1112
|
|
1095
1113
|
# deploy template with kube-watcher
|
1096
|
-
|
1114
|
+
data = fetch_job_defaults(name=template_name)
|
1115
|
+
metadata = data["metadata"]
|
1116
|
+
defaults = data["defaults"]
|
1097
1117
|
if "error" in defaults:
|
1098
1118
|
console.log(f"[red]Error when fetching job defaults: {defaults}")
|
1099
1119
|
print(
|
1100
1120
|
json.dumps(defaults, indent=3)
|
1101
1121
|
)
|
1122
|
+
print(
|
1123
|
+
"*****************",
|
1124
|
+
"Metadata",
|
1125
|
+
"*****************"
|
1126
|
+
)
|
1127
|
+
print(
|
1128
|
+
json.dumps(metadata, indent=3)
|
1129
|
+
)
|
1102
1130
|
|
1103
1131
|
|
1104
1132
|
@arguably.command
|
kalavai_client/core.py
CHANGED
@@ -13,7 +13,6 @@ import re
|
|
13
13
|
|
14
14
|
from kalavai_client.cluster import CLUSTER
|
15
15
|
from kalavai_client.utils import (
|
16
|
-
DEPLOY_LLM_SIDECARS_KEY,
|
17
16
|
NODE_ROLE_LABEL,
|
18
17
|
check_gpu_drivers,
|
19
18
|
generate_join_token,
|
@@ -80,6 +79,7 @@ class Job(BaseModel):
|
|
80
79
|
workers: Optional[str] = None
|
81
80
|
endpoint: Optional[str] = None
|
82
81
|
status: Optional[str] = None
|
82
|
+
host_nodes: Optional[str] = None
|
83
83
|
|
84
84
|
class DeviceStatus(BaseModel):
|
85
85
|
name: str
|
@@ -198,19 +198,22 @@ def get_ip_addresses(subnet=None):
|
|
198
198
|
raise ValueError(f"No IPs available on subnet {subnet}")
|
199
199
|
return ips
|
200
200
|
|
201
|
-
def fetch_resources():
|
201
|
+
def fetch_resources(node_names: list[str]=None):
|
202
|
+
data = {}
|
203
|
+
if node_names is not None:
|
204
|
+
data["node_names"] = node_names
|
202
205
|
try:
|
203
206
|
total = request_to_server(
|
204
207
|
method="get",
|
205
208
|
endpoint="/v1/get_cluster_total_resources",
|
206
|
-
data=
|
209
|
+
data=data,
|
207
210
|
server_creds=USER_LOCAL_SERVER_FILE,
|
208
211
|
user_cookie=USER_COOKIE
|
209
212
|
)
|
210
213
|
available = request_to_server(
|
211
214
|
method="get",
|
212
215
|
endpoint="/v1/get_cluster_available_resources",
|
213
|
-
data=
|
216
|
+
data=data,
|
214
217
|
server_creds=USER_LOCAL_SERVER_FILE,
|
215
218
|
user_cookie=USER_COOKIE
|
216
219
|
)
|
@@ -224,14 +227,14 @@ def fetch_job_defaults(name):
|
|
224
227
|
"template": name
|
225
228
|
}
|
226
229
|
try:
|
227
|
-
|
230
|
+
metadata = request_to_server(
|
228
231
|
method="get",
|
229
232
|
endpoint="/v1/job_defaults",
|
230
233
|
data=data,
|
231
234
|
server_creds=USER_LOCAL_SERVER_FILE,
|
232
235
|
user_cookie=USER_COOKIE
|
233
236
|
)
|
234
|
-
return
|
237
|
+
return metadata
|
235
238
|
except Exception as e:
|
236
239
|
return {"error": str(e)}
|
237
240
|
|
@@ -296,14 +299,18 @@ def fetch_job_details(jobs: list[Job]):
|
|
296
299
|
)
|
297
300
|
workers_status = defaultdict(int)
|
298
301
|
restart_counts = 0
|
302
|
+
host_nodes = set()
|
299
303
|
for ns, ss in result.items():
|
300
304
|
if ns != namespace: # same job name, different namespace
|
301
305
|
continue
|
302
306
|
for _, values in ss.items():
|
303
|
-
# TODO: get nodes involved in deployment (needs kubewatcher)
|
304
307
|
if "conditions" in values and values["conditions"] is not None:
|
305
308
|
restart_counts = sum([c["restart_count"] for c in values["conditions"]])
|
306
309
|
workers_status[values["status"]] += 1
|
310
|
+
# get nodes involved in deployment (needs kubewatcher)
|
311
|
+
if "node_name" in values:
|
312
|
+
host_nodes.add(values["node_name"])
|
313
|
+
|
307
314
|
workers = "\n".join([f"{k}: {v}" for k, v in workers_status.items()])
|
308
315
|
if restart_counts > 0:
|
309
316
|
workers += f"\n({restart_counts} restart)"
|
@@ -320,7 +327,8 @@ def fetch_job_details(jobs: list[Job]):
|
|
320
327
|
server_creds=USER_LOCAL_SERVER_FILE,
|
321
328
|
user_cookie=USER_COOKIE
|
322
329
|
)
|
323
|
-
node_ports = [f"{p['node_port']} (mapped to {p['port']})" for s in result.values() for p in s["ports"]]
|
330
|
+
#node_ports = [f"{p['node_port']} (mapped to {p['port']})" for s in result.values() for p in s["ports"]]
|
331
|
+
node_ports = [f"{p['node_port']}" for s in result.values() for p in s["ports"]]
|
324
332
|
|
325
333
|
urls = [f"http://{load_server_info(data_key=SERVER_IP_KEY, file=USER_LOCAL_SERVER_FILE)}:{node_port}" for node_port in node_ports]
|
326
334
|
if "Ready" in workers_status and len(workers_status) == 1:
|
@@ -338,7 +346,8 @@ def fetch_job_details(jobs: list[Job]):
|
|
338
346
|
name=deployment,
|
339
347
|
workers=workers,
|
340
348
|
endpoint="\n".join(urls),
|
341
|
-
status=str(status)
|
349
|
+
status=str(status),
|
350
|
+
host_nodes=" ".join(host_nodes))
|
342
351
|
)
|
343
352
|
|
344
353
|
except Exception as e:
|
@@ -595,7 +604,7 @@ def attach_to_pool(token, node_name=None):
|
|
595
604
|
|
596
605
|
return cluster_name
|
597
606
|
|
598
|
-
def generate_worker_package(num_gpus=0, node_name=None, ip_address="0.0.0.0", storage_compatible=True):
|
607
|
+
def generate_worker_package(target_platform="amd64", num_gpus=0, node_name=None, ip_address="0.0.0.0", storage_compatible=True):
|
599
608
|
# get pool data from token
|
600
609
|
token = get_pool_token(mode=TokenType.WORKER)
|
601
610
|
if "error" in token:
|
@@ -620,6 +629,7 @@ def generate_worker_package(num_gpus=0, node_name=None, ip_address="0.0.0.0", st
|
|
620
629
|
}
|
621
630
|
# Generate docker compose recipe
|
622
631
|
compose = generate_compose_config(
|
632
|
+
target_platform=target_platform,
|
623
633
|
write_to_file=False,
|
624
634
|
role="agent",
|
625
635
|
node_ip_address=ip_address,
|
@@ -633,7 +643,13 @@ def generate_worker_package(num_gpus=0, node_name=None, ip_address="0.0.0.0", st
|
|
633
643
|
return compose
|
634
644
|
|
635
645
|
|
636
|
-
def join_pool(
|
646
|
+
def join_pool(
|
647
|
+
token,
|
648
|
+
num_gpus=None,
|
649
|
+
node_name=None,
|
650
|
+
ip_address=None,
|
651
|
+
target_platform="amd64"
|
652
|
+
):
|
637
653
|
compatibility = check_worker_compatibility()
|
638
654
|
if len(compatibility["issues"]) > 0:
|
639
655
|
return {"error": compatibility["issues"]}
|
@@ -668,6 +684,7 @@ def join_pool(token, num_gpus=None, node_name=None, ip_address=None):
|
|
668
684
|
# local agent join
|
669
685
|
# Generate docker compose recipe
|
670
686
|
generate_compose_config(
|
687
|
+
target_platform=target_platform,
|
671
688
|
role="agent",
|
672
689
|
node_ip_address=ip_address,
|
673
690
|
pool_ip=f"https://{kalavai_seed_ip}:6443",
|
@@ -722,7 +739,8 @@ def create_pool(
|
|
722
739
|
pool_config_values: str=None,
|
723
740
|
num_gpus: int=0,
|
724
741
|
node_name: str=None,
|
725
|
-
location: str=None
|
742
|
+
location: str=None,
|
743
|
+
target_platform: str="amd64"
|
726
744
|
):
|
727
745
|
|
728
746
|
if not check_seed_compatibility():
|
@@ -756,6 +774,7 @@ def create_pool(
|
|
756
774
|
|
757
775
|
# Generate docker compose recipe
|
758
776
|
generate_compose_config(
|
777
|
+
target_platform=target_platform,
|
759
778
|
role="server",
|
760
779
|
vpn_token=location,
|
761
780
|
node_ip_address=ip_address,
|
@@ -792,8 +811,7 @@ def create_pool(
|
|
792
811
|
WATCHER_PORT_KEY: DEFAULT_WATCHER_PORT,
|
793
812
|
WATCHER_SERVICE_KEY: watcher_service,
|
794
813
|
USER_NODE_LABEL_KEY: USER_NODE_LABEL,
|
795
|
-
ALLOW_UNREGISTERED_USER_KEY: True, # Change this if only registered users are allowed
|
796
|
-
DEPLOY_LLM_SIDECARS_KEY: location is not None
|
814
|
+
ALLOW_UNREGISTERED_USER_KEY: True, # Change this if only registered users are allowed
|
797
815
|
}
|
798
816
|
|
799
817
|
store_server_info(
|
kalavai_client/utils.py
CHANGED
@@ -38,7 +38,6 @@ CLUSTER_NAME_KEY = "cluster_name"
|
|
38
38
|
AUTH_KEY = "watcher_admin_key"
|
39
39
|
WRITE_AUTH_KEY = "watcher_write_key"
|
40
40
|
ALLOW_UNREGISTERED_USER_KEY = "watcher_allow_unregistered_user"
|
41
|
-
DEPLOY_LLM_SIDECARS_KEY = "deploy_llm_sidecars"
|
42
41
|
NODE_ROLE_LABEL = "kalavai.node_role"
|
43
42
|
USER_API_KEY = "user_api_key"
|
44
43
|
READONLY_AUTH_KEY = "watcher_readonly_key"
|
@@ -106,12 +105,13 @@ def is_storage_compatible():
|
|
106
105
|
return False
|
107
106
|
################
|
108
107
|
|
109
|
-
def generate_compose_config(role, node_name, write_to_file=True, node_ip_address="0.0.0.0", num_gpus=0, node_labels=None, pool_ip=None, vpn_token=None, pool_token=None):
|
108
|
+
def generate_compose_config(role, node_name, target_platform="amd64", write_to_file=True, node_ip_address="0.0.0.0", num_gpus=0, node_labels=None, pool_ip=None, vpn_token=None, pool_token=None):
|
110
109
|
|
111
110
|
if node_labels is not None:
|
112
111
|
node_labels = " ".join([f"--node-label {key}={value}" for key, value in node_labels.items()])
|
113
112
|
rand_suffix = uuid.uuid4().hex[:8]
|
114
113
|
compose_values = {
|
114
|
+
"target_platform": target_platform,
|
115
115
|
"user_path": user_path(""),
|
116
116
|
"service_name": DEFAULT_CONTAINER_NAME,
|
117
117
|
"vpn": vpn_token is not None,
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: kalavai-client
|
3
|
-
Version: 0.6.
|
3
|
+
Version: 0.6.17
|
4
4
|
Summary: Client app for kalavai platform
|
5
5
|
License: Apache-2.0
|
6
6
|
Keywords: LLM,platform
|
@@ -52,30 +52,28 @@ Description-Content-Type: text/markdown
|
|
52
52
|
|
53
53
|
</div>
|
54
54
|
|
55
|
-
⭐⭐⭐ **Kalavai
|
55
|
+
⭐⭐⭐ **Kalavai platform is open source, and free to use in both commercial and non-commercial purposes. If you find it useful, consider supporting us by [giving a star to our GitHub project](https://github.com/kalavai-net/kalavai-client), joining our [discord channel](https://discord.gg/YN6ThTJKbM) and follow our [Substack](https://kalavainet.substack.com/).**
|
56
56
|
|
57
57
|
|
58
58
|
# Kalavai: turn your devices into a scalable AI platform
|
59
59
|
|
60
|
-
|
60
|
+
> AI in the cloud is not aligned with you, it's aligned with the company that owns it. Make sure you own your AI
|
61
61
|
|
62
|
-
|
62
|
+
### Taming the adoption of self-hosted GenAI
|
63
63
|
|
64
|
-
|
65
|
-
|
66
|
-
<a href="https://www.producthunt.com/products/kalavai/reviews?utm_source=badge-product_review&utm_medium=badge&utm_souce=badge-kalavai" target="_blank"><img src="https://api.producthunt.com/widgets/embed-image/v1/product_review.svg?product_id=720725&theme=neutral" alt="Kalavai - The first platform to crowdsource AI computation | Product Hunt" style="width: 250px; height: 54px;" width="250" height="54" /></a>
|
67
|
-
|
68
|
-
</div>
|
64
|
+
Kalavai is an **open source** tool that turns **any devices** into a self-hosted AI platform. It aggregates resources from multiple machines, including cloud, on prem and personal computers, and is **compatible with most model engines** to make model deployment and orchestration simple and reliable.
|
69
65
|
|
70
66
|
|
71
67
|
## What can Kalavai do?
|
72
68
|
|
73
|
-
Kalavai's goal is to make using AI (
|
69
|
+
Kalavai's goal is to make using self-hosted AI (GenAI models and agents) in real applications accessible and affordable to all. It's a tool that transforms machines into a _magic box_ that **integrates all the components required to make AI useful in the age of massive computing**, from model deployment and orchestration to Agentic AI.
|
74
70
|
|
75
71
|
### Core features
|
76
72
|
|
77
73
|
- Manage **multiple devices resources as one**. One pool of RAM, CPUs and GPUs
|
78
|
-
- **Deploy
|
74
|
+
- **Deploy open source models seamlessly across devices**, wherever they are (cloud, on premises, personal devices)
|
75
|
+
- Beyond LLMs: not just for large language models, but text-to-speech, speech-to-text, image understanding, coding generation and embedding models.
|
76
|
+
- The hybrid dream: build on your laptop, move to the cloud (any!) with zero changes
|
79
77
|
- Auto-discovery: all **models are automatically exposed** through a single OpenAI-like API and a ChatGPT-like UI playground
|
80
78
|
- Compatible with [most popular model engines](#support-for-llm-engines)
|
81
79
|
- [Easy to expand](https://github.com/kalavai-net/kube-watcher/tree/main/templates) to custom workloads
|
@@ -83,19 +81,19 @@ Kalavai's goal is to make using AI (LLMs, AI agents) in real applications access
|
|
83
81
|
|
84
82
|
<details>
|
85
83
|
|
86
|
-
**<summary>
|
84
|
+
**<summary>Powered by Kalavai</summary>**
|
87
85
|
|
88
|
-
|
89
|
-
|
90
|
-
https://github.com/user-attachments/assets/0d2316f3-79ea-46ac-b41e-8ef720f52672
|
86
|
+
- [CoGen AI](https://cogenai.kalavai.net): A community hosted alternative to OpenAI API for unlimited inference.
|
87
|
+
- [Create your own Free Cursor/Windsurf Clone](https://www.youtube.com/watch?v=6zHSo7oeCDQ&t=21s)
|
91
88
|
|
92
89
|
|
93
90
|
</details>
|
94
91
|
|
92
|
+
|
95
93
|
### Latest updates
|
96
94
|
|
95
|
+
- 11 June 2025: Native support for Mac and Raspberry pi devices (ARM).
|
97
96
|
- 20 February 2025: New shiny GUI interface to control LLM pools and deploy models
|
98
|
-
- 6 February 2025: 🔥🔥🔥 Access **DeepSeek R1 model for free** when you join our [public LLM pool](https://kalavai-net.github.io/kalavai-client/public_llm_pool/)
|
99
97
|
- 31 January 2025: `kalavai-client` is now a [PyPI package](https://pypi.org/project/kalavai-client/), easier to install than ever!
|
100
98
|
<details>
|
101
99
|
<summary>More news</summary>
|
@@ -148,8 +146,6 @@ The `kalavai-client` is the main tool to interact with the Kalavai platform, to
|
|
148
146
|
|
149
147
|
<summary>Requirements</summary>
|
150
148
|
|
151
|
-
### Requirements
|
152
|
-
|
153
149
|
For workers sharing resources with the pool:
|
154
150
|
|
155
151
|
- A laptop, desktop or Virtual Machine
|
@@ -157,37 +153,8 @@ For workers sharing resources with the pool:
|
|
157
153
|
|
158
154
|
> **Support for Windows and MacOS workers is experimental**: kalavai workers run on docker containers that require access to the host network interfaces, thus systems that do not support containers natively (Windows and MacOS) may have difficulties finding each other.
|
159
155
|
|
160
|
-
Any system that runs python 3.6+ is able to run the `kalavai-client` and therefore connect and operate an LLM pool, [without sharing with the pool](). Your computer won't be adding its capacity to the pool, but it wil be able to deploy jobs and interact with models.
|
161
|
-
|
162
156
|
</details>
|
163
157
|
|
164
|
-
<details>
|
165
|
-
|
166
|
-
<summary> Common issues</summary>
|
167
|
-
|
168
|
-
If you see the following error:
|
169
|
-
|
170
|
-
```bash
|
171
|
-
fatal error: Python.h: No such file or directory | #include <Python.h>
|
172
|
-
```
|
173
|
-
|
174
|
-
Make sure you also install python3-dev package. For ubuntu distros:
|
175
|
-
|
176
|
-
```bash
|
177
|
-
sudo apt install python3-dev
|
178
|
-
```
|
179
|
-
|
180
|
-
If you see:
|
181
|
-
```bash
|
182
|
-
AttributeError: install_layout. Did you mean: 'install_platlib'?
|
183
|
-
[end of output]
|
184
|
-
```
|
185
|
-
|
186
|
-
Upgrade your setuptools:
|
187
|
-
```bash
|
188
|
-
pip install -U setuptools
|
189
|
-
```
|
190
|
-
</details>
|
191
158
|
|
192
159
|
### Install the client
|
193
160
|
|
@@ -230,6 +197,8 @@ If your system is not currently supported, [open an issue](https://github.com/ka
|
|
230
197
|
|
231
198
|
### OS compatibility
|
232
199
|
|
200
|
+
Currently **seed nodes** are supported exclusively on linux machines (x86_64 platform). However Kalavai supports mix-pools, i.e. having Windows and MacOS computers as workers.
|
201
|
+
|
233
202
|
Since **worker nodes** run inside docker, any machine that can run docker **should** be compatible with Kalavai. Here are instructions for [linux](https://docs.docker.com/engine/install/), [Windows](https://docs.docker.com/desktop/setup/install/windows-install/) and [MacOS](https://docs.docker.com/desktop/setup/install/mac-install/).
|
234
203
|
|
235
204
|
The kalavai client, which controls and access pools, can be installed on any machine that has python 3.10+.
|
@@ -237,9 +206,10 @@ The kalavai client, which controls and access pools, can be installed on any mac
|
|
237
206
|
|
238
207
|
### Hardware compatibility:
|
239
208
|
|
240
|
-
- `amd64` or `x86_64` CPU architecture
|
209
|
+
- `amd64` or `x86_64` CPU architecture for seed and worker nodes.
|
210
|
+
- `arm64` CPU architecture for worker nodes.
|
241
211
|
- NVIDIA GPU
|
242
|
-
- AMD and Intel GPUs are currently not supported ([interested in helping us test it?](https://kalavai-net.github.io/kalavai-client/compatibility/#help-testing-amd-gpus))
|
212
|
+
- Mac M series, AMD and Intel GPUs are currently not supported ([interested in helping us test it?](https://kalavai-net.github.io/kalavai-client/compatibility/#help-testing-amd-gpus))
|
243
213
|
|
244
214
|
</details>
|
245
215
|
|
@@ -247,15 +217,15 @@ The kalavai client, which controls and access pools, can be installed on any mac
|
|
247
217
|
|
248
218
|
- [x] Kalavai client on Linux
|
249
219
|
- [x] [TEMPLATE] Distributed LLM deployment
|
250
|
-
- [x] Kalavai client on Windows (
|
220
|
+
- [x] Kalavai client on Windows (worker only)
|
221
|
+
- [x] Kalavai client on Windows WSL2 (seed and worker)
|
251
222
|
- [x] Self-hosted LLM pools
|
252
223
|
- [x] Collaborative LLM deployment
|
253
224
|
- [x] Ray cluster support
|
254
|
-
- [x] Kalavai client on Mac
|
225
|
+
- [x] Kalavai client on Mac (worker only)
|
255
226
|
- [x] Kalavai pools UI
|
256
|
-
- [ ] [TEMPLATE] [GPUStack](https://github.com/gpustack/gpustack) support
|
257
|
-
- [ ] [TEMPLATE] [exo](https://github.com/exo-explore/exo) support
|
258
227
|
- [ ] Support for AMD GPUs
|
228
|
+
- [ ] Support for Mac M GPUs
|
259
229
|
- [x] Docker install path
|
260
230
|
|
261
231
|
|
@@ -0,0 +1,25 @@
|
|
1
|
+
kalavai_client/__init__.py,sha256=6D1LEuHzwFI-n2PmIawhX9C0K4e_BwIIJUAUnpkzCWE,23
|
2
|
+
kalavai_client/__main__.py,sha256=WQUfxvRsBJH5gsCJg8pLz95QnZIj7Ol8psTO77m0QE0,73
|
3
|
+
kalavai_client/assets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
|
+
kalavai_client/assets/apps.yaml,sha256=zVtfPqesNhoBLpNlhIPAVtxgXLqEQU2pK1GTzKGEqiQ,6395
|
5
|
+
kalavai_client/assets/apps_values.yaml,sha256=dvsAnMC1uk9oDsnITIYJc5CIg9LTwGzXldjPZTwRQyE,2069
|
6
|
+
kalavai_client/assets/docker-compose-gui.yaml,sha256=shqN78YLw0QP7bqTKveI4ppz5E-5b1JowmsSB4OG3nA,778
|
7
|
+
kalavai_client/assets/docker-compose-template.yaml,sha256=KHIwJ2WWX7Y7wQKiXRr82Jqd3IKRyls5zhTyl8mSmrc,1805
|
8
|
+
kalavai_client/assets/nginx.conf,sha256=drVVCg8GHucz7hmt_BI6giAhK92OV71257NTs3LthwM,225
|
9
|
+
kalavai_client/assets/pool_config_template.yaml,sha256=MhBZQsEMKrBgbUVSKgIGmXWhybeGKG6l5XvJb38y5GI,577
|
10
|
+
kalavai_client/assets/pool_config_values.yaml,sha256=_iAnugramLiwJaaDcPSetThvOdR7yFiCffdMri-SQCU,68
|
11
|
+
kalavai_client/assets/user_workspace.yaml,sha256=wDvlMYknOPABAEo0dsQwU7bac8iubjAG9tdkFbJZ5Go,476
|
12
|
+
kalavai_client/assets/user_workspace_values.yaml,sha256=G0HOzQUxrDMCwuW9kbWUZaKMzDDPVwDwzBHCL2Xi2ZM,542
|
13
|
+
kalavai_client/auth.py,sha256=EB3PMvKUn5_KAQkezkEHEt-OMZXyfkZguIQlUFkEHcA,3243
|
14
|
+
kalavai_client/bridge_api.py,sha256=5tYqI8UdG7K1Qskywk97kC0TpvYruUZxqxvbn-2nve4,15405
|
15
|
+
kalavai_client/bridge_models.py,sha256=t1fJGaF6YDMQdOnEU3XT8zTBHU8eUWJ1yhM5s7c6vMo,2546
|
16
|
+
kalavai_client/cli.py,sha256=SzKG7_ZG0ehMQsECQRWSvqj2Fju2Gd5O7uBa60bFBAY,47830
|
17
|
+
kalavai_client/cluster.py,sha256=Z2PIXbZuSAv9xmw-MyZP1M41BpVMpirLzG51bqGA-zc,13548
|
18
|
+
kalavai_client/core.py,sha256=dJVX5mhFzIshazCfAzb-AqpqWjkp_djgbMyNXzuAF48,34650
|
19
|
+
kalavai_client/env.py,sha256=YsfZj7LWf6ABquDsoIFFkXCFYwenpDk8zVnGsf7qv98,2823
|
20
|
+
kalavai_client/utils.py,sha256=bhvQzF12q7L2hGVrbcmXRDXXIsAdlzcsTms6RQRxGU4,12733
|
21
|
+
kalavai_client-0.6.17.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
22
|
+
kalavai_client-0.6.17.dist-info/METADATA,sha256=J5TEqnwPm3ZopGEi2MRn_ddy_VV101JcB8sl6MGL4iI,12655
|
23
|
+
kalavai_client-0.6.17.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
24
|
+
kalavai_client-0.6.17.dist-info/entry_points.txt,sha256=9T6D45gxwzfVbglMm1r6XPdXuuZdHfy_7fCeu2jUphc,50
|
25
|
+
kalavai_client-0.6.17.dist-info/RECORD,,
|
@@ -1,25 +0,0 @@
|
|
1
|
-
kalavai_client/__init__.py,sha256=nfdFjB1S39Wxvq7GI6NuNOdisKxExyZQSWX0Q2sAupU,23
|
2
|
-
kalavai_client/__main__.py,sha256=WQUfxvRsBJH5gsCJg8pLz95QnZIj7Ol8psTO77m0QE0,73
|
3
|
-
kalavai_client/assets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
|
-
kalavai_client/assets/apps.yaml,sha256=L0hi826JwWW0rDRD83wcFVJVNZoUnafPdsjpharBrHE,6365
|
5
|
-
kalavai_client/assets/apps_values.yaml,sha256=WRew3bS1MztjzcJfphuJcKn0n2T1ICRupPpr_Csjt_s,1644
|
6
|
-
kalavai_client/assets/docker-compose-gui.yaml,sha256=DGCyGYzz1kH6kkMbo62FJHe3F9vcAmA8DOHw-c_o0Kw,752
|
7
|
-
kalavai_client/assets/docker-compose-template.yaml,sha256=w9Eux2-lQgkGFbNhwHwurlRJe13CVZPrAGOiFBfI5I0,1763
|
8
|
-
kalavai_client/assets/nginx.conf,sha256=drVVCg8GHucz7hmt_BI6giAhK92OV71257NTs3LthwM,225
|
9
|
-
kalavai_client/assets/pool_config_template.yaml,sha256=fFz4w2-fMKD5KvyzFdfcWD_jSneRlmnjLc8hCctweX0,576
|
10
|
-
kalavai_client/assets/pool_config_values.yaml,sha256=VrM3XHQfQo6QLZ68qvagooUptaYgl1pszniY_JUtemk,233
|
11
|
-
kalavai_client/assets/user_workspace.yaml,sha256=wDvlMYknOPABAEo0dsQwU7bac8iubjAG9tdkFbJZ5Go,476
|
12
|
-
kalavai_client/assets/user_workspace_values.yaml,sha256=G0HOzQUxrDMCwuW9kbWUZaKMzDDPVwDwzBHCL2Xi2ZM,542
|
13
|
-
kalavai_client/auth.py,sha256=EB3PMvKUn5_KAQkezkEHEt-OMZXyfkZguIQlUFkEHcA,3243
|
14
|
-
kalavai_client/bridge_api.py,sha256=-z0NBUSRJkVOfP807Fd-ZX2uEtKc6BCfrDD_umQ6sNg,15592
|
15
|
-
kalavai_client/bridge_models.py,sha256=775aXLTma3dv6KmKTmebAZ55ns6d9EmNno5e4blfoNY,2738
|
16
|
-
kalavai_client/cli.py,sha256=mmwLqqSYfl9k6vqveMcbHTq7g5FFd84YUUQCSH4J0k0,46967
|
17
|
-
kalavai_client/cluster.py,sha256=Z2PIXbZuSAv9xmw-MyZP1M41BpVMpirLzG51bqGA-zc,13548
|
18
|
-
kalavai_client/core.py,sha256=u8a4uYqGS0mMJh0ArcXG2hwp2uDUSuwM5ROGXRQkHZg,34051
|
19
|
-
kalavai_client/env.py,sha256=YsfZj7LWf6ABquDsoIFFkXCFYwenpDk8zVnGsf7qv98,2823
|
20
|
-
kalavai_client/utils.py,sha256=S80bLSICvWLhtQP-dmW0OF4coKwjxmhmPIja8UArTTE,12712
|
21
|
-
kalavai_client-0.6.14.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
22
|
-
kalavai_client-0.6.14.dist-info/METADATA,sha256=UkJ77kexOEA-_8c9DwnphWnk6vZl1-1g2EGPNLJ9VHI,13354
|
23
|
-
kalavai_client-0.6.14.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
24
|
-
kalavai_client-0.6.14.dist-info/entry_points.txt,sha256=9T6D45gxwzfVbglMm1r6XPdXuuZdHfy_7fCeu2jUphc,50
|
25
|
-
kalavai_client-0.6.14.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|