kalavai-client 0.5.2__py3-none-any.whl → 0.5.12__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- kalavai_client/__init__.py +1 -1
- kalavai_client/assets/apps.yaml +9 -9
- kalavai_client/assets/docker-compose-template.yaml +101 -49
- kalavai_client/assets/nginx.conf +12 -0
- kalavai_client/cli.py +214 -122
- kalavai_client/cluster.py +26 -5
- kalavai_client/utils.py +9 -45
- {kalavai_client-0.5.2.dist-info → kalavai_client-0.5.12.dist-info}/METADATA +58 -12
- kalavai_client-0.5.12.dist-info/RECORD +20 -0
- kalavai_client/assets/vpn-template.yaml +0 -13
- kalavai_client-0.5.2.dist-info/RECORD +0 -20
- {kalavai_client-0.5.2.dist-info → kalavai_client-0.5.12.dist-info}/LICENSE +0 -0
- {kalavai_client-0.5.2.dist-info → kalavai_client-0.5.12.dist-info}/WHEEL +0 -0
- {kalavai_client-0.5.2.dist-info → kalavai_client-0.5.12.dist-info}/entry_points.txt +0 -0
kalavai_client/__init__.py
CHANGED
@@ -1,2 +1,2 @@
|
|
1
1
|
|
2
|
-
__version__ = "0.5.
|
2
|
+
__version__ = "0.5.12"
|
kalavai_client/assets/apps.yaml
CHANGED
@@ -139,7 +139,7 @@ releases:
|
|
139
139
|
- name: replicas
|
140
140
|
value: 2
|
141
141
|
- name: image_tag
|
142
|
-
value: "v2025.01"
|
142
|
+
value: "v2025.01.1"
|
143
143
|
- name: deployment.in_cluster
|
144
144
|
value: "True"
|
145
145
|
- name: deployment.use_auth_key
|
@@ -189,13 +189,13 @@ releases:
|
|
189
189
|
value: "1"
|
190
190
|
- name: devicePlugin.deviceSplitCount
|
191
191
|
value: "1"
|
192
|
-
- name: scheduler.customWebhook.port
|
193
|
-
|
194
|
-
- name: scheduler.service.schedulerPort
|
195
|
-
|
196
|
-
- name: scheduler.service.monitorPort
|
197
|
-
|
198
|
-
- name: devicePlugin.service.httpPort
|
199
|
-
|
192
|
+
# - name: scheduler.customWebhook.port
|
193
|
+
# value: "30498"
|
194
|
+
# - name: scheduler.service.schedulerPort
|
195
|
+
# value: "30498"
|
196
|
+
# - name: scheduler.service.monitorPort
|
197
|
+
# value: "30493"
|
198
|
+
# - name: devicePlugin.service.httpPort
|
199
|
+
# value: "30492"
|
200
200
|
|
201
201
|
|
@@ -1,55 +1,107 @@
|
|
1
1
|
services:
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
2
|
+
{% if vpn %}
|
3
|
+
{{vpn_name}}:
|
4
|
+
image: gravitl/netclient:v0.24.3
|
5
|
+
container_name: {{vpn_name}}
|
6
|
+
#privileged: true
|
7
|
+
cap_add:
|
8
|
+
- NET_ADMIN
|
9
|
+
- SYS_MODULE
|
10
|
+
network_mode: host
|
11
|
+
# networks:
|
12
|
+
# - custom-network
|
13
|
+
# ports:
|
14
|
+
# # https://docs.k3s.io/installation/requirements#inbound-rules-for-k3s-nodes
|
15
|
+
# - "6443:6443" # kube server
|
16
|
+
# - "10250:10250" # worker balancer
|
17
|
+
# - "8472:8472/udp" # flannel vxlan
|
18
|
+
# - "51820-51830:51820-51830" # flannel wireguard
|
19
|
+
# {% if command == "server" %}
|
20
|
+
# - "30000-30500:30000-30500"
|
21
|
+
# {% endif %}
|
22
|
+
environment:
|
23
|
+
- HOST_NAME={{node_name}}
|
24
|
+
- IFACE_NAME={{flannel_iface}}
|
25
|
+
- TOKEN={{vpn_token}}
|
26
|
+
volumes:
|
27
|
+
- /dev/net/tun:/dev/net/tun
|
28
|
+
restart: unless-stopped
|
29
|
+
# nginx:
|
30
|
+
# image: nginx:latest
|
31
|
+
# ports:
|
32
|
+
# - "{{redirect_source_port}}:{{redirect_source_port}}"
|
33
|
+
# restart: unless-stopped
|
34
|
+
# networks:
|
35
|
+
# - custom-network
|
36
|
+
# volumes:
|
37
|
+
# - {{nginx_path}}/nginx.conf:/etc/nginx/nginx.conf
|
14
38
|
{% endif %}
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
{%
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
{%
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
39
|
+
|
40
|
+
# run worker only if command is set
|
41
|
+
{%if command %}
|
42
|
+
{{service_name}}:
|
43
|
+
image: docker.io/bundenth/kalavai-runner:gpu-latest
|
44
|
+
container_name: {{service_name}}
|
45
|
+
{% if vpn %}
|
46
|
+
depends_on:
|
47
|
+
- {{vpn_name}}
|
48
|
+
network_mode: "service:{{vpn_name}}"
|
49
|
+
{% else %}
|
50
|
+
network_mode: host
|
51
|
+
# hostname: {{node_name}}
|
52
|
+
# networks:
|
53
|
+
# - custom-network
|
54
|
+
# ports:
|
55
|
+
# - "6443:6443" # kube server
|
56
|
+
# - "2379-2380:2379-2380" # etcd server
|
57
|
+
# - "10259:10259" # kube scheduler
|
58
|
+
# - "10257:10257" # kube controller manager
|
59
|
+
# - "10250:10250" # worker balancer
|
60
|
+
# - "8285:8285" # flannel
|
61
|
+
# - "8472:8472" # flannel vxlan
|
62
|
+
# - "51820:51820" # flannel wireguard
|
63
|
+
# {% if command == "server" %}
|
64
|
+
# - "30000-32767:30000-32767"
|
65
|
+
# {% endif %}
|
66
|
+
{% endif %}
|
67
|
+
privileged: true
|
68
|
+
restart: unless-stopped
|
69
|
+
command: >
|
70
|
+
--command={{command}}
|
71
|
+
{% if command == "server" %}
|
72
|
+
--port_range="30000-32767"
|
73
|
+
{% else %}
|
74
|
+
--server_ip={{pool_ip}}
|
75
|
+
--token={{pool_token}}
|
76
|
+
{% endif %}
|
77
|
+
{%if vpn %}
|
78
|
+
--flannel_iface={{flannel_iface}}
|
79
|
+
{% endif %}
|
80
|
+
{% if num_gpus and num_gpus > 0 %}
|
81
|
+
--gpu=on
|
82
|
+
{% else %}
|
83
|
+
--gpu=off
|
84
|
+
{% endif %}
|
85
|
+
{% if node_labels %}
|
86
|
+
--extra="{{node_labels}}"
|
87
|
+
{% endif %}
|
88
|
+
|
89
|
+
volumes:
|
90
|
+
- {{k3s_path}}:/var/lib/rancher/k3s # Persist data
|
91
|
+
- {{etc_path}}:/etc/rancher/k3s # Config files
|
92
|
+
|
93
|
+
{% if num_gpus and num_gpus > 0 %}
|
94
|
+
deploy:
|
95
|
+
resources:
|
96
|
+
reservations:
|
97
|
+
devices:
|
98
|
+
- driver: nvidia
|
99
|
+
count: {{num_gpus}}
|
100
|
+
capabilities: [gpu]
|
101
|
+
{% endif %}
|
51
102
|
{% endif %}
|
52
103
|
|
53
104
|
networks:
|
54
105
|
custom-network:
|
55
|
-
driver: bridge
|
106
|
+
driver: bridge
|
107
|
+
|
kalavai_client/cli.py
CHANGED
@@ -31,6 +31,7 @@ from kalavai_client.utils import (
|
|
31
31
|
safe_remove,
|
32
32
|
leave_vpn,
|
33
33
|
join_vpn,
|
34
|
+
get_vpn_details,
|
34
35
|
load_server_info,
|
35
36
|
user_login,
|
36
37
|
user_logout,
|
@@ -81,15 +82,17 @@ STORAGE_ACCESS_MODE = ["ReadWriteOnce"]
|
|
81
82
|
STORAGE_CLASS_LABEL = "kalavai.storage.enabled"
|
82
83
|
DEFAULT_STORAGE_NAME = "pool-cache"
|
83
84
|
DEFAULT_STORAGE_SIZE = 20
|
85
|
+
DEFAULT_WATCHER_PORT = 30001
|
84
86
|
USER_NODE_LABEL = "kalavai.cluster.user"
|
85
87
|
KUBE_VERSION = os.getenv("KALAVAI_KUBE_VERSION", "v1.31.1+k3s1")
|
86
|
-
DEFAULT_FLANNEL_IFACE = os.getenv("KALAVAI_FLANNEL_IFACE", "netmaker")
|
88
|
+
DEFAULT_FLANNEL_IFACE = os.getenv("KALAVAI_FLANNEL_IFACE", "netmaker-1")
|
87
89
|
FORBIDEDEN_IPS = ["127.0.0.1"]
|
88
90
|
# kalavai templates
|
89
91
|
HELM_APPS_FILE = resource_path("kalavai_client/assets/apps.yaml")
|
90
92
|
HELM_APPS_VALUES = resource_path("kalavai_client/assets/apps_values.yaml")
|
91
93
|
# user specific config files
|
92
|
-
DEFAULT_CONTAINER_NAME = "kalavai
|
94
|
+
DEFAULT_CONTAINER_NAME = "kalavai"
|
95
|
+
DEFAULT_VPN_CONTAINER_NAME = "kalavai-vpn"
|
93
96
|
CONTAINER_HOST_PATH = user_path("pool/", create_path=True)
|
94
97
|
USER_COMPOSE_FILE = user_path("docker-compose-worker.yaml")
|
95
98
|
USER_VPN_COMPOSE_FILE = user_path("docker-compose-vpn.yaml")
|
@@ -115,27 +118,6 @@ CLUSTER = dockerCluster(
|
|
115
118
|
######################
|
116
119
|
## HELPER FUNCTIONS ##
|
117
120
|
######################
|
118
|
-
|
119
|
-
def check_vpn_compatibility():
|
120
|
-
"""Check required packages to join VPN"""
|
121
|
-
logs = []
|
122
|
-
console.log("[white]Checking system requirements...")
|
123
|
-
# netclient
|
124
|
-
try:
|
125
|
-
run_cmd("sudo netclient version >/dev/null 2>&1")
|
126
|
-
except:
|
127
|
-
logs.append("[red]Netmaker not installed. Install instructions:\n")
|
128
|
-
logs.append(" Linux: https://docs.netmaker.io/docs/netclient#linux\n")
|
129
|
-
logs.append(" Windows: https://docs.netmaker.io/docs/netclient#windows\n")
|
130
|
-
logs.append(" MacOS: https://docs.netmaker.io/docs/netclient#mac\n")
|
131
|
-
|
132
|
-
if len(logs) == 0:
|
133
|
-
console.log("[green]System is ready to join a pool")
|
134
|
-
return True
|
135
|
-
else:
|
136
|
-
for log in logs:
|
137
|
-
console.log(log)
|
138
|
-
return False
|
139
121
|
|
140
122
|
def check_seed_compatibility():
|
141
123
|
"""Check required packages to start pools"""
|
@@ -179,16 +161,6 @@ def check_worker_compatibility():
|
|
179
161
|
|
180
162
|
|
181
163
|
def cleanup_local():
|
182
|
-
# disconnect from private network
|
183
|
-
console.log("Disconnecting from VPN...")
|
184
|
-
try:
|
185
|
-
vpns = leave_vpn()
|
186
|
-
if vpns is not None:
|
187
|
-
for vpn in vpns:
|
188
|
-
console.log(f"You have left {vpn} VPN")
|
189
|
-
except:
|
190
|
-
# no vpn
|
191
|
-
pass
|
192
164
|
console.log("Removing local cache files...")
|
193
165
|
safe_remove(CONTAINER_HOST_PATH)
|
194
166
|
safe_remove(USER_COMPOSE_FILE)
|
@@ -365,32 +337,37 @@ def select_token_type():
|
|
365
337
|
break
|
366
338
|
return {"admin": choice == 0, "user": choice == 1, "worker": choice == 2}
|
367
339
|
|
368
|
-
def generate_compose_config(role, node_name,
|
340
|
+
def generate_compose_config(role, node_name, is_public, use_gpus=True, node_labels=None, pool_ip=None, vpn_token=None, pool_token=None):
|
369
341
|
num_gpus = 0
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
342
|
+
if use_gpus:
|
343
|
+
try:
|
344
|
+
has_gpus = check_gpu_drivers()
|
345
|
+
if has_gpus:
|
346
|
+
max_gpus = int(run_cmd("nvidia-smi -L | wc -l").decode())
|
347
|
+
num_gpus = user_confirm(
|
348
|
+
question=f"{max_gpus} NVIDIA GPU(s) detected. How many GPUs would you like to include?",
|
349
|
+
options=range(max_gpus+1)
|
350
|
+
)
|
351
|
+
except:
|
352
|
+
console.log(f"[red]WARNING: error when fetching NVIDIA GPU info. GPUs will not be used on this local machine")
|
353
|
+
if node_labels is not None:
|
354
|
+
node_labels = " ".join([f"--node-label {key}={value}" for key, value in node_labels.items()])
|
380
355
|
compose_values = {
|
381
356
|
"user_path": user_path(""),
|
382
357
|
"service_name": DEFAULT_CONTAINER_NAME,
|
383
|
-
"
|
384
|
-
"
|
385
|
-
"
|
358
|
+
"vpn": is_public,
|
359
|
+
"vpn_name": DEFAULT_VPN_CONTAINER_NAME,
|
360
|
+
"pool_ip": pool_ip,
|
361
|
+
"pool_token": pool_token,
|
362
|
+
"vpn_token": vpn_token,
|
363
|
+
"node_name": node_name,
|
386
364
|
"command": role,
|
387
365
|
"storage_enabled": "True",
|
388
|
-
"ip_address": ip_address,
|
389
366
|
"num_gpus": num_gpus,
|
390
367
|
"k3s_path": f"{CONTAINER_HOST_PATH}/k3s",
|
391
368
|
"etc_path": f"{CONTAINER_HOST_PATH}/etc",
|
392
|
-
"node_labels":
|
393
|
-
"flannel_iface": DEFAULT_FLANNEL_IFACE if is_public else
|
369
|
+
"node_labels": node_labels,
|
370
|
+
"flannel_iface": DEFAULT_FLANNEL_IFACE if is_public else ""
|
394
371
|
}
|
395
372
|
# generate local config files
|
396
373
|
compose_yaml = load_template(
|
@@ -585,35 +562,56 @@ def pool__start(cluster_name, *others, only_registered_users: bool=False, ip_ad
|
|
585
562
|
STORAGE_CLASS_LABEL: is_storage_compatible()
|
586
563
|
}
|
587
564
|
if location is not None:
|
588
|
-
console.log("
|
565
|
+
console.log("Fetching VPN credentials")
|
589
566
|
try:
|
590
|
-
|
591
|
-
return
|
592
|
-
vpn = join_vpn(
|
567
|
+
vpn = get_vpn_details(
|
593
568
|
location=location,
|
594
569
|
user_cookie=USER_COOKIE)
|
595
570
|
node_labels[USER_NODE_LABEL] = user["username"]
|
596
571
|
except Exception as e:
|
597
572
|
console.log(f"[red]Error when joining network: {str(e)}")
|
598
573
|
return
|
599
|
-
|
600
|
-
|
601
|
-
|
602
|
-
|
574
|
+
|
575
|
+
# Generate docker compose recipe
|
576
|
+
generate_compose_config(
|
577
|
+
role="server",
|
578
|
+
vpn_token=vpn["key"],
|
579
|
+
node_name=socket.gethostname(),
|
580
|
+
node_labels=node_labels,
|
581
|
+
is_public=location is not None
|
582
|
+
)
|
583
|
+
|
584
|
+
# start server
|
585
|
+
console.log("Deploying seed...")
|
586
|
+
CLUSTER.start_seed_node()
|
587
|
+
|
588
|
+
while not CLUSTER.is_agent_running():
|
589
|
+
console.log("Waiting for seed to start...")
|
590
|
+
time.sleep(10)
|
591
|
+
|
592
|
+
# select IP address (for external discovery)
|
593
|
+
if ip_address is None and location is None:
|
594
|
+
# local IP
|
595
|
+
console.log(f"Scanning for valid IPs")
|
596
|
+
ip_address = select_ip_address()
|
597
|
+
else:
|
598
|
+
# load VPN ip
|
599
|
+
ip_address = CLUSTER.get_vpn_ip()
|
603
600
|
console.log(f"Using {ip_address} address for server")
|
604
601
|
|
602
|
+
# populate local cred files
|
605
603
|
auth_key = str(uuid.uuid4())
|
606
604
|
write_auth_key = str(uuid.uuid4())
|
607
605
|
readonly_auth_key = str(uuid.uuid4())
|
608
|
-
|
609
|
-
watcher_service = f"{ip_address}:{
|
606
|
+
|
607
|
+
watcher_service = f"{ip_address}:{DEFAULT_WATCHER_PORT}"
|
610
608
|
values = {
|
611
609
|
CLUSTER_NAME_KEY: cluster_name,
|
612
610
|
CLUSTER_IP_KEY: ip_address,
|
613
611
|
AUTH_KEY: auth_key,
|
614
612
|
READONLY_AUTH_KEY: readonly_auth_key,
|
615
613
|
WRITE_AUTH_KEY: write_auth_key,
|
616
|
-
WATCHER_PORT_KEY:
|
614
|
+
WATCHER_PORT_KEY: DEFAULT_WATCHER_PORT,
|
617
615
|
WATCHER_SERVICE_KEY: watcher_service,
|
618
616
|
USER_NODE_LABEL_KEY: USER_NODE_LABEL,
|
619
617
|
ALLOW_UNREGISTERED_USER_KEY: not only_registered_users
|
@@ -630,15 +628,6 @@ def pool__start(cluster_name, *others, only_registered_users: bool=False, ip_ad
|
|
630
628
|
cluster_name=cluster_name,
|
631
629
|
public_location=location,
|
632
630
|
user_api_key=user["api_key"])
|
633
|
-
|
634
|
-
# 1. Generate docker compose recipe
|
635
|
-
compose_yaml = generate_compose_config(
|
636
|
-
role="server",
|
637
|
-
node_name=socket.gethostname(),
|
638
|
-
ip_address=ip_address,
|
639
|
-
node_labels=node_labels,
|
640
|
-
is_public=location is not None
|
641
|
-
)
|
642
631
|
|
643
632
|
# Generate helmfile recipe
|
644
633
|
helm_yaml = load_template(
|
@@ -650,14 +639,6 @@ def pool__start(cluster_name, *others, only_registered_users: bool=False, ip_ad
|
|
650
639
|
f.write(helm_yaml)
|
651
640
|
|
652
641
|
console.log("[green]Config files have been generated in your local machine\n")
|
653
|
-
|
654
|
-
# # 1. start server
|
655
|
-
console.log("Deploying seed...")
|
656
|
-
CLUSTER.start_seed_node()
|
657
|
-
|
658
|
-
while not CLUSTER.is_agent_running():
|
659
|
-
console.log("Waiting for seed to start...")
|
660
|
-
time.sleep(10)
|
661
642
|
|
662
643
|
console.log("Setting pool dependencies...")
|
663
644
|
# set template values in helmfile
|
@@ -691,7 +672,6 @@ def pool__start(cluster_name, *others, only_registered_users: bool=False, ip_ad
|
|
691
672
|
|
692
673
|
return None
|
693
674
|
|
694
|
-
|
695
675
|
@arguably.command
|
696
676
|
def pool__token(*others, admin=False, user=False, worker=False):
|
697
677
|
"""
|
@@ -773,6 +753,7 @@ def pool__join(token, *others, node_name=None, ip_address: str=None):
|
|
773
753
|
if CLUSTER.is_agent_running():
|
774
754
|
console.log(f"[white] You are already connected to {load_server_info(data_key=CLUSTER_NAME_KEY, file=USER_LOCAL_SERVER_FILE)}. Enter [yellow]kalavai pool stop[white] to exit and join another one.")
|
775
755
|
return
|
756
|
+
|
776
757
|
# check that is not attached to another instance
|
777
758
|
if os.path.exists(USER_LOCAL_SERVER_FILE):
|
778
759
|
option = user_confirm(
|
@@ -810,20 +791,20 @@ def pool__join(token, *others, node_name=None, ip_address: str=None):
|
|
810
791
|
}
|
811
792
|
user = defaultdict(lambda: None)
|
812
793
|
if public_location is not None:
|
813
|
-
|
794
|
+
user = user_login(user_cookie=USER_COOKIE)
|
795
|
+
if user is None:
|
796
|
+
console.log("[red]Must be logged in to join public pools. Run [yellow]kalavai login[red] to authenticate")
|
797
|
+
exit()
|
798
|
+
console.log("Fetching VPN credentials")
|
814
799
|
try:
|
815
|
-
|
816
|
-
return
|
817
|
-
vpn = join_vpn(
|
800
|
+
vpn = get_vpn_details(
|
818
801
|
location=public_location,
|
819
802
|
user_cookie=USER_COOKIE)
|
820
|
-
user = user_login(user_cookie=USER_COOKIE)
|
821
803
|
node_labels[USER_NODE_LABEL] = user["username"]
|
822
804
|
except Exception as e:
|
823
805
|
console.log(f"[red]Error when joining network: {str(e)}")
|
824
806
|
console.log("Are you authenticated? Try [yellow]kalavai login")
|
825
807
|
return
|
826
|
-
# validate public seed
|
827
808
|
try:
|
828
809
|
validate_join_public_seed(
|
829
810
|
cluster_name=cluster_name,
|
@@ -832,31 +813,29 @@ def pool__join(token, *others, node_name=None, ip_address: str=None):
|
|
832
813
|
)
|
833
814
|
except Exception as e:
|
834
815
|
console.log(f"[red]Error when joining network: {str(e)}")
|
835
|
-
leave_vpn(vpn_file=USER_VPN_COMPOSE_FILE)
|
836
816
|
return
|
837
817
|
|
838
818
|
# send note to server to let them know the node is coming online
|
839
|
-
|
840
|
-
|
841
|
-
|
842
|
-
|
843
|
-
|
844
|
-
if ip_address is None:
|
845
|
-
console.log(f"Scanning for valid IPs (subnet {vpn['subnet']})...")
|
846
|
-
ip_address = select_ip_address(subnet=vpn["subnet"])
|
847
|
-
console.log(f"Using {ip_address} address for worker")
|
819
|
+
# TODO: won't be able to check for VPN pools...
|
820
|
+
# if not pre_join_check(node_name=node_name, server_url=watcher_service, server_key=auth_key):
|
821
|
+
# console.log(f"[red] Failed pre join checks. Server offline or node '{node_name}' may already exist. Please specify a different one with '--node-name'")
|
822
|
+
# leave_vpn(container_name=DEFAULT_VPN_CONTAINER_NAME)
|
823
|
+
# return
|
848
824
|
|
849
825
|
# local agent join
|
850
826
|
# 1. Generate local cache files
|
851
827
|
console.log("Generating config files...")
|
852
|
-
|
828
|
+
|
829
|
+
# Generate docker compose recipe
|
830
|
+
generate_compose_config(
|
853
831
|
role="agent",
|
854
|
-
|
855
|
-
|
856
|
-
|
857
|
-
|
832
|
+
pool_ip=f"https://{kalavai_seed_ip}:6443",
|
833
|
+
pool_token=kalavai_token,
|
834
|
+
vpn_token=vpn["key"],
|
835
|
+
node_name=node_name,
|
858
836
|
node_labels=node_labels,
|
859
837
|
is_public=public_location is not None)
|
838
|
+
|
860
839
|
store_server_info(
|
861
840
|
server_ip=kalavai_seed_ip,
|
862
841
|
auth_key=auth_key,
|
@@ -866,8 +845,6 @@ def pool__join(token, *others, node_name=None, ip_address: str=None):
|
|
866
845
|
cluster_name=cluster_name,
|
867
846
|
public_location=public_location,
|
868
847
|
user_api_key=user["api_key"])
|
869
|
-
|
870
|
-
init_user_workspace()
|
871
848
|
|
872
849
|
option = user_confirm(
|
873
850
|
question="Docker compose ready. Would you like Kalavai to deploy it?",
|
@@ -883,18 +860,23 @@ def pool__join(token, *others, node_name=None, ip_address: str=None):
|
|
883
860
|
CLUSTER.start_worker_node()
|
884
861
|
except Exception as e:
|
885
862
|
console.log(f"[red] Error connecting to {cluster_name} @ {kalavai_seed_ip}. Check with the admin if the token is still valid.")
|
886
|
-
leave_vpn(
|
863
|
+
leave_vpn(container_name=DEFAULT_VPN_CONTAINER_NAME)
|
887
864
|
exit()
|
888
865
|
|
889
|
-
|
890
|
-
|
891
|
-
|
866
|
+
# ensure we are connected
|
867
|
+
while True:
|
868
|
+
console.log("Waiting for core services to be ready, may take a few minutes...")
|
869
|
+
time.sleep(30)
|
870
|
+
if is_watcher_alive(server_creds=USER_LOCAL_SERVER_FILE, user_cookie=USER_COOKIE):
|
871
|
+
break
|
872
|
+
|
873
|
+
init_user_workspace()
|
892
874
|
|
893
875
|
# set status to schedulable
|
894
876
|
console.log(f"[green] You are connected to {cluster_name}")
|
895
877
|
|
896
878
|
@arguably.command
|
897
|
-
def pool__stop(*others):
|
879
|
+
def pool__stop(*others, skip_node_deletion=False):
|
898
880
|
"""
|
899
881
|
Stop sharing your device and clean up. DO THIS ONLY IF YOU WANT TO REMOVE KALAVAI-CLIENT from your device.
|
900
882
|
|
@@ -903,7 +885,8 @@ def pool__stop(*others):
|
|
903
885
|
"""
|
904
886
|
console.log("[white] Stopping kalavai app...")
|
905
887
|
# delete local node from server
|
906
|
-
|
888
|
+
if not skip_node_deletion:
|
889
|
+
node__delete(load_server_info(data_key=NODE_NAME_KEY, file=USER_LOCAL_SERVER_FILE))
|
907
890
|
# unpublish event (only if seed node)
|
908
891
|
# TODO: no, this should be done via the platform!!!
|
909
892
|
# try:
|
@@ -916,7 +899,20 @@ def pool__stop(*others):
|
|
916
899
|
# console.log(f"[red][WARNING]: (ignore if not a public pool) Error when unpublishing cluster. {str(e)}")
|
917
900
|
# remove local node agent
|
918
901
|
console.log("Removing agent and local cache")
|
902
|
+
|
903
|
+
# disconnect from VPN first, then remove agent, then remove local files
|
904
|
+
console.log("Disconnecting from VPN...")
|
905
|
+
try:
|
906
|
+
vpns = leave_vpn(container_name=DEFAULT_VPN_CONTAINER_NAME)
|
907
|
+
if vpns is not None:
|
908
|
+
for vpn in vpns:
|
909
|
+
console.log(f"You have left {vpn} VPN")
|
910
|
+
except:
|
911
|
+
# no vpn
|
912
|
+
pass
|
913
|
+
|
919
914
|
CLUSTER.remove_agent()
|
915
|
+
|
920
916
|
# clean local files
|
921
917
|
cleanup_local()
|
922
918
|
console.log("[white] Kalavai has stopped sharing your resources. Use [yellow]kalavai pool start[white] or [yellow]kalavai pool join[white] to start again!")
|
@@ -1107,6 +1103,11 @@ def pool__attach(token, *others, node_name=None):
|
|
1107
1103
|
"""
|
1108
1104
|
Set creds in token on the local instance
|
1109
1105
|
"""
|
1106
|
+
|
1107
|
+
if node_name is None:
|
1108
|
+
node_name = socket.gethostname()
|
1109
|
+
|
1110
|
+
# check that is not attached to another instance
|
1110
1111
|
if os.path.exists(USER_LOCAL_SERVER_FILE):
|
1111
1112
|
option = user_confirm(
|
1112
1113
|
question="You seem to be connected to an instance already. Are you sure you want to join a new one?",
|
@@ -1115,34 +1116,39 @@ def pool__attach(token, *others, node_name=None):
|
|
1115
1116
|
if option == 0:
|
1116
1117
|
console.log("[green]Nothing happened.")
|
1117
1118
|
return
|
1119
|
+
|
1120
|
+
# check token
|
1121
|
+
if not pool__check_token(token):
|
1122
|
+
return
|
1123
|
+
|
1118
1124
|
try:
|
1119
1125
|
data = decode_dict(token)
|
1120
1126
|
kalavai_seed_ip = data[CLUSTER_IP_KEY]
|
1121
|
-
kalavai_token = data[CLUSTER_TOKEN_KEY]
|
1122
1127
|
cluster_name = data[CLUSTER_NAME_KEY]
|
1123
1128
|
auth_key = data[AUTH_KEY]
|
1124
1129
|
watcher_service = data[WATCHER_SERVICE_KEY]
|
1125
1130
|
public_location = data[PUBLIC_LOCATION_KEY]
|
1126
|
-
|
1127
|
-
|
1131
|
+
vpn = defaultdict(lambda: None)
|
1132
|
+
except Exception as e:
|
1133
|
+
console.log(str(e))
|
1134
|
+
console.log("[red] Invalid token")
|
1128
1135
|
return
|
1129
|
-
|
1136
|
+
|
1130
1137
|
user = defaultdict(lambda: None)
|
1131
1138
|
if public_location is not None:
|
1132
|
-
|
1139
|
+
user = user_login(user_cookie=USER_COOKIE)
|
1140
|
+
if user is None:
|
1141
|
+
console.log("[red]Must be logged in to join public pools. Run [yellow]kalavai login[red] to authenticate")
|
1142
|
+
exit()
|
1143
|
+
console.log("Fetching VPN credentials")
|
1133
1144
|
try:
|
1134
|
-
|
1135
|
-
return
|
1136
|
-
vpn = join_vpn(
|
1145
|
+
vpn = get_vpn_details(
|
1137
1146
|
location=public_location,
|
1138
1147
|
user_cookie=USER_COOKIE)
|
1139
|
-
user = user_login(user_cookie=USER_COOKIE)
|
1140
|
-
time.sleep(5)
|
1141
1148
|
except Exception as e:
|
1142
1149
|
console.log(f"[red]Error when joining network: {str(e)}")
|
1143
1150
|
console.log("Are you authenticated? Try [yellow]kalavai login")
|
1144
1151
|
return
|
1145
|
-
# validate public seed
|
1146
1152
|
try:
|
1147
1153
|
validate_join_public_seed(
|
1148
1154
|
cluster_name=cluster_name,
|
@@ -1151,9 +1157,20 @@ def pool__attach(token, *others, node_name=None):
|
|
1151
1157
|
)
|
1152
1158
|
except Exception as e:
|
1153
1159
|
console.log(f"[red]Error when joining network: {str(e)}")
|
1154
|
-
leave_vpn(vpn_file=USER_VPN_COMPOSE_FILE)
|
1155
1160
|
return
|
1156
|
-
|
1161
|
+
|
1162
|
+
# local agent join
|
1163
|
+
# 1. Generate local cache files
|
1164
|
+
console.log("Generating config files...")
|
1165
|
+
|
1166
|
+
# Generate docker compose recipe
|
1167
|
+
generate_compose_config(
|
1168
|
+
use_gpus=False,
|
1169
|
+
role="",
|
1170
|
+
vpn_token=vpn["key"],
|
1171
|
+
node_name=node_name,
|
1172
|
+
is_public=public_location is not None)
|
1173
|
+
|
1157
1174
|
store_server_info(
|
1158
1175
|
server_ip=kalavai_seed_ip,
|
1159
1176
|
auth_key=auth_key,
|
@@ -1164,7 +1181,26 @@ def pool__attach(token, *others, node_name=None):
|
|
1164
1181
|
public_location=public_location,
|
1165
1182
|
user_api_key=user["api_key"])
|
1166
1183
|
|
1167
|
-
|
1184
|
+
option = user_confirm(
|
1185
|
+
question="Docker compose ready. Would you like Kalavai to deploy it?",
|
1186
|
+
options=["no", "yes"]
|
1187
|
+
)
|
1188
|
+
if option == 0:
|
1189
|
+
console.log("Manually deploy the worker with the following command:\n")
|
1190
|
+
print(f"docker compose -f {USER_COMPOSE_FILE} up -d")
|
1191
|
+
return
|
1192
|
+
|
1193
|
+
console.log(f"[white] Connecting to {cluster_name} @ {kalavai_seed_ip} (this may take a few minutes)...")
|
1194
|
+
run_cmd(f"docker compose -f {USER_COMPOSE_FILE} up -d")
|
1195
|
+
# ensure we are connected
|
1196
|
+
while True:
|
1197
|
+
console.log("Waiting for core services to be ready, may take a few minutes...")
|
1198
|
+
time.sleep(30)
|
1199
|
+
if is_watcher_alive(server_creds=USER_LOCAL_SERVER_FILE, user_cookie=USER_COOKIE):
|
1200
|
+
break
|
1201
|
+
|
1202
|
+
# set status to schedulable
|
1203
|
+
console.log(f"[green] You are connected to {cluster_name}")
|
1168
1204
|
|
1169
1205
|
|
1170
1206
|
@arguably.command
|
@@ -1480,6 +1516,62 @@ def job__run(template_name, *others, values: str=None, force_namespace: str=None
|
|
1480
1516
|
console.log(f"[red]Error when connecting to kalavai service: {str(e)}")
|
1481
1517
|
return
|
1482
1518
|
|
1519
|
+
@arguably.command
|
1520
|
+
def job__test(local_template_dir, *others, values, defaults, force_namespace: str=None):
|
1521
|
+
"""
|
1522
|
+
Helper to test local templates, useful for development
|
1523
|
+
"""
|
1524
|
+
try:
|
1525
|
+
CLUSTER.validate_cluster()
|
1526
|
+
except Exception as e:
|
1527
|
+
console.log(f"[red]Problems with your pool: {str(e)}")
|
1528
|
+
return
|
1529
|
+
|
1530
|
+
if not os.path.isdir(local_template_dir):
|
1531
|
+
console.log(f"[red]--local_template_dir ({local_template_dir}) is not a directory")
|
1532
|
+
return
|
1533
|
+
|
1534
|
+
# load template
|
1535
|
+
with open(os.path.join(local_template_dir, "template.yaml"), "r") as f:
|
1536
|
+
template_str = f.read()
|
1537
|
+
|
1538
|
+
# load values
|
1539
|
+
if not os.path.isfile(values):
|
1540
|
+
console.log(f"[red]--values ({values}) is not a valid local file")
|
1541
|
+
return
|
1542
|
+
with open(values, "r") as f:
|
1543
|
+
values_dict = yaml.safe_load(f)
|
1544
|
+
# load defaults
|
1545
|
+
if not os.path.isfile(defaults):
|
1546
|
+
console.log(f"[red]--defaults ({defaults}) is not a valid local file")
|
1547
|
+
return
|
1548
|
+
with open(defaults, "r") as f:
|
1549
|
+
defaults = f.read()
|
1550
|
+
|
1551
|
+
# submit custom deployment
|
1552
|
+
data = {
|
1553
|
+
"template": template_str,
|
1554
|
+
"template_values": values_dict,
|
1555
|
+
"default_values": defaults
|
1556
|
+
}
|
1557
|
+
if force_namespace is not None:
|
1558
|
+
data["force_namespace"] = force_namespace
|
1559
|
+
|
1560
|
+
try:
|
1561
|
+
result = request_to_server(
|
1562
|
+
method="post",
|
1563
|
+
endpoint="/v1/deploy_custom_job",
|
1564
|
+
data=data,
|
1565
|
+
server_creds=USER_LOCAL_SERVER_FILE,
|
1566
|
+
user_cookie=USER_COOKIE
|
1567
|
+
)
|
1568
|
+
console.log("Deployment result:")
|
1569
|
+
print(
|
1570
|
+
json.dumps(result,indent=3)
|
1571
|
+
)
|
1572
|
+
except Exception as e:
|
1573
|
+
console.log(f"[red]Error when connecting to kalavai service: {str(e)}")
|
1574
|
+
|
1483
1575
|
|
1484
1576
|
@arguably.command
|
1485
1577
|
def job__defaults(template_name, *others):
|
kalavai_client/cluster.py
CHANGED
@@ -7,7 +7,8 @@ from kalavai_client.utils import (
|
|
7
7
|
run_cmd,
|
8
8
|
check_gpu_drivers,
|
9
9
|
validate_poolconfig,
|
10
|
-
user_path
|
10
|
+
user_path,
|
11
|
+
populate_template
|
11
12
|
)
|
12
13
|
|
13
14
|
|
@@ -20,6 +21,9 @@ class Cluster(ABC):
|
|
20
21
|
def start_worker_node(self, url, token, node_name, auth_key, watcher_service, ip_address, labels, flannel_iface):
|
21
22
|
raise NotImplementedError()
|
22
23
|
|
24
|
+
@abstractmethod
|
25
|
+
def get_vpn_ip(self):
|
26
|
+
raise NotImplementedError()
|
23
27
|
|
24
28
|
@abstractmethod
|
25
29
|
def update_dependencies(self, dependencies_files):
|
@@ -79,11 +83,23 @@ class dockerCluster(Cluster):
|
|
79
83
|
def start_seed_node(self):
|
80
84
|
|
81
85
|
run_cmd(f"docker compose -f {self.compose_file} up -d")
|
82
|
-
|
83
|
-
|
86
|
+
# wait for container to be setup
|
87
|
+
while True:
|
88
|
+
try:
|
89
|
+
run_cmd(f"docker cp {self.container_name}:/etc/rancher/k3s/k3s.yaml {self.kubeconfig_file} >/dev/null 2>&1")
|
90
|
+
break
|
91
|
+
except:
|
92
|
+
pass
|
93
|
+
time.sleep(5)
|
84
94
|
|
85
95
|
def start_worker_node(self):
|
86
96
|
run_cmd(f"docker compose -f {self.compose_file} up -d")
|
97
|
+
|
98
|
+
def get_vpn_ip(self):
|
99
|
+
command = populate_template(
|
100
|
+
template_str="docker exec -it {{container_name}} ifconfig {{iface_name}} | grep 'inet ' | awk '{gsub(/^addr:/, \"\", $2); print $2}'",
|
101
|
+
values_dict={"container_name": self.container_name, "iface_name": self.default_flannel_iface})
|
102
|
+
return run_cmd(command).decode().strip()
|
87
103
|
|
88
104
|
|
89
105
|
def update_dependencies(self, dependencies_file=None, debug=False, retries=3):
|
@@ -122,8 +138,13 @@ class dockerCluster(Cluster):
|
|
122
138
|
def is_seed_node(self):
|
123
139
|
if not os.path.isfile(self.compose_file):
|
124
140
|
return False
|
125
|
-
|
126
|
-
|
141
|
+
if not self.is_agent_running():
|
142
|
+
return False
|
143
|
+
try:
|
144
|
+
run_cmd(f"docker container exec {self.container_name} cat /var/lib/rancher/k3s/server/node-token >/dev/null 2>&1")
|
145
|
+
return True
|
146
|
+
except:
|
147
|
+
return False
|
127
148
|
|
128
149
|
def is_cluster_init(self):
|
129
150
|
if not os.path.isfile(self.compose_file):
|
kalavai_client/utils.py
CHANGED
@@ -2,7 +2,6 @@ import json, base64
|
|
2
2
|
import os
|
3
3
|
import requests
|
4
4
|
from pathlib import Path
|
5
|
-
from urllib.parse import urljoin
|
6
5
|
import shutil
|
7
6
|
import subprocess
|
8
7
|
import re
|
@@ -12,10 +11,7 @@ from jinja2 import Template
|
|
12
11
|
|
13
12
|
from rich.table import Table
|
14
13
|
import yaml
|
15
|
-
import platform
|
16
|
-
import psutil
|
17
14
|
|
18
|
-
import GPUtil
|
19
15
|
|
20
16
|
from kalavai_client.auth import KalavaiAuthClient
|
21
17
|
|
@@ -245,12 +241,12 @@ def join_vpn(location, user_cookie):
|
|
245
241
|
run_cmd(f"sudo netclient join -t {token} >/dev/null 2>&1")
|
246
242
|
return vpn
|
247
243
|
|
248
|
-
def leave_vpn():
|
244
|
+
def leave_vpn(container_name):
|
249
245
|
try:
|
250
|
-
vpns = json.loads(run_cmd("
|
246
|
+
vpns = json.loads(run_cmd(f"docker exec {container_name} netclient list").decode())
|
251
247
|
left_vpns = [vpn['network'] for vpn in vpns]
|
252
248
|
for vpn in left_vpns:
|
253
|
-
run_cmd(f"
|
249
|
+
run_cmd(f"docker exec {container_name} netclient leave {vpn}")
|
254
250
|
return left_vpns
|
255
251
|
except:
|
256
252
|
return None
|
@@ -288,8 +284,11 @@ def request_to_server(
|
|
288
284
|
json=data,
|
289
285
|
headers=headers
|
290
286
|
)
|
291
|
-
|
292
|
-
|
287
|
+
try:
|
288
|
+
result = response.json()
|
289
|
+
return result
|
290
|
+
except Exception as e:
|
291
|
+
raise ValueError(f"Error with HTTP request: {response.text}\n{str(e)}")
|
293
292
|
|
294
293
|
|
295
294
|
def generate_table(columns, rows, end_sections=None):
|
@@ -387,48 +386,13 @@ def encode_dict(data: dict):
|
|
387
386
|
def decode_dict(str_data: str):
|
388
387
|
return json.loads(base64.b64decode(str_data.encode()))
|
389
388
|
|
390
|
-
def get_gpus():
|
391
|
-
GPUs = GPUtil.getGPUs()
|
392
|
-
gpus = []
|
393
|
-
for gpu in GPUs:
|
394
|
-
name = "nvidia" if "nvidia" in gpu.name.lower() else None
|
395
|
-
if name is None:
|
396
|
-
continue
|
397
|
-
mem = int(gpu.memoryTotal / 1000) # in GBs
|
398
|
-
gpus.append(f"{name}-{mem}GB")
|
399
|
-
return ",".join(gpus)
|
400
|
-
|
401
|
-
def system_uptick_request(username, node_name, backend_endpoint, backend_api_key, local_version=0):
|
402
|
-
gpus = get_gpus()
|
403
|
-
data = {
|
404
|
-
"username": username,
|
405
|
-
"system_info": {
|
406
|
-
"os": platform.system(),
|
407
|
-
"cpu_count": os.cpu_count(),
|
408
|
-
"cpu": platform.processor(),
|
409
|
-
"platform": platform.platform(),
|
410
|
-
"ram": round(psutil.virtual_memory().total / (1024.0 **3)),
|
411
|
-
"hostname": node_name,
|
412
|
-
"gpus": gpus
|
413
|
-
},
|
414
|
-
"version": local_version
|
415
|
-
}
|
416
|
-
|
417
|
-
response = requests.post(
|
418
|
-
url=urljoin(backend_endpoint, "/uptick"),
|
419
|
-
json=data,
|
420
|
-
headers={'X-API-KEY': backend_api_key}
|
421
|
-
)
|
422
|
-
response.raise_for_status()
|
423
|
-
return response.json()
|
424
|
-
|
425
389
|
def resource_path(relative_path: str):
|
426
390
|
""" Get absolute path to resource """
|
427
391
|
try:
|
428
392
|
last_slash = relative_path.rfind("/")
|
429
393
|
path = relative_path[:last_slash].replace("/", ".")
|
430
394
|
filename = relative_path[last_slash+1:]
|
431
|
-
resource = importlib.resources.path(
|
395
|
+
resource = str(importlib.resources.files(path).joinpath(filename))
|
432
396
|
except Exception as e:
|
433
397
|
return None
|
434
398
|
return resource
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: kalavai-client
|
3
|
-
Version: 0.5.
|
3
|
+
Version: 0.5.12
|
4
4
|
Summary: Client app for kalavai platform
|
5
5
|
License: Apache-2.0
|
6
6
|
Keywords: LLM,platform
|
@@ -8,9 +8,13 @@ Author: Carlos Fernandez Musoles
|
|
8
8
|
Author-email: carlos@kalavai.net
|
9
9
|
Maintainer: Carlos Fernandez Musoles
|
10
10
|
Maintainer-email: carlos@kalavai.net
|
11
|
-
Requires-Python: >=3.
|
11
|
+
Requires-Python: >=3.4
|
12
12
|
Classifier: License :: OSI Approved :: Apache Software License
|
13
13
|
Classifier: Programming Language :: Python :: 3
|
14
|
+
Classifier: Programming Language :: Python :: 3.4
|
15
|
+
Classifier: Programming Language :: Python :: 3.5
|
16
|
+
Classifier: Programming Language :: Python :: 3.6
|
17
|
+
Classifier: Programming Language :: Python :: 3.7
|
14
18
|
Classifier: Programming Language :: Python :: 3.8
|
15
19
|
Classifier: Programming Language :: Python :: 3.9
|
16
20
|
Classifier: Programming Language :: Python :: 3.10
|
@@ -22,7 +26,6 @@ Requires-Dist: Pillow (==10.3.0)
|
|
22
26
|
Requires-Dist: anvil-uplink (==0.5.1)
|
23
27
|
Requires-Dist: arguably (>=1.2.5)
|
24
28
|
Requires-Dist: build ; extra == "dev"
|
25
|
-
Requires-Dist: gputil (==1.4.0)
|
26
29
|
Requires-Dist: importlib_resources (==6.5.2)
|
27
30
|
Requires-Dist: jinja2 (==3.1.4)
|
28
31
|
Requires-Dist: netifaces (==0.11.0)
|
@@ -32,7 +35,7 @@ Requires-Dist: pyinstaller (==6.5.0) ; extra == "dev"
|
|
32
35
|
Requires-Dist: pyyaml (==6.0.2)
|
33
36
|
Requires-Dist: requests (>=2.25)
|
34
37
|
Requires-Dist: rich (==13.7.1)
|
35
|
-
Requires-Dist: setuptools (
|
38
|
+
Requires-Dist: setuptools (>75.0.0)
|
36
39
|
Requires-Dist: twine ; extra == "dev"
|
37
40
|
Project-URL: Homepage, https://platform.kalavai.net
|
38
41
|
Project-URL: Website, https://kalavai.net
|
@@ -47,7 +50,7 @@ Description-Content-Type: text/markdown
|
|
47
50
|
|
48
51
|
</div>
|
49
52
|
|
50
|
-
⭐⭐⭐ **Kalavai and our LLM pools are open source, and free to use in both commercial and non-commercial purposes. If you find it useful, consider supporting us by [
|
53
|
+
⭐⭐⭐ **Kalavai and our LLM pools are open source, and free to use in both commercial and non-commercial purposes. If you find it useful, consider supporting us by [giving a star to our GitHub project](https://github.com/kalavai-net/kalavai-client), joining our [discord channel](https://discord.gg/HJ8FNapQ), follow our [Substack](https://kalavainet.substack.com/) and give us a [review on Product Hunt](https://www.producthunt.com/products/kalavai/reviews/new).**
|
51
54
|
|
52
55
|
|
53
56
|
# Kalavai: turn your devices into a scalable LLM platform
|
@@ -86,6 +89,7 @@ https://github.com/user-attachments/assets/0d2316f3-79ea-46ac-b41e-8ef720f52672
|
|
86
89
|
|
87
90
|
### News updates
|
88
91
|
|
92
|
+
- 31 January 2025: `kalavai-client` is now a [PyPI package](https://pypi.org/project/kalavai-client/), easier to install than ever!
|
89
93
|
- 27 January 2025: Support for accessing pools from remote computers
|
90
94
|
- 9 January 2025: Added support for [Aphrodite Engine](https://github.com/aphrodite-engine/aphrodite-engine) models
|
91
95
|
- 8 January 2025: Release of [a free, public, shared pool](/docs/docs/public_llm_pool.md) for community LLM deployment
|
@@ -126,21 +130,47 @@ Not what you were looking for? [Tell us](https://github.com/kalavai-net/kalavai-
|
|
126
130
|
|
127
131
|
## Getting started
|
128
132
|
|
129
|
-
The `kalavai`
|
133
|
+
The `kalavai-client` is the main tool to interact with the Kalavai platform, to create and manage both local and public pools and also to interact with them (e.g. deploy models). Let's go over its installation.
|
130
134
|
|
131
|
-
From release **v0.5.0, you can now install `kalavai`
|
135
|
+
From release **v0.5.0, you can now install `kalavai-client` in non-worker computers**. You can run a pool on a set of machines and have the client on a remote computer from which you access the LLM pool. Because the client only requires having python installed, this means more computers are now supported to run it.
|
132
136
|
|
133
137
|
|
134
|
-
### Requirements
|
138
|
+
### Requirements
|
139
|
+
|
140
|
+
For workers sharing resources with the pool:
|
135
141
|
|
136
142
|
- A laptop, desktop or Virtual Machine
|
137
143
|
- Docker engine installed (for [linux](https://docs.docker.com/engine/install/), [Windows and MacOS](https://docs.docker.com/desktop/)) with [privilege access](https://docs.docker.com/engine/containers/run/#runtime-privilege-and-linux-capabilities).
|
138
144
|
|
145
|
+
> **Support for Windows and MacOS workers is experimental**: kalavai workers run on docker containers that require access to the host network interfaces, thus systems that do not support containers natively (Windows and MacOS) may have difficulties finding each other.
|
146
|
+
|
147
|
+
Any system that runs python 3.6+ is able to run the `kalavai-client` and therefore connect and operate an LLM pool, [without sharing with the pool](). Your computer won't be adding its capacity to the pool, but it wil be able to deploy jobs and interact with models.
|
148
|
+
|
149
|
+
|
150
|
+
#### Common issues
|
151
|
+
|
152
|
+
If you see the following error:
|
139
153
|
|
140
|
-
|
154
|
+
```bash
|
155
|
+
fatal error: Python.h: No such file or directory | #include <Python.h>
|
156
|
+
```
|
141
157
|
|
142
|
-
-
|
158
|
+
Make sure you also install python3-dev package. For ubuntu distros:
|
143
159
|
|
160
|
+
```bash
|
161
|
+
sudo apt install python3-dev
|
162
|
+
```
|
163
|
+
|
164
|
+
If you see:
|
165
|
+
```bash
|
166
|
+
AttributeError: install_layout. Did you mean: 'install_platlib'?
|
167
|
+
[end of output]
|
168
|
+
```
|
169
|
+
|
170
|
+
Upgrade your setuptools:
|
171
|
+
```bash
|
172
|
+
pip install -U setuptools
|
173
|
+
```
|
144
174
|
|
145
175
|
### Install the client
|
146
176
|
|
@@ -186,6 +216,17 @@ Copy the joining token. On the worker node, run:
|
|
186
216
|
kalavai pool join <token>
|
187
217
|
```
|
188
218
|
|
219
|
+
### 3. Attach more clients
|
220
|
+
|
221
|
+
You can now connect to an existing pool from any computer -not just from worker nodes. To connect to a pool, run:
|
222
|
+
|
223
|
+
```bash
|
224
|
+
kalavai pool attach <token>
|
225
|
+
```
|
226
|
+
|
227
|
+
This won't add the machine as a worker, but you will be able to operate in the pool as if you were. This is ideal for remote access to the pool, and to use the pool from machines that cannot run workers (docker container limitations).
|
228
|
+
|
229
|
+
|
189
230
|
### Enough already, let's run stuff!
|
190
231
|
|
191
232
|
Check our [examples](examples/) to put your new AI pool to good use!
|
@@ -250,12 +291,16 @@ Anything missing here? Give us a shout in the [discussion board](https://github.
|
|
250
291
|
|
251
292
|
### Requirements
|
252
293
|
|
253
|
-
Python version
|
294
|
+
Python version >= 3.6.
|
254
295
|
|
255
296
|
```bash
|
256
|
-
|
297
|
+
sudo add-apt-repository ppa:deadsnakes/ppa
|
298
|
+
sudo apt update
|
299
|
+
sudo apt install python3.10 python3.10-dev python3-virtualenv
|
300
|
+
virtualenv -p python3.10 env
|
257
301
|
source env/bin/activate
|
258
302
|
sudo apt install python3.10-venv python3.10-dev -y
|
303
|
+
pip install -U setuptools
|
259
304
|
pip install -e .[dev]
|
260
305
|
```
|
261
306
|
|
@@ -273,3 +318,4 @@ To run the unit tests, use:
|
|
273
318
|
python -m unittest
|
274
319
|
```
|
275
320
|
|
321
|
+
docker run --rm --net=host -v /root/.cache/kalavai/:/root/.cache/kalavai/ ghcr.io/helmfile/helmfile:v0.169.2 helmfile sync --file /root/.cache/kalavai/apps.yaml --kubeconfig /root/.cache/kalavai/kubeconfig
|
@@ -0,0 +1,20 @@
|
|
1
|
+
kalavai_client/__init__.py,sha256=mqDZCSDdo4jyvGQ0x29B1roX6bl4-uTP0d3CclCa2oo,23
|
2
|
+
kalavai_client/__main__.py,sha256=WQUfxvRsBJH5gsCJg8pLz95QnZIj7Ol8psTO77m0QE0,73
|
3
|
+
kalavai_client/assets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
|
+
kalavai_client/assets/apps.yaml,sha256=LKLF315cBJG0Ts8oShl6WdKaJlM6Ewk_P_XyPVcApP8,5981
|
5
|
+
kalavai_client/assets/apps_values.yaml,sha256=CjKVelPQHd-hm-DTMEuya92feKiphU9mh3HrosLYYPE,1676
|
6
|
+
kalavai_client/assets/docker-compose-template.yaml,sha256=mo8LUam9-AzB_0w72wTyMyreKr4Ns-pxZGc4GVWcUHA,2747
|
7
|
+
kalavai_client/assets/nginx.conf,sha256=drVVCg8GHucz7hmt_BI6giAhK92OV71257NTs3LthwM,225
|
8
|
+
kalavai_client/assets/pool_config_template.yaml,sha256=fFz4w2-fMKD5KvyzFdfcWD_jSneRlmnjLc8hCctweX0,576
|
9
|
+
kalavai_client/assets/pool_config_values.yaml,sha256=VrM3XHQfQo6QLZ68qvagooUptaYgl1pszniY_JUtemk,233
|
10
|
+
kalavai_client/assets/user_workspace.yaml,sha256=wDvlMYknOPABAEo0dsQwU7bac8iubjAG9tdkFbJZ5Go,476
|
11
|
+
kalavai_client/assets/user_workspace_values.yaml,sha256=G0HOzQUxrDMCwuW9kbWUZaKMzDDPVwDwzBHCL2Xi2ZM,542
|
12
|
+
kalavai_client/auth.py,sha256=QsBh28L2LwjBBK6pTUE4Xu36lLDTyetyU1YfS1Hbb6g,1717
|
13
|
+
kalavai_client/cli.py,sha256=KVWQI2FOAU7km6hXNTs4PXw9Q24G4QqxujQL2pySEY0,70817
|
14
|
+
kalavai_client/cluster.py,sha256=fULTAad4KXEGeWZmp4_VBoBwT5eED_HOBUsXIKmf0CU,12119
|
15
|
+
kalavai_client/utils.py,sha256=7Cr2wXm2mXGJoseKbFdIVMuMBkqmzSgbwaCLHYoGOn0,12544
|
16
|
+
kalavai_client-0.5.12.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
17
|
+
kalavai_client-0.5.12.dist-info/METADATA,sha256=631MQR4FQ5pcMwbLKDucI8gy7qVylzhqmkKhohEBN8c,14101
|
18
|
+
kalavai_client-0.5.12.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
|
19
|
+
kalavai_client-0.5.12.dist-info/entry_points.txt,sha256=9T6D45gxwzfVbglMm1r6XPdXuuZdHfy_7fCeu2jUphc,50
|
20
|
+
kalavai_client-0.5.12.dist-info/RECORD,,
|
@@ -1,13 +0,0 @@
|
|
1
|
-
# https://docs.netmaker.io/docs/netclient#docker
|
2
|
-
services:
|
3
|
-
{{service_name}}:
|
4
|
-
network_mode: host
|
5
|
-
privileged: true
|
6
|
-
restart: always
|
7
|
-
environment:
|
8
|
-
- TOKEN={{vpn_token}}
|
9
|
-
- IFACE_NAME={{flannel_iface}}
|
10
|
-
volumes:
|
11
|
-
- '{{etc_path}}/netclient:/etc/netclient'
|
12
|
-
container_name: {{service_name}}
|
13
|
-
image: 'gravitl/netclient:latest'
|
@@ -1,20 +0,0 @@
|
|
1
|
-
kalavai_client/__init__.py,sha256=Ihhobsu68mjIUrgtsfOab7vjkNbLMm1uv7WvMKrKu8c,22
|
2
|
-
kalavai_client/__main__.py,sha256=WQUfxvRsBJH5gsCJg8pLz95QnZIj7Ol8psTO77m0QE0,73
|
3
|
-
kalavai_client/assets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
|
-
kalavai_client/assets/apps.yaml,sha256=aW9wKyvZhZFMHzBkZOsgVq-kpntED6U8B9XoHkm5F9Y,5963
|
5
|
-
kalavai_client/assets/apps_values.yaml,sha256=CjKVelPQHd-hm-DTMEuya92feKiphU9mh3HrosLYYPE,1676
|
6
|
-
kalavai_client/assets/docker-compose-template.yaml,sha256=qDv0og338clLobDDPEJ-HiGlMcCMMx2NOi5R_hdhKvw,1442
|
7
|
-
kalavai_client/assets/pool_config_template.yaml,sha256=fFz4w2-fMKD5KvyzFdfcWD_jSneRlmnjLc8hCctweX0,576
|
8
|
-
kalavai_client/assets/pool_config_values.yaml,sha256=VrM3XHQfQo6QLZ68qvagooUptaYgl1pszniY_JUtemk,233
|
9
|
-
kalavai_client/assets/user_workspace.yaml,sha256=wDvlMYknOPABAEo0dsQwU7bac8iubjAG9tdkFbJZ5Go,476
|
10
|
-
kalavai_client/assets/user_workspace_values.yaml,sha256=G0HOzQUxrDMCwuW9kbWUZaKMzDDPVwDwzBHCL2Xi2ZM,542
|
11
|
-
kalavai_client/assets/vpn-template.yaml,sha256=Hm7sevtrsakSxSKMJwl68hzOEWCaxwYytwkTgKhe_MM,397
|
12
|
-
kalavai_client/auth.py,sha256=QsBh28L2LwjBBK6pTUE4Xu36lLDTyetyU1YfS1Hbb6g,1717
|
13
|
-
kalavai_client/cli.py,sha256=qBOe4IULIX7pzKUhHoR1JppWrbUeV1aslxQLMWrRGpU,67913
|
14
|
-
kalavai_client/cluster.py,sha256=C7uofrpSEj4PUhF_VLfwH4k9BhJMTkuKRWHxDYu1OD0,11345
|
15
|
-
kalavai_client/utils.py,sha256=XO-fTrl8vudAOYK9QwuR-C5enzXORIs5A4sFIk3eA9k,13476
|
16
|
-
kalavai_client-0.5.2.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
17
|
-
kalavai_client-0.5.2.dist-info/METADATA,sha256=K-7NcUKhmWlkGyYmQ9hrnbKqN7UuGHUV6lQPnqxdSLg,12077
|
18
|
-
kalavai_client-0.5.2.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
|
19
|
-
kalavai_client-0.5.2.dist-info/entry_points.txt,sha256=9T6D45gxwzfVbglMm1r6XPdXuuZdHfy_7fCeu2jUphc,50
|
20
|
-
kalavai_client-0.5.2.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|