kalavai-client 0.5.7__py2.py3-none-any.whl → 0.5.9__py2.py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- kalavai_client/__init__.py +1 -1
- kalavai_client/assets/apps.yaml +8 -8
- kalavai_client/assets/docker-compose-template.yaml +100 -49
- kalavai_client/assets/nginx.conf +12 -0
- kalavai_client/cli.py +142 -112
- kalavai_client/cluster.py +26 -5
- kalavai_client/utils.py +3 -3
- {kalavai_client-0.5.7.dist-info → kalavai_client-0.5.9.dist-info}/METADATA +14 -3
- kalavai_client-0.5.9.dist-info/RECORD +20 -0
- kalavai_client/assets/vpn-template.yaml +0 -13
- kalavai_client-0.5.7.dist-info/RECORD +0 -20
- {kalavai_client-0.5.7.dist-info → kalavai_client-0.5.9.dist-info}/LICENSE +0 -0
- {kalavai_client-0.5.7.dist-info → kalavai_client-0.5.9.dist-info}/WHEEL +0 -0
- {kalavai_client-0.5.7.dist-info → kalavai_client-0.5.9.dist-info}/entry_points.txt +0 -0
kalavai_client/__init__.py
CHANGED
@@ -1,2 +1,2 @@
|
|
1
1
|
|
2
|
-
__version__ = "0.5.
|
2
|
+
__version__ = "0.5.9"
|
kalavai_client/assets/apps.yaml
CHANGED
@@ -189,13 +189,13 @@ releases:
|
|
189
189
|
value: "1"
|
190
190
|
- name: devicePlugin.deviceSplitCount
|
191
191
|
value: "1"
|
192
|
-
- name: scheduler.customWebhook.port
|
193
|
-
|
194
|
-
- name: scheduler.service.schedulerPort
|
195
|
-
|
196
|
-
- name: scheduler.service.monitorPort
|
197
|
-
|
198
|
-
- name: devicePlugin.service.httpPort
|
199
|
-
|
192
|
+
# - name: scheduler.customWebhook.port
|
193
|
+
# value: "30498"
|
194
|
+
# - name: scheduler.service.schedulerPort
|
195
|
+
# value: "30498"
|
196
|
+
# - name: scheduler.service.monitorPort
|
197
|
+
# value: "30493"
|
198
|
+
# - name: devicePlugin.service.httpPort
|
199
|
+
# value: "30492"
|
200
200
|
|
201
201
|
|
@@ -1,55 +1,106 @@
|
|
1
1
|
services:
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
2
|
+
{% if vpn %}
|
3
|
+
{{vpn_name}}:
|
4
|
+
image: gravitl/netclient:v0.30.0
|
5
|
+
container_name: {{vpn_name}}
|
6
|
+
cap_add:
|
7
|
+
- NET_ADMIN
|
8
|
+
- SYS_MODULE
|
9
|
+
network_mode: host
|
10
|
+
# networks:
|
11
|
+
# - custom-network
|
12
|
+
# ports:
|
13
|
+
# # https://docs.k3s.io/installation/requirements#inbound-rules-for-k3s-nodes
|
14
|
+
# - "6443:6443" # kube server
|
15
|
+
# - "10250:10250" # worker balancer
|
16
|
+
# - "8472:8472/udp" # flannel vxlan
|
17
|
+
# - "51820-51830:51820-51830" # flannel wireguard
|
18
|
+
# {% if command == "server" %}
|
19
|
+
# - "30000-30500:30000-30500"
|
20
|
+
# {% endif %}
|
21
|
+
environment:
|
22
|
+
- HOST_NAME={{node_name}}
|
23
|
+
- IFACE_NAME={{flannel_iface}}
|
24
|
+
- TOKEN={{vpn_token}}
|
25
|
+
volumes:
|
26
|
+
- /dev/net/tun:/dev/net/tun
|
27
|
+
restart: unless-stopped
|
28
|
+
# nginx:
|
29
|
+
# image: nginx:latest
|
30
|
+
# ports:
|
31
|
+
# - "{{redirect_source_port}}:{{redirect_source_port}}"
|
32
|
+
# restart: unless-stopped
|
33
|
+
# networks:
|
34
|
+
# - custom-network
|
35
|
+
# volumes:
|
36
|
+
# - {{nginx_path}}/nginx.conf:/etc/nginx/nginx.conf
|
14
37
|
{% endif %}
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
{%
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
{%
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
38
|
+
|
39
|
+
# run worker only if command is set
|
40
|
+
{%if command %}
|
41
|
+
{{service_name}}:
|
42
|
+
image: docker.io/bundenth/kalavai-runner:gpu-latest
|
43
|
+
container_name: {{service_name}}
|
44
|
+
{% if vpn %}
|
45
|
+
depends_on:
|
46
|
+
- {{vpn_name}}
|
47
|
+
network_mode: "service:{{vpn_name}}"
|
48
|
+
{% else %}
|
49
|
+
network_mode: host
|
50
|
+
# hostname: {{node_name}}
|
51
|
+
# networks:
|
52
|
+
# - custom-network
|
53
|
+
# ports:
|
54
|
+
# - "6443:6443" # kube server
|
55
|
+
# - "2379-2380:2379-2380" # etcd server
|
56
|
+
# - "10259:10259" # kube scheduler
|
57
|
+
# - "10257:10257" # kube controller manager
|
58
|
+
# - "10250:10250" # worker balancer
|
59
|
+
# - "8285:8285" # flannel
|
60
|
+
# - "8472:8472" # flannel vxlan
|
61
|
+
# - "51820:51820" # flannel wireguard
|
62
|
+
# {% if command == "server" %}
|
63
|
+
# - "30000-32767:30000-32767"
|
64
|
+
# {% endif %}
|
65
|
+
{% endif %}
|
66
|
+
privileged: true
|
67
|
+
restart: unless-stopped
|
68
|
+
command: >
|
69
|
+
--command={{command}}
|
70
|
+
{% if command == "server" %}
|
71
|
+
--port_range="30000-32767"
|
72
|
+
{% else %}
|
73
|
+
--server_ip={{pool_ip}}
|
74
|
+
--token={{pool_token}}
|
75
|
+
{% endif %}
|
76
|
+
{%if vpn %}
|
77
|
+
--flannel_iface={{flannel_iface}}
|
78
|
+
{% endif %}
|
79
|
+
{% if num_gpus and num_gpus > 0 %}
|
80
|
+
--gpu=on
|
81
|
+
{% else %}
|
82
|
+
--gpu=off
|
83
|
+
{% endif %}
|
84
|
+
{% if node_labels %}
|
85
|
+
--extra="{{node_labels}}"
|
86
|
+
{% endif %}
|
87
|
+
|
88
|
+
volumes:
|
89
|
+
- {{k3s_path}}:/var/lib/rancher/k3s # Persist data
|
90
|
+
- {{etc_path}}:/etc/rancher/k3s # Config files
|
91
|
+
|
92
|
+
{% if num_gpus and num_gpus > 0 %}
|
93
|
+
deploy:
|
94
|
+
resources:
|
95
|
+
reservations:
|
96
|
+
devices:
|
97
|
+
- driver: nvidia
|
98
|
+
count: {{num_gpus}}
|
99
|
+
capabilities: [gpu]
|
100
|
+
{% endif %}
|
51
101
|
{% endif %}
|
52
102
|
|
53
103
|
networks:
|
54
104
|
custom-network:
|
55
|
-
driver: bridge
|
105
|
+
driver: bridge
|
106
|
+
|
kalavai_client/cli.py
CHANGED
@@ -31,6 +31,7 @@ from kalavai_client.utils import (
|
|
31
31
|
safe_remove,
|
32
32
|
leave_vpn,
|
33
33
|
join_vpn,
|
34
|
+
get_vpn_details,
|
34
35
|
load_server_info,
|
35
36
|
user_login,
|
36
37
|
user_logout,
|
@@ -81,15 +82,17 @@ STORAGE_ACCESS_MODE = ["ReadWriteOnce"]
|
|
81
82
|
STORAGE_CLASS_LABEL = "kalavai.storage.enabled"
|
82
83
|
DEFAULT_STORAGE_NAME = "pool-cache"
|
83
84
|
DEFAULT_STORAGE_SIZE = 20
|
85
|
+
DEFAULT_WATCHER_PORT = 30001
|
84
86
|
USER_NODE_LABEL = "kalavai.cluster.user"
|
85
87
|
KUBE_VERSION = os.getenv("KALAVAI_KUBE_VERSION", "v1.31.1+k3s1")
|
86
|
-
DEFAULT_FLANNEL_IFACE = os.getenv("KALAVAI_FLANNEL_IFACE", "netmaker")
|
88
|
+
DEFAULT_FLANNEL_IFACE = os.getenv("KALAVAI_FLANNEL_IFACE", "netmaker-1")
|
87
89
|
FORBIDEDEN_IPS = ["127.0.0.1"]
|
88
90
|
# kalavai templates
|
89
91
|
HELM_APPS_FILE = resource_path("kalavai_client/assets/apps.yaml")
|
90
92
|
HELM_APPS_VALUES = resource_path("kalavai_client/assets/apps_values.yaml")
|
91
93
|
# user specific config files
|
92
|
-
DEFAULT_CONTAINER_NAME = "kalavai
|
94
|
+
DEFAULT_CONTAINER_NAME = "kalavai"
|
95
|
+
DEFAULT_VPN_CONTAINER_NAME = "kalavai-vpn"
|
93
96
|
CONTAINER_HOST_PATH = user_path("pool/", create_path=True)
|
94
97
|
USER_COMPOSE_FILE = user_path("docker-compose-worker.yaml")
|
95
98
|
USER_VPN_COMPOSE_FILE = user_path("docker-compose-vpn.yaml")
|
@@ -115,27 +118,6 @@ CLUSTER = dockerCluster(
|
|
115
118
|
######################
|
116
119
|
## HELPER FUNCTIONS ##
|
117
120
|
######################
|
118
|
-
|
119
|
-
def check_vpn_compatibility():
|
120
|
-
"""Check required packages to join VPN"""
|
121
|
-
logs = []
|
122
|
-
console.log("[white]Checking system requirements...")
|
123
|
-
# netclient
|
124
|
-
try:
|
125
|
-
run_cmd("sudo netclient version >/dev/null 2>&1")
|
126
|
-
except:
|
127
|
-
logs.append("[red]Netmaker not installed. Install instructions:\n")
|
128
|
-
logs.append(" Linux: https://docs.netmaker.io/docs/netclient#linux\n")
|
129
|
-
logs.append(" Windows: https://docs.netmaker.io/docs/netclient#windows\n")
|
130
|
-
logs.append(" MacOS: https://docs.netmaker.io/docs/netclient#mac\n")
|
131
|
-
|
132
|
-
if len(logs) == 0:
|
133
|
-
console.log("[green]System is ready to join a pool")
|
134
|
-
return True
|
135
|
-
else:
|
136
|
-
for log in logs:
|
137
|
-
console.log(log)
|
138
|
-
return False
|
139
121
|
|
140
122
|
def check_seed_compatibility():
|
141
123
|
"""Check required packages to start pools"""
|
@@ -179,16 +161,6 @@ def check_worker_compatibility():
|
|
179
161
|
|
180
162
|
|
181
163
|
def cleanup_local():
|
182
|
-
# disconnect from private network
|
183
|
-
console.log("Disconnecting from VPN...")
|
184
|
-
try:
|
185
|
-
vpns = leave_vpn()
|
186
|
-
if vpns is not None:
|
187
|
-
for vpn in vpns:
|
188
|
-
console.log(f"You have left {vpn} VPN")
|
189
|
-
except:
|
190
|
-
# no vpn
|
191
|
-
pass
|
192
164
|
console.log("Removing local cache files...")
|
193
165
|
safe_remove(CONTAINER_HOST_PATH)
|
194
166
|
safe_remove(USER_COMPOSE_FILE)
|
@@ -365,7 +337,7 @@ def select_token_type():
|
|
365
337
|
break
|
366
338
|
return {"admin": choice == 0, "user": choice == 1, "worker": choice == 2}
|
367
339
|
|
368
|
-
def generate_compose_config(role, node_name,
|
340
|
+
def generate_compose_config(role, node_name, is_public, node_labels=None, pool_ip=None, vpn_token=None, pool_token=None):
|
369
341
|
num_gpus = 0
|
370
342
|
try:
|
371
343
|
has_gpus = check_gpu_drivers()
|
@@ -377,20 +349,24 @@ def generate_compose_config(role, node_name, ip_address, node_labels, is_public,
|
|
377
349
|
)
|
378
350
|
except:
|
379
351
|
console.log(f"[red]WARNING: error when fetching NVIDIA GPU info. GPUs will not be used on this local machine")
|
352
|
+
if node_labels is not None:
|
353
|
+
node_labels = " ".join([f"--node-label {key}={value}" for key, value in node_labels.items()])
|
380
354
|
compose_values = {
|
381
355
|
"user_path": user_path(""),
|
382
356
|
"service_name": DEFAULT_CONTAINER_NAME,
|
383
|
-
"
|
384
|
-
"
|
385
|
-
"
|
357
|
+
"vpn": is_public,
|
358
|
+
"vpn_name": DEFAULT_VPN_CONTAINER_NAME,
|
359
|
+
"pool_ip": pool_ip,
|
360
|
+
"pool_token": pool_token,
|
361
|
+
"vpn_token": vpn_token,
|
362
|
+
"node_name": node_name,
|
386
363
|
"command": role,
|
387
364
|
"storage_enabled": "True",
|
388
|
-
"ip_address": ip_address,
|
389
365
|
"num_gpus": num_gpus,
|
390
366
|
"k3s_path": f"{CONTAINER_HOST_PATH}/k3s",
|
391
367
|
"etc_path": f"{CONTAINER_HOST_PATH}/etc",
|
392
|
-
"node_labels":
|
393
|
-
"flannel_iface": DEFAULT_FLANNEL_IFACE if is_public else
|
368
|
+
"node_labels": node_labels,
|
369
|
+
"flannel_iface": DEFAULT_FLANNEL_IFACE if is_public else ""
|
394
370
|
}
|
395
371
|
# generate local config files
|
396
372
|
compose_yaml = load_template(
|
@@ -585,35 +561,56 @@ def pool__start(cluster_name, *others, only_registered_users: bool=False, ip_ad
|
|
585
561
|
STORAGE_CLASS_LABEL: is_storage_compatible()
|
586
562
|
}
|
587
563
|
if location is not None:
|
588
|
-
console.log("
|
564
|
+
console.log("Fetching VPN credentials")
|
589
565
|
try:
|
590
|
-
|
591
|
-
return
|
592
|
-
vpn = join_vpn(
|
566
|
+
vpn = get_vpn_details(
|
593
567
|
location=location,
|
594
568
|
user_cookie=USER_COOKIE)
|
595
569
|
node_labels[USER_NODE_LABEL] = user["username"]
|
596
570
|
except Exception as e:
|
597
571
|
console.log(f"[red]Error when joining network: {str(e)}")
|
598
572
|
return
|
599
|
-
|
600
|
-
|
601
|
-
|
602
|
-
|
573
|
+
|
574
|
+
# Generate docker compose recipe
|
575
|
+
generate_compose_config(
|
576
|
+
role="server",
|
577
|
+
vpn_token=vpn["key"],
|
578
|
+
node_name=socket.gethostname(),
|
579
|
+
node_labels=node_labels,
|
580
|
+
is_public=location is not None
|
581
|
+
)
|
582
|
+
|
583
|
+
# start server
|
584
|
+
console.log("Deploying seed...")
|
585
|
+
CLUSTER.start_seed_node()
|
586
|
+
|
587
|
+
while not CLUSTER.is_agent_running():
|
588
|
+
console.log("Waiting for seed to start...")
|
589
|
+
time.sleep(10)
|
590
|
+
|
591
|
+
# select IP address (for external discovery)
|
592
|
+
if ip_address is None and location is None:
|
593
|
+
# local IP
|
594
|
+
console.log(f"Scanning for valid IPs")
|
595
|
+
ip_address = select_ip_address()
|
596
|
+
else:
|
597
|
+
# load VPN ip
|
598
|
+
ip_address = CLUSTER.get_vpn_ip()
|
603
599
|
console.log(f"Using {ip_address} address for server")
|
604
600
|
|
601
|
+
# populate local cred files
|
605
602
|
auth_key = str(uuid.uuid4())
|
606
603
|
write_auth_key = str(uuid.uuid4())
|
607
604
|
readonly_auth_key = str(uuid.uuid4())
|
608
|
-
|
609
|
-
watcher_service = f"{ip_address}:{
|
605
|
+
|
606
|
+
watcher_service = f"{ip_address}:{DEFAULT_WATCHER_PORT}"
|
610
607
|
values = {
|
611
608
|
CLUSTER_NAME_KEY: cluster_name,
|
612
609
|
CLUSTER_IP_KEY: ip_address,
|
613
610
|
AUTH_KEY: auth_key,
|
614
611
|
READONLY_AUTH_KEY: readonly_auth_key,
|
615
612
|
WRITE_AUTH_KEY: write_auth_key,
|
616
|
-
WATCHER_PORT_KEY:
|
613
|
+
WATCHER_PORT_KEY: DEFAULT_WATCHER_PORT,
|
617
614
|
WATCHER_SERVICE_KEY: watcher_service,
|
618
615
|
USER_NODE_LABEL_KEY: USER_NODE_LABEL,
|
619
616
|
ALLOW_UNREGISTERED_USER_KEY: not only_registered_users
|
@@ -630,15 +627,6 @@ def pool__start(cluster_name, *others, only_registered_users: bool=False, ip_ad
|
|
630
627
|
cluster_name=cluster_name,
|
631
628
|
public_location=location,
|
632
629
|
user_api_key=user["api_key"])
|
633
|
-
|
634
|
-
# 1. Generate docker compose recipe
|
635
|
-
compose_yaml = generate_compose_config(
|
636
|
-
role="server",
|
637
|
-
node_name=socket.gethostname(),
|
638
|
-
ip_address=ip_address,
|
639
|
-
node_labels=node_labels,
|
640
|
-
is_public=location is not None
|
641
|
-
)
|
642
630
|
|
643
631
|
# Generate helmfile recipe
|
644
632
|
helm_yaml = load_template(
|
@@ -650,14 +638,6 @@ def pool__start(cluster_name, *others, only_registered_users: bool=False, ip_ad
|
|
650
638
|
f.write(helm_yaml)
|
651
639
|
|
652
640
|
console.log("[green]Config files have been generated in your local machine\n")
|
653
|
-
|
654
|
-
# # 1. start server
|
655
|
-
console.log("Deploying seed...")
|
656
|
-
CLUSTER.start_seed_node()
|
657
|
-
|
658
|
-
while not CLUSTER.is_agent_running():
|
659
|
-
console.log("Waiting for seed to start...")
|
660
|
-
time.sleep(10)
|
661
641
|
|
662
642
|
console.log("Setting pool dependencies...")
|
663
643
|
# set template values in helmfile
|
@@ -691,7 +671,6 @@ def pool__start(cluster_name, *others, only_registered_users: bool=False, ip_ad
|
|
691
671
|
|
692
672
|
return None
|
693
673
|
|
694
|
-
|
695
674
|
@arguably.command
|
696
675
|
def pool__token(*others, admin=False, user=False, worker=False):
|
697
676
|
"""
|
@@ -773,6 +752,7 @@ def pool__join(token, *others, node_name=None, ip_address: str=None):
|
|
773
752
|
if CLUSTER.is_agent_running():
|
774
753
|
console.log(f"[white] You are already connected to {load_server_info(data_key=CLUSTER_NAME_KEY, file=USER_LOCAL_SERVER_FILE)}. Enter [yellow]kalavai pool stop[white] to exit and join another one.")
|
775
754
|
return
|
755
|
+
|
776
756
|
# check that is not attached to another instance
|
777
757
|
if os.path.exists(USER_LOCAL_SERVER_FILE):
|
778
758
|
option = user_confirm(
|
@@ -810,20 +790,20 @@ def pool__join(token, *others, node_name=None, ip_address: str=None):
|
|
810
790
|
}
|
811
791
|
user = defaultdict(lambda: None)
|
812
792
|
if public_location is not None:
|
813
|
-
|
793
|
+
user = user_login(user_cookie=USER_COOKIE)
|
794
|
+
if user is None:
|
795
|
+
console.log("[red]Must be logged in to join public pools. Run [yellow]kalavai login[red] to authenticate")
|
796
|
+
exit()
|
797
|
+
console.log("Fetching VPN credentials")
|
814
798
|
try:
|
815
|
-
|
816
|
-
return
|
817
|
-
vpn = join_vpn(
|
799
|
+
vpn = get_vpn_details(
|
818
800
|
location=public_location,
|
819
801
|
user_cookie=USER_COOKIE)
|
820
|
-
user = user_login(user_cookie=USER_COOKIE)
|
821
802
|
node_labels[USER_NODE_LABEL] = user["username"]
|
822
803
|
except Exception as e:
|
823
804
|
console.log(f"[red]Error when joining network: {str(e)}")
|
824
805
|
console.log("Are you authenticated? Try [yellow]kalavai login")
|
825
806
|
return
|
826
|
-
# validate public seed
|
827
807
|
try:
|
828
808
|
validate_join_public_seed(
|
829
809
|
cluster_name=cluster_name,
|
@@ -832,31 +812,29 @@ def pool__join(token, *others, node_name=None, ip_address: str=None):
|
|
832
812
|
)
|
833
813
|
except Exception as e:
|
834
814
|
console.log(f"[red]Error when joining network: {str(e)}")
|
835
|
-
leave_vpn(vpn_file=USER_VPN_COMPOSE_FILE)
|
836
815
|
return
|
837
816
|
|
838
817
|
# send note to server to let them know the node is coming online
|
839
|
-
|
840
|
-
|
841
|
-
|
842
|
-
|
843
|
-
|
844
|
-
if ip_address is None:
|
845
|
-
console.log(f"Scanning for valid IPs (subnet {vpn['subnet']})...")
|
846
|
-
ip_address = select_ip_address(subnet=vpn["subnet"])
|
847
|
-
console.log(f"Using {ip_address} address for worker")
|
818
|
+
# TODO: won't be able to check for VPN pools...
|
819
|
+
# if not pre_join_check(node_name=node_name, server_url=watcher_service, server_key=auth_key):
|
820
|
+
# console.log(f"[red] Failed pre join checks. Server offline or node '{node_name}' may already exist. Please specify a different one with '--node-name'")
|
821
|
+
# leave_vpn(container_name=DEFAULT_VPN_CONTAINER_NAME)
|
822
|
+
# return
|
848
823
|
|
849
824
|
# local agent join
|
850
825
|
# 1. Generate local cache files
|
851
826
|
console.log("Generating config files...")
|
852
|
-
|
827
|
+
|
828
|
+
# Generate docker compose recipe
|
829
|
+
generate_compose_config(
|
853
830
|
role="agent",
|
854
|
-
|
855
|
-
|
856
|
-
|
857
|
-
|
831
|
+
pool_ip=f"https://{kalavai_seed_ip}:6443",
|
832
|
+
pool_token=kalavai_token,
|
833
|
+
vpn_token=vpn["key"],
|
834
|
+
node_name=node_name,
|
858
835
|
node_labels=node_labels,
|
859
836
|
is_public=public_location is not None)
|
837
|
+
|
860
838
|
store_server_info(
|
861
839
|
server_ip=kalavai_seed_ip,
|
862
840
|
auth_key=auth_key,
|
@@ -866,8 +844,6 @@ def pool__join(token, *others, node_name=None, ip_address: str=None):
|
|
866
844
|
cluster_name=cluster_name,
|
867
845
|
public_location=public_location,
|
868
846
|
user_api_key=user["api_key"])
|
869
|
-
|
870
|
-
init_user_workspace()
|
871
847
|
|
872
848
|
option = user_confirm(
|
873
849
|
question="Docker compose ready. Would you like Kalavai to deploy it?",
|
@@ -883,18 +859,23 @@ def pool__join(token, *others, node_name=None, ip_address: str=None):
|
|
883
859
|
CLUSTER.start_worker_node()
|
884
860
|
except Exception as e:
|
885
861
|
console.log(f"[red] Error connecting to {cluster_name} @ {kalavai_seed_ip}. Check with the admin if the token is still valid.")
|
886
|
-
leave_vpn(
|
862
|
+
leave_vpn(container_name=DEFAULT_VPN_CONTAINER_NAME)
|
887
863
|
exit()
|
888
864
|
|
889
|
-
|
890
|
-
|
891
|
-
|
865
|
+
# ensure we are connected
|
866
|
+
while True:
|
867
|
+
console.log("Waiting for core services to be ready, may take a few minutes...")
|
868
|
+
time.sleep(30)
|
869
|
+
if is_watcher_alive(server_creds=USER_LOCAL_SERVER_FILE, user_cookie=USER_COOKIE):
|
870
|
+
break
|
871
|
+
|
872
|
+
init_user_workspace()
|
892
873
|
|
893
874
|
# set status to schedulable
|
894
875
|
console.log(f"[green] You are connected to {cluster_name}")
|
895
876
|
|
896
877
|
@arguably.command
|
897
|
-
def pool__stop(*others):
|
878
|
+
def pool__stop(*others, skip_node_deletion=False):
|
898
879
|
"""
|
899
880
|
Stop sharing your device and clean up. DO THIS ONLY IF YOU WANT TO REMOVE KALAVAI-CLIENT from your device.
|
900
881
|
|
@@ -903,7 +884,8 @@ def pool__stop(*others):
|
|
903
884
|
"""
|
904
885
|
console.log("[white] Stopping kalavai app...")
|
905
886
|
# delete local node from server
|
906
|
-
|
887
|
+
if not skip_node_deletion:
|
888
|
+
node__delete(load_server_info(data_key=NODE_NAME_KEY, file=USER_LOCAL_SERVER_FILE))
|
907
889
|
# unpublish event (only if seed node)
|
908
890
|
# TODO: no, this should be done via the platform!!!
|
909
891
|
# try:
|
@@ -916,7 +898,20 @@ def pool__stop(*others):
|
|
916
898
|
# console.log(f"[red][WARNING]: (ignore if not a public pool) Error when unpublishing cluster. {str(e)}")
|
917
899
|
# remove local node agent
|
918
900
|
console.log("Removing agent and local cache")
|
901
|
+
|
902
|
+
# disconnect from VPN first, then remove agent, then remove local files
|
903
|
+
console.log("Disconnecting from VPN...")
|
904
|
+
try:
|
905
|
+
vpns = leave_vpn(container_name=DEFAULT_VPN_CONTAINER_NAME)
|
906
|
+
if vpns is not None:
|
907
|
+
for vpn in vpns:
|
908
|
+
console.log(f"You have left {vpn} VPN")
|
909
|
+
except:
|
910
|
+
# no vpn
|
911
|
+
pass
|
912
|
+
|
919
913
|
CLUSTER.remove_agent()
|
914
|
+
|
920
915
|
# clean local files
|
921
916
|
cleanup_local()
|
922
917
|
console.log("[white] Kalavai has stopped sharing your resources. Use [yellow]kalavai pool start[white] or [yellow]kalavai pool join[white] to start again!")
|
@@ -1107,6 +1102,7 @@ def pool__attach(token, *others, node_name=None):
|
|
1107
1102
|
"""
|
1108
1103
|
Set creds in token on the local instance
|
1109
1104
|
"""
|
1105
|
+
# check that is not attached to another instance
|
1110
1106
|
if os.path.exists(USER_LOCAL_SERVER_FILE):
|
1111
1107
|
option = user_confirm(
|
1112
1108
|
question="You seem to be connected to an instance already. Are you sure you want to join a new one?",
|
@@ -1115,34 +1111,39 @@ def pool__attach(token, *others, node_name=None):
|
|
1115
1111
|
if option == 0:
|
1116
1112
|
console.log("[green]Nothing happened.")
|
1117
1113
|
return
|
1114
|
+
|
1115
|
+
# check token
|
1116
|
+
if not pool__check_token(token):
|
1117
|
+
return
|
1118
|
+
|
1118
1119
|
try:
|
1119
1120
|
data = decode_dict(token)
|
1120
1121
|
kalavai_seed_ip = data[CLUSTER_IP_KEY]
|
1121
|
-
kalavai_token = data[CLUSTER_TOKEN_KEY]
|
1122
1122
|
cluster_name = data[CLUSTER_NAME_KEY]
|
1123
1123
|
auth_key = data[AUTH_KEY]
|
1124
1124
|
watcher_service = data[WATCHER_SERVICE_KEY]
|
1125
1125
|
public_location = data[PUBLIC_LOCATION_KEY]
|
1126
|
-
|
1127
|
-
|
1126
|
+
vpn = defaultdict(lambda: None)
|
1127
|
+
except Exception as e:
|
1128
|
+
console.log(str(e))
|
1129
|
+
console.log("[red] Invalid token")
|
1128
1130
|
return
|
1129
|
-
|
1131
|
+
|
1130
1132
|
user = defaultdict(lambda: None)
|
1131
1133
|
if public_location is not None:
|
1132
|
-
|
1134
|
+
user = user_login(user_cookie=USER_COOKIE)
|
1135
|
+
if user is None:
|
1136
|
+
console.log("[red]Must be logged in to join public pools. Run [yellow]kalavai login[red] to authenticate")
|
1137
|
+
exit()
|
1138
|
+
console.log("Fetching VPN credentials")
|
1133
1139
|
try:
|
1134
|
-
|
1135
|
-
return
|
1136
|
-
vpn = join_vpn(
|
1140
|
+
vpn = get_vpn_details(
|
1137
1141
|
location=public_location,
|
1138
1142
|
user_cookie=USER_COOKIE)
|
1139
|
-
user = user_login(user_cookie=USER_COOKIE)
|
1140
|
-
time.sleep(5)
|
1141
1143
|
except Exception as e:
|
1142
1144
|
console.log(f"[red]Error when joining network: {str(e)}")
|
1143
1145
|
console.log("Are you authenticated? Try [yellow]kalavai login")
|
1144
1146
|
return
|
1145
|
-
# validate public seed
|
1146
1147
|
try:
|
1147
1148
|
validate_join_public_seed(
|
1148
1149
|
cluster_name=cluster_name,
|
@@ -1151,9 +1152,19 @@ def pool__attach(token, *others, node_name=None):
|
|
1151
1152
|
)
|
1152
1153
|
except Exception as e:
|
1153
1154
|
console.log(f"[red]Error when joining network: {str(e)}")
|
1154
|
-
leave_vpn(vpn_file=USER_VPN_COMPOSE_FILE)
|
1155
1155
|
return
|
1156
|
-
|
1156
|
+
|
1157
|
+
# local agent join
|
1158
|
+
# 1. Generate local cache files
|
1159
|
+
console.log("Generating config files...")
|
1160
|
+
|
1161
|
+
# Generate docker compose recipe
|
1162
|
+
generate_compose_config(
|
1163
|
+
role="",
|
1164
|
+
vpn_token=vpn["key"],
|
1165
|
+
node_name=node_name,
|
1166
|
+
is_public=public_location is not None)
|
1167
|
+
|
1157
1168
|
store_server_info(
|
1158
1169
|
server_ip=kalavai_seed_ip,
|
1159
1170
|
auth_key=auth_key,
|
@@ -1164,7 +1175,26 @@ def pool__attach(token, *others, node_name=None):
|
|
1164
1175
|
public_location=public_location,
|
1165
1176
|
user_api_key=user["api_key"])
|
1166
1177
|
|
1167
|
-
|
1178
|
+
option = user_confirm(
|
1179
|
+
question="Docker compose ready. Would you like Kalavai to deploy it?",
|
1180
|
+
options=["no", "yes"]
|
1181
|
+
)
|
1182
|
+
if option == 0:
|
1183
|
+
console.log("Manually deploy the worker with the following command:\n")
|
1184
|
+
print(f"docker compose -f {USER_COMPOSE_FILE} up -d")
|
1185
|
+
return
|
1186
|
+
|
1187
|
+
console.log(f"[white] Connecting to {cluster_name} @ {kalavai_seed_ip} (this may take a few minutes)...")
|
1188
|
+
run_cmd(f"docker compose -f {USER_COMPOSE_FILE} up -d")
|
1189
|
+
# ensure we are connected
|
1190
|
+
while True:
|
1191
|
+
console.log("Waiting for core services to be ready, may take a few minutes...")
|
1192
|
+
time.sleep(30)
|
1193
|
+
if is_watcher_alive(server_creds=USER_LOCAL_SERVER_FILE, user_cookie=USER_COOKIE):
|
1194
|
+
break
|
1195
|
+
|
1196
|
+
# set status to schedulable
|
1197
|
+
console.log(f"[green] You are connected to {cluster_name}")
|
1168
1198
|
|
1169
1199
|
|
1170
1200
|
@arguably.command
|
kalavai_client/cluster.py
CHANGED
@@ -7,7 +7,8 @@ from kalavai_client.utils import (
|
|
7
7
|
run_cmd,
|
8
8
|
check_gpu_drivers,
|
9
9
|
validate_poolconfig,
|
10
|
-
user_path
|
10
|
+
user_path,
|
11
|
+
populate_template
|
11
12
|
)
|
12
13
|
|
13
14
|
|
@@ -20,6 +21,9 @@ class Cluster(ABC):
|
|
20
21
|
def start_worker_node(self, url, token, node_name, auth_key, watcher_service, ip_address, labels, flannel_iface):
|
21
22
|
raise NotImplementedError()
|
22
23
|
|
24
|
+
@abstractmethod
|
25
|
+
def get_vpn_ip(self):
|
26
|
+
raise NotImplementedError()
|
23
27
|
|
24
28
|
@abstractmethod
|
25
29
|
def update_dependencies(self, dependencies_files):
|
@@ -79,11 +83,23 @@ class dockerCluster(Cluster):
|
|
79
83
|
def start_seed_node(self):
|
80
84
|
|
81
85
|
run_cmd(f"docker compose -f {self.compose_file} up -d")
|
82
|
-
|
83
|
-
|
86
|
+
# wait for container to be setup
|
87
|
+
while True:
|
88
|
+
try:
|
89
|
+
run_cmd(f"docker cp {self.container_name}:/etc/rancher/k3s/k3s.yaml {self.kubeconfig_file} >/dev/null 2>&1")
|
90
|
+
break
|
91
|
+
except:
|
92
|
+
pass
|
93
|
+
time.sleep(5)
|
84
94
|
|
85
95
|
def start_worker_node(self):
|
86
96
|
run_cmd(f"docker compose -f {self.compose_file} up -d")
|
97
|
+
|
98
|
+
def get_vpn_ip(self):
|
99
|
+
command = populate_template(
|
100
|
+
template_str="docker exec -it {{container_name}} ifconfig {{iface_name}} | grep 'inet ' | awk '{gsub(/^addr:/, \"\", $2); print $2}'",
|
101
|
+
values_dict={"container_name": self.container_name, "iface_name": self.default_flannel_iface})
|
102
|
+
return run_cmd(command).decode().strip()
|
87
103
|
|
88
104
|
|
89
105
|
def update_dependencies(self, dependencies_file=None, debug=False, retries=3):
|
@@ -122,8 +138,13 @@ class dockerCluster(Cluster):
|
|
122
138
|
def is_seed_node(self):
|
123
139
|
if not os.path.isfile(self.compose_file):
|
124
140
|
return False
|
125
|
-
|
126
|
-
|
141
|
+
if not self.is_agent_running():
|
142
|
+
return False
|
143
|
+
try:
|
144
|
+
run_cmd(f"docker container exec {self.container_name} cat /var/lib/rancher/k3s/server/node-token >/dev/null 2>&1")
|
145
|
+
return True
|
146
|
+
except:
|
147
|
+
return False
|
127
148
|
|
128
149
|
def is_cluster_init(self):
|
129
150
|
if not os.path.isfile(self.compose_file):
|
kalavai_client/utils.py
CHANGED
@@ -241,12 +241,12 @@ def join_vpn(location, user_cookie):
|
|
241
241
|
run_cmd(f"sudo netclient join -t {token} >/dev/null 2>&1")
|
242
242
|
return vpn
|
243
243
|
|
244
|
-
def leave_vpn():
|
244
|
+
def leave_vpn(container_name):
|
245
245
|
try:
|
246
|
-
vpns = json.loads(run_cmd("
|
246
|
+
vpns = json.loads(run_cmd(f"docker exec {container_name} netclient list").decode())
|
247
247
|
left_vpns = [vpn['network'] for vpn in vpns]
|
248
248
|
for vpn in left_vpns:
|
249
|
-
run_cmd(f"
|
249
|
+
run_cmd(f"docker exec {container_name} netclient leave {vpn}")
|
250
250
|
return left_vpns
|
251
251
|
except:
|
252
252
|
return None
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: kalavai-client
|
3
|
-
Version: 0.5.
|
3
|
+
Version: 0.5.9
|
4
4
|
Summary: Client app for kalavai platform
|
5
5
|
License: Apache-2.0
|
6
6
|
Keywords: LLM,platform
|
@@ -156,6 +156,16 @@ Make sure you also install python3-dev package. For ubuntu distros:
|
|
156
156
|
sudo apt install python3-dev
|
157
157
|
```
|
158
158
|
|
159
|
+
If you see:
|
160
|
+
```bash
|
161
|
+
AttributeError: install_layout. Did you mean: 'install_platlib'?
|
162
|
+
[end of output]
|
163
|
+
```
|
164
|
+
|
165
|
+
Upgrade your setuptools:
|
166
|
+
```bash
|
167
|
+
pip install -U setuptools
|
168
|
+
```
|
159
169
|
|
160
170
|
### Install the client
|
161
171
|
|
@@ -270,10 +280,11 @@ Python version <= 3.12.
|
|
270
280
|
```bash
|
271
281
|
sudo add-apt-repository ppa:deadsnakes/ppa
|
272
282
|
sudo apt update
|
273
|
-
sudo apt install python3.10 python3.10-dev
|
283
|
+
sudo apt install python3.10 python3.10-dev python3-virtualenv
|
274
284
|
virtualenv -p python3.10 env
|
275
285
|
source env/bin/activate
|
276
286
|
sudo apt install python3.10-venv python3.10-dev -y
|
287
|
+
pip install -U setuptools
|
277
288
|
pip install -e .[dev]
|
278
289
|
```
|
279
290
|
|
@@ -291,4 +302,4 @@ To run the unit tests, use:
|
|
291
302
|
python -m unittest
|
292
303
|
```
|
293
304
|
|
294
|
-
|
305
|
+
docker run --rm --net=host -v /root/.cache/kalavai/:/root/.cache/kalavai/ ghcr.io/helmfile/helmfile:v0.169.2 helmfile sync --file /root/.cache/kalavai/apps.yaml --kubeconfig /root/.cache/kalavai/kubeconfig
|
@@ -0,0 +1,20 @@
|
|
1
|
+
kalavai_client/__init__.py,sha256=vItrdD645pc4NrXPAz6cc0ExWrMEfwvLkksqG8FHQaE,22
|
2
|
+
kalavai_client/__main__.py,sha256=WQUfxvRsBJH5gsCJg8pLz95QnZIj7Ol8psTO77m0QE0,73
|
3
|
+
kalavai_client/assets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
|
+
kalavai_client/assets/apps.yaml,sha256=_8BgT9F611c8uZJvhTE_0CLbQqnLaUQosqxZjzOslXQ,5979
|
5
|
+
kalavai_client/assets/apps_values.yaml,sha256=CjKVelPQHd-hm-DTMEuya92feKiphU9mh3HrosLYYPE,1676
|
6
|
+
kalavai_client/assets/docker-compose-template.yaml,sha256=gJ0NkhcG2c-gZPmSd385dadrXkZrWruTJkiaxcaKkQ0,2725
|
7
|
+
kalavai_client/assets/nginx.conf,sha256=drVVCg8GHucz7hmt_BI6giAhK92OV71257NTs3LthwM,225
|
8
|
+
kalavai_client/assets/pool_config_template.yaml,sha256=fFz4w2-fMKD5KvyzFdfcWD_jSneRlmnjLc8hCctweX0,576
|
9
|
+
kalavai_client/assets/pool_config_values.yaml,sha256=VrM3XHQfQo6QLZ68qvagooUptaYgl1pszniY_JUtemk,233
|
10
|
+
kalavai_client/assets/user_workspace.yaml,sha256=wDvlMYknOPABAEo0dsQwU7bac8iubjAG9tdkFbJZ5Go,476
|
11
|
+
kalavai_client/assets/user_workspace_values.yaml,sha256=G0HOzQUxrDMCwuW9kbWUZaKMzDDPVwDwzBHCL2Xi2ZM,542
|
12
|
+
kalavai_client/auth.py,sha256=QsBh28L2LwjBBK6pTUE4Xu36lLDTyetyU1YfS1Hbb6g,1717
|
13
|
+
kalavai_client/cli.py,sha256=4qTZuYNFhsdJbnER-MSBJHPNgJc_lWzbWR0Bj2YeQe0,68889
|
14
|
+
kalavai_client/cluster.py,sha256=fULTAad4KXEGeWZmp4_VBoBwT5eED_HOBUsXIKmf0CU,12119
|
15
|
+
kalavai_client/utils.py,sha256=NOORAsRs9A84w8yyvhGwXcvshyZyf5wj86HT4HPzOcI,12405
|
16
|
+
kalavai_client-0.5.9.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
17
|
+
kalavai_client-0.5.9.dist-info/METADATA,sha256=LEBFwVRSWQ6GRLPBWhygefs4S_EAgpgoqaSXSmF3CwI,13013
|
18
|
+
kalavai_client-0.5.9.dist-info/WHEEL,sha256=_GR1VF9XYsw6EE9ATkyto4e6vRiOUBshR1STiZ5m1gE,92
|
19
|
+
kalavai_client-0.5.9.dist-info/entry_points.txt,sha256=9T6D45gxwzfVbglMm1r6XPdXuuZdHfy_7fCeu2jUphc,50
|
20
|
+
kalavai_client-0.5.9.dist-info/RECORD,,
|
@@ -1,13 +0,0 @@
|
|
1
|
-
# https://docs.netmaker.io/docs/netclient#docker
|
2
|
-
services:
|
3
|
-
{{service_name}}:
|
4
|
-
network_mode: host
|
5
|
-
privileged: true
|
6
|
-
restart: always
|
7
|
-
environment:
|
8
|
-
- TOKEN={{vpn_token}}
|
9
|
-
- IFACE_NAME={{flannel_iface}}
|
10
|
-
volumes:
|
11
|
-
- '{{etc_path}}/netclient:/etc/netclient'
|
12
|
-
container_name: {{service_name}}
|
13
|
-
image: 'gravitl/netclient:latest'
|
@@ -1,20 +0,0 @@
|
|
1
|
-
kalavai_client/__init__.py,sha256=VsVCdw5n_XkxVQiDP5x4If6sIoowwW2gG3FlCDuzkaY,22
|
2
|
-
kalavai_client/__main__.py,sha256=WQUfxvRsBJH5gsCJg8pLz95QnZIj7Ol8psTO77m0QE0,73
|
3
|
-
kalavai_client/assets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
4
|
-
kalavai_client/assets/apps.yaml,sha256=aW9wKyvZhZFMHzBkZOsgVq-kpntED6U8B9XoHkm5F9Y,5963
|
5
|
-
kalavai_client/assets/apps_values.yaml,sha256=CjKVelPQHd-hm-DTMEuya92feKiphU9mh3HrosLYYPE,1676
|
6
|
-
kalavai_client/assets/docker-compose-template.yaml,sha256=qDv0og338clLobDDPEJ-HiGlMcCMMx2NOi5R_hdhKvw,1442
|
7
|
-
kalavai_client/assets/pool_config_template.yaml,sha256=fFz4w2-fMKD5KvyzFdfcWD_jSneRlmnjLc8hCctweX0,576
|
8
|
-
kalavai_client/assets/pool_config_values.yaml,sha256=VrM3XHQfQo6QLZ68qvagooUptaYgl1pszniY_JUtemk,233
|
9
|
-
kalavai_client/assets/user_workspace.yaml,sha256=wDvlMYknOPABAEo0dsQwU7bac8iubjAG9tdkFbJZ5Go,476
|
10
|
-
kalavai_client/assets/user_workspace_values.yaml,sha256=G0HOzQUxrDMCwuW9kbWUZaKMzDDPVwDwzBHCL2Xi2ZM,542
|
11
|
-
kalavai_client/assets/vpn-template.yaml,sha256=Hm7sevtrsakSxSKMJwl68hzOEWCaxwYytwkTgKhe_MM,397
|
12
|
-
kalavai_client/auth.py,sha256=QsBh28L2LwjBBK6pTUE4Xu36lLDTyetyU1YfS1Hbb6g,1717
|
13
|
-
kalavai_client/cli.py,sha256=qBOe4IULIX7pzKUhHoR1JppWrbUeV1aslxQLMWrRGpU,67913
|
14
|
-
kalavai_client/cluster.py,sha256=C7uofrpSEj4PUhF_VLfwH4k9BhJMTkuKRWHxDYu1OD0,11345
|
15
|
-
kalavai_client/utils.py,sha256=aHgNj5iVOhgTrI-UFoqgAvnesn4t6Qos3-WnNSxZ-fc,12342
|
16
|
-
kalavai_client-0.5.7.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
17
|
-
kalavai_client-0.5.7.dist-info/METADATA,sha256=q7m04ZO0VMss4HEuJxBEY-587JwjHRmwZ8Cj8a7jyxw,12584
|
18
|
-
kalavai_client-0.5.7.dist-info/WHEEL,sha256=_GR1VF9XYsw6EE9ATkyto4e6vRiOUBshR1STiZ5m1gE,92
|
19
|
-
kalavai_client-0.5.7.dist-info/entry_points.txt,sha256=9T6D45gxwzfVbglMm1r6XPdXuuZdHfy_7fCeu2jUphc,50
|
20
|
-
kalavai_client-0.5.7.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|