kalavai-client 0.5.6__tar.gz → 0.5.8__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (22) hide show
  1. {kalavai_client-0.5.6 → kalavai_client-0.5.8}/PKG-INFO +15 -4
  2. {kalavai_client-0.5.6 → kalavai_client-0.5.8}/README.md +13 -1
  3. kalavai_client-0.5.8/kalavai_client/__init__.py +2 -0
  4. kalavai_client-0.5.8/kalavai_client/assets/docker-compose-template.yaml +97 -0
  5. kalavai_client-0.5.8/kalavai_client/assets/nginx.conf +12 -0
  6. {kalavai_client-0.5.6 → kalavai_client-0.5.8}/kalavai_client/cli.py +92 -78
  7. {kalavai_client-0.5.6 → kalavai_client-0.5.8}/kalavai_client/cluster.py +26 -5
  8. {kalavai_client-0.5.6 → kalavai_client-0.5.8}/kalavai_client/utils.py +3 -3
  9. {kalavai_client-0.5.6 → kalavai_client-0.5.8}/pyproject.toml +3 -4
  10. kalavai_client-0.5.6/kalavai_client/__init__.py +0 -2
  11. kalavai_client-0.5.6/kalavai_client/assets/docker-compose-template.yaml +0 -55
  12. kalavai_client-0.5.6/kalavai_client/assets/vpn-template.yaml +0 -13
  13. {kalavai_client-0.5.6 → kalavai_client-0.5.8}/LICENSE +0 -0
  14. {kalavai_client-0.5.6 → kalavai_client-0.5.8}/kalavai_client/__main__.py +0 -0
  15. {kalavai_client-0.5.6 → kalavai_client-0.5.8}/kalavai_client/assets/__init__.py +0 -0
  16. {kalavai_client-0.5.6 → kalavai_client-0.5.8}/kalavai_client/assets/apps.yaml +0 -0
  17. {kalavai_client-0.5.6 → kalavai_client-0.5.8}/kalavai_client/assets/apps_values.yaml +0 -0
  18. {kalavai_client-0.5.6 → kalavai_client-0.5.8}/kalavai_client/assets/pool_config_template.yaml +0 -0
  19. {kalavai_client-0.5.6 → kalavai_client-0.5.8}/kalavai_client/assets/pool_config_values.yaml +0 -0
  20. {kalavai_client-0.5.6 → kalavai_client-0.5.8}/kalavai_client/assets/user_workspace.yaml +0 -0
  21. {kalavai_client-0.5.6 → kalavai_client-0.5.8}/kalavai_client/assets/user_workspace_values.yaml +0 -0
  22. {kalavai_client-0.5.6 → kalavai_client-0.5.8}/kalavai_client/auth.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: kalavai-client
3
- Version: 0.5.6
3
+ Version: 0.5.8
4
4
  Summary: Client app for kalavai platform
5
5
  License: Apache-2.0
6
6
  Keywords: LLM,platform
@@ -35,7 +35,7 @@ Requires-Dist: pyinstaller (==6.5.0) ; extra == "dev"
35
35
  Requires-Dist: pyyaml (==6.0.2)
36
36
  Requires-Dist: requests (>=2.25)
37
37
  Requires-Dist: rich (==13.7.1)
38
- Requires-Dist: setuptools (>70.0.0)
38
+ Requires-Dist: setuptools (>75.0.0)
39
39
  Requires-Dist: twine ; extra == "dev"
40
40
  Project-URL: Homepage, https://platform.kalavai.net
41
41
  Project-URL: Website, https://kalavai.net
@@ -156,6 +156,16 @@ Make sure you also install python3-dev package. For ubuntu distros:
156
156
  sudo apt install python3-dev
157
157
  ```
158
158
 
159
+ If you see:
160
+ ```bash
161
+ AttributeError: install_layout. Did you mean: 'install_platlib'?
162
+ [end of output]
163
+ ```
164
+
165
+ Upgrade your setuptools:
166
+ ```bash
167
+ pip install -U setuptools
168
+ ```
159
169
 
160
170
  ### Install the client
161
171
 
@@ -270,10 +280,11 @@ Python version <= 3.12.
270
280
  ```bash
271
281
  sudo add-apt-repository ppa:deadsnakes/ppa
272
282
  sudo apt update
273
- sudo apt install python3.10 python3.10-dev
283
+ sudo apt install python3.10 python3.10-dev python3-virtualenv
274
284
  virtualenv -p python3.10 env
275
285
  source env/bin/activate
276
286
  sudo apt install python3.10-venv python3.10-dev -y
287
+ pip install -U setuptools
277
288
  pip install -e .[dev]
278
289
  ```
279
290
 
@@ -291,4 +302,4 @@ To run the unit tests, use:
291
302
  python -m unittest
292
303
  ```
293
304
 
294
-
305
+ docker run --rm --net=host -v /root/.cache/kalavai/:/root/.cache/kalavai/ ghcr.io/helmfile/helmfile:v0.169.2 helmfile sync --file /root/.cache/kalavai/apps.yaml --kubeconfig /root/.cache/kalavai/kubeconfig
@@ -113,6 +113,16 @@ Make sure you also install python3-dev package. For ubuntu distros:
113
113
  sudo apt install python3-dev
114
114
  ```
115
115
 
116
+ If you see:
117
+ ```bash
118
+ AttributeError: install_layout. Did you mean: 'install_platlib'?
119
+ [end of output]
120
+ ```
121
+
122
+ Upgrade your setuptools:
123
+ ```bash
124
+ pip install -U setuptools
125
+ ```
116
126
 
117
127
  ### Install the client
118
128
 
@@ -227,10 +237,11 @@ Python version <= 3.12.
227
237
  ```bash
228
238
  sudo add-apt-repository ppa:deadsnakes/ppa
229
239
  sudo apt update
230
- sudo apt install python3.10 python3.10-dev
240
+ sudo apt install python3.10 python3.10-dev python3-virtualenv
231
241
  virtualenv -p python3.10 env
232
242
  source env/bin/activate
233
243
  sudo apt install python3.10-venv python3.10-dev -y
244
+ pip install -U setuptools
234
245
  pip install -e .[dev]
235
246
  ```
236
247
 
@@ -248,3 +259,4 @@ To run the unit tests, use:
248
259
  python -m unittest
249
260
  ```
250
261
 
262
+ docker run --rm --net=host -v /root/.cache/kalavai/:/root/.cache/kalavai/ ghcr.io/helmfile/helmfile:v0.169.2 helmfile sync --file /root/.cache/kalavai/apps.yaml --kubeconfig /root/.cache/kalavai/kubeconfig
@@ -0,0 +1,2 @@
1
+
2
+ __version__ = "0.5.8"
@@ -0,0 +1,97 @@
1
+ services:
2
+ {% if vpn %}
3
+ {{vpn_name}}:
4
+ image: gravitl/netclient:v0.30.0
5
+ container_name: {{vpn_name}}
6
+ cap_add:
7
+ - NET_ADMIN
8
+ - SYS_MODULE
9
+ network_mode: host
10
+ # networks:
11
+ # - custom-network
12
+ # ports:
13
+ # # https://docs.k3s.io/installation/requirements#inbound-rules-for-k3s-nodes
14
+ # - "6443:6443" # kube server
15
+ # - "10250:10250" # worker balancer
16
+ # - "8472:8472/udp" # flannel vxlan
17
+ # - "51820:51820/udp" # flannel wireguard
18
+ # {% if command == "server" %}
19
+ # - "30000-30500:30000-30500"
20
+ # {% endif %}
21
+ environment:
22
+ - HOST_NAME={{node_name}}
23
+ - IFACE_NAME={{flannel_iface}}
24
+ - PORT=51820
25
+ - TOKEN={{vpn_token}}
26
+ volumes:
27
+ - /dev/net/tun:/dev/net/tun
28
+ restart: unless-stopped
29
+ # nginx:
30
+ # image: nginx:latest
31
+ # ports:
32
+ # - "{{redirect_source_port}}:{{redirect_source_port}}"
33
+ # restart: unless-stopped
34
+ # networks:
35
+ # - custom-network
36
+ # volumes:
37
+ # - {{nginx_path}}/nginx.conf:/etc/nginx/nginx.conf
38
+ {% endif %}
39
+ {{service_name}}:
40
+ image: docker.io/bundenth/kalavai-runner:gpu-latest
41
+ container_name: {{service_name}}
42
+ {% if vpn %}
43
+ depends_on:
44
+ - {{vpn_name}}
45
+ network_mode: "service:{{vpn_name}}"
46
+ {% else %}
47
+ hostname: {{node_name}}
48
+ networks:
49
+ - custom-network
50
+ ports:
51
+ - "6443:6443" # kube server
52
+ - "10250:10250" # worker balancer
53
+ - "8472:8472" # flannel vxlan
54
+ - "51820:51820" # flannel wireguard
55
+ {% if command == "server" %}
56
+ - "30000-30500:30000-30500"
57
+ {% endif %}
58
+ {% endif %}
59
+ privileged: true
60
+ restart: unless-stopped
61
+ command: >
62
+ --command={{command}}
63
+ {% if command == "server" %}
64
+ --port_range="30000-30500"
65
+ {% else %}
66
+ --server_ip={{pool_ip}}
67
+ --token={{pool_token}}
68
+ {% endif %}
69
+ {%if vpn %}
70
+ --flannel_iface={{flannel_iface}}
71
+ {% endif %}
72
+ {% if num_gpus and num_gpus > 0 %}
73
+ --gpu=on
74
+ {% else %}
75
+ --gpu=off
76
+ {% endif %}
77
+ {% if node_labels %}
78
+ --extra="{{node_labels}}"
79
+ {% endif %}
80
+
81
+ volumes:
82
+ - {{k3s_path}}:/var/lib/rancher/k3s # Persist data
83
+ - {{etc_path}}:/etc/rancher/k3s # Config files
84
+
85
+ {% if num_gpus and num_gpus > 0 %}
86
+ deploy:
87
+ resources:
88
+ reservations:
89
+ devices:
90
+ - driver: nvidia
91
+ count: {{num_gpus}}
92
+ capabilities: [gpu]
93
+ {% endif %}
94
+
95
+ networks:
96
+ custom-network:
97
+ driver: bridge
@@ -0,0 +1,12 @@
1
+ events {}
2
+
3
+ http {
4
+ server {
5
+ listen {{redirect_source_port}};
6
+ server_name localhost;
7
+
8
+ location / {
9
+ proxy_pass http://{{redirect_container}}:{{redirect_target_port}};
10
+ }
11
+ }
12
+ }
@@ -31,6 +31,7 @@ from kalavai_client.utils import (
31
31
  safe_remove,
32
32
  leave_vpn,
33
33
  join_vpn,
34
+ get_vpn_details,
34
35
  load_server_info,
35
36
  user_login,
36
37
  user_logout,
@@ -81,15 +82,17 @@ STORAGE_ACCESS_MODE = ["ReadWriteOnce"]
81
82
  STORAGE_CLASS_LABEL = "kalavai.storage.enabled"
82
83
  DEFAULT_STORAGE_NAME = "pool-cache"
83
84
  DEFAULT_STORAGE_SIZE = 20
85
+ DEFAULT_WATCHER_PORT = 30001
84
86
  USER_NODE_LABEL = "kalavai.cluster.user"
85
87
  KUBE_VERSION = os.getenv("KALAVAI_KUBE_VERSION", "v1.31.1+k3s1")
86
- DEFAULT_FLANNEL_IFACE = os.getenv("KALAVAI_FLANNEL_IFACE", "netmaker")
88
+ DEFAULT_FLANNEL_IFACE = os.getenv("KALAVAI_FLANNEL_IFACE", "netmaker-1")
87
89
  FORBIDEDEN_IPS = ["127.0.0.1"]
88
90
  # kalavai templates
89
91
  HELM_APPS_FILE = resource_path("kalavai_client/assets/apps.yaml")
90
92
  HELM_APPS_VALUES = resource_path("kalavai_client/assets/apps_values.yaml")
91
93
  # user specific config files
92
- DEFAULT_CONTAINER_NAME = "kalavai-seed"
94
+ DEFAULT_CONTAINER_NAME = "kalavai"
95
+ DEFAULT_VPN_CONTAINER_NAME = "kalavai-vpn"
93
96
  CONTAINER_HOST_PATH = user_path("pool/", create_path=True)
94
97
  USER_COMPOSE_FILE = user_path("docker-compose-worker.yaml")
95
98
  USER_VPN_COMPOSE_FILE = user_path("docker-compose-vpn.yaml")
@@ -130,7 +133,7 @@ def check_vpn_compatibility():
130
133
  logs.append(" MacOS: https://docs.netmaker.io/docs/netclient#mac\n")
131
134
 
132
135
  if len(logs) == 0:
133
- console.log("[green]System is ready to join a pool")
136
+ console.log("[green]System is ready to join the vpn")
134
137
  return True
135
138
  else:
136
139
  for log in logs:
@@ -179,16 +182,6 @@ def check_worker_compatibility():
179
182
 
180
183
 
181
184
  def cleanup_local():
182
- # disconnect from private network
183
- console.log("Disconnecting from VPN...")
184
- try:
185
- vpns = leave_vpn()
186
- if vpns is not None:
187
- for vpn in vpns:
188
- console.log(f"You have left {vpn} VPN")
189
- except:
190
- # no vpn
191
- pass
192
185
  console.log("Removing local cache files...")
193
186
  safe_remove(CONTAINER_HOST_PATH)
194
187
  safe_remove(USER_COMPOSE_FILE)
@@ -365,7 +358,7 @@ def select_token_type():
365
358
  break
366
359
  return {"admin": choice == 0, "user": choice == 1, "worker": choice == 2}
367
360
 
368
- def generate_compose_config(role, node_name, ip_address, node_labels, is_public, server=None, token=None):
361
+ def generate_compose_config(role, node_name, node_labels, is_public, pool_ip=None, vpn_token=None, pool_token=None):
369
362
  num_gpus = 0
370
363
  try:
371
364
  has_gpus = check_gpu_drivers()
@@ -380,17 +373,19 @@ def generate_compose_config(role, node_name, ip_address, node_labels, is_public,
380
373
  compose_values = {
381
374
  "user_path": user_path(""),
382
375
  "service_name": DEFAULT_CONTAINER_NAME,
383
- "pool_ip": server,
384
- "token": token,
385
- "hostname": node_name,
376
+ "vpn": is_public,
377
+ "vpn_name": DEFAULT_VPN_CONTAINER_NAME,
378
+ "pool_ip": pool_ip,
379
+ "pool_token": pool_token,
380
+ "vpn_token": vpn_token,
381
+ "node_name": node_name,
386
382
  "command": role,
387
383
  "storage_enabled": "True",
388
- "ip_address": ip_address,
389
384
  "num_gpus": num_gpus,
390
385
  "k3s_path": f"{CONTAINER_HOST_PATH}/k3s",
391
386
  "etc_path": f"{CONTAINER_HOST_PATH}/etc",
392
387
  "node_labels": " ".join([f"--node-label {key}={value}" for key, value in node_labels.items()]),
393
- "flannel_iface": DEFAULT_FLANNEL_IFACE if is_public else None
388
+ "flannel_iface": DEFAULT_FLANNEL_IFACE if is_public else ""
394
389
  }
395
390
  # generate local config files
396
391
  compose_yaml = load_template(
@@ -585,35 +580,56 @@ def pool__start(cluster_name, *others, only_registered_users: bool=False, ip_ad
585
580
  STORAGE_CLASS_LABEL: is_storage_compatible()
586
581
  }
587
582
  if location is not None:
588
- console.log("Joining private network")
583
+ console.log("Fetching VPN credentials")
589
584
  try:
590
- if not check_vpn_compatibility():
591
- return
592
- vpn = join_vpn(
585
+ vpn = get_vpn_details(
593
586
  location=location,
594
587
  user_cookie=USER_COOKIE)
595
588
  node_labels[USER_NODE_LABEL] = user["username"]
596
589
  except Exception as e:
597
590
  console.log(f"[red]Error when joining network: {str(e)}")
598
591
  return
599
-
600
- if ip_address is None:
601
- console.log(f"Scanning for valid IPs (subnet {vpn['subnet']})...")
602
- ip_address = select_ip_address(subnet=vpn["subnet"])
592
+
593
+ # Generate docker compose recipe
594
+ generate_compose_config(
595
+ role="server",
596
+ vpn_token=vpn["key"],
597
+ node_name=socket.gethostname(),
598
+ node_labels=node_labels,
599
+ is_public=location is not None
600
+ )
601
+
602
+ # start server
603
+ console.log("Deploying seed...")
604
+ CLUSTER.start_seed_node()
605
+
606
+ while not CLUSTER.is_agent_running():
607
+ console.log("Waiting for seed to start...")
608
+ time.sleep(10)
609
+
610
+ # select IP address (for external discovery)
611
+ if ip_address is None and location is None:
612
+ # local IP
613
+ console.log(f"Scanning for valid IPs")
614
+ ip_address = select_ip_address()
615
+ else:
616
+ # load VPN ip
617
+ ip_address = CLUSTER.get_vpn_ip()
603
618
  console.log(f"Using {ip_address} address for server")
604
619
 
620
+ # populate local cred files
605
621
  auth_key = str(uuid.uuid4())
606
622
  write_auth_key = str(uuid.uuid4())
607
623
  readonly_auth_key = str(uuid.uuid4())
608
- watcher_port = 30001
609
- watcher_service = f"{ip_address}:{watcher_port}"
624
+
625
+ watcher_service = f"{ip_address}:{DEFAULT_WATCHER_PORT}"
610
626
  values = {
611
627
  CLUSTER_NAME_KEY: cluster_name,
612
628
  CLUSTER_IP_KEY: ip_address,
613
629
  AUTH_KEY: auth_key,
614
630
  READONLY_AUTH_KEY: readonly_auth_key,
615
631
  WRITE_AUTH_KEY: write_auth_key,
616
- WATCHER_PORT_KEY: watcher_port,
632
+ WATCHER_PORT_KEY: DEFAULT_WATCHER_PORT,
617
633
  WATCHER_SERVICE_KEY: watcher_service,
618
634
  USER_NODE_LABEL_KEY: USER_NODE_LABEL,
619
635
  ALLOW_UNREGISTERED_USER_KEY: not only_registered_users
@@ -630,15 +646,6 @@ def pool__start(cluster_name, *others, only_registered_users: bool=False, ip_ad
630
646
  cluster_name=cluster_name,
631
647
  public_location=location,
632
648
  user_api_key=user["api_key"])
633
-
634
- # 1. Generate docker compose recipe
635
- compose_yaml = generate_compose_config(
636
- role="server",
637
- node_name=socket.gethostname(),
638
- ip_address=ip_address,
639
- node_labels=node_labels,
640
- is_public=location is not None
641
- )
642
649
 
643
650
  # Generate helmfile recipe
644
651
  helm_yaml = load_template(
@@ -650,14 +657,6 @@ def pool__start(cluster_name, *others, only_registered_users: bool=False, ip_ad
650
657
  f.write(helm_yaml)
651
658
 
652
659
  console.log("[green]Config files have been generated in your local machine\n")
653
-
654
- # # 1. start server
655
- console.log("Deploying seed...")
656
- CLUSTER.start_seed_node()
657
-
658
- while not CLUSTER.is_agent_running():
659
- console.log("Waiting for seed to start...")
660
- time.sleep(10)
661
660
 
662
661
  console.log("Setting pool dependencies...")
663
662
  # set template values in helmfile
@@ -691,7 +690,6 @@ def pool__start(cluster_name, *others, only_registered_users: bool=False, ip_ad
691
690
 
692
691
  return None
693
692
 
694
-
695
693
  @arguably.command
696
694
  def pool__token(*others, admin=False, user=False, worker=False):
697
695
  """
@@ -773,6 +771,7 @@ def pool__join(token, *others, node_name=None, ip_address: str=None):
773
771
  if CLUSTER.is_agent_running():
774
772
  console.log(f"[white] You are already connected to {load_server_info(data_key=CLUSTER_NAME_KEY, file=USER_LOCAL_SERVER_FILE)}. Enter [yellow]kalavai pool stop[white] to exit and join another one.")
775
773
  return
774
+
776
775
  # check that is not attached to another instance
777
776
  if os.path.exists(USER_LOCAL_SERVER_FILE):
778
777
  option = user_confirm(
@@ -810,20 +809,20 @@ def pool__join(token, *others, node_name=None, ip_address: str=None):
810
809
  }
811
810
  user = defaultdict(lambda: None)
812
811
  if public_location is not None:
813
- console.log("Joining private network")
812
+ user = user_login(user_cookie=USER_COOKIE)
813
+ if user is None:
814
+ console.log("[red]Must be logged in to join public pools. Run [yellow]kalavai login[red] to authenticate")
815
+ exit()
816
+ console.log("Fetching VPN credentials")
814
817
  try:
815
- if not check_vpn_compatibility():
816
- return
817
- vpn = join_vpn(
818
+ vpn = get_vpn_details(
818
819
  location=public_location,
819
820
  user_cookie=USER_COOKIE)
820
- user = user_login(user_cookie=USER_COOKIE)
821
821
  node_labels[USER_NODE_LABEL] = user["username"]
822
822
  except Exception as e:
823
823
  console.log(f"[red]Error when joining network: {str(e)}")
824
824
  console.log("Are you authenticated? Try [yellow]kalavai login")
825
825
  return
826
- # validate public seed
827
826
  try:
828
827
  validate_join_public_seed(
829
828
  cluster_name=cluster_name,
@@ -832,31 +831,29 @@ def pool__join(token, *others, node_name=None, ip_address: str=None):
832
831
  )
833
832
  except Exception as e:
834
833
  console.log(f"[red]Error when joining network: {str(e)}")
835
- leave_vpn(vpn_file=USER_VPN_COMPOSE_FILE)
836
834
  return
837
835
 
838
836
  # send note to server to let them know the node is coming online
839
- if not pre_join_check(node_name=node_name, server_url=watcher_service, server_key=auth_key):
840
- console.log(f"[red] Failed pre join checks. Server offline or node '{node_name}' may already exist. Please specify a different one with '--node-name'")
841
- leave_vpn(vpn_file=USER_VPN_COMPOSE_FILE)
842
- return
843
-
844
- if ip_address is None:
845
- console.log(f"Scanning for valid IPs (subnet {vpn['subnet']})...")
846
- ip_address = select_ip_address(subnet=vpn["subnet"])
847
- console.log(f"Using {ip_address} address for worker")
837
+ # TODO: won't be able to check for VPN pools...
838
+ # if not pre_join_check(node_name=node_name, server_url=watcher_service, server_key=auth_key):
839
+ # console.log(f"[red] Failed pre join checks. Server offline or node '{node_name}' may already exist. Please specify a different one with '--node-name'")
840
+ # leave_vpn(container_name=DEFAULT_VPN_CONTAINER_NAME)
841
+ # return
848
842
 
849
843
  # local agent join
850
844
  # 1. Generate local cache files
851
845
  console.log("Generating config files...")
852
- compose_yaml = generate_compose_config(
846
+
847
+ # Generate docker compose recipe
848
+ generate_compose_config(
853
849
  role="agent",
854
- server=f"https://{kalavai_seed_ip}:6443",
855
- token=kalavai_token,
856
- node_name=socket.gethostname(),
857
- ip_address=ip_address,
850
+ pool_ip=f"https://{kalavai_seed_ip}:6443",
851
+ pool_token=kalavai_token,
852
+ vpn_token=vpn["key"],
853
+ node_name=node_name,
858
854
  node_labels=node_labels,
859
855
  is_public=public_location is not None)
856
+
860
857
  store_server_info(
861
858
  server_ip=kalavai_seed_ip,
862
859
  auth_key=auth_key,
@@ -866,8 +863,6 @@ def pool__join(token, *others, node_name=None, ip_address: str=None):
866
863
  cluster_name=cluster_name,
867
864
  public_location=public_location,
868
865
  user_api_key=user["api_key"])
869
-
870
- init_user_workspace()
871
866
 
872
867
  option = user_confirm(
873
868
  question="Docker compose ready. Would you like Kalavai to deploy it?",
@@ -883,18 +878,23 @@ def pool__join(token, *others, node_name=None, ip_address: str=None):
883
878
  CLUSTER.start_worker_node()
884
879
  except Exception as e:
885
880
  console.log(f"[red] Error connecting to {cluster_name} @ {kalavai_seed_ip}. Check with the admin if the token is still valid.")
886
- leave_vpn(vpn_file=USER_VPN_COMPOSE_FILE)
881
+ leave_vpn(container_name=DEFAULT_VPN_CONTAINER_NAME)
887
882
  exit()
888
883
 
889
- while not CLUSTER.is_agent_running():
890
- console.log("Waiting for worker to start...")
891
- time.sleep(10)
884
+ # ensure we are connected
885
+ while True:
886
+ console.log("Waiting for core services to be ready, may take a few minutes...")
887
+ time.sleep(30)
888
+ if is_watcher_alive(server_creds=USER_LOCAL_SERVER_FILE, user_cookie=USER_COOKIE):
889
+ break
890
+
891
+ init_user_workspace()
892
892
 
893
893
  # set status to schedulable
894
894
  console.log(f"[green] You are connected to {cluster_name}")
895
895
 
896
896
  @arguably.command
897
- def pool__stop(*others):
897
+ def pool__stop(*others, skip_node_deletion=False):
898
898
  """
899
899
  Stop sharing your device and clean up. DO THIS ONLY IF YOU WANT TO REMOVE KALAVAI-CLIENT from your device.
900
900
 
@@ -903,7 +903,8 @@ def pool__stop(*others):
903
903
  """
904
904
  console.log("[white] Stopping kalavai app...")
905
905
  # delete local node from server
906
- node__delete(load_server_info(data_key=NODE_NAME_KEY, file=USER_LOCAL_SERVER_FILE))
906
+ if not skip_node_deletion:
907
+ node__delete(load_server_info(data_key=NODE_NAME_KEY, file=USER_LOCAL_SERVER_FILE))
907
908
  # unpublish event (only if seed node)
908
909
  # TODO: no, this should be done via the platform!!!
909
910
  # try:
@@ -916,7 +917,20 @@ def pool__stop(*others):
916
917
  # console.log(f"[red][WARNING]: (ignore if not a public pool) Error when unpublishing cluster. {str(e)}")
917
918
  # remove local node agent
918
919
  console.log("Removing agent and local cache")
920
+
921
+ # disconnect from VPN first, then remove agent, then remove local files
922
+ console.log("Disconnecting from VPN...")
923
+ try:
924
+ vpns = leave_vpn(container_name=DEFAULT_VPN_CONTAINER_NAME)
925
+ if vpns is not None:
926
+ for vpn in vpns:
927
+ console.log(f"You have left {vpn} VPN")
928
+ except:
929
+ # no vpn
930
+ pass
931
+
919
932
  CLUSTER.remove_agent()
933
+
920
934
  # clean local files
921
935
  cleanup_local()
922
936
  console.log("[white] Kalavai has stopped sharing your resources. Use [yellow]kalavai pool start[white] or [yellow]kalavai pool join[white] to start again!")
@@ -1151,7 +1165,7 @@ def pool__attach(token, *others, node_name=None):
1151
1165
  )
1152
1166
  except Exception as e:
1153
1167
  console.log(f"[red]Error when joining network: {str(e)}")
1154
- leave_vpn(vpn_file=USER_VPN_COMPOSE_FILE)
1168
+ leave_vpn(container_name=DEFAULT_VPN_CONTAINER_NAME)
1155
1169
  return
1156
1170
 
1157
1171
  store_server_info(
@@ -7,7 +7,8 @@ from kalavai_client.utils import (
7
7
  run_cmd,
8
8
  check_gpu_drivers,
9
9
  validate_poolconfig,
10
- user_path
10
+ user_path,
11
+ populate_template
11
12
  )
12
13
 
13
14
 
@@ -20,6 +21,9 @@ class Cluster(ABC):
20
21
  def start_worker_node(self, url, token, node_name, auth_key, watcher_service, ip_address, labels, flannel_iface):
21
22
  raise NotImplementedError()
22
23
 
24
+ @abstractmethod
25
+ def get_vpn_ip(self):
26
+ raise NotImplementedError()
23
27
 
24
28
  @abstractmethod
25
29
  def update_dependencies(self, dependencies_files):
@@ -79,11 +83,23 @@ class dockerCluster(Cluster):
79
83
  def start_seed_node(self):
80
84
 
81
85
  run_cmd(f"docker compose -f {self.compose_file} up -d")
82
- time.sleep(5)
83
- run_cmd(f"docker cp {self.container_name}:/etc/rancher/k3s/k3s.yaml {self.kubeconfig_file}")
86
+ # wait for container to be setup
87
+ while True:
88
+ try:
89
+ run_cmd(f"docker cp {self.container_name}:/etc/rancher/k3s/k3s.yaml {self.kubeconfig_file} >/dev/null 2>&1")
90
+ break
91
+ except:
92
+ pass
93
+ time.sleep(5)
84
94
 
85
95
  def start_worker_node(self):
86
96
  run_cmd(f"docker compose -f {self.compose_file} up -d")
97
+
98
+ def get_vpn_ip(self):
99
+ command = populate_template(
100
+ template_str="docker exec -it {{container_name}} ifconfig {{iface_name}} | grep 'inet ' | awk '{gsub(/^addr:/, \"\", $2); print $2}'",
101
+ values_dict={"container_name": self.container_name, "iface_name": self.default_flannel_iface})
102
+ return run_cmd(command).decode().strip()
87
103
 
88
104
 
89
105
  def update_dependencies(self, dependencies_file=None, debug=False, retries=3):
@@ -122,8 +138,13 @@ class dockerCluster(Cluster):
122
138
  def is_seed_node(self):
123
139
  if not os.path.isfile(self.compose_file):
124
140
  return False
125
- status = "server" in run_cmd(f"docker compose -f {self.compose_file} ps --services --status=running").decode()
126
- return status
141
+ if not self.is_agent_running():
142
+ return False
143
+ try:
144
+ run_cmd(f"docker container exec {self.container_name} cat /var/lib/rancher/k3s/server/node-token >/dev/null 2>&1")
145
+ return True
146
+ except:
147
+ return False
127
148
 
128
149
  def is_cluster_init(self):
129
150
  if not os.path.isfile(self.compose_file):
@@ -241,12 +241,12 @@ def join_vpn(location, user_cookie):
241
241
  run_cmd(f"sudo netclient join -t {token} >/dev/null 2>&1")
242
242
  return vpn
243
243
 
244
- def leave_vpn():
244
+ def leave_vpn(container_name):
245
245
  try:
246
- vpns = json.loads(run_cmd("sudo netclient list").decode())
246
+ vpns = json.loads(run_cmd(f"docker exec {container_name} netclient list").decode())
247
247
  left_vpns = [vpn['network'] for vpn in vpns]
248
248
  for vpn in left_vpns:
249
- run_cmd(f"sudo netclient leave {vpn}")
249
+ run_cmd(f"docker exec {container_name} netclient leave {vpn}")
250
250
  return left_vpns
251
251
  except:
252
252
  return None
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "kalavai-client"
3
- version = "0.5.6"
3
+ version = "0.5.8"
4
4
  authors = [
5
5
  {name = "Carlos Fernandez Musoles", email = "carlos@kalavai.net"}
6
6
  ]
@@ -21,7 +21,7 @@ dependencies = [
21
21
  "rich==13.7.1",
22
22
  "arguably>=1.2.5",
23
23
  "Pillow==10.3.0",
24
- "setuptools>70.0.0",
24
+ "setuptools>75.0.0",
25
25
  "netifaces==0.11.0",
26
26
  "anvil-uplink==0.5.1",
27
27
  "importlib_resources==6.5.2"
@@ -53,9 +53,8 @@ build-backend = "poetry.core.masonry.api"
53
53
  kalavai = "kalavai_client.cli:app"
54
54
 
55
55
 
56
-
57
56
  [tool.setuptools.packages.find]
58
57
  where = ["kalavai_client"]
59
58
 
60
59
  [tool.setuptools.package-data]
61
- assets = ["*.yaml"]
60
+ assets = ["*.yaml", "*.conf"]
@@ -1,2 +0,0 @@
1
-
2
- __version__ = "0.5.6"
@@ -1,55 +0,0 @@
1
- services:
2
- {{service_name}}-{{command}}:
3
- image: bundenth/kalavai-runner:gpu-latest
4
- container_name: {{service_name}}
5
- hostname: {{hostname}}
6
- privileged: true
7
- restart: unless-stopped
8
- ports:
9
- - "6443:6443" # kube server
10
- - "8472:8472" # flannel vxlan
11
- - "51820:51820" # flannel wireguard
12
- {% if command == "server" %}
13
- - "30000-30500:30000-30500"
14
- {% endif %}
15
- networks:
16
- - custom-network
17
- command: >
18
- {{command}}
19
- {% if command == "server" %}
20
- --flannel-backend wireguard-native
21
- --service-node-port-range "30000-30500"
22
- {% else %}
23
- --server {{pool_ip}}
24
- --token {{token}}
25
- {% endif %}
26
- --node-label role={{command}}
27
- {% if node_labels %}
28
- {{node_labels}}
29
- {% endif %}
30
- {% if num_gpus and num_gpus > 0 %}
31
- --node-label gpu=on
32
- {% else %}
33
- --node-label gpu=off
34
- {% endif %}
35
- --node-ip {{ip_address}}
36
- --node-external-ip {{ip_address}}
37
- {% if flannel_iface %}
38
- --flannel-iface {{flannel_iface}}
39
- {% endif %}
40
- volumes:
41
- - {{k3s_path}}:/var/lib/rancher/k3s # Persist data
42
- - {{etc_path}}:/etc/rancher/k3s # Config files
43
- {% if num_gpus and num_gpus > 0 %}
44
- deploy:
45
- resources:
46
- reservations:
47
- devices:
48
- - driver: nvidia
49
- count: {{num_gpus}}
50
- capabilities: [gpu]
51
- {% endif %}
52
-
53
- networks:
54
- custom-network:
55
- driver: bridge
@@ -1,13 +0,0 @@
1
- # https://docs.netmaker.io/docs/netclient#docker
2
- services:
3
- {{service_name}}:
4
- network_mode: host
5
- privileged: true
6
- restart: always
7
- environment:
8
- - TOKEN={{vpn_token}}
9
- - IFACE_NAME={{flannel_iface}}
10
- volumes:
11
- - '{{etc_path}}/netclient:/etc/netclient'
12
- container_name: {{service_name}}
13
- image: 'gravitl/netclient:latest'
File without changes