kalavai-client 0.5.7__tar.gz → 0.5.9__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (22) hide show
  1. {kalavai_client-0.5.7 → kalavai_client-0.5.9}/PKG-INFO +14 -3
  2. {kalavai_client-0.5.7 → kalavai_client-0.5.9}/README.md +13 -1
  3. kalavai_client-0.5.9/kalavai_client/__init__.py +2 -0
  4. {kalavai_client-0.5.7 → kalavai_client-0.5.9}/kalavai_client/assets/apps.yaml +8 -8
  5. kalavai_client-0.5.9/kalavai_client/assets/docker-compose-template.yaml +106 -0
  6. kalavai_client-0.5.9/kalavai_client/assets/nginx.conf +12 -0
  7. {kalavai_client-0.5.7 → kalavai_client-0.5.9}/kalavai_client/cli.py +142 -112
  8. {kalavai_client-0.5.7 → kalavai_client-0.5.9}/kalavai_client/cluster.py +26 -5
  9. {kalavai_client-0.5.7 → kalavai_client-0.5.9}/kalavai_client/utils.py +3 -3
  10. {kalavai_client-0.5.7 → kalavai_client-0.5.9}/pyproject.toml +2 -3
  11. kalavai_client-0.5.7/kalavai_client/__init__.py +0 -2
  12. kalavai_client-0.5.7/kalavai_client/assets/docker-compose-template.yaml +0 -55
  13. kalavai_client-0.5.7/kalavai_client/assets/vpn-template.yaml +0 -13
  14. {kalavai_client-0.5.7 → kalavai_client-0.5.9}/LICENSE +0 -0
  15. {kalavai_client-0.5.7 → kalavai_client-0.5.9}/kalavai_client/__main__.py +0 -0
  16. {kalavai_client-0.5.7 → kalavai_client-0.5.9}/kalavai_client/assets/__init__.py +0 -0
  17. {kalavai_client-0.5.7 → kalavai_client-0.5.9}/kalavai_client/assets/apps_values.yaml +0 -0
  18. {kalavai_client-0.5.7 → kalavai_client-0.5.9}/kalavai_client/assets/pool_config_template.yaml +0 -0
  19. {kalavai_client-0.5.7 → kalavai_client-0.5.9}/kalavai_client/assets/pool_config_values.yaml +0 -0
  20. {kalavai_client-0.5.7 → kalavai_client-0.5.9}/kalavai_client/assets/user_workspace.yaml +0 -0
  21. {kalavai_client-0.5.7 → kalavai_client-0.5.9}/kalavai_client/assets/user_workspace_values.yaml +0 -0
  22. {kalavai_client-0.5.7 → kalavai_client-0.5.9}/kalavai_client/auth.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: kalavai-client
3
- Version: 0.5.7
3
+ Version: 0.5.9
4
4
  Summary: Client app for kalavai platform
5
5
  License: Apache-2.0
6
6
  Keywords: LLM,platform
@@ -156,6 +156,16 @@ Make sure you also install python3-dev package. For ubuntu distros:
156
156
  sudo apt install python3-dev
157
157
  ```
158
158
 
159
+ If you see:
160
+ ```bash
161
+ AttributeError: install_layout. Did you mean: 'install_platlib'?
162
+ [end of output]
163
+ ```
164
+
165
+ Upgrade your setuptools:
166
+ ```bash
167
+ pip install -U setuptools
168
+ ```
159
169
 
160
170
  ### Install the client
161
171
 
@@ -270,10 +280,11 @@ Python version <= 3.12.
270
280
  ```bash
271
281
  sudo add-apt-repository ppa:deadsnakes/ppa
272
282
  sudo apt update
273
- sudo apt install python3.10 python3.10-dev
283
+ sudo apt install python3.10 python3.10-dev python3-virtualenv
274
284
  virtualenv -p python3.10 env
275
285
  source env/bin/activate
276
286
  sudo apt install python3.10-venv python3.10-dev -y
287
+ pip install -U setuptools
277
288
  pip install -e .[dev]
278
289
  ```
279
290
 
@@ -291,4 +302,4 @@ To run the unit tests, use:
291
302
  python -m unittest
292
303
  ```
293
304
 
294
-
305
+ docker run --rm --net=host -v /root/.cache/kalavai/:/root/.cache/kalavai/ ghcr.io/helmfile/helmfile:v0.169.2 helmfile sync --file /root/.cache/kalavai/apps.yaml --kubeconfig /root/.cache/kalavai/kubeconfig
@@ -113,6 +113,16 @@ Make sure you also install python3-dev package. For ubuntu distros:
113
113
  sudo apt install python3-dev
114
114
  ```
115
115
 
116
+ If you see:
117
+ ```bash
118
+ AttributeError: install_layout. Did you mean: 'install_platlib'?
119
+ [end of output]
120
+ ```
121
+
122
+ Upgrade your setuptools:
123
+ ```bash
124
+ pip install -U setuptools
125
+ ```
116
126
 
117
127
  ### Install the client
118
128
 
@@ -227,10 +237,11 @@ Python version <= 3.12.
227
237
  ```bash
228
238
  sudo add-apt-repository ppa:deadsnakes/ppa
229
239
  sudo apt update
230
- sudo apt install python3.10 python3.10-dev
240
+ sudo apt install python3.10 python3.10-dev python3-virtualenv
231
241
  virtualenv -p python3.10 env
232
242
  source env/bin/activate
233
243
  sudo apt install python3.10-venv python3.10-dev -y
244
+ pip install -U setuptools
234
245
  pip install -e .[dev]
235
246
  ```
236
247
 
@@ -248,3 +259,4 @@ To run the unit tests, use:
248
259
  python -m unittest
249
260
  ```
250
261
 
262
+ docker run --rm --net=host -v /root/.cache/kalavai/:/root/.cache/kalavai/ ghcr.io/helmfile/helmfile:v0.169.2 helmfile sync --file /root/.cache/kalavai/apps.yaml --kubeconfig /root/.cache/kalavai/kubeconfig
@@ -0,0 +1,2 @@
1
+
2
+ __version__ = "0.5.9"
@@ -189,13 +189,13 @@ releases:
189
189
  value: "1"
190
190
  - name: devicePlugin.deviceSplitCount
191
191
  value: "1"
192
- - name: scheduler.customWebhook.port
193
- value: "30498"
194
- - name: scheduler.service.schedulerPort
195
- value: "30498"
196
- - name: scheduler.service.monitorPort
197
- value: "30493"
198
- - name: devicePlugin.service.httpPort
199
- value: "30492"
192
+ # - name: scheduler.customWebhook.port
193
+ # value: "30498"
194
+ # - name: scheduler.service.schedulerPort
195
+ # value: "30498"
196
+ # - name: scheduler.service.monitorPort
197
+ # value: "30493"
198
+ # - name: devicePlugin.service.httpPort
199
+ # value: "30492"
200
200
 
201
201
 
@@ -0,0 +1,106 @@
1
+ services:
2
+ {% if vpn %}
3
+ {{vpn_name}}:
4
+ image: gravitl/netclient:v0.30.0
5
+ container_name: {{vpn_name}}
6
+ cap_add:
7
+ - NET_ADMIN
8
+ - SYS_MODULE
9
+ network_mode: host
10
+ # networks:
11
+ # - custom-network
12
+ # ports:
13
+ # # https://docs.k3s.io/installation/requirements#inbound-rules-for-k3s-nodes
14
+ # - "6443:6443" # kube server
15
+ # - "10250:10250" # worker balancer
16
+ # - "8472:8472/udp" # flannel vxlan
17
+ # - "51820-51830:51820-51830" # flannel wireguard
18
+ # {% if command == "server" %}
19
+ # - "30000-30500:30000-30500"
20
+ # {% endif %}
21
+ environment:
22
+ - HOST_NAME={{node_name}}
23
+ - IFACE_NAME={{flannel_iface}}
24
+ - TOKEN={{vpn_token}}
25
+ volumes:
26
+ - /dev/net/tun:/dev/net/tun
27
+ restart: unless-stopped
28
+ # nginx:
29
+ # image: nginx:latest
30
+ # ports:
31
+ # - "{{redirect_source_port}}:{{redirect_source_port}}"
32
+ # restart: unless-stopped
33
+ # networks:
34
+ # - custom-network
35
+ # volumes:
36
+ # - {{nginx_path}}/nginx.conf:/etc/nginx/nginx.conf
37
+ {% endif %}
38
+
39
+ # run worker only if command is set
40
+ {%if command %}
41
+ {{service_name}}:
42
+ image: docker.io/bundenth/kalavai-runner:gpu-latest
43
+ container_name: {{service_name}}
44
+ {% if vpn %}
45
+ depends_on:
46
+ - {{vpn_name}}
47
+ network_mode: "service:{{vpn_name}}"
48
+ {% else %}
49
+ network_mode: host
50
+ # hostname: {{node_name}}
51
+ # networks:
52
+ # - custom-network
53
+ # ports:
54
+ # - "6443:6443" # kube server
55
+ # - "2379-2380:2379-2380" # etcd server
56
+ # - "10259:10259" # kube scheduler
57
+ # - "10257:10257" # kube controller manager
58
+ # - "10250:10250" # worker balancer
59
+ # - "8285:8285" # flannel
60
+ # - "8472:8472" # flannel vxlan
61
+ # - "51820:51820" # flannel wireguard
62
+ # {% if command == "server" %}
63
+ # - "30000-32767:30000-32767"
64
+ # {% endif %}
65
+ {% endif %}
66
+ privileged: true
67
+ restart: unless-stopped
68
+ command: >
69
+ --command={{command}}
70
+ {% if command == "server" %}
71
+ --port_range="30000-32767"
72
+ {% else %}
73
+ --server_ip={{pool_ip}}
74
+ --token={{pool_token}}
75
+ {% endif %}
76
+ {%if vpn %}
77
+ --flannel_iface={{flannel_iface}}
78
+ {% endif %}
79
+ {% if num_gpus and num_gpus > 0 %}
80
+ --gpu=on
81
+ {% else %}
82
+ --gpu=off
83
+ {% endif %}
84
+ {% if node_labels %}
85
+ --extra="{{node_labels}}"
86
+ {% endif %}
87
+
88
+ volumes:
89
+ - {{k3s_path}}:/var/lib/rancher/k3s # Persist data
90
+ - {{etc_path}}:/etc/rancher/k3s # Config files
91
+
92
+ {% if num_gpus and num_gpus > 0 %}
93
+ deploy:
94
+ resources:
95
+ reservations:
96
+ devices:
97
+ - driver: nvidia
98
+ count: {{num_gpus}}
99
+ capabilities: [gpu]
100
+ {% endif %}
101
+ {% endif %}
102
+
103
+ networks:
104
+ custom-network:
105
+ driver: bridge
106
+
@@ -0,0 +1,12 @@
1
+ events {}
2
+
3
+ http {
4
+ server {
5
+ listen {{redirect_source_port}};
6
+ server_name localhost;
7
+
8
+ location / {
9
+ proxy_pass http://{{redirect_container}}:{{redirect_target_port}};
10
+ }
11
+ }
12
+ }
@@ -31,6 +31,7 @@ from kalavai_client.utils import (
31
31
  safe_remove,
32
32
  leave_vpn,
33
33
  join_vpn,
34
+ get_vpn_details,
34
35
  load_server_info,
35
36
  user_login,
36
37
  user_logout,
@@ -81,15 +82,17 @@ STORAGE_ACCESS_MODE = ["ReadWriteOnce"]
81
82
  STORAGE_CLASS_LABEL = "kalavai.storage.enabled"
82
83
  DEFAULT_STORAGE_NAME = "pool-cache"
83
84
  DEFAULT_STORAGE_SIZE = 20
85
+ DEFAULT_WATCHER_PORT = 30001
84
86
  USER_NODE_LABEL = "kalavai.cluster.user"
85
87
  KUBE_VERSION = os.getenv("KALAVAI_KUBE_VERSION", "v1.31.1+k3s1")
86
- DEFAULT_FLANNEL_IFACE = os.getenv("KALAVAI_FLANNEL_IFACE", "netmaker")
88
+ DEFAULT_FLANNEL_IFACE = os.getenv("KALAVAI_FLANNEL_IFACE", "netmaker-1")
87
89
  FORBIDEDEN_IPS = ["127.0.0.1"]
88
90
  # kalavai templates
89
91
  HELM_APPS_FILE = resource_path("kalavai_client/assets/apps.yaml")
90
92
  HELM_APPS_VALUES = resource_path("kalavai_client/assets/apps_values.yaml")
91
93
  # user specific config files
92
- DEFAULT_CONTAINER_NAME = "kalavai-seed"
94
+ DEFAULT_CONTAINER_NAME = "kalavai"
95
+ DEFAULT_VPN_CONTAINER_NAME = "kalavai-vpn"
93
96
  CONTAINER_HOST_PATH = user_path("pool/", create_path=True)
94
97
  USER_COMPOSE_FILE = user_path("docker-compose-worker.yaml")
95
98
  USER_VPN_COMPOSE_FILE = user_path("docker-compose-vpn.yaml")
@@ -115,27 +118,6 @@ CLUSTER = dockerCluster(
115
118
  ######################
116
119
  ## HELPER FUNCTIONS ##
117
120
  ######################
118
-
119
- def check_vpn_compatibility():
120
- """Check required packages to join VPN"""
121
- logs = []
122
- console.log("[white]Checking system requirements...")
123
- # netclient
124
- try:
125
- run_cmd("sudo netclient version >/dev/null 2>&1")
126
- except:
127
- logs.append("[red]Netmaker not installed. Install instructions:\n")
128
- logs.append(" Linux: https://docs.netmaker.io/docs/netclient#linux\n")
129
- logs.append(" Windows: https://docs.netmaker.io/docs/netclient#windows\n")
130
- logs.append(" MacOS: https://docs.netmaker.io/docs/netclient#mac\n")
131
-
132
- if len(logs) == 0:
133
- console.log("[green]System is ready to join a pool")
134
- return True
135
- else:
136
- for log in logs:
137
- console.log(log)
138
- return False
139
121
 
140
122
  def check_seed_compatibility():
141
123
  """Check required packages to start pools"""
@@ -179,16 +161,6 @@ def check_worker_compatibility():
179
161
 
180
162
 
181
163
  def cleanup_local():
182
- # disconnect from private network
183
- console.log("Disconnecting from VPN...")
184
- try:
185
- vpns = leave_vpn()
186
- if vpns is not None:
187
- for vpn in vpns:
188
- console.log(f"You have left {vpn} VPN")
189
- except:
190
- # no vpn
191
- pass
192
164
  console.log("Removing local cache files...")
193
165
  safe_remove(CONTAINER_HOST_PATH)
194
166
  safe_remove(USER_COMPOSE_FILE)
@@ -365,7 +337,7 @@ def select_token_type():
365
337
  break
366
338
  return {"admin": choice == 0, "user": choice == 1, "worker": choice == 2}
367
339
 
368
- def generate_compose_config(role, node_name, ip_address, node_labels, is_public, server=None, token=None):
340
+ def generate_compose_config(role, node_name, is_public, node_labels=None, pool_ip=None, vpn_token=None, pool_token=None):
369
341
  num_gpus = 0
370
342
  try:
371
343
  has_gpus = check_gpu_drivers()
@@ -377,20 +349,24 @@ def generate_compose_config(role, node_name, ip_address, node_labels, is_public,
377
349
  )
378
350
  except:
379
351
  console.log(f"[red]WARNING: error when fetching NVIDIA GPU info. GPUs will not be used on this local machine")
352
+ if node_labels is not None:
353
+ node_labels = " ".join([f"--node-label {key}={value}" for key, value in node_labels.items()])
380
354
  compose_values = {
381
355
  "user_path": user_path(""),
382
356
  "service_name": DEFAULT_CONTAINER_NAME,
383
- "pool_ip": server,
384
- "token": token,
385
- "hostname": node_name,
357
+ "vpn": is_public,
358
+ "vpn_name": DEFAULT_VPN_CONTAINER_NAME,
359
+ "pool_ip": pool_ip,
360
+ "pool_token": pool_token,
361
+ "vpn_token": vpn_token,
362
+ "node_name": node_name,
386
363
  "command": role,
387
364
  "storage_enabled": "True",
388
- "ip_address": ip_address,
389
365
  "num_gpus": num_gpus,
390
366
  "k3s_path": f"{CONTAINER_HOST_PATH}/k3s",
391
367
  "etc_path": f"{CONTAINER_HOST_PATH}/etc",
392
- "node_labels": " ".join([f"--node-label {key}={value}" for key, value in node_labels.items()]),
393
- "flannel_iface": DEFAULT_FLANNEL_IFACE if is_public else None
368
+ "node_labels": node_labels,
369
+ "flannel_iface": DEFAULT_FLANNEL_IFACE if is_public else ""
394
370
  }
395
371
  # generate local config files
396
372
  compose_yaml = load_template(
@@ -585,35 +561,56 @@ def pool__start(cluster_name, *others, only_registered_users: bool=False, ip_ad
585
561
  STORAGE_CLASS_LABEL: is_storage_compatible()
586
562
  }
587
563
  if location is not None:
588
- console.log("Joining private network")
564
+ console.log("Fetching VPN credentials")
589
565
  try:
590
- if not check_vpn_compatibility():
591
- return
592
- vpn = join_vpn(
566
+ vpn = get_vpn_details(
593
567
  location=location,
594
568
  user_cookie=USER_COOKIE)
595
569
  node_labels[USER_NODE_LABEL] = user["username"]
596
570
  except Exception as e:
597
571
  console.log(f"[red]Error when joining network: {str(e)}")
598
572
  return
599
-
600
- if ip_address is None:
601
- console.log(f"Scanning for valid IPs (subnet {vpn['subnet']})...")
602
- ip_address = select_ip_address(subnet=vpn["subnet"])
573
+
574
+ # Generate docker compose recipe
575
+ generate_compose_config(
576
+ role="server",
577
+ vpn_token=vpn["key"],
578
+ node_name=socket.gethostname(),
579
+ node_labels=node_labels,
580
+ is_public=location is not None
581
+ )
582
+
583
+ # start server
584
+ console.log("Deploying seed...")
585
+ CLUSTER.start_seed_node()
586
+
587
+ while not CLUSTER.is_agent_running():
588
+ console.log("Waiting for seed to start...")
589
+ time.sleep(10)
590
+
591
+ # select IP address (for external discovery)
592
+ if ip_address is None and location is None:
593
+ # local IP
594
+ console.log(f"Scanning for valid IPs")
595
+ ip_address = select_ip_address()
596
+ else:
597
+ # load VPN ip
598
+ ip_address = CLUSTER.get_vpn_ip()
603
599
  console.log(f"Using {ip_address} address for server")
604
600
 
601
+ # populate local cred files
605
602
  auth_key = str(uuid.uuid4())
606
603
  write_auth_key = str(uuid.uuid4())
607
604
  readonly_auth_key = str(uuid.uuid4())
608
- watcher_port = 30001
609
- watcher_service = f"{ip_address}:{watcher_port}"
605
+
606
+ watcher_service = f"{ip_address}:{DEFAULT_WATCHER_PORT}"
610
607
  values = {
611
608
  CLUSTER_NAME_KEY: cluster_name,
612
609
  CLUSTER_IP_KEY: ip_address,
613
610
  AUTH_KEY: auth_key,
614
611
  READONLY_AUTH_KEY: readonly_auth_key,
615
612
  WRITE_AUTH_KEY: write_auth_key,
616
- WATCHER_PORT_KEY: watcher_port,
613
+ WATCHER_PORT_KEY: DEFAULT_WATCHER_PORT,
617
614
  WATCHER_SERVICE_KEY: watcher_service,
618
615
  USER_NODE_LABEL_KEY: USER_NODE_LABEL,
619
616
  ALLOW_UNREGISTERED_USER_KEY: not only_registered_users
@@ -630,15 +627,6 @@ def pool__start(cluster_name, *others, only_registered_users: bool=False, ip_ad
630
627
  cluster_name=cluster_name,
631
628
  public_location=location,
632
629
  user_api_key=user["api_key"])
633
-
634
- # 1. Generate docker compose recipe
635
- compose_yaml = generate_compose_config(
636
- role="server",
637
- node_name=socket.gethostname(),
638
- ip_address=ip_address,
639
- node_labels=node_labels,
640
- is_public=location is not None
641
- )
642
630
 
643
631
  # Generate helmfile recipe
644
632
  helm_yaml = load_template(
@@ -650,14 +638,6 @@ def pool__start(cluster_name, *others, only_registered_users: bool=False, ip_ad
650
638
  f.write(helm_yaml)
651
639
 
652
640
  console.log("[green]Config files have been generated in your local machine\n")
653
-
654
- # # 1. start server
655
- console.log("Deploying seed...")
656
- CLUSTER.start_seed_node()
657
-
658
- while not CLUSTER.is_agent_running():
659
- console.log("Waiting for seed to start...")
660
- time.sleep(10)
661
641
 
662
642
  console.log("Setting pool dependencies...")
663
643
  # set template values in helmfile
@@ -691,7 +671,6 @@ def pool__start(cluster_name, *others, only_registered_users: bool=False, ip_ad
691
671
 
692
672
  return None
693
673
 
694
-
695
674
  @arguably.command
696
675
  def pool__token(*others, admin=False, user=False, worker=False):
697
676
  """
@@ -773,6 +752,7 @@ def pool__join(token, *others, node_name=None, ip_address: str=None):
773
752
  if CLUSTER.is_agent_running():
774
753
  console.log(f"[white] You are already connected to {load_server_info(data_key=CLUSTER_NAME_KEY, file=USER_LOCAL_SERVER_FILE)}. Enter [yellow]kalavai pool stop[white] to exit and join another one.")
775
754
  return
755
+
776
756
  # check that is not attached to another instance
777
757
  if os.path.exists(USER_LOCAL_SERVER_FILE):
778
758
  option = user_confirm(
@@ -810,20 +790,20 @@ def pool__join(token, *others, node_name=None, ip_address: str=None):
810
790
  }
811
791
  user = defaultdict(lambda: None)
812
792
  if public_location is not None:
813
- console.log("Joining private network")
793
+ user = user_login(user_cookie=USER_COOKIE)
794
+ if user is None:
795
+ console.log("[red]Must be logged in to join public pools. Run [yellow]kalavai login[red] to authenticate")
796
+ exit()
797
+ console.log("Fetching VPN credentials")
814
798
  try:
815
- if not check_vpn_compatibility():
816
- return
817
- vpn = join_vpn(
799
+ vpn = get_vpn_details(
818
800
  location=public_location,
819
801
  user_cookie=USER_COOKIE)
820
- user = user_login(user_cookie=USER_COOKIE)
821
802
  node_labels[USER_NODE_LABEL] = user["username"]
822
803
  except Exception as e:
823
804
  console.log(f"[red]Error when joining network: {str(e)}")
824
805
  console.log("Are you authenticated? Try [yellow]kalavai login")
825
806
  return
826
- # validate public seed
827
807
  try:
828
808
  validate_join_public_seed(
829
809
  cluster_name=cluster_name,
@@ -832,31 +812,29 @@ def pool__join(token, *others, node_name=None, ip_address: str=None):
832
812
  )
833
813
  except Exception as e:
834
814
  console.log(f"[red]Error when joining network: {str(e)}")
835
- leave_vpn(vpn_file=USER_VPN_COMPOSE_FILE)
836
815
  return
837
816
 
838
817
  # send note to server to let them know the node is coming online
839
- if not pre_join_check(node_name=node_name, server_url=watcher_service, server_key=auth_key):
840
- console.log(f"[red] Failed pre join checks. Server offline or node '{node_name}' may already exist. Please specify a different one with '--node-name'")
841
- leave_vpn(vpn_file=USER_VPN_COMPOSE_FILE)
842
- return
843
-
844
- if ip_address is None:
845
- console.log(f"Scanning for valid IPs (subnet {vpn['subnet']})...")
846
- ip_address = select_ip_address(subnet=vpn["subnet"])
847
- console.log(f"Using {ip_address} address for worker")
818
+ # TODO: won't be able to check for VPN pools...
819
+ # if not pre_join_check(node_name=node_name, server_url=watcher_service, server_key=auth_key):
820
+ # console.log(f"[red] Failed pre join checks. Server offline or node '{node_name}' may already exist. Please specify a different one with '--node-name'")
821
+ # leave_vpn(container_name=DEFAULT_VPN_CONTAINER_NAME)
822
+ # return
848
823
 
849
824
  # local agent join
850
825
  # 1. Generate local cache files
851
826
  console.log("Generating config files...")
852
- compose_yaml = generate_compose_config(
827
+
828
+ # Generate docker compose recipe
829
+ generate_compose_config(
853
830
  role="agent",
854
- server=f"https://{kalavai_seed_ip}:6443",
855
- token=kalavai_token,
856
- node_name=socket.gethostname(),
857
- ip_address=ip_address,
831
+ pool_ip=f"https://{kalavai_seed_ip}:6443",
832
+ pool_token=kalavai_token,
833
+ vpn_token=vpn["key"],
834
+ node_name=node_name,
858
835
  node_labels=node_labels,
859
836
  is_public=public_location is not None)
837
+
860
838
  store_server_info(
861
839
  server_ip=kalavai_seed_ip,
862
840
  auth_key=auth_key,
@@ -866,8 +844,6 @@ def pool__join(token, *others, node_name=None, ip_address: str=None):
866
844
  cluster_name=cluster_name,
867
845
  public_location=public_location,
868
846
  user_api_key=user["api_key"])
869
-
870
- init_user_workspace()
871
847
 
872
848
  option = user_confirm(
873
849
  question="Docker compose ready. Would you like Kalavai to deploy it?",
@@ -883,18 +859,23 @@ def pool__join(token, *others, node_name=None, ip_address: str=None):
883
859
  CLUSTER.start_worker_node()
884
860
  except Exception as e:
885
861
  console.log(f"[red] Error connecting to {cluster_name} @ {kalavai_seed_ip}. Check with the admin if the token is still valid.")
886
- leave_vpn(vpn_file=USER_VPN_COMPOSE_FILE)
862
+ leave_vpn(container_name=DEFAULT_VPN_CONTAINER_NAME)
887
863
  exit()
888
864
 
889
- while not CLUSTER.is_agent_running():
890
- console.log("Waiting for worker to start...")
891
- time.sleep(10)
865
+ # ensure we are connected
866
+ while True:
867
+ console.log("Waiting for core services to be ready, may take a few minutes...")
868
+ time.sleep(30)
869
+ if is_watcher_alive(server_creds=USER_LOCAL_SERVER_FILE, user_cookie=USER_COOKIE):
870
+ break
871
+
872
+ init_user_workspace()
892
873
 
893
874
  # set status to schedulable
894
875
  console.log(f"[green] You are connected to {cluster_name}")
895
876
 
896
877
  @arguably.command
897
- def pool__stop(*others):
878
+ def pool__stop(*others, skip_node_deletion=False):
898
879
  """
899
880
  Stop sharing your device and clean up. DO THIS ONLY IF YOU WANT TO REMOVE KALAVAI-CLIENT from your device.
900
881
 
@@ -903,7 +884,8 @@ def pool__stop(*others):
903
884
  """
904
885
  console.log("[white] Stopping kalavai app...")
905
886
  # delete local node from server
906
- node__delete(load_server_info(data_key=NODE_NAME_KEY, file=USER_LOCAL_SERVER_FILE))
887
+ if not skip_node_deletion:
888
+ node__delete(load_server_info(data_key=NODE_NAME_KEY, file=USER_LOCAL_SERVER_FILE))
907
889
  # unpublish event (only if seed node)
908
890
  # TODO: no, this should be done via the platform!!!
909
891
  # try:
@@ -916,7 +898,20 @@ def pool__stop(*others):
916
898
  # console.log(f"[red][WARNING]: (ignore if not a public pool) Error when unpublishing cluster. {str(e)}")
917
899
  # remove local node agent
918
900
  console.log("Removing agent and local cache")
901
+
902
+ # disconnect from VPN first, then remove agent, then remove local files
903
+ console.log("Disconnecting from VPN...")
904
+ try:
905
+ vpns = leave_vpn(container_name=DEFAULT_VPN_CONTAINER_NAME)
906
+ if vpns is not None:
907
+ for vpn in vpns:
908
+ console.log(f"You have left {vpn} VPN")
909
+ except:
910
+ # no vpn
911
+ pass
912
+
919
913
  CLUSTER.remove_agent()
914
+
920
915
  # clean local files
921
916
  cleanup_local()
922
917
  console.log("[white] Kalavai has stopped sharing your resources. Use [yellow]kalavai pool start[white] or [yellow]kalavai pool join[white] to start again!")
@@ -1107,6 +1102,7 @@ def pool__attach(token, *others, node_name=None):
1107
1102
  """
1108
1103
  Set creds in token on the local instance
1109
1104
  """
1105
+ # check that is not attached to another instance
1110
1106
  if os.path.exists(USER_LOCAL_SERVER_FILE):
1111
1107
  option = user_confirm(
1112
1108
  question="You seem to be connected to an instance already. Are you sure you want to join a new one?",
@@ -1115,34 +1111,39 @@ def pool__attach(token, *others, node_name=None):
1115
1111
  if option == 0:
1116
1112
  console.log("[green]Nothing happened.")
1117
1113
  return
1114
+
1115
+ # check token
1116
+ if not pool__check_token(token):
1117
+ return
1118
+
1118
1119
  try:
1119
1120
  data = decode_dict(token)
1120
1121
  kalavai_seed_ip = data[CLUSTER_IP_KEY]
1121
- kalavai_token = data[CLUSTER_TOKEN_KEY]
1122
1122
  cluster_name = data[CLUSTER_NAME_KEY]
1123
1123
  auth_key = data[AUTH_KEY]
1124
1124
  watcher_service = data[WATCHER_SERVICE_KEY]
1125
1125
  public_location = data[PUBLIC_LOCATION_KEY]
1126
- except:
1127
- console.log("[red]Error when parsing token. Invalid token")
1126
+ vpn = defaultdict(lambda: None)
1127
+ except Exception as e:
1128
+ console.log(str(e))
1129
+ console.log("[red] Invalid token")
1128
1130
  return
1129
-
1131
+
1130
1132
  user = defaultdict(lambda: None)
1131
1133
  if public_location is not None:
1132
- console.log("Joining private network")
1134
+ user = user_login(user_cookie=USER_COOKIE)
1135
+ if user is None:
1136
+ console.log("[red]Must be logged in to join public pools. Run [yellow]kalavai login[red] to authenticate")
1137
+ exit()
1138
+ console.log("Fetching VPN credentials")
1133
1139
  try:
1134
- if not check_vpn_compatibility():
1135
- return
1136
- vpn = join_vpn(
1140
+ vpn = get_vpn_details(
1137
1141
  location=public_location,
1138
1142
  user_cookie=USER_COOKIE)
1139
- user = user_login(user_cookie=USER_COOKIE)
1140
- time.sleep(5)
1141
1143
  except Exception as e:
1142
1144
  console.log(f"[red]Error when joining network: {str(e)}")
1143
1145
  console.log("Are you authenticated? Try [yellow]kalavai login")
1144
1146
  return
1145
- # validate public seed
1146
1147
  try:
1147
1148
  validate_join_public_seed(
1148
1149
  cluster_name=cluster_name,
@@ -1151,9 +1152,19 @@ def pool__attach(token, *others, node_name=None):
1151
1152
  )
1152
1153
  except Exception as e:
1153
1154
  console.log(f"[red]Error when joining network: {str(e)}")
1154
- leave_vpn(vpn_file=USER_VPN_COMPOSE_FILE)
1155
1155
  return
1156
-
1156
+
1157
+ # local agent join
1158
+ # 1. Generate local cache files
1159
+ console.log("Generating config files...")
1160
+
1161
+ # Generate docker compose recipe
1162
+ generate_compose_config(
1163
+ role="",
1164
+ vpn_token=vpn["key"],
1165
+ node_name=node_name,
1166
+ is_public=public_location is not None)
1167
+
1157
1168
  store_server_info(
1158
1169
  server_ip=kalavai_seed_ip,
1159
1170
  auth_key=auth_key,
@@ -1164,7 +1175,26 @@ def pool__attach(token, *others, node_name=None):
1164
1175
  public_location=public_location,
1165
1176
  user_api_key=user["api_key"])
1166
1177
 
1167
- console.log(f"[green]You are now connected to {cluster_name} @ {kalavai_seed_ip}")
1178
+ option = user_confirm(
1179
+ question="Docker compose ready. Would you like Kalavai to deploy it?",
1180
+ options=["no", "yes"]
1181
+ )
1182
+ if option == 0:
1183
+ console.log("Manually deploy the worker with the following command:\n")
1184
+ print(f"docker compose -f {USER_COMPOSE_FILE} up -d")
1185
+ return
1186
+
1187
+ console.log(f"[white] Connecting to {cluster_name} @ {kalavai_seed_ip} (this may take a few minutes)...")
1188
+ run_cmd(f"docker compose -f {USER_COMPOSE_FILE} up -d")
1189
+ # ensure we are connected
1190
+ while True:
1191
+ console.log("Waiting for core services to be ready, may take a few minutes...")
1192
+ time.sleep(30)
1193
+ if is_watcher_alive(server_creds=USER_LOCAL_SERVER_FILE, user_cookie=USER_COOKIE):
1194
+ break
1195
+
1196
+ # set status to schedulable
1197
+ console.log(f"[green] You are connected to {cluster_name}")
1168
1198
 
1169
1199
 
1170
1200
  @arguably.command
@@ -7,7 +7,8 @@ from kalavai_client.utils import (
7
7
  run_cmd,
8
8
  check_gpu_drivers,
9
9
  validate_poolconfig,
10
- user_path
10
+ user_path,
11
+ populate_template
11
12
  )
12
13
 
13
14
 
@@ -20,6 +21,9 @@ class Cluster(ABC):
20
21
  def start_worker_node(self, url, token, node_name, auth_key, watcher_service, ip_address, labels, flannel_iface):
21
22
  raise NotImplementedError()
22
23
 
24
+ @abstractmethod
25
+ def get_vpn_ip(self):
26
+ raise NotImplementedError()
23
27
 
24
28
  @abstractmethod
25
29
  def update_dependencies(self, dependencies_files):
@@ -79,11 +83,23 @@ class dockerCluster(Cluster):
79
83
  def start_seed_node(self):
80
84
 
81
85
  run_cmd(f"docker compose -f {self.compose_file} up -d")
82
- time.sleep(5)
83
- run_cmd(f"docker cp {self.container_name}:/etc/rancher/k3s/k3s.yaml {self.kubeconfig_file}")
86
+ # wait for container to be setup
87
+ while True:
88
+ try:
89
+ run_cmd(f"docker cp {self.container_name}:/etc/rancher/k3s/k3s.yaml {self.kubeconfig_file} >/dev/null 2>&1")
90
+ break
91
+ except:
92
+ pass
93
+ time.sleep(5)
84
94
 
85
95
  def start_worker_node(self):
86
96
  run_cmd(f"docker compose -f {self.compose_file} up -d")
97
+
98
+ def get_vpn_ip(self):
99
+ command = populate_template(
100
+ template_str="docker exec -it {{container_name}} ifconfig {{iface_name}} | grep 'inet ' | awk '{gsub(/^addr:/, \"\", $2); print $2}'",
101
+ values_dict={"container_name": self.container_name, "iface_name": self.default_flannel_iface})
102
+ return run_cmd(command).decode().strip()
87
103
 
88
104
 
89
105
  def update_dependencies(self, dependencies_file=None, debug=False, retries=3):
@@ -122,8 +138,13 @@ class dockerCluster(Cluster):
122
138
  def is_seed_node(self):
123
139
  if not os.path.isfile(self.compose_file):
124
140
  return False
125
- status = "server" in run_cmd(f"docker compose -f {self.compose_file} ps --services --status=running").decode()
126
- return status
141
+ if not self.is_agent_running():
142
+ return False
143
+ try:
144
+ run_cmd(f"docker container exec {self.container_name} cat /var/lib/rancher/k3s/server/node-token >/dev/null 2>&1")
145
+ return True
146
+ except:
147
+ return False
127
148
 
128
149
  def is_cluster_init(self):
129
150
  if not os.path.isfile(self.compose_file):
@@ -241,12 +241,12 @@ def join_vpn(location, user_cookie):
241
241
  run_cmd(f"sudo netclient join -t {token} >/dev/null 2>&1")
242
242
  return vpn
243
243
 
244
- def leave_vpn():
244
+ def leave_vpn(container_name):
245
245
  try:
246
- vpns = json.loads(run_cmd("sudo netclient list").decode())
246
+ vpns = json.loads(run_cmd(f"docker exec {container_name} netclient list").decode())
247
247
  left_vpns = [vpn['network'] for vpn in vpns]
248
248
  for vpn in left_vpns:
249
- run_cmd(f"sudo netclient leave {vpn}")
249
+ run_cmd(f"docker exec {container_name} netclient leave {vpn}")
250
250
  return left_vpns
251
251
  except:
252
252
  return None
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "kalavai-client"
3
- version = "0.5.7"
3
+ version = "0.5.9"
4
4
  authors = [
5
5
  {name = "Carlos Fernandez Musoles", email = "carlos@kalavai.net"}
6
6
  ]
@@ -53,9 +53,8 @@ build-backend = "poetry.core.masonry.api"
53
53
  kalavai = "kalavai_client.cli:app"
54
54
 
55
55
 
56
-
57
56
  [tool.setuptools.packages.find]
58
57
  where = ["kalavai_client"]
59
58
 
60
59
  [tool.setuptools.package-data]
61
- assets = ["*.yaml"]
60
+ assets = ["*.yaml", "*.conf"]
@@ -1,2 +0,0 @@
1
-
2
- __version__ = "0.5.7"
@@ -1,55 +0,0 @@
1
- services:
2
- {{service_name}}-{{command}}:
3
- image: bundenth/kalavai-runner:gpu-latest
4
- container_name: {{service_name}}
5
- hostname: {{hostname}}
6
- privileged: true
7
- restart: unless-stopped
8
- ports:
9
- - "6443:6443" # kube server
10
- - "8472:8472" # flannel vxlan
11
- - "51820:51820" # flannel wireguard
12
- {% if command == "server" %}
13
- - "30000-30500:30000-30500"
14
- {% endif %}
15
- networks:
16
- - custom-network
17
- command: >
18
- {{command}}
19
- {% if command == "server" %}
20
- --flannel-backend wireguard-native
21
- --service-node-port-range "30000-30500"
22
- {% else %}
23
- --server {{pool_ip}}
24
- --token {{token}}
25
- {% endif %}
26
- --node-label role={{command}}
27
- {% if node_labels %}
28
- {{node_labels}}
29
- {% endif %}
30
- {% if num_gpus and num_gpus > 0 %}
31
- --node-label gpu=on
32
- {% else %}
33
- --node-label gpu=off
34
- {% endif %}
35
- --node-ip {{ip_address}}
36
- --node-external-ip {{ip_address}}
37
- {% if flannel_iface %}
38
- --flannel-iface {{flannel_iface}}
39
- {% endif %}
40
- volumes:
41
- - {{k3s_path}}:/var/lib/rancher/k3s # Persist data
42
- - {{etc_path}}:/etc/rancher/k3s # Config files
43
- {% if num_gpus and num_gpus > 0 %}
44
- deploy:
45
- resources:
46
- reservations:
47
- devices:
48
- - driver: nvidia
49
- count: {{num_gpus}}
50
- capabilities: [gpu]
51
- {% endif %}
52
-
53
- networks:
54
- custom-network:
55
- driver: bridge
@@ -1,13 +0,0 @@
1
- # https://docs.netmaker.io/docs/netclient#docker
2
- services:
3
- {{service_name}}:
4
- network_mode: host
5
- privileged: true
6
- restart: always
7
- environment:
8
- - TOKEN={{vpn_token}}
9
- - IFACE_NAME={{flannel_iface}}
10
- volumes:
11
- - '{{etc_path}}/netclient:/etc/netclient'
12
- container_name: {{service_name}}
13
- image: 'gravitl/netclient:latest'
File without changes