kalavai-client 0.5.14__py3-none-any.whl → 0.5.16__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
kalavai_client/core.py CHANGED
@@ -1,17 +1,70 @@
1
+ import os
2
+ import time
1
3
  from collections import defaultdict
2
4
  import math
5
+ import uuid
6
+ import socket
7
+ import ipaddress
8
+ import netifaces as ni
3
9
 
4
10
  from pydantic import BaseModel
5
11
 
12
+ from kalavai_client.cluster import CLUSTER
6
13
  from kalavai_client.utils import (
7
14
  request_to_server,
8
- load_server_info
15
+ load_server_info,
16
+ decode_dict,
17
+ get_vpn_details,
18
+ validate_join_public_seed,
19
+ generate_compose_config,
20
+ store_server_info,
21
+ is_watcher_alive,
22
+ run_cmd,
23
+ leave_vpn,
24
+ safe_remove,
25
+ get_public_seeds,
26
+ load_template,
27
+ is_storage_compatible,
28
+ NODE_NAME_KEY,
29
+ MANDATORY_TOKEN_FIELDS,
30
+ PUBLIC_LOCATION_KEY,
31
+ CLUSTER_IP_KEY,
32
+ CLUSTER_NAME_KEY,
33
+ AUTH_KEY,
34
+ WATCHER_SERVICE_KEY,
35
+ CLUSTER_TOKEN_KEY,
36
+ READONLY_AUTH_KEY,
37
+ WRITE_AUTH_KEY,
38
+ WATCHER_PORT_KEY,
39
+ WATCHER_SERVICE_KEY,
40
+ USER_NODE_LABEL_KEY,
41
+ ALLOW_UNREGISTERED_USER_KEY
42
+ )
43
+ from kalavai_client.auth import (
44
+ KalavaiAuthClient
9
45
  )
10
46
  from kalavai_client.env import (
11
47
  USER_COOKIE,
12
48
  USER_LOCAL_SERVER_FILE,
13
49
  TEMPLATE_LABEL,
14
- SERVER_IP_KEY
50
+ SERVER_IP_KEY,
51
+ USER_COMPOSE_FILE,
52
+ DEFAULT_VPN_CONTAINER_NAME,
53
+ CONTAINER_HOST_PATH,
54
+ USER_VPN_COMPOSE_FILE,
55
+ USER_HELM_APPS_FILE,
56
+ USER_KUBECONFIG_FILE,
57
+ USER_TEMPLATES_FOLDER,
58
+ USER_WORKSPACE_TEMPLATE,
59
+ DEFAULT_USER_WORKSPACE_VALUES,
60
+ STORAGE_CLASS_LABEL,
61
+ USER_NODE_LABEL,
62
+ DEFAULT_WATCHER_PORT,
63
+ HELM_APPS_FILE,
64
+ HELM_APPS_VALUES,
65
+ POOL_CONFIG_DEFAULT_VALUES,
66
+ POOL_CONFIG_TEMPLATE,
67
+ FORBIDEDEN_IPS
15
68
  )
16
69
 
17
70
  class Job(BaseModel):
@@ -19,6 +72,7 @@ class Job(BaseModel):
19
72
  name: str = None
20
73
  workers: str = None
21
74
  endpoint: str = None
75
+ status: str = None
22
76
 
23
77
  class DeviceStatus(BaseModel):
24
78
  name: str
@@ -36,6 +90,76 @@ class GPU(BaseModel):
36
90
  model: str
37
91
 
38
92
 
93
+ def init_user_workspace(force_namespace=None):
94
+
95
+ # load template config and populate with values
96
+ sidecar_template_yaml = load_template(
97
+ template_path=USER_WORKSPACE_TEMPLATE,
98
+ values={},
99
+ default_values_path=DEFAULT_USER_WORKSPACE_VALUES)
100
+
101
+ try:
102
+ data = {"config": sidecar_template_yaml}
103
+ if force_namespace is not None:
104
+ data["force_namespace"] = force_namespace
105
+ result = request_to_server(
106
+ method="post",
107
+ endpoint="/v1/create_user_space",
108
+ data=data,
109
+ server_creds=USER_LOCAL_SERVER_FILE,
110
+ user_cookie=USER_COOKIE
111
+ )
112
+ return {"success"}
113
+ except Exception as e:
114
+ return {"error": f"Error when connecting to kalavai service: {str(e)}"}
115
+
116
+ def check_seed_compatibility():
117
+ """Check required packages to start pools"""
118
+ logs = []
119
+ # docker
120
+ try:
121
+ run_cmd("docker version >/dev/null 2>&1")
122
+ except:
123
+ logs.append("[red]Docker not installed. Install instructions:\n")
124
+ logs.append(" Linux: https://docs.docker.com/engine/install/\n")
125
+ logs.append(" Windows/MacOS: https://docs.docker.com/desktop/\n")
126
+
127
+ return {"issues": logs}
128
+
129
+ def check_worker_compatibility():
130
+ """Check required packages to join pools"""
131
+ logs = []
132
+ # docker
133
+ try:
134
+ run_cmd("docker version >/dev/null 2>&1")
135
+ except:
136
+ logs.append("[red]Docker not installed. Install instructions:\n")
137
+ logs.append(" Linux: https://docs.docker.com/engine/install/\n")
138
+ logs.append(" Windows/MacOS: https://docs.docker.com/desktop/\n")
139
+
140
+ return {"issues": logs}
141
+
142
+ def get_ip_addresses(subnet=None):
143
+ ips = []
144
+ retry = 3
145
+ while len(ips) == 0:
146
+ for iface in ni.interfaces():
147
+ try:
148
+ ip = ni.ifaddresses(iface)[ni.AF_INET][0]['addr']
149
+ if ip in FORBIDEDEN_IPS:
150
+ continue
151
+ if subnet is None or ipaddress.ip_address(ip) in ipaddress.ip_network(subnet):
152
+ ips.append(ip)
153
+ except:
154
+ pass
155
+ if len(ips) == 1:
156
+ return ips[0]
157
+ time.sleep(2)
158
+ retry -= 1
159
+ if retry < 0:
160
+ raise ValueError(f"No IPs available on subnet {subnet}")
161
+ return ips
162
+
39
163
  def fetch_resources():
40
164
  try:
41
165
  total = request_to_server(
@@ -57,6 +181,35 @@ def fetch_resources():
57
181
 
58
182
  return {"total": total, "available": available}
59
183
 
184
+ def fetch_job_defaults(name):
185
+ data = {
186
+ "template": name
187
+ }
188
+ try:
189
+ defaults = request_to_server(
190
+ method="get",
191
+ endpoint="/v1/job_defaults",
192
+ data=data,
193
+ server_creds=USER_LOCAL_SERVER_FILE,
194
+ user_cookie=USER_COOKIE
195
+ )
196
+ return defaults
197
+ except Exception as e:
198
+ return {"error": str(e)}
199
+
200
+ def fetch_job_templates():
201
+ try:
202
+ templates = request_to_server(
203
+ method="get",
204
+ endpoint="/v1/get_job_templates",
205
+ server_creds=USER_LOCAL_SERVER_FILE,
206
+ data=None,
207
+ user_cookie=USER_COOKIE
208
+ )
209
+ return templates
210
+ except Exception as e:
211
+ return {"error": str(e)}
212
+
60
213
  def fetch_job_names():
61
214
  data = {
62
215
  "group": "batch.volcano.sh",
@@ -126,11 +279,18 @@ def fetch_job_details(jobs: list[Job]):
126
279
  node_ports = [f"{p['node_port']} (mapped to {p['port']})" for s in result.values() for p in s["ports"]]
127
280
 
128
281
  urls = [f"http://{load_server_info(data_key=SERVER_IP_KEY, file=USER_LOCAL_SERVER_FILE)}:{node_port}" for node_port in node_ports]
282
+ if "Ready" in workers_status and len(workers_status) == 1:
283
+ status = "running"
284
+ elif any([st in workers_status for st in ["Failed", "Completed"]]):
285
+ status = "error"
286
+ else:
287
+ status = "pending"
129
288
  job_details.append(
130
289
  Job(owner=namespace,
131
290
  name=deployment,
132
291
  workers=workers,
133
- endpoint="\n".join(urls))
292
+ endpoint="\n".join(urls),
293
+ status=str(status))
134
294
  )
135
295
 
136
296
  except Exception as e:
@@ -138,6 +298,47 @@ def fetch_job_details(jobs: list[Job]):
138
298
 
139
299
  return job_details
140
300
 
301
+ def deploy_job(template_name, values_dict, force_namespace=None):
302
+
303
+ # deploy template with kube-watcher
304
+ data = {
305
+ "template": template_name,
306
+ "template_values": values_dict
307
+ }
308
+ if force_namespace is not None:
309
+ data["force_namespace"] = force_namespace
310
+
311
+ try:
312
+ result = request_to_server(
313
+ method="post",
314
+ endpoint="/v1/deploy_job",
315
+ data=data,
316
+ server_creds=USER_LOCAL_SERVER_FILE,
317
+ user_cookie=USER_COOKIE
318
+ )
319
+ return result
320
+ except Exception as e:
321
+ return {"error": str(e)}
322
+
323
+ def delete_job(name, force_namespace=None):
324
+ data = {
325
+ "label": TEMPLATE_LABEL, # this ensures that both lws template and services are deleted
326
+ "value": name
327
+ }
328
+ if force_namespace is not None:
329
+ data["force_namespace"] = force_namespace
330
+ try:
331
+ result = request_to_server(
332
+ method="post",
333
+ endpoint="/v1/delete_labeled_resources",
334
+ data=data,
335
+ server_creds=USER_LOCAL_SERVER_FILE,
336
+ user_cookie=USER_COOKIE
337
+ )
338
+ return result
339
+ except Exception as e:
340
+ return {"error": str(e)}
341
+
141
342
  def fetch_devices():
142
343
  """Load devices status info for all hosts"""
143
344
  try:
@@ -224,4 +425,454 @@ def fetch_gpus(available=False):
224
425
  return all_gpus
225
426
 
226
427
  except Exception as e:
227
- return {"error": str(e)}
428
+ return {"error": str(e)}
429
+
430
+ def load_user_session():
431
+ auth = KalavaiAuthClient(
432
+ user_cookie_file=USER_COOKIE
433
+ )
434
+ return auth.load_user_session()
435
+
436
+ def authenticate_user(username=None, password=None):
437
+ auth = KalavaiAuthClient(
438
+ user_cookie_file=USER_COOKIE
439
+ )
440
+ user = auth.load_user_session()
441
+ if user is None:
442
+ user = auth.login(username=username, password=password)
443
+
444
+ if user is None:
445
+ return {"error": "Username or password incorrect"}
446
+ return user
447
+
448
+ def user_logout():
449
+ auth = KalavaiAuthClient(
450
+ user_cookie_file=USER_COOKIE
451
+ )
452
+ auth.logout()
453
+ return True
454
+
455
+ def check_token(token, public=False):
456
+ try:
457
+ data = decode_dict(token)
458
+ for field in MANDATORY_TOKEN_FIELDS:
459
+ assert field in data
460
+ if public:
461
+ if data[PUBLIC_LOCATION_KEY] is None:
462
+ raise ValueError("Token is not valid for public pools. Did you start the cluster with a public_location?")
463
+ return {"status": True}
464
+ except Exception as e:
465
+ return {"error": str(e)}
466
+
467
+ def attach_to_pool(token, node_name=None):
468
+ if node_name is None:
469
+ node_name = f"{socket.gethostname()}-{uuid.uuid4().hex[:6]}"
470
+
471
+ # check token
472
+ valid = check_token(token=token)
473
+ if "error" in valid:
474
+ return {"error": f"Invalid token: {valid}"}
475
+
476
+ try:
477
+ data = decode_dict(token)
478
+ kalavai_seed_ip = data[CLUSTER_IP_KEY]
479
+ cluster_name = data[CLUSTER_NAME_KEY]
480
+ auth_key = data[AUTH_KEY]
481
+ watcher_service = data[WATCHER_SERVICE_KEY]
482
+ public_location = data[PUBLIC_LOCATION_KEY]
483
+ vpn = defaultdict(lambda: None)
484
+ except Exception as e:
485
+ return {"error": f"Invalid token. {str(e)}"}
486
+
487
+ user = defaultdict(lambda: None)
488
+ if public_location is not None:
489
+ user = load_user_session()
490
+ if user is None:
491
+ return {"error ": "Must be logged in to join public pools"}
492
+ try:
493
+ vpn = get_vpn_details(
494
+ location=public_location,
495
+ user_cookie=USER_COOKIE)
496
+ except Exception as e:
497
+ return {"error": f"Are you authenticated? {str(e)}"}
498
+ try:
499
+ validate_join_public_seed(
500
+ cluster_name=cluster_name,
501
+ join_key=token,
502
+ user_cookie=USER_COOKIE
503
+ )
504
+ except Exception as e:
505
+ return {"error": f"Error when joining network: {str(e)}"}
506
+
507
+ # local agent join
508
+ # 1. Generate local cache files
509
+ # Generate docker compose recipe
510
+ generate_compose_config(
511
+ role="",
512
+ vpn_token=vpn["key"],
513
+ node_name=node_name,
514
+ is_public=public_location is not None)
515
+
516
+ store_server_info(
517
+ server_ip=kalavai_seed_ip,
518
+ auth_key=auth_key,
519
+ file=USER_LOCAL_SERVER_FILE,
520
+ watcher_service=watcher_service,
521
+ node_name=node_name,
522
+ cluster_name=cluster_name,
523
+ public_location=public_location,
524
+ user_api_key=user["api_key"])
525
+
526
+ run_cmd(f"docker compose -f {USER_COMPOSE_FILE} up -d")
527
+ # ensure we are connected
528
+ while True:
529
+ time.sleep(30)
530
+ if is_watcher_alive(server_creds=USER_LOCAL_SERVER_FILE, user_cookie=USER_COOKIE):
531
+ break
532
+
533
+ return cluster_name
534
+
535
+ def join_pool(token, num_gpus=0, node_name=None):
536
+ compatibility = check_worker_compatibility()
537
+ if len(compatibility["issues"]) > 0:
538
+ return {"error": compatibility["issues"]}
539
+
540
+ if node_name is None:
541
+ node_name = f"{socket.gethostname()}-{uuid.uuid4().hex[:6]}"
542
+
543
+ # check token
544
+ valid = check_token(token=token)
545
+ if "error" in valid:
546
+ return {"error": f"Invalid token: {valid}"}
547
+
548
+ try:
549
+ data = decode_dict(token)
550
+ kalavai_seed_ip = data[CLUSTER_IP_KEY]
551
+ kalavai_token = data[CLUSTER_TOKEN_KEY]
552
+ cluster_name = data[CLUSTER_NAME_KEY]
553
+ auth_key = data[AUTH_KEY]
554
+ watcher_service = data[WATCHER_SERVICE_KEY]
555
+ public_location = data[PUBLIC_LOCATION_KEY]
556
+ vpn = defaultdict(lambda: None)
557
+ except Exception as e:
558
+ return {"error": f"Invalid token. {str(e)}"}
559
+
560
+ # join private network if provided
561
+ node_labels = {
562
+ STORAGE_CLASS_LABEL: is_storage_compatible()
563
+ }
564
+ user = defaultdict(lambda: None)
565
+ if public_location is not None:
566
+ user = authenticate_user()
567
+ if user is None:
568
+ return {"error": "Must be logged in to join public pools"}
569
+ try:
570
+ vpn = get_vpn_details(
571
+ location=public_location,
572
+ user_cookie=USER_COOKIE)
573
+ node_labels[USER_NODE_LABEL] = user["username"]
574
+ except Exception as e:
575
+ return {"error": f"Are you authenticated? Error: {str(e)}"}
576
+ try:
577
+ validate_join_public_seed(
578
+ cluster_name=cluster_name,
579
+ join_key=token,
580
+ user_cookie=USER_COOKIE
581
+ )
582
+ except Exception as e:
583
+ return {"error": f"Error when joining network: {str(e)}"}
584
+
585
+ # local agent join
586
+ # Generate docker compose recipe
587
+ generate_compose_config(
588
+ role="agent",
589
+ pool_ip=f"https://{kalavai_seed_ip}:6443",
590
+ pool_token=kalavai_token,
591
+ num_gpus=num_gpus,
592
+ vpn_token=vpn["key"],
593
+ node_name=node_name,
594
+ node_labels=node_labels,
595
+ is_public=public_location is not None)
596
+
597
+ store_server_info(
598
+ server_ip=kalavai_seed_ip,
599
+ auth_key=auth_key,
600
+ file=USER_LOCAL_SERVER_FILE,
601
+ watcher_service=watcher_service,
602
+ node_name=node_name,
603
+ cluster_name=cluster_name,
604
+ public_location=public_location,
605
+ user_api_key=user["api_key"])
606
+
607
+ try:
608
+ CLUSTER.start_worker_node()
609
+ except Exception as e:
610
+ return {"error": f"Error connecting to {cluster_name} @ {kalavai_seed_ip}. Check with the admin if the token is still valid."}
611
+
612
+ # ensure we are connected
613
+ while True:
614
+ time.sleep(30)
615
+ if is_watcher_alive(server_creds=USER_LOCAL_SERVER_FILE, user_cookie=USER_COOKIE):
616
+ break
617
+
618
+ # check the node has connected successfully
619
+ try:
620
+ while not CLUSTER.is_agent_running():
621
+ time.sleep(30)
622
+ except KeyboardInterrupt:
623
+ return {"error": "Installation aborted. Leaving pool."}
624
+
625
+ result = init_user_workspace()
626
+ if "error" in result:
627
+ return {"error": f"Error when creating user workspace: {result}"}
628
+
629
+ return cluster_name
630
+
631
+ def create_pool(cluster_name: str, ip_address: str, app_values: str=None, pool_config_values: str=None, num_gpus: int=0, node_name: str=None, only_registered_users: bool=False, location: str=None):
632
+
633
+ if not check_seed_compatibility():
634
+ return {"error": "Requirements failed"}
635
+
636
+ if app_values is None:
637
+ app_values = HELM_APPS_VALUES
638
+
639
+ if pool_config_values is None:
640
+ pool_config_values = POOL_CONFIG_DEFAULT_VALUES
641
+
642
+ node_name = f"{socket.gethostname()}-{uuid.uuid4().hex[:6]}"
643
+
644
+ # if only registered users are allowed, check user has logged in
645
+ user = defaultdict(lambda: None)
646
+ if only_registered_users or location is not None:
647
+ user = authenticate_user()
648
+ if user is None:
649
+ return {"error": "[white]--only-registered-users [red]or [white]--location[red] can only be used if the host is authenticated. Run [yellow]kalavai login[red] to authenticate"}
650
+
651
+ # join private network if provided
652
+ vpn = defaultdict(lambda: None)
653
+ node_labels = {
654
+ STORAGE_CLASS_LABEL: is_storage_compatible()
655
+ }
656
+ if location is not None:
657
+ try:
658
+ vpn = get_vpn_details(
659
+ location=location,
660
+ user_cookie=USER_COOKIE)
661
+ node_labels[USER_NODE_LABEL] = user["username"]
662
+ except Exception as e:
663
+ return {"error": f"[red]Error when joining network: {str(e)}"}
664
+
665
+ # Generate docker compose recipe
666
+ generate_compose_config(
667
+ role="server",
668
+ vpn_token=vpn["key"],
669
+ node_ip_address=ip_address,
670
+ num_gpus=num_gpus,
671
+ node_name=node_name,
672
+ node_labels=node_labels,
673
+ is_public=location is not None
674
+ )
675
+
676
+ # start server
677
+ CLUSTER.start_seed_node()
678
+
679
+ while not CLUSTER.is_agent_running():
680
+ time.sleep(10)
681
+
682
+ # select IP address (for external discovery)
683
+ if ip_address is None or location is not None:
684
+ # load VPN ip
685
+ ip_address = CLUSTER.get_vpn_ip()
686
+
687
+ # populate local cred files
688
+ auth_key = str(uuid.uuid4())
689
+ write_auth_key = str(uuid.uuid4())
690
+ readonly_auth_key = str(uuid.uuid4())
691
+
692
+ watcher_service = f"{ip_address}:{DEFAULT_WATCHER_PORT}"
693
+ values = {
694
+ CLUSTER_NAME_KEY: cluster_name,
695
+ CLUSTER_IP_KEY: ip_address,
696
+ AUTH_KEY: auth_key,
697
+ READONLY_AUTH_KEY: readonly_auth_key,
698
+ WRITE_AUTH_KEY: write_auth_key,
699
+ WATCHER_PORT_KEY: DEFAULT_WATCHER_PORT,
700
+ WATCHER_SERVICE_KEY: watcher_service,
701
+ USER_NODE_LABEL_KEY: USER_NODE_LABEL,
702
+ ALLOW_UNREGISTERED_USER_KEY: not only_registered_users
703
+ }
704
+
705
+ store_server_info(
706
+ server_ip=ip_address,
707
+ auth_key=auth_key,
708
+ readonly_auth_key=readonly_auth_key,
709
+ write_auth_key=write_auth_key,
710
+ file=USER_LOCAL_SERVER_FILE,
711
+ watcher_service=watcher_service,
712
+ node_name=node_name,
713
+ cluster_name=cluster_name,
714
+ public_location=location,
715
+ user_api_key=user["api_key"])
716
+
717
+ # Generate helmfile recipe
718
+ helm_yaml = load_template(
719
+ template_path=HELM_APPS_FILE,
720
+ values=values,
721
+ default_values_path=app_values,
722
+ force_defaults=True)
723
+ with open(USER_HELM_APPS_FILE, "w") as f:
724
+ f.write(helm_yaml)
725
+
726
+ # set template values in helmfile
727
+ try:
728
+ CLUSTER.update_dependencies(
729
+ dependencies_file=USER_HELM_APPS_FILE
730
+ )
731
+ except Exception as e:
732
+ return {"error": f"Error when updating dependencies: {str(e)}"}
733
+
734
+ if location is not None:
735
+ # TODO: register with kalavai if it's a public cluster
736
+ pass
737
+ #pool__publish()
738
+
739
+ # wait until the server is ready to create objects
740
+ while True:
741
+ time.sleep(30)
742
+ if is_watcher_alive(server_creds=USER_LOCAL_SERVER_FILE, user_cookie=USER_COOKIE):
743
+ break
744
+
745
+ result = pool_init(pool_config_values_path=pool_config_values)
746
+ if "error" in result or ("failed" in result and len(result['failed']) > 0):
747
+ return {"error": f"Error when initialising pool: {result}"}
748
+ # init default namespace
749
+ init_user_workspace(force_namespace="default")
750
+ if only_registered_users:
751
+ # init user namespace
752
+ init_user_workspace()
753
+
754
+ return {"success"}
755
+
756
+ def pool_init(pool_config_values_path=None):
757
+ """Deploy configured objects to initialise pool"""
758
+ if pool_config_values_path is None:
759
+ return
760
+
761
+ # load template config and populate with values
762
+ sidecar_template_yaml = load_template(
763
+ template_path=POOL_CONFIG_TEMPLATE,
764
+ values={},
765
+ default_values_path=pool_config_values_path)
766
+
767
+ try:
768
+ result = request_to_server(
769
+ method="post",
770
+ endpoint="/v1/deploy_generic_model",
771
+ data={"config": sidecar_template_yaml},
772
+ server_creds=USER_LOCAL_SERVER_FILE,
773
+ user_cookie=USER_COOKIE
774
+ )
775
+ return result
776
+ except Exception as e:
777
+ return {"error": f"[red]Error when connecting to kalavai service: {str(e)}"}
778
+
779
+ def is_connected():
780
+ if not os.path.isfile(USER_LOCAL_SERVER_FILE):
781
+ return False
782
+ return is_watcher_alive(server_creds=USER_LOCAL_SERVER_FILE, user_cookie=USER_COOKIE, timeout=10)
783
+
784
+ def is_agent_running():
785
+ return CLUSTER.is_agent_running()
786
+
787
+ def is_server():
788
+ return CLUSTER.is_seed_node()
789
+
790
+ def pause_agent(retries=3):
791
+ try:
792
+ while retries > 0:
793
+ state = CLUSTER.pause_agent()
794
+ if state:
795
+ return {"success"}
796
+ time.sleep(5)
797
+ retries -= 1
798
+ except:
799
+ return {"error": "Could not pause agent"}
800
+
801
+ def resume_agent(retries=3):
802
+ try:
803
+ while retries > 0:
804
+ state = CLUSTER.restart_agent()
805
+ if state:
806
+ return {"success"}
807
+ time.sleep(5)
808
+ retries -= 1
809
+ except:
810
+ return {"error": "Could not resume agent"}
811
+
812
+ def cleanup_local():
813
+ safe_remove(CONTAINER_HOST_PATH)
814
+ safe_remove(USER_COMPOSE_FILE)
815
+ safe_remove(USER_VPN_COMPOSE_FILE)
816
+ safe_remove(USER_HELM_APPS_FILE)
817
+ safe_remove(USER_KUBECONFIG_FILE)
818
+ safe_remove(USER_LOCAL_SERVER_FILE)
819
+ safe_remove(USER_TEMPLATES_FOLDER)
820
+
821
+ def delete_node(name):
822
+ data = {
823
+ "node_names": [name]
824
+ }
825
+ try:
826
+ result = request_to_server(
827
+ method="post",
828
+ endpoint="/v1/delete_nodes",
829
+ data=data,
830
+ server_creds=USER_LOCAL_SERVER_FILE,
831
+ user_cookie=USER_COOKIE
832
+ )
833
+ if result is None or result is True:
834
+ return {f"Node {name} deleted successfully"}
835
+ else:
836
+ return {"error": result}
837
+ except Exception as e:
838
+ return {"error": str(e)}
839
+
840
+ def stop_pool(skip_node_deletion=False):
841
+ # delete local node from server
842
+ logs = []
843
+ if not skip_node_deletion:
844
+ logs.append(
845
+ delete_node(load_server_info(data_key=NODE_NAME_KEY, file=USER_LOCAL_SERVER_FILE))
846
+ )
847
+ # unpublish event (only if seed node)
848
+ # TODO: no, this should be done via the platform!!!
849
+ # try:
850
+ # if CLUSTER.is_seed_node():
851
+ # console.log("Unregistering pool...")
852
+ # unregister_cluster(
853
+ # name=load_server_info(data_key=CLUSTER_NAME_KEY, file=USER_LOCAL_SERVER_FILE),
854
+ # user_cookie=USER_COOKIE)
855
+ # except Exception as e:
856
+ # console.log(f"[red][WARNING]: (ignore if not a public pool) Error when unpublishing cluster. {str(e)}")
857
+ # remove local node agent
858
+
859
+ # disconnect from VPN first, then remove agent, then remove local files
860
+ try:
861
+ vpns = leave_vpn(container_name=DEFAULT_VPN_CONTAINER_NAME)
862
+ if vpns is not None:
863
+ for vpn in vpns:
864
+ logs.append(f"You have left {vpn} VPN")
865
+ except:
866
+ # no vpn
867
+ pass
868
+
869
+ CLUSTER.remove_agent()
870
+
871
+ # clean local files
872
+ cleanup_local()
873
+
874
+ return logs
875
+
876
+ def list_available_pools(user_only=False):
877
+ pools = get_public_seeds(user_only=user_only, user_cookie=USER_COOKIE)
878
+ return pools