kalavai-client 0.5.15__py3-none-any.whl → 0.5.17__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
kalavai_client/core.py CHANGED
@@ -1,17 +1,70 @@
1
+ import os
2
+ import time
1
3
  from collections import defaultdict
2
4
  import math
5
+ import uuid
6
+ import socket
7
+ import ipaddress
8
+ import netifaces as ni
3
9
 
4
10
  from pydantic import BaseModel
5
11
 
12
+ from kalavai_client.cluster import CLUSTER
6
13
  from kalavai_client.utils import (
7
14
  request_to_server,
8
- load_server_info
15
+ load_server_info,
16
+ decode_dict,
17
+ get_vpn_details,
18
+ validate_join_public_seed,
19
+ generate_compose_config,
20
+ store_server_info,
21
+ is_watcher_alive,
22
+ run_cmd,
23
+ leave_vpn,
24
+ safe_remove,
25
+ get_public_seeds,
26
+ load_template,
27
+ is_storage_compatible,
28
+ NODE_NAME_KEY,
29
+ MANDATORY_TOKEN_FIELDS,
30
+ PUBLIC_LOCATION_KEY,
31
+ CLUSTER_IP_KEY,
32
+ CLUSTER_NAME_KEY,
33
+ AUTH_KEY,
34
+ WATCHER_SERVICE_KEY,
35
+ CLUSTER_TOKEN_KEY,
36
+ READONLY_AUTH_KEY,
37
+ WRITE_AUTH_KEY,
38
+ WATCHER_PORT_KEY,
39
+ WATCHER_SERVICE_KEY,
40
+ USER_NODE_LABEL_KEY,
41
+ ALLOW_UNREGISTERED_USER_KEY
42
+ )
43
+ from kalavai_client.auth import (
44
+ KalavaiAuthClient
9
45
  )
10
46
  from kalavai_client.env import (
11
47
  USER_COOKIE,
12
48
  USER_LOCAL_SERVER_FILE,
13
49
  TEMPLATE_LABEL,
14
- SERVER_IP_KEY
50
+ SERVER_IP_KEY,
51
+ USER_COMPOSE_FILE,
52
+ DEFAULT_VPN_CONTAINER_NAME,
53
+ CONTAINER_HOST_PATH,
54
+ USER_VPN_COMPOSE_FILE,
55
+ USER_HELM_APPS_FILE,
56
+ USER_KUBECONFIG_FILE,
57
+ USER_TEMPLATES_FOLDER,
58
+ USER_WORKSPACE_TEMPLATE,
59
+ DEFAULT_USER_WORKSPACE_VALUES,
60
+ STORAGE_CLASS_LABEL,
61
+ USER_NODE_LABEL,
62
+ DEFAULT_WATCHER_PORT,
63
+ HELM_APPS_FILE,
64
+ HELM_APPS_VALUES,
65
+ POOL_CONFIG_DEFAULT_VALUES,
66
+ POOL_CONFIG_TEMPLATE,
67
+ FORBIDEDEN_IPS
15
68
  )
16
69
 
17
70
  class Job(BaseModel):
@@ -19,6 +72,7 @@ class Job(BaseModel):
19
72
  name: str = None
20
73
  workers: str = None
21
74
  endpoint: str = None
75
+ status: str = None
22
76
 
23
77
  class DeviceStatus(BaseModel):
24
78
  name: str
@@ -36,6 +90,74 @@ class GPU(BaseModel):
36
90
  model: str
37
91
 
38
92
 
93
+ def init_user_workspace(force_namespace=None):
94
+
95
+ # load template config and populate with values
96
+ sidecar_template_yaml = load_template(
97
+ template_path=USER_WORKSPACE_TEMPLATE,
98
+ values={},
99
+ default_values_path=DEFAULT_USER_WORKSPACE_VALUES)
100
+
101
+ try:
102
+ data = {"config": sidecar_template_yaml}
103
+ if force_namespace is not None:
104
+ data["force_namespace"] = force_namespace
105
+ result = request_to_server(
106
+ method="post",
107
+ endpoint="/v1/create_user_space",
108
+ data=data,
109
+ server_creds=USER_LOCAL_SERVER_FILE,
110
+ user_cookie=USER_COOKIE
111
+ )
112
+ return {"success"}
113
+ except Exception as e:
114
+ return {"error": f"Error when connecting to kalavai service: {str(e)}"}
115
+
116
+ def check_seed_compatibility():
117
+ """Check required packages to start pools"""
118
+ logs = []
119
+ # docker
120
+ try:
121
+ run_cmd("docker version >/dev/null 2>&1")
122
+ except:
123
+ logs.append("[red]Docker not installed. Install instructions:\n")
124
+ logs.append(" Linux: https://docs.docker.com/engine/install/\n")
125
+ logs.append(" Windows/MacOS: https://docs.docker.com/desktop/\n")
126
+
127
+ return {"issues": logs}
128
+
129
+ def check_worker_compatibility():
130
+ """Check required packages to join pools"""
131
+ logs = []
132
+ # docker
133
+ try:
134
+ run_cmd("docker version >/dev/null 2>&1")
135
+ except:
136
+ logs.append("[red]Docker not installed. Install instructions:\n")
137
+ logs.append(" Linux: https://docs.docker.com/engine/install/\n")
138
+ logs.append(" Windows/MacOS: https://docs.docker.com/desktop/\n")
139
+
140
+ return {"issues": logs}
141
+
142
+ def get_ip_addresses(subnet=None):
143
+ ips = []
144
+ retry = 3
145
+ while len(ips) == 0:
146
+ for iface in ni.interfaces():
147
+ try:
148
+ ip = ni.ifaddresses(iface)[ni.AF_INET][0]['addr']
149
+ if ip in FORBIDEDEN_IPS:
150
+ continue
151
+ if subnet is None or ipaddress.ip_address(ip) in ipaddress.ip_network(subnet):
152
+ ips.append(ip)
153
+ except:
154
+ pass
155
+ time.sleep(2)
156
+ retry -= 1
157
+ if retry < 0:
158
+ raise ValueError(f"No IPs available on subnet {subnet}")
159
+ return ips
160
+
39
161
  def fetch_resources():
40
162
  try:
41
163
  total = request_to_server(
@@ -57,6 +179,35 @@ def fetch_resources():
57
179
 
58
180
  return {"total": total, "available": available}
59
181
 
182
+ def fetch_job_defaults(name):
183
+ data = {
184
+ "template": name
185
+ }
186
+ try:
187
+ defaults = request_to_server(
188
+ method="get",
189
+ endpoint="/v1/job_defaults",
190
+ data=data,
191
+ server_creds=USER_LOCAL_SERVER_FILE,
192
+ user_cookie=USER_COOKIE
193
+ )
194
+ return defaults
195
+ except Exception as e:
196
+ return {"error": str(e)}
197
+
198
+ def fetch_job_templates():
199
+ try:
200
+ templates = request_to_server(
201
+ method="get",
202
+ endpoint="/v1/get_job_templates",
203
+ server_creds=USER_LOCAL_SERVER_FILE,
204
+ data=None,
205
+ user_cookie=USER_COOKIE
206
+ )
207
+ return templates
208
+ except Exception as e:
209
+ return {"error": str(e)}
210
+
60
211
  def fetch_job_names():
61
212
  data = {
62
213
  "group": "batch.volcano.sh",
@@ -126,11 +277,18 @@ def fetch_job_details(jobs: list[Job]):
126
277
  node_ports = [f"{p['node_port']} (mapped to {p['port']})" for s in result.values() for p in s["ports"]]
127
278
 
128
279
  urls = [f"http://{load_server_info(data_key=SERVER_IP_KEY, file=USER_LOCAL_SERVER_FILE)}:{node_port}" for node_port in node_ports]
280
+ if "Ready" in workers_status and len(workers_status) == 1:
281
+ status = "running"
282
+ elif any([st in workers_status for st in ["Failed", "Completed"]]):
283
+ status = "error"
284
+ else:
285
+ status = "pending"
129
286
  job_details.append(
130
287
  Job(owner=namespace,
131
288
  name=deployment,
132
289
  workers=workers,
133
- endpoint="\n".join(urls))
290
+ endpoint="\n".join(urls),
291
+ status=str(status))
134
292
  )
135
293
 
136
294
  except Exception as e:
@@ -138,6 +296,47 @@ def fetch_job_details(jobs: list[Job]):
138
296
 
139
297
  return job_details
140
298
 
299
+ def deploy_job(template_name, values_dict, force_namespace=None):
300
+
301
+ # deploy template with kube-watcher
302
+ data = {
303
+ "template": template_name,
304
+ "template_values": values_dict
305
+ }
306
+ if force_namespace is not None:
307
+ data["force_namespace"] = force_namespace
308
+
309
+ try:
310
+ result = request_to_server(
311
+ method="post",
312
+ endpoint="/v1/deploy_job",
313
+ data=data,
314
+ server_creds=USER_LOCAL_SERVER_FILE,
315
+ user_cookie=USER_COOKIE
316
+ )
317
+ return result
318
+ except Exception as e:
319
+ return {"error": str(e)}
320
+
321
+ def delete_job(name, force_namespace=None):
322
+ data = {
323
+ "label": TEMPLATE_LABEL, # this ensures that both lws template and services are deleted
324
+ "value": name
325
+ }
326
+ if force_namespace is not None:
327
+ data["force_namespace"] = force_namespace
328
+ try:
329
+ result = request_to_server(
330
+ method="post",
331
+ endpoint="/v1/delete_labeled_resources",
332
+ data=data,
333
+ server_creds=USER_LOCAL_SERVER_FILE,
334
+ user_cookie=USER_COOKIE
335
+ )
336
+ return result
337
+ except Exception as e:
338
+ return {"error": str(e)}
339
+
141
340
  def fetch_devices():
142
341
  """Load devices status info for all hosts"""
143
342
  try:
@@ -224,4 +423,454 @@ def fetch_gpus(available=False):
224
423
  return all_gpus
225
424
 
226
425
  except Exception as e:
227
- return {"error": str(e)}
426
+ return {"error": str(e)}
427
+
428
+ def load_user_session():
429
+ auth = KalavaiAuthClient(
430
+ user_cookie_file=USER_COOKIE
431
+ )
432
+ return auth.load_user_session()
433
+
434
+ def authenticate_user(username=None, password=None):
435
+ auth = KalavaiAuthClient(
436
+ user_cookie_file=USER_COOKIE
437
+ )
438
+ user = auth.load_user_session()
439
+ if user is None:
440
+ user = auth.login(username=username, password=password)
441
+
442
+ if user is None:
443
+ return {"error": "Username or password incorrect"}
444
+ return user
445
+
446
+ def user_logout():
447
+ auth = KalavaiAuthClient(
448
+ user_cookie_file=USER_COOKIE
449
+ )
450
+ auth.logout()
451
+ return True
452
+
453
+ def check_token(token, public=False):
454
+ try:
455
+ data = decode_dict(token)
456
+ for field in MANDATORY_TOKEN_FIELDS:
457
+ assert field in data
458
+ if public:
459
+ if data[PUBLIC_LOCATION_KEY] is None:
460
+ raise ValueError("Token is not valid for public pools. Did you start the cluster with a public_location?")
461
+ return {"status": True}
462
+ except Exception as e:
463
+ return {"error": str(e)}
464
+
465
+ def attach_to_pool(token, node_name=None):
466
+ if node_name is None:
467
+ node_name = f"{socket.gethostname()}-{uuid.uuid4().hex[:6]}"
468
+
469
+ # check token
470
+ valid = check_token(token=token)
471
+ if "error" in valid:
472
+ return {"error": f"Invalid token: {valid}"}
473
+
474
+ try:
475
+ data = decode_dict(token)
476
+ kalavai_seed_ip = data[CLUSTER_IP_KEY]
477
+ cluster_name = data[CLUSTER_NAME_KEY]
478
+ auth_key = data[AUTH_KEY]
479
+ watcher_service = data[WATCHER_SERVICE_KEY]
480
+ public_location = data[PUBLIC_LOCATION_KEY]
481
+ vpn = defaultdict(lambda: None)
482
+ except Exception as e:
483
+ return {"error": f"Invalid token. {str(e)}"}
484
+
485
+ user = defaultdict(lambda: None)
486
+ if public_location is not None:
487
+ user = load_user_session()
488
+ if user is None:
489
+ return {"error ": "Must be logged in to join public pools"}
490
+ try:
491
+ vpn = get_vpn_details(
492
+ location=public_location,
493
+ user_cookie=USER_COOKIE)
494
+ except Exception as e:
495
+ return {"error": f"Are you authenticated? {str(e)}"}
496
+ try:
497
+ validate_join_public_seed(
498
+ cluster_name=cluster_name,
499
+ join_key=token,
500
+ user_cookie=USER_COOKIE
501
+ )
502
+ except Exception as e:
503
+ return {"error": f"Error when joining network: {str(e)}"}
504
+
505
+ # local agent join
506
+ # 1. Generate local cache files
507
+ # Generate docker compose recipe
508
+ generate_compose_config(
509
+ role="",
510
+ vpn_token=vpn["key"],
511
+ node_name=node_name,
512
+ is_public=public_location is not None)
513
+
514
+ store_server_info(
515
+ server_ip=kalavai_seed_ip,
516
+ auth_key=auth_key,
517
+ file=USER_LOCAL_SERVER_FILE,
518
+ watcher_service=watcher_service,
519
+ node_name=node_name,
520
+ cluster_name=cluster_name,
521
+ public_location=public_location,
522
+ user_api_key=user["api_key"])
523
+
524
+ run_cmd(f"docker compose -f {USER_COMPOSE_FILE} up -d")
525
+ # ensure we are connected
526
+ while True:
527
+ time.sleep(30)
528
+ if is_watcher_alive(server_creds=USER_LOCAL_SERVER_FILE, user_cookie=USER_COOKIE):
529
+ break
530
+
531
+ return cluster_name
532
+
533
+ def join_pool(token, num_gpus=0, node_name=None):
534
+ compatibility = check_worker_compatibility()
535
+ if len(compatibility["issues"]) > 0:
536
+ return {"error": compatibility["issues"]}
537
+
538
+ if node_name is None:
539
+ node_name = f"{socket.gethostname()}-{uuid.uuid4().hex[:6]}"
540
+
541
+ # check token
542
+ valid = check_token(token=token)
543
+ if "error" in valid:
544
+ return {"error": f"Invalid token: {valid}"}
545
+
546
+ try:
547
+ data = decode_dict(token)
548
+ kalavai_seed_ip = data[CLUSTER_IP_KEY]
549
+ kalavai_token = data[CLUSTER_TOKEN_KEY]
550
+ cluster_name = data[CLUSTER_NAME_KEY]
551
+ auth_key = data[AUTH_KEY]
552
+ watcher_service = data[WATCHER_SERVICE_KEY]
553
+ public_location = data[PUBLIC_LOCATION_KEY]
554
+ vpn = defaultdict(lambda: None)
555
+ except Exception as e:
556
+ return {"error": f"Invalid token. {str(e)}"}
557
+
558
+ # join private network if provided
559
+ node_labels = {
560
+ STORAGE_CLASS_LABEL: is_storage_compatible()
561
+ }
562
+ user = defaultdict(lambda: None)
563
+ if public_location is not None:
564
+ user = authenticate_user()
565
+ if user is None:
566
+ return {"error": "Must be logged in to join public pools"}
567
+ try:
568
+ vpn = get_vpn_details(
569
+ location=public_location,
570
+ user_cookie=USER_COOKIE)
571
+ node_labels[USER_NODE_LABEL] = user["username"]
572
+ except Exception as e:
573
+ return {"error": f"Are you authenticated? Error: {str(e)}"}
574
+ try:
575
+ validate_join_public_seed(
576
+ cluster_name=cluster_name,
577
+ join_key=token,
578
+ user_cookie=USER_COOKIE
579
+ )
580
+ except Exception as e:
581
+ return {"error": f"Error when joining network: {str(e)}"}
582
+
583
+ # local agent join
584
+ # Generate docker compose recipe
585
+ generate_compose_config(
586
+ role="agent",
587
+ pool_ip=f"https://{kalavai_seed_ip}:6443",
588
+ pool_token=kalavai_token,
589
+ num_gpus=num_gpus,
590
+ vpn_token=vpn["key"],
591
+ node_name=node_name,
592
+ node_labels=node_labels,
593
+ is_public=public_location is not None)
594
+
595
+ store_server_info(
596
+ server_ip=kalavai_seed_ip,
597
+ auth_key=auth_key,
598
+ file=USER_LOCAL_SERVER_FILE,
599
+ watcher_service=watcher_service,
600
+ node_name=node_name,
601
+ cluster_name=cluster_name,
602
+ public_location=public_location,
603
+ user_api_key=user["api_key"])
604
+
605
+ try:
606
+ CLUSTER.start_worker_node()
607
+ except Exception as e:
608
+ return {"error": f"Error connecting to {cluster_name} @ {kalavai_seed_ip}. Check with the admin if the token is still valid."}
609
+
610
+ # ensure we are connected
611
+ while True:
612
+ time.sleep(30)
613
+ if is_watcher_alive(server_creds=USER_LOCAL_SERVER_FILE, user_cookie=USER_COOKIE):
614
+ break
615
+
616
+ # check the node has connected successfully
617
+ try:
618
+ while not CLUSTER.is_agent_running():
619
+ time.sleep(30)
620
+ except KeyboardInterrupt:
621
+ return {"error": "Installation aborted. Leaving pool."}
622
+
623
+ result = init_user_workspace()
624
+ if "error" in result:
625
+ return {"error": f"Error when creating user workspace: {result}"}
626
+
627
+ return cluster_name
628
+
629
+ def create_pool(cluster_name: str, ip_address: str, app_values: str=None, pool_config_values: str=None, num_gpus: int=0, node_name: str=None, only_registered_users: bool=False, location: str=None):
630
+
631
+ if not check_seed_compatibility():
632
+ return {"error": "Requirements failed"}
633
+
634
+ if app_values is None:
635
+ app_values = HELM_APPS_VALUES
636
+
637
+ if pool_config_values is None:
638
+ pool_config_values = POOL_CONFIG_DEFAULT_VALUES
639
+
640
+ node_name = f"{socket.gethostname()}-{uuid.uuid4().hex[:6]}"
641
+
642
+ # if only registered users are allowed, check user has logged in
643
+ user = defaultdict(lambda: None)
644
+ if only_registered_users or location is not None:
645
+ user = authenticate_user()
646
+ if user is None:
647
+ return {"error": "[white]--only-registered-users [red]or [white]--location[red] can only be used if the host is authenticated. Run [yellow]kalavai login[red] to authenticate"}
648
+
649
+ # join private network if provided
650
+ vpn = defaultdict(lambda: None)
651
+ node_labels = {
652
+ STORAGE_CLASS_LABEL: is_storage_compatible()
653
+ }
654
+ if location is not None:
655
+ try:
656
+ vpn = get_vpn_details(
657
+ location=location,
658
+ user_cookie=USER_COOKIE)
659
+ node_labels[USER_NODE_LABEL] = user["username"]
660
+ except Exception as e:
661
+ return {"error": f"[red]Error when joining network: {str(e)}"}
662
+
663
+ # Generate docker compose recipe
664
+ generate_compose_config(
665
+ role="server",
666
+ vpn_token=vpn["key"],
667
+ node_ip_address=ip_address,
668
+ num_gpus=num_gpus,
669
+ node_name=node_name,
670
+ node_labels=node_labels,
671
+ is_public=location is not None
672
+ )
673
+
674
+ # start server
675
+ CLUSTER.start_seed_node()
676
+
677
+ while not CLUSTER.is_agent_running():
678
+ time.sleep(10)
679
+
680
+ # select IP address (for external discovery)
681
+ if ip_address is None or location is not None:
682
+ # load VPN ip
683
+ ip_address = CLUSTER.get_vpn_ip()
684
+
685
+ # populate local cred files
686
+ auth_key = str(uuid.uuid4())
687
+ write_auth_key = str(uuid.uuid4())
688
+ readonly_auth_key = str(uuid.uuid4())
689
+
690
+ watcher_service = f"{ip_address}:{DEFAULT_WATCHER_PORT}"
691
+ values = {
692
+ CLUSTER_NAME_KEY: cluster_name,
693
+ CLUSTER_IP_KEY: ip_address,
694
+ AUTH_KEY: auth_key,
695
+ READONLY_AUTH_KEY: readonly_auth_key,
696
+ WRITE_AUTH_KEY: write_auth_key,
697
+ WATCHER_PORT_KEY: DEFAULT_WATCHER_PORT,
698
+ WATCHER_SERVICE_KEY: watcher_service,
699
+ USER_NODE_LABEL_KEY: USER_NODE_LABEL,
700
+ ALLOW_UNREGISTERED_USER_KEY: not only_registered_users
701
+ }
702
+
703
+ store_server_info(
704
+ server_ip=ip_address,
705
+ auth_key=auth_key,
706
+ readonly_auth_key=readonly_auth_key,
707
+ write_auth_key=write_auth_key,
708
+ file=USER_LOCAL_SERVER_FILE,
709
+ watcher_service=watcher_service,
710
+ node_name=node_name,
711
+ cluster_name=cluster_name,
712
+ public_location=location,
713
+ user_api_key=user["api_key"])
714
+
715
+ # Generate helmfile recipe
716
+ helm_yaml = load_template(
717
+ template_path=HELM_APPS_FILE,
718
+ values=values,
719
+ default_values_path=app_values,
720
+ force_defaults=True)
721
+ with open(USER_HELM_APPS_FILE, "w") as f:
722
+ f.write(helm_yaml)
723
+
724
+ # set template values in helmfile
725
+ try:
726
+ CLUSTER.update_dependencies(
727
+ dependencies_file=USER_HELM_APPS_FILE
728
+ )
729
+ except Exception as e:
730
+ return {"error": f"Error when updating dependencies: {str(e)}"}
731
+
732
+ if location is not None:
733
+ # TODO: register with kalavai if it's a public cluster
734
+ pass
735
+ #pool__publish()
736
+
737
+ # wait until the server is ready to create objects
738
+ while True:
739
+ time.sleep(30)
740
+ if is_watcher_alive(server_creds=USER_LOCAL_SERVER_FILE, user_cookie=USER_COOKIE):
741
+ break
742
+
743
+ result = pool_init(pool_config_values_path=pool_config_values)
744
+ if "error" in result or ("failed" in result and len(result['failed']) > 0):
745
+ return {"error": f"Error when initialising pool: {result}"}
746
+ # init default namespace
747
+ init_user_workspace(force_namespace="default")
748
+ if only_registered_users:
749
+ # init user namespace
750
+ init_user_workspace()
751
+
752
+ return {"success"}
753
+
754
+ def pool_init(pool_config_values_path=None):
755
+ """Deploy configured objects to initialise pool"""
756
+ if pool_config_values_path is None:
757
+ return
758
+
759
+ # load template config and populate with values
760
+ sidecar_template_yaml = load_template(
761
+ template_path=POOL_CONFIG_TEMPLATE,
762
+ values={},
763
+ default_values_path=pool_config_values_path)
764
+
765
+ try:
766
+ result = request_to_server(
767
+ method="post",
768
+ endpoint="/v1/deploy_generic_model",
769
+ data={"config": sidecar_template_yaml},
770
+ server_creds=USER_LOCAL_SERVER_FILE,
771
+ user_cookie=USER_COOKIE
772
+ )
773
+ return result
774
+ except Exception as e:
775
+ return {"error": f"[red]Error when connecting to kalavai service: {str(e)}"}
776
+
777
+ def is_connected():
778
+ if not os.path.isfile(USER_LOCAL_SERVER_FILE):
779
+ return False
780
+ return is_watcher_alive(server_creds=USER_LOCAL_SERVER_FILE, user_cookie=USER_COOKIE, timeout=10)
781
+
782
+ def is_agent_running():
783
+ return CLUSTER.is_agent_running()
784
+
785
+ def is_server():
786
+ return CLUSTER.is_seed_node()
787
+
788
+ def pause_agent(retries=3):
789
+ try:
790
+ while retries > 0:
791
+ state = CLUSTER.pause_agent()
792
+ if state:
793
+ return {"success"}
794
+ time.sleep(5)
795
+ retries -= 1
796
+ except:
797
+ return {"error": "Could not pause agent"}
798
+
799
+ def resume_agent(retries=3):
800
+ try:
801
+ while retries > 0:
802
+ state = CLUSTER.restart_agent()
803
+ if state:
804
+ return {"success"}
805
+ time.sleep(5)
806
+ retries -= 1
807
+ except:
808
+ return {"error": "Could not resume agent"}
809
+
810
+ def cleanup_local():
811
+ safe_remove(CONTAINER_HOST_PATH)
812
+ safe_remove(USER_COMPOSE_FILE)
813
+ safe_remove(USER_VPN_COMPOSE_FILE)
814
+ safe_remove(USER_HELM_APPS_FILE)
815
+ safe_remove(USER_KUBECONFIG_FILE)
816
+ safe_remove(USER_LOCAL_SERVER_FILE)
817
+ safe_remove(USER_TEMPLATES_FOLDER)
818
+
819
+ def delete_node(name):
820
+ data = {
821
+ "node_names": [name]
822
+ }
823
+ try:
824
+ result = request_to_server(
825
+ method="post",
826
+ endpoint="/v1/delete_nodes",
827
+ data=data,
828
+ server_creds=USER_LOCAL_SERVER_FILE,
829
+ user_cookie=USER_COOKIE
830
+ )
831
+ if result is None or result is True:
832
+ return {f"Node {name} deleted successfully"}
833
+ else:
834
+ return {"error": result}
835
+ except Exception as e:
836
+ return {"error": str(e)}
837
+
838
+ def stop_pool(skip_node_deletion=False):
839
+ # delete local node from server
840
+ logs = []
841
+ if not skip_node_deletion:
842
+ logs.append(
843
+ delete_node(load_server_info(data_key=NODE_NAME_KEY, file=USER_LOCAL_SERVER_FILE))
844
+ )
845
+ # unpublish event (only if seed node)
846
+ # TODO: no, this should be done via the platform!!!
847
+ # try:
848
+ # if CLUSTER.is_seed_node():
849
+ # console.log("Unregistering pool...")
850
+ # unregister_cluster(
851
+ # name=load_server_info(data_key=CLUSTER_NAME_KEY, file=USER_LOCAL_SERVER_FILE),
852
+ # user_cookie=USER_COOKIE)
853
+ # except Exception as e:
854
+ # console.log(f"[red][WARNING]: (ignore if not a public pool) Error when unpublishing cluster. {str(e)}")
855
+ # remove local node agent
856
+
857
+ # disconnect from VPN first, then remove agent, then remove local files
858
+ try:
859
+ vpns = leave_vpn(container_name=DEFAULT_VPN_CONTAINER_NAME)
860
+ if vpns is not None:
861
+ for vpn in vpns:
862
+ logs.append(f"You have left {vpn} VPN")
863
+ except:
864
+ # no vpn
865
+ pass
866
+
867
+ CLUSTER.remove_agent()
868
+
869
+ # clean local files
870
+ cleanup_local()
871
+
872
+ return logs
873
+
874
+ def list_available_pools(user_only=False):
875
+ pools = get_public_seeds(user_only=user_only, user_cookie=USER_COOKIE)
876
+ return pools