kalavai-client 0.5.15__py3-none-any.whl → 0.5.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
kalavai_client/core.py CHANGED
@@ -1,17 +1,70 @@
1
+ import os
2
+ import time
1
3
  from collections import defaultdict
2
4
  import math
5
+ import uuid
6
+ import socket
7
+ import ipaddress
8
+ import netifaces as ni
3
9
 
4
10
  from pydantic import BaseModel
5
11
 
12
+ from kalavai_client.cluster import CLUSTER
6
13
  from kalavai_client.utils import (
7
14
  request_to_server,
8
- load_server_info
15
+ load_server_info,
16
+ decode_dict,
17
+ get_vpn_details,
18
+ validate_join_public_seed,
19
+ generate_compose_config,
20
+ store_server_info,
21
+ is_watcher_alive,
22
+ run_cmd,
23
+ leave_vpn,
24
+ safe_remove,
25
+ get_public_seeds,
26
+ load_template,
27
+ is_storage_compatible,
28
+ NODE_NAME_KEY,
29
+ MANDATORY_TOKEN_FIELDS,
30
+ PUBLIC_LOCATION_KEY,
31
+ CLUSTER_IP_KEY,
32
+ CLUSTER_NAME_KEY,
33
+ AUTH_KEY,
34
+ WATCHER_SERVICE_KEY,
35
+ CLUSTER_TOKEN_KEY,
36
+ READONLY_AUTH_KEY,
37
+ WRITE_AUTH_KEY,
38
+ WATCHER_PORT_KEY,
39
+ WATCHER_SERVICE_KEY,
40
+ USER_NODE_LABEL_KEY,
41
+ ALLOW_UNREGISTERED_USER_KEY
42
+ )
43
+ from kalavai_client.auth import (
44
+ KalavaiAuthClient
9
45
  )
10
46
  from kalavai_client.env import (
11
47
  USER_COOKIE,
12
48
  USER_LOCAL_SERVER_FILE,
13
49
  TEMPLATE_LABEL,
14
- SERVER_IP_KEY
50
+ SERVER_IP_KEY,
51
+ USER_COMPOSE_FILE,
52
+ DEFAULT_VPN_CONTAINER_NAME,
53
+ CONTAINER_HOST_PATH,
54
+ USER_VPN_COMPOSE_FILE,
55
+ USER_HELM_APPS_FILE,
56
+ USER_KUBECONFIG_FILE,
57
+ USER_TEMPLATES_FOLDER,
58
+ USER_WORKSPACE_TEMPLATE,
59
+ DEFAULT_USER_WORKSPACE_VALUES,
60
+ STORAGE_CLASS_LABEL,
61
+ USER_NODE_LABEL,
62
+ DEFAULT_WATCHER_PORT,
63
+ HELM_APPS_FILE,
64
+ HELM_APPS_VALUES,
65
+ POOL_CONFIG_DEFAULT_VALUES,
66
+ POOL_CONFIG_TEMPLATE,
67
+ FORBIDEDEN_IPS
15
68
  )
16
69
 
17
70
  class Job(BaseModel):
@@ -19,6 +72,7 @@ class Job(BaseModel):
19
72
  name: str = None
20
73
  workers: str = None
21
74
  endpoint: str = None
75
+ status: str = None
22
76
 
23
77
  class DeviceStatus(BaseModel):
24
78
  name: str
@@ -36,6 +90,74 @@ class GPU(BaseModel):
36
90
  model: str
37
91
 
38
92
 
93
+ def init_user_workspace(force_namespace=None):
94
+
95
+ # load template config and populate with values
96
+ sidecar_template_yaml = load_template(
97
+ template_path=USER_WORKSPACE_TEMPLATE,
98
+ values={},
99
+ default_values_path=DEFAULT_USER_WORKSPACE_VALUES)
100
+
101
+ try:
102
+ data = {"config": sidecar_template_yaml}
103
+ if force_namespace is not None:
104
+ data["force_namespace"] = force_namespace
105
+ result = request_to_server(
106
+ method="post",
107
+ endpoint="/v1/create_user_space",
108
+ data=data,
109
+ server_creds=USER_LOCAL_SERVER_FILE,
110
+ user_cookie=USER_COOKIE
111
+ )
112
+ return {"success"}
113
+ except Exception as e:
114
+ return {"error": f"Error when connecting to kalavai service: {str(e)}"}
115
+
116
+ def check_seed_compatibility():
117
+ """Check required packages to start pools"""
118
+ logs = []
119
+ # docker
120
+ try:
121
+ run_cmd("docker version >/dev/null 2>&1")
122
+ except:
123
+ logs.append("[red]Docker not installed. Install instructions:\n")
124
+ logs.append(" Linux: https://docs.docker.com/engine/install/\n")
125
+ logs.append(" Windows/MacOS: https://docs.docker.com/desktop/\n")
126
+
127
+ return {"issues": logs}
128
+
129
+ def check_worker_compatibility():
130
+ """Check required packages to join pools"""
131
+ logs = []
132
+ # docker
133
+ try:
134
+ run_cmd("docker version >/dev/null 2>&1")
135
+ except:
136
+ logs.append("[red]Docker not installed. Install instructions:\n")
137
+ logs.append(" Linux: https://docs.docker.com/engine/install/\n")
138
+ logs.append(" Windows/MacOS: https://docs.docker.com/desktop/\n")
139
+
140
+ return {"issues": logs}
141
+
142
+ def get_ip_addresses(subnet=None):
143
+ ips = []
144
+ retry = 3
145
+ while len(ips) == 0:
146
+ for iface in ni.interfaces():
147
+ try:
148
+ ip = ni.ifaddresses(iface)[ni.AF_INET][0]['addr']
149
+ if ip in FORBIDEDEN_IPS:
150
+ continue
151
+ if subnet is None or ipaddress.ip_address(ip) in ipaddress.ip_network(subnet):
152
+ ips.append(ip)
153
+ except:
154
+ pass
155
+ time.sleep(2)
156
+ retry -= 1
157
+ if retry < 0:
158
+ raise ValueError(f"No IPs available on subnet {subnet}")
159
+ return ips
160
+
39
161
  def fetch_resources():
40
162
  try:
41
163
  total = request_to_server(
@@ -57,6 +179,35 @@ def fetch_resources():
57
179
 
58
180
  return {"total": total, "available": available}
59
181
 
182
+ def fetch_job_defaults(name):
183
+ data = {
184
+ "template": name
185
+ }
186
+ try:
187
+ defaults = request_to_server(
188
+ method="get",
189
+ endpoint="/v1/job_defaults",
190
+ data=data,
191
+ server_creds=USER_LOCAL_SERVER_FILE,
192
+ user_cookie=USER_COOKIE
193
+ )
194
+ return defaults
195
+ except Exception as e:
196
+ return {"error": str(e)}
197
+
198
+ def fetch_job_templates():
199
+ try:
200
+ templates = request_to_server(
201
+ method="get",
202
+ endpoint="/v1/get_job_templates",
203
+ server_creds=USER_LOCAL_SERVER_FILE,
204
+ data=None,
205
+ user_cookie=USER_COOKIE
206
+ )
207
+ return templates
208
+ except Exception as e:
209
+ return {"error": str(e)}
210
+
60
211
  def fetch_job_names():
61
212
  data = {
62
213
  "group": "batch.volcano.sh",
@@ -126,11 +277,18 @@ def fetch_job_details(jobs: list[Job]):
126
277
  node_ports = [f"{p['node_port']} (mapped to {p['port']})" for s in result.values() for p in s["ports"]]
127
278
 
128
279
  urls = [f"http://{load_server_info(data_key=SERVER_IP_KEY, file=USER_LOCAL_SERVER_FILE)}:{node_port}" for node_port in node_ports]
280
+ if "Ready" in workers_status and len(workers_status) == 1:
281
+ status = "running"
282
+ elif any([st in workers_status for st in ["Failed", "Completed"]]):
283
+ status = "error"
284
+ else:
285
+ status = "pending"
129
286
  job_details.append(
130
287
  Job(owner=namespace,
131
288
  name=deployment,
132
289
  workers=workers,
133
- endpoint="\n".join(urls))
290
+ endpoint="\n".join(urls),
291
+ status=str(status))
134
292
  )
135
293
 
136
294
  except Exception as e:
@@ -138,6 +296,47 @@ def fetch_job_details(jobs: list[Job]):
138
296
 
139
297
  return job_details
140
298
 
299
+ def deploy_job(template_name, values_dict, force_namespace=None):
300
+
301
+ # deploy template with kube-watcher
302
+ data = {
303
+ "template": template_name,
304
+ "template_values": values_dict
305
+ }
306
+ if force_namespace is not None:
307
+ data["force_namespace"] = force_namespace
308
+
309
+ try:
310
+ result = request_to_server(
311
+ method="post",
312
+ endpoint="/v1/deploy_job",
313
+ data=data,
314
+ server_creds=USER_LOCAL_SERVER_FILE,
315
+ user_cookie=USER_COOKIE
316
+ )
317
+ return result
318
+ except Exception as e:
319
+ return {"error": str(e)}
320
+
321
+ def delete_job(name, force_namespace=None):
322
+ data = {
323
+ "label": TEMPLATE_LABEL, # this ensures that both lws template and services are deleted
324
+ "value": name
325
+ }
326
+ if force_namespace is not None:
327
+ data["force_namespace"] = force_namespace
328
+ try:
329
+ result = request_to_server(
330
+ method="post",
331
+ endpoint="/v1/delete_labeled_resources",
332
+ data=data,
333
+ server_creds=USER_LOCAL_SERVER_FILE,
334
+ user_cookie=USER_COOKIE
335
+ )
336
+ return result
337
+ except Exception as e:
338
+ return {"error": str(e)}
339
+
141
340
  def fetch_devices():
142
341
  """Load devices status info for all hosts"""
143
342
  try:
@@ -224,4 +423,454 @@ def fetch_gpus(available=False):
224
423
  return all_gpus
225
424
 
226
425
  except Exception as e:
227
- return {"error": str(e)}
426
+ return {"error": str(e)}
427
+
428
+ def load_user_session():
429
+ auth = KalavaiAuthClient(
430
+ user_cookie_file=USER_COOKIE
431
+ )
432
+ return auth.load_user_session()
433
+
434
+ def authenticate_user(username=None, password=None):
435
+ auth = KalavaiAuthClient(
436
+ user_cookie_file=USER_COOKIE
437
+ )
438
+ user = auth.load_user_session()
439
+ if user is None:
440
+ user = auth.login(username=username, password=password)
441
+
442
+ if user is None:
443
+ return {"error": "Username or password incorrect"}
444
+ return user
445
+
446
+ def user_logout():
447
+ auth = KalavaiAuthClient(
448
+ user_cookie_file=USER_COOKIE
449
+ )
450
+ auth.logout()
451
+ return True
452
+
453
+ def check_token(token, public=False):
454
+ try:
455
+ data = decode_dict(token)
456
+ for field in MANDATORY_TOKEN_FIELDS:
457
+ assert field in data
458
+ if public:
459
+ if data[PUBLIC_LOCATION_KEY] is None:
460
+ raise ValueError("Token is not valid for public pools. Did you start the cluster with a public_location?")
461
+ return {"status": True}
462
+ except Exception as e:
463
+ return {"error": str(e)}
464
+
465
+ def attach_to_pool(token, node_name=None):
466
+ if node_name is None:
467
+ node_name = f"{socket.gethostname()}-{uuid.uuid4().hex[:6]}"
468
+
469
+ # check token
470
+ valid = check_token(token=token)
471
+ if "error" in valid:
472
+ return {"error": f"Invalid token: {valid}"}
473
+
474
+ try:
475
+ data = decode_dict(token)
476
+ kalavai_seed_ip = data[CLUSTER_IP_KEY]
477
+ cluster_name = data[CLUSTER_NAME_KEY]
478
+ auth_key = data[AUTH_KEY]
479
+ watcher_service = data[WATCHER_SERVICE_KEY]
480
+ public_location = data[PUBLIC_LOCATION_KEY]
481
+ vpn = defaultdict(lambda: None)
482
+ except Exception as e:
483
+ return {"error": f"Invalid token. {str(e)}"}
484
+
485
+ user = defaultdict(lambda: None)
486
+ if public_location is not None:
487
+ user = load_user_session()
488
+ if user is None:
489
+ return {"error ": "Must be logged in to join public pools"}
490
+ try:
491
+ vpn = get_vpn_details(
492
+ location=public_location,
493
+ user_cookie=USER_COOKIE)
494
+ except Exception as e:
495
+ return {"error": f"Are you authenticated? {str(e)}"}
496
+ try:
497
+ validate_join_public_seed(
498
+ cluster_name=cluster_name,
499
+ join_key=token,
500
+ user_cookie=USER_COOKIE
501
+ )
502
+ except Exception as e:
503
+ return {"error": f"Error when joining network: {str(e)}"}
504
+
505
+ # local agent join
506
+ # 1. Generate local cache files
507
+ # Generate docker compose recipe
508
+ generate_compose_config(
509
+ role="",
510
+ vpn_token=vpn["key"],
511
+ node_name=node_name,
512
+ is_public=public_location is not None)
513
+
514
+ store_server_info(
515
+ server_ip=kalavai_seed_ip,
516
+ auth_key=auth_key,
517
+ file=USER_LOCAL_SERVER_FILE,
518
+ watcher_service=watcher_service,
519
+ node_name=node_name,
520
+ cluster_name=cluster_name,
521
+ public_location=public_location,
522
+ user_api_key=user["api_key"])
523
+
524
+ run_cmd(f"docker compose -f {USER_COMPOSE_FILE} up -d")
525
+ # ensure we are connected
526
+ while True:
527
+ time.sleep(30)
528
+ if is_watcher_alive(server_creds=USER_LOCAL_SERVER_FILE, user_cookie=USER_COOKIE):
529
+ break
530
+
531
+ return cluster_name
532
+
533
+ def join_pool(token, num_gpus=0, node_name=None):
534
+ compatibility = check_worker_compatibility()
535
+ if len(compatibility["issues"]) > 0:
536
+ return {"error": compatibility["issues"]}
537
+
538
+ if node_name is None:
539
+ node_name = f"{socket.gethostname()}-{uuid.uuid4().hex[:6]}"
540
+
541
+ # check token
542
+ valid = check_token(token=token)
543
+ if "error" in valid:
544
+ return {"error": f"Invalid token: {valid}"}
545
+
546
+ try:
547
+ data = decode_dict(token)
548
+ kalavai_seed_ip = data[CLUSTER_IP_KEY]
549
+ kalavai_token = data[CLUSTER_TOKEN_KEY]
550
+ cluster_name = data[CLUSTER_NAME_KEY]
551
+ auth_key = data[AUTH_KEY]
552
+ watcher_service = data[WATCHER_SERVICE_KEY]
553
+ public_location = data[PUBLIC_LOCATION_KEY]
554
+ vpn = defaultdict(lambda: None)
555
+ except Exception as e:
556
+ return {"error": f"Invalid token. {str(e)}"}
557
+
558
+ # join private network if provided
559
+ node_labels = {
560
+ STORAGE_CLASS_LABEL: is_storage_compatible()
561
+ }
562
+ user = defaultdict(lambda: None)
563
+ if public_location is not None:
564
+ user = authenticate_user()
565
+ if user is None:
566
+ return {"error": "Must be logged in to join public pools"}
567
+ try:
568
+ vpn = get_vpn_details(
569
+ location=public_location,
570
+ user_cookie=USER_COOKIE)
571
+ node_labels[USER_NODE_LABEL] = user["username"]
572
+ except Exception as e:
573
+ return {"error": f"Are you authenticated? Error: {str(e)}"}
574
+ try:
575
+ validate_join_public_seed(
576
+ cluster_name=cluster_name,
577
+ join_key=token,
578
+ user_cookie=USER_COOKIE
579
+ )
580
+ except Exception as e:
581
+ return {"error": f"Error when joining network: {str(e)}"}
582
+
583
+ # local agent join
584
+ # Generate docker compose recipe
585
+ generate_compose_config(
586
+ role="agent",
587
+ pool_ip=f"https://{kalavai_seed_ip}:6443",
588
+ pool_token=kalavai_token,
589
+ num_gpus=num_gpus,
590
+ vpn_token=vpn["key"],
591
+ node_name=node_name,
592
+ node_labels=node_labels,
593
+ is_public=public_location is not None)
594
+
595
+ store_server_info(
596
+ server_ip=kalavai_seed_ip,
597
+ auth_key=auth_key,
598
+ file=USER_LOCAL_SERVER_FILE,
599
+ watcher_service=watcher_service,
600
+ node_name=node_name,
601
+ cluster_name=cluster_name,
602
+ public_location=public_location,
603
+ user_api_key=user["api_key"])
604
+
605
+ try:
606
+ CLUSTER.start_worker_node()
607
+ except Exception as e:
608
+ return {"error": f"Error connecting to {cluster_name} @ {kalavai_seed_ip}. Check with the admin if the token is still valid."}
609
+
610
+ # ensure we are connected
611
+ while True:
612
+ time.sleep(30)
613
+ if is_watcher_alive(server_creds=USER_LOCAL_SERVER_FILE, user_cookie=USER_COOKIE):
614
+ break
615
+
616
+ # check the node has connected successfully
617
+ try:
618
+ while not CLUSTER.is_agent_running():
619
+ time.sleep(30)
620
+ except KeyboardInterrupt:
621
+ return {"error": "Installation aborted. Leaving pool."}
622
+
623
+ result = init_user_workspace()
624
+ if "error" in result:
625
+ return {"error": f"Error when creating user workspace: {result}"}
626
+
627
+ return cluster_name
628
+
629
+ def create_pool(cluster_name: str, ip_address: str, app_values: str=None, pool_config_values: str=None, num_gpus: int=0, node_name: str=None, only_registered_users: bool=False, location: str=None):
630
+
631
+ if not check_seed_compatibility():
632
+ return {"error": "Requirements failed"}
633
+
634
+ if app_values is None:
635
+ app_values = HELM_APPS_VALUES
636
+
637
+ if pool_config_values is None:
638
+ pool_config_values = POOL_CONFIG_DEFAULT_VALUES
639
+
640
+ node_name = f"{socket.gethostname()}-{uuid.uuid4().hex[:6]}"
641
+
642
+ # if only registered users are allowed, check user has logged in
643
+ user = defaultdict(lambda: None)
644
+ if only_registered_users or location is not None:
645
+ user = authenticate_user()
646
+ if user is None:
647
+ return {"error": "[white]--only-registered-users [red]or [white]--location[red] can only be used if the host is authenticated. Run [yellow]kalavai login[red] to authenticate"}
648
+
649
+ # join private network if provided
650
+ vpn = defaultdict(lambda: None)
651
+ node_labels = {
652
+ STORAGE_CLASS_LABEL: is_storage_compatible()
653
+ }
654
+ if location is not None:
655
+ try:
656
+ vpn = get_vpn_details(
657
+ location=location,
658
+ user_cookie=USER_COOKIE)
659
+ node_labels[USER_NODE_LABEL] = user["username"]
660
+ except Exception as e:
661
+ return {"error": f"[red]Error when joining network: {str(e)}"}
662
+
663
+ # Generate docker compose recipe
664
+ generate_compose_config(
665
+ role="server",
666
+ vpn_token=vpn["key"],
667
+ node_ip_address=ip_address,
668
+ num_gpus=num_gpus,
669
+ node_name=node_name,
670
+ node_labels=node_labels,
671
+ is_public=location is not None
672
+ )
673
+
674
+ # start server
675
+ CLUSTER.start_seed_node()
676
+
677
+ while not CLUSTER.is_agent_running():
678
+ time.sleep(10)
679
+
680
+ # select IP address (for external discovery)
681
+ if ip_address is None or location is not None:
682
+ # load VPN ip
683
+ ip_address = CLUSTER.get_vpn_ip()
684
+
685
+ # populate local cred files
686
+ auth_key = str(uuid.uuid4())
687
+ write_auth_key = str(uuid.uuid4())
688
+ readonly_auth_key = str(uuid.uuid4())
689
+
690
+ watcher_service = f"{ip_address}:{DEFAULT_WATCHER_PORT}"
691
+ values = {
692
+ CLUSTER_NAME_KEY: cluster_name,
693
+ CLUSTER_IP_KEY: ip_address,
694
+ AUTH_KEY: auth_key,
695
+ READONLY_AUTH_KEY: readonly_auth_key,
696
+ WRITE_AUTH_KEY: write_auth_key,
697
+ WATCHER_PORT_KEY: DEFAULT_WATCHER_PORT,
698
+ WATCHER_SERVICE_KEY: watcher_service,
699
+ USER_NODE_LABEL_KEY: USER_NODE_LABEL,
700
+ ALLOW_UNREGISTERED_USER_KEY: not only_registered_users
701
+ }
702
+
703
+ store_server_info(
704
+ server_ip=ip_address,
705
+ auth_key=auth_key,
706
+ readonly_auth_key=readonly_auth_key,
707
+ write_auth_key=write_auth_key,
708
+ file=USER_LOCAL_SERVER_FILE,
709
+ watcher_service=watcher_service,
710
+ node_name=node_name,
711
+ cluster_name=cluster_name,
712
+ public_location=location,
713
+ user_api_key=user["api_key"])
714
+
715
+ # Generate helmfile recipe
716
+ helm_yaml = load_template(
717
+ template_path=HELM_APPS_FILE,
718
+ values=values,
719
+ default_values_path=app_values,
720
+ force_defaults=True)
721
+ with open(USER_HELM_APPS_FILE, "w") as f:
722
+ f.write(helm_yaml)
723
+
724
+ # set template values in helmfile
725
+ try:
726
+ CLUSTER.update_dependencies(
727
+ dependencies_file=USER_HELM_APPS_FILE
728
+ )
729
+ except Exception as e:
730
+ return {"error": f"Error when updating dependencies: {str(e)}"}
731
+
732
+ if location is not None:
733
+ # TODO: register with kalavai if it's a public cluster
734
+ pass
735
+ #pool__publish()
736
+
737
+ # wait until the server is ready to create objects
738
+ while True:
739
+ time.sleep(30)
740
+ if is_watcher_alive(server_creds=USER_LOCAL_SERVER_FILE, user_cookie=USER_COOKIE):
741
+ break
742
+
743
+ result = pool_init(pool_config_values_path=pool_config_values)
744
+ if "error" in result or ("failed" in result and len(result['failed']) > 0):
745
+ return {"error": f"Error when initialising pool: {result}"}
746
+ # init default namespace
747
+ init_user_workspace(force_namespace="default")
748
+ if only_registered_users:
749
+ # init user namespace
750
+ init_user_workspace()
751
+
752
+ return {"success"}
753
+
754
+ def pool_init(pool_config_values_path=None):
755
+ """Deploy configured objects to initialise pool"""
756
+ if pool_config_values_path is None:
757
+ return
758
+
759
+ # load template config and populate with values
760
+ sidecar_template_yaml = load_template(
761
+ template_path=POOL_CONFIG_TEMPLATE,
762
+ values={},
763
+ default_values_path=pool_config_values_path)
764
+
765
+ try:
766
+ result = request_to_server(
767
+ method="post",
768
+ endpoint="/v1/deploy_generic_model",
769
+ data={"config": sidecar_template_yaml},
770
+ server_creds=USER_LOCAL_SERVER_FILE,
771
+ user_cookie=USER_COOKIE
772
+ )
773
+ return result
774
+ except Exception as e:
775
+ return {"error": f"[red]Error when connecting to kalavai service: {str(e)}"}
776
+
777
+ def is_connected():
778
+ if not os.path.isfile(USER_LOCAL_SERVER_FILE):
779
+ return False
780
+ return is_watcher_alive(server_creds=USER_LOCAL_SERVER_FILE, user_cookie=USER_COOKIE, timeout=10)
781
+
782
+ def is_agent_running():
783
+ return CLUSTER.is_agent_running()
784
+
785
+ def is_server():
786
+ return CLUSTER.is_seed_node()
787
+
788
+ def pause_agent(retries=3):
789
+ try:
790
+ while retries > 0:
791
+ state = CLUSTER.pause_agent()
792
+ if state:
793
+ return {"success"}
794
+ time.sleep(5)
795
+ retries -= 1
796
+ except:
797
+ return {"error": "Could not pause agent"}
798
+
799
+ def resume_agent(retries=3):
800
+ try:
801
+ while retries > 0:
802
+ state = CLUSTER.restart_agent()
803
+ if state:
804
+ return {"success"}
805
+ time.sleep(5)
806
+ retries -= 1
807
+ except:
808
+ return {"error": "Could not resume agent"}
809
+
810
+ def cleanup_local():
811
+ safe_remove(CONTAINER_HOST_PATH)
812
+ safe_remove(USER_COMPOSE_FILE)
813
+ safe_remove(USER_VPN_COMPOSE_FILE)
814
+ safe_remove(USER_HELM_APPS_FILE)
815
+ safe_remove(USER_KUBECONFIG_FILE)
816
+ safe_remove(USER_LOCAL_SERVER_FILE)
817
+ safe_remove(USER_TEMPLATES_FOLDER)
818
+
819
+ def delete_node(name):
820
+ data = {
821
+ "node_names": [name]
822
+ }
823
+ try:
824
+ result = request_to_server(
825
+ method="post",
826
+ endpoint="/v1/delete_nodes",
827
+ data=data,
828
+ server_creds=USER_LOCAL_SERVER_FILE,
829
+ user_cookie=USER_COOKIE
830
+ )
831
+ if result is None or result is True:
832
+ return {f"Node {name} deleted successfully"}
833
+ else:
834
+ return {"error": result}
835
+ except Exception as e:
836
+ return {"error": str(e)}
837
+
838
+ def stop_pool(skip_node_deletion=False):
839
+ # delete local node from server
840
+ logs = []
841
+ if not skip_node_deletion:
842
+ logs.append(
843
+ delete_node(load_server_info(data_key=NODE_NAME_KEY, file=USER_LOCAL_SERVER_FILE))
844
+ )
845
+ # unpublish event (only if seed node)
846
+ # TODO: no, this should be done via the platform!!!
847
+ # try:
848
+ # if CLUSTER.is_seed_node():
849
+ # console.log("Unregistering pool...")
850
+ # unregister_cluster(
851
+ # name=load_server_info(data_key=CLUSTER_NAME_KEY, file=USER_LOCAL_SERVER_FILE),
852
+ # user_cookie=USER_COOKIE)
853
+ # except Exception as e:
854
+ # console.log(f"[red][WARNING]: (ignore if not a public pool) Error when unpublishing cluster. {str(e)}")
855
+ # remove local node agent
856
+
857
+ # disconnect from VPN first, then remove agent, then remove local files
858
+ try:
859
+ vpns = leave_vpn(container_name=DEFAULT_VPN_CONTAINER_NAME)
860
+ if vpns is not None:
861
+ for vpn in vpns:
862
+ logs.append(f"You have left {vpn} VPN")
863
+ except:
864
+ # no vpn
865
+ pass
866
+
867
+ CLUSTER.remove_agent()
868
+
869
+ # clean local files
870
+ cleanup_local()
871
+
872
+ return logs
873
+
874
+ def list_available_pools(user_only=False):
875
+ pools = get_public_seeds(user_only=user_only, user_cookie=USER_COOKIE)
876
+ return pools