kalavai-client 0.5.14__py3-none-any.whl → 0.5.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
kalavai_client/core.py CHANGED
@@ -1,17 +1,70 @@
1
+ import os
2
+ import time
1
3
  from collections import defaultdict
2
4
  import math
5
+ import uuid
6
+ import socket
7
+ import ipaddress
8
+ import netifaces as ni
3
9
 
4
10
  from pydantic import BaseModel
5
11
 
12
+ from kalavai_client.cluster import CLUSTER
6
13
  from kalavai_client.utils import (
7
14
  request_to_server,
8
- load_server_info
15
+ load_server_info,
16
+ decode_dict,
17
+ get_vpn_details,
18
+ validate_join_public_seed,
19
+ generate_compose_config,
20
+ store_server_info,
21
+ is_watcher_alive,
22
+ run_cmd,
23
+ leave_vpn,
24
+ safe_remove,
25
+ get_public_seeds,
26
+ load_template,
27
+ is_storage_compatible,
28
+ NODE_NAME_KEY,
29
+ MANDATORY_TOKEN_FIELDS,
30
+ PUBLIC_LOCATION_KEY,
31
+ CLUSTER_IP_KEY,
32
+ CLUSTER_NAME_KEY,
33
+ AUTH_KEY,
34
+ WATCHER_SERVICE_KEY,
35
+ CLUSTER_TOKEN_KEY,
36
+ READONLY_AUTH_KEY,
37
+ WRITE_AUTH_KEY,
38
+ WATCHER_PORT_KEY,
39
+ WATCHER_SERVICE_KEY,
40
+ USER_NODE_LABEL_KEY,
41
+ ALLOW_UNREGISTERED_USER_KEY
42
+ )
43
+ from kalavai_client.auth import (
44
+ KalavaiAuthClient
9
45
  )
10
46
  from kalavai_client.env import (
11
47
  USER_COOKIE,
12
48
  USER_LOCAL_SERVER_FILE,
13
49
  TEMPLATE_LABEL,
14
- SERVER_IP_KEY
50
+ SERVER_IP_KEY,
51
+ USER_COMPOSE_FILE,
52
+ DEFAULT_VPN_CONTAINER_NAME,
53
+ CONTAINER_HOST_PATH,
54
+ USER_VPN_COMPOSE_FILE,
55
+ USER_HELM_APPS_FILE,
56
+ USER_KUBECONFIG_FILE,
57
+ USER_TEMPLATES_FOLDER,
58
+ USER_WORKSPACE_TEMPLATE,
59
+ DEFAULT_USER_WORKSPACE_VALUES,
60
+ STORAGE_CLASS_LABEL,
61
+ USER_NODE_LABEL,
62
+ DEFAULT_WATCHER_PORT,
63
+ HELM_APPS_FILE,
64
+ HELM_APPS_VALUES,
65
+ POOL_CONFIG_DEFAULT_VALUES,
66
+ POOL_CONFIG_TEMPLATE,
67
+ FORBIDEDEN_IPS
15
68
  )
16
69
 
17
70
  class Job(BaseModel):
@@ -19,6 +72,7 @@ class Job(BaseModel):
19
72
  name: str = None
20
73
  workers: str = None
21
74
  endpoint: str = None
75
+ status: str = None
22
76
 
23
77
  class DeviceStatus(BaseModel):
24
78
  name: str
@@ -36,6 +90,76 @@ class GPU(BaseModel):
36
90
  model: str
37
91
 
38
92
 
93
+ def init_user_workspace(force_namespace=None):
94
+
95
+ # load template config and populate with values
96
+ sidecar_template_yaml = load_template(
97
+ template_path=USER_WORKSPACE_TEMPLATE,
98
+ values={},
99
+ default_values_path=DEFAULT_USER_WORKSPACE_VALUES)
100
+
101
+ try:
102
+ data = {"config": sidecar_template_yaml}
103
+ if force_namespace is not None:
104
+ data["force_namespace"] = force_namespace
105
+ result = request_to_server(
106
+ method="post",
107
+ endpoint="/v1/create_user_space",
108
+ data=data,
109
+ server_creds=USER_LOCAL_SERVER_FILE,
110
+ user_cookie=USER_COOKIE
111
+ )
112
+ return {"success"}
113
+ except Exception as e:
114
+ return {"error": f"Error when connecting to kalavai service: {str(e)}"}
115
+
116
+ def check_seed_compatibility():
117
+ """Check required packages to start pools"""
118
+ logs = []
119
+ # docker
120
+ try:
121
+ run_cmd("docker version >/dev/null 2>&1")
122
+ except:
123
+ logs.append("[red]Docker not installed. Install instructions:\n")
124
+ logs.append(" Linux: https://docs.docker.com/engine/install/\n")
125
+ logs.append(" Windows/MacOS: https://docs.docker.com/desktop/\n")
126
+
127
+ return {"issues": logs}
128
+
129
+ def check_worker_compatibility():
130
+ """Check required packages to join pools"""
131
+ logs = []
132
+ # docker
133
+ try:
134
+ run_cmd("docker version >/dev/null 2>&1")
135
+ except:
136
+ logs.append("[red]Docker not installed. Install instructions:\n")
137
+ logs.append(" Linux: https://docs.docker.com/engine/install/\n")
138
+ logs.append(" Windows/MacOS: https://docs.docker.com/desktop/\n")
139
+
140
+ return {"issues": logs}
141
+
142
+ def get_ip_addresses(subnet=None):
143
+ ips = []
144
+ retry = 3
145
+ while len(ips) == 0:
146
+ for iface in ni.interfaces():
147
+ try:
148
+ ip = ni.ifaddresses(iface)[ni.AF_INET][0]['addr']
149
+ if ip in FORBIDEDEN_IPS:
150
+ continue
151
+ if subnet is None or ipaddress.ip_address(ip) in ipaddress.ip_network(subnet):
152
+ ips.append(ip)
153
+ except:
154
+ pass
155
+ if len(ips) == 1:
156
+ return ips[0]
157
+ time.sleep(2)
158
+ retry -= 1
159
+ if retry < 0:
160
+ raise ValueError(f"No IPs available on subnet {subnet}")
161
+ return ips
162
+
39
163
  def fetch_resources():
40
164
  try:
41
165
  total = request_to_server(
@@ -57,6 +181,35 @@ def fetch_resources():
57
181
 
58
182
  return {"total": total, "available": available}
59
183
 
184
+ def fetch_job_defaults(name):
185
+ data = {
186
+ "template": name
187
+ }
188
+ try:
189
+ defaults = request_to_server(
190
+ method="get",
191
+ endpoint="/v1/job_defaults",
192
+ data=data,
193
+ server_creds=USER_LOCAL_SERVER_FILE,
194
+ user_cookie=USER_COOKIE
195
+ )
196
+ return defaults
197
+ except Exception as e:
198
+ return {"error": str(e)}
199
+
200
+ def fetch_job_templates():
201
+ try:
202
+ templates = request_to_server(
203
+ method="get",
204
+ endpoint="/v1/get_job_templates",
205
+ server_creds=USER_LOCAL_SERVER_FILE,
206
+ data=None,
207
+ user_cookie=USER_COOKIE
208
+ )
209
+ return templates
210
+ except Exception as e:
211
+ return {"error": str(e)}
212
+
60
213
  def fetch_job_names():
61
214
  data = {
62
215
  "group": "batch.volcano.sh",
@@ -126,11 +279,18 @@ def fetch_job_details(jobs: list[Job]):
126
279
  node_ports = [f"{p['node_port']} (mapped to {p['port']})" for s in result.values() for p in s["ports"]]
127
280
 
128
281
  urls = [f"http://{load_server_info(data_key=SERVER_IP_KEY, file=USER_LOCAL_SERVER_FILE)}:{node_port}" for node_port in node_ports]
282
+ if "Ready" in workers_status and len(workers_status) == 1:
283
+ status = "running"
284
+ elif any([st in workers_status for st in ["Failed", "Completed"]]):
285
+ status = "error"
286
+ else:
287
+ status = "pending"
129
288
  job_details.append(
130
289
  Job(owner=namespace,
131
290
  name=deployment,
132
291
  workers=workers,
133
- endpoint="\n".join(urls))
292
+ endpoint="\n".join(urls),
293
+ status=str(status))
134
294
  )
135
295
 
136
296
  except Exception as e:
@@ -138,6 +298,47 @@ def fetch_job_details(jobs: list[Job]):
138
298
 
139
299
  return job_details
140
300
 
301
+ def deploy_job(template_name, values_dict, force_namespace=None):
302
+
303
+ # deploy template with kube-watcher
304
+ data = {
305
+ "template": template_name,
306
+ "template_values": values_dict
307
+ }
308
+ if force_namespace is not None:
309
+ data["force_namespace"] = force_namespace
310
+
311
+ try:
312
+ result = request_to_server(
313
+ method="post",
314
+ endpoint="/v1/deploy_job",
315
+ data=data,
316
+ server_creds=USER_LOCAL_SERVER_FILE,
317
+ user_cookie=USER_COOKIE
318
+ )
319
+ return result
320
+ except Exception as e:
321
+ return {"error": str(e)}
322
+
323
+ def delete_job(name, force_namespace=None):
324
+ data = {
325
+ "label": TEMPLATE_LABEL, # this ensures that both lws template and services are deleted
326
+ "value": name
327
+ }
328
+ if force_namespace is not None:
329
+ data["force_namespace"] = force_namespace
330
+ try:
331
+ result = request_to_server(
332
+ method="post",
333
+ endpoint="/v1/delete_labeled_resources",
334
+ data=data,
335
+ server_creds=USER_LOCAL_SERVER_FILE,
336
+ user_cookie=USER_COOKIE
337
+ )
338
+ return result
339
+ except Exception as e:
340
+ return {"error": str(e)}
341
+
141
342
  def fetch_devices():
142
343
  """Load devices status info for all hosts"""
143
344
  try:
@@ -224,4 +425,454 @@ def fetch_gpus(available=False):
224
425
  return all_gpus
225
426
 
226
427
  except Exception as e:
227
- return {"error": str(e)}
428
+ return {"error": str(e)}
429
+
430
+ def load_user_session():
431
+ auth = KalavaiAuthClient(
432
+ user_cookie_file=USER_COOKIE
433
+ )
434
+ return auth.load_user_session()
435
+
436
+ def authenticate_user(username=None, password=None):
437
+ auth = KalavaiAuthClient(
438
+ user_cookie_file=USER_COOKIE
439
+ )
440
+ user = auth.load_user_session()
441
+ if user is None:
442
+ user = auth.login(username=username, password=password)
443
+
444
+ if user is None:
445
+ return {"error": "Username or password incorrect"}
446
+ return user
447
+
448
+ def user_logout():
449
+ auth = KalavaiAuthClient(
450
+ user_cookie_file=USER_COOKIE
451
+ )
452
+ auth.logout()
453
+ return True
454
+
455
+ def check_token(token, public=False):
456
+ try:
457
+ data = decode_dict(token)
458
+ for field in MANDATORY_TOKEN_FIELDS:
459
+ assert field in data
460
+ if public:
461
+ if data[PUBLIC_LOCATION_KEY] is None:
462
+ raise ValueError("Token is not valid for public pools. Did you start the cluster with a public_location?")
463
+ return {"status": True}
464
+ except Exception as e:
465
+ return {"error": str(e)}
466
+
467
+ def attach_to_pool(token, node_name=None):
468
+ if node_name is None:
469
+ node_name = f"{socket.gethostname()}-{uuid.uuid4().hex[:6]}"
470
+
471
+ # check token
472
+ valid = check_token(token=token)
473
+ if "error" in valid:
474
+ return {"error": f"Invalid token: {valid}"}
475
+
476
+ try:
477
+ data = decode_dict(token)
478
+ kalavai_seed_ip = data[CLUSTER_IP_KEY]
479
+ cluster_name = data[CLUSTER_NAME_KEY]
480
+ auth_key = data[AUTH_KEY]
481
+ watcher_service = data[WATCHER_SERVICE_KEY]
482
+ public_location = data[PUBLIC_LOCATION_KEY]
483
+ vpn = defaultdict(lambda: None)
484
+ except Exception as e:
485
+ return {"error": f"Invalid token. {str(e)}"}
486
+
487
+ user = defaultdict(lambda: None)
488
+ if public_location is not None:
489
+ user = load_user_session()
490
+ if user is None:
491
+ return {"error ": "Must be logged in to join public pools"}
492
+ try:
493
+ vpn = get_vpn_details(
494
+ location=public_location,
495
+ user_cookie=USER_COOKIE)
496
+ except Exception as e:
497
+ return {"error": f"Are you authenticated? {str(e)}"}
498
+ try:
499
+ validate_join_public_seed(
500
+ cluster_name=cluster_name,
501
+ join_key=token,
502
+ user_cookie=USER_COOKIE
503
+ )
504
+ except Exception as e:
505
+ return {"error": f"Error when joining network: {str(e)}"}
506
+
507
+ # local agent join
508
+ # 1. Generate local cache files
509
+ # Generate docker compose recipe
510
+ generate_compose_config(
511
+ role="",
512
+ vpn_token=vpn["key"],
513
+ node_name=node_name,
514
+ is_public=public_location is not None)
515
+
516
+ store_server_info(
517
+ server_ip=kalavai_seed_ip,
518
+ auth_key=auth_key,
519
+ file=USER_LOCAL_SERVER_FILE,
520
+ watcher_service=watcher_service,
521
+ node_name=node_name,
522
+ cluster_name=cluster_name,
523
+ public_location=public_location,
524
+ user_api_key=user["api_key"])
525
+
526
+ run_cmd(f"docker compose -f {USER_COMPOSE_FILE} up -d")
527
+ # ensure we are connected
528
+ while True:
529
+ time.sleep(30)
530
+ if is_watcher_alive(server_creds=USER_LOCAL_SERVER_FILE, user_cookie=USER_COOKIE):
531
+ break
532
+
533
+ return cluster_name
534
+
535
+ def join_pool(token, num_gpus=0, node_name=None):
536
+ compatibility = check_worker_compatibility()
537
+ if len(compatibility["issues"]) > 0:
538
+ return {"error": compatibility["issues"]}
539
+
540
+ if node_name is None:
541
+ node_name = f"{socket.gethostname()}-{uuid.uuid4().hex[:6]}"
542
+
543
+ # check token
544
+ valid = check_token(token=token)
545
+ if "error" in valid:
546
+ return {"error": f"Invalid token: {valid}"}
547
+
548
+ try:
549
+ data = decode_dict(token)
550
+ kalavai_seed_ip = data[CLUSTER_IP_KEY]
551
+ kalavai_token = data[CLUSTER_TOKEN_KEY]
552
+ cluster_name = data[CLUSTER_NAME_KEY]
553
+ auth_key = data[AUTH_KEY]
554
+ watcher_service = data[WATCHER_SERVICE_KEY]
555
+ public_location = data[PUBLIC_LOCATION_KEY]
556
+ vpn = defaultdict(lambda: None)
557
+ except Exception as e:
558
+ return {"error": f"Invalid token. {str(e)}"}
559
+
560
+ # join private network if provided
561
+ node_labels = {
562
+ STORAGE_CLASS_LABEL: is_storage_compatible()
563
+ }
564
+ user = defaultdict(lambda: None)
565
+ if public_location is not None:
566
+ user = authenticate_user()
567
+ if user is None:
568
+ return {"error": "Must be logged in to join public pools"}
569
+ try:
570
+ vpn = get_vpn_details(
571
+ location=public_location,
572
+ user_cookie=USER_COOKIE)
573
+ node_labels[USER_NODE_LABEL] = user["username"]
574
+ except Exception as e:
575
+ return {"error": f"Are you authenticated? Error: {str(e)}"}
576
+ try:
577
+ validate_join_public_seed(
578
+ cluster_name=cluster_name,
579
+ join_key=token,
580
+ user_cookie=USER_COOKIE
581
+ )
582
+ except Exception as e:
583
+ return {"error": f"Error when joining network: {str(e)}"}
584
+
585
+ # local agent join
586
+ # Generate docker compose recipe
587
+ generate_compose_config(
588
+ role="agent",
589
+ pool_ip=f"https://{kalavai_seed_ip}:6443",
590
+ pool_token=kalavai_token,
591
+ num_gpus=num_gpus,
592
+ vpn_token=vpn["key"],
593
+ node_name=node_name,
594
+ node_labels=node_labels,
595
+ is_public=public_location is not None)
596
+
597
+ store_server_info(
598
+ server_ip=kalavai_seed_ip,
599
+ auth_key=auth_key,
600
+ file=USER_LOCAL_SERVER_FILE,
601
+ watcher_service=watcher_service,
602
+ node_name=node_name,
603
+ cluster_name=cluster_name,
604
+ public_location=public_location,
605
+ user_api_key=user["api_key"])
606
+
607
+ try:
608
+ CLUSTER.start_worker_node()
609
+ except Exception as e:
610
+ return {"error": f"Error connecting to {cluster_name} @ {kalavai_seed_ip}. Check with the admin if the token is still valid."}
611
+
612
+ # ensure we are connected
613
+ while True:
614
+ time.sleep(30)
615
+ if is_watcher_alive(server_creds=USER_LOCAL_SERVER_FILE, user_cookie=USER_COOKIE):
616
+ break
617
+
618
+ # check the node has connected successfully
619
+ try:
620
+ while not CLUSTER.is_agent_running():
621
+ time.sleep(30)
622
+ except KeyboardInterrupt:
623
+ return {"error": "Installation aborted. Leaving pool."}
624
+
625
+ result = init_user_workspace()
626
+ if "error" in result:
627
+ return {"error": f"Error when creating user workspace: {result}"}
628
+
629
+ return cluster_name
630
+
631
+ def create_pool(cluster_name: str, ip_address: str, app_values: str=None, pool_config_values: str=None, num_gpus: int=0, node_name: str=None, only_registered_users: bool=False, location: str=None):
632
+
633
+ if not check_seed_compatibility():
634
+ return {"error": "Requirements failed"}
635
+
636
+ if app_values is None:
637
+ app_values = HELM_APPS_VALUES
638
+
639
+ if pool_config_values is None:
640
+ pool_config_values = POOL_CONFIG_DEFAULT_VALUES
641
+
642
+ node_name = f"{socket.gethostname()}-{uuid.uuid4().hex[:6]}"
643
+
644
+ # if only registered users are allowed, check user has logged in
645
+ user = defaultdict(lambda: None)
646
+ if only_registered_users or location is not None:
647
+ user = authenticate_user()
648
+ if user is None:
649
+ return {"error": "[white]--only-registered-users [red]or [white]--location[red] can only be used if the host is authenticated. Run [yellow]kalavai login[red] to authenticate"}
650
+
651
+ # join private network if provided
652
+ vpn = defaultdict(lambda: None)
653
+ node_labels = {
654
+ STORAGE_CLASS_LABEL: is_storage_compatible()
655
+ }
656
+ if location is not None:
657
+ try:
658
+ vpn = get_vpn_details(
659
+ location=location,
660
+ user_cookie=USER_COOKIE)
661
+ node_labels[USER_NODE_LABEL] = user["username"]
662
+ except Exception as e:
663
+ return {"error": f"[red]Error when joining network: {str(e)}"}
664
+
665
+ # Generate docker compose recipe
666
+ generate_compose_config(
667
+ role="server",
668
+ vpn_token=vpn["key"],
669
+ node_ip_address=ip_address,
670
+ num_gpus=num_gpus,
671
+ node_name=node_name,
672
+ node_labels=node_labels,
673
+ is_public=location is not None
674
+ )
675
+
676
+ # start server
677
+ CLUSTER.start_seed_node()
678
+
679
+ while not CLUSTER.is_agent_running():
680
+ time.sleep(10)
681
+
682
+ # select IP address (for external discovery)
683
+ if ip_address is None or location is not None:
684
+ # load VPN ip
685
+ ip_address = CLUSTER.get_vpn_ip()
686
+
687
+ # populate local cred files
688
+ auth_key = str(uuid.uuid4())
689
+ write_auth_key = str(uuid.uuid4())
690
+ readonly_auth_key = str(uuid.uuid4())
691
+
692
+ watcher_service = f"{ip_address}:{DEFAULT_WATCHER_PORT}"
693
+ values = {
694
+ CLUSTER_NAME_KEY: cluster_name,
695
+ CLUSTER_IP_KEY: ip_address,
696
+ AUTH_KEY: auth_key,
697
+ READONLY_AUTH_KEY: readonly_auth_key,
698
+ WRITE_AUTH_KEY: write_auth_key,
699
+ WATCHER_PORT_KEY: DEFAULT_WATCHER_PORT,
700
+ WATCHER_SERVICE_KEY: watcher_service,
701
+ USER_NODE_LABEL_KEY: USER_NODE_LABEL,
702
+ ALLOW_UNREGISTERED_USER_KEY: not only_registered_users
703
+ }
704
+
705
+ store_server_info(
706
+ server_ip=ip_address,
707
+ auth_key=auth_key,
708
+ readonly_auth_key=readonly_auth_key,
709
+ write_auth_key=write_auth_key,
710
+ file=USER_LOCAL_SERVER_FILE,
711
+ watcher_service=watcher_service,
712
+ node_name=node_name,
713
+ cluster_name=cluster_name,
714
+ public_location=location,
715
+ user_api_key=user["api_key"])
716
+
717
+ # Generate helmfile recipe
718
+ helm_yaml = load_template(
719
+ template_path=HELM_APPS_FILE,
720
+ values=values,
721
+ default_values_path=app_values,
722
+ force_defaults=True)
723
+ with open(USER_HELM_APPS_FILE, "w") as f:
724
+ f.write(helm_yaml)
725
+
726
+ # set template values in helmfile
727
+ try:
728
+ CLUSTER.update_dependencies(
729
+ dependencies_file=USER_HELM_APPS_FILE
730
+ )
731
+ except Exception as e:
732
+ return {"error": f"Error when updating dependencies: {str(e)}"}
733
+
734
+ if location is not None:
735
+ # TODO: register with kalavai if it's a public cluster
736
+ pass
737
+ #pool__publish()
738
+
739
+ # wait until the server is ready to create objects
740
+ while True:
741
+ time.sleep(30)
742
+ if is_watcher_alive(server_creds=USER_LOCAL_SERVER_FILE, user_cookie=USER_COOKIE):
743
+ break
744
+
745
+ result = pool_init(pool_config_values_path=pool_config_values)
746
+ if "error" in result or ("failed" in result and len(result['failed']) > 0):
747
+ return {"error": f"Error when initialising pool: {result}"}
748
+ # init default namespace
749
+ init_user_workspace(force_namespace="default")
750
+ if only_registered_users:
751
+ # init user namespace
752
+ init_user_workspace()
753
+
754
+ return {"success"}
755
+
756
+ def pool_init(pool_config_values_path=None):
757
+ """Deploy configured objects to initialise pool"""
758
+ if pool_config_values_path is None:
759
+ return
760
+
761
+ # load template config and populate with values
762
+ sidecar_template_yaml = load_template(
763
+ template_path=POOL_CONFIG_TEMPLATE,
764
+ values={},
765
+ default_values_path=pool_config_values_path)
766
+
767
+ try:
768
+ result = request_to_server(
769
+ method="post",
770
+ endpoint="/v1/deploy_generic_model",
771
+ data={"config": sidecar_template_yaml},
772
+ server_creds=USER_LOCAL_SERVER_FILE,
773
+ user_cookie=USER_COOKIE
774
+ )
775
+ return result
776
+ except Exception as e:
777
+ return {"error": f"[red]Error when connecting to kalavai service: {str(e)}"}
778
+
779
+ def is_connected():
780
+ if not os.path.isfile(USER_LOCAL_SERVER_FILE):
781
+ return False
782
+ return is_watcher_alive(server_creds=USER_LOCAL_SERVER_FILE, user_cookie=USER_COOKIE, timeout=10)
783
+
784
+ def is_agent_running():
785
+ return CLUSTER.is_agent_running()
786
+
787
+ def is_server():
788
+ return CLUSTER.is_seed_node()
789
+
790
+ def pause_agent(retries=3):
791
+ try:
792
+ while retries > 0:
793
+ state = CLUSTER.pause_agent()
794
+ if state:
795
+ return {"success"}
796
+ time.sleep(5)
797
+ retries -= 1
798
+ except:
799
+ return {"error": "Could not pause agent"}
800
+
801
+ def resume_agent(retries=3):
802
+ try:
803
+ while retries > 0:
804
+ state = CLUSTER.restart_agent()
805
+ if state:
806
+ return {"success"}
807
+ time.sleep(5)
808
+ retries -= 1
809
+ except:
810
+ return {"error": "Could not resume agent"}
811
+
812
+ def cleanup_local():
813
+ safe_remove(CONTAINER_HOST_PATH)
814
+ safe_remove(USER_COMPOSE_FILE)
815
+ safe_remove(USER_VPN_COMPOSE_FILE)
816
+ safe_remove(USER_HELM_APPS_FILE)
817
+ safe_remove(USER_KUBECONFIG_FILE)
818
+ safe_remove(USER_LOCAL_SERVER_FILE)
819
+ safe_remove(USER_TEMPLATES_FOLDER)
820
+
821
+ def delete_node(name):
822
+ data = {
823
+ "node_names": [name]
824
+ }
825
+ try:
826
+ result = request_to_server(
827
+ method="post",
828
+ endpoint="/v1/delete_nodes",
829
+ data=data,
830
+ server_creds=USER_LOCAL_SERVER_FILE,
831
+ user_cookie=USER_COOKIE
832
+ )
833
+ if result is None or result is True:
834
+ return {f"Node {name} deleted successfully"}
835
+ else:
836
+ return {"error": result}
837
+ except Exception as e:
838
+ return {"error": str(e)}
839
+
840
+ def stop_pool(skip_node_deletion=False):
841
+ # delete local node from server
842
+ logs = []
843
+ if not skip_node_deletion:
844
+ logs.append(
845
+ delete_node(load_server_info(data_key=NODE_NAME_KEY, file=USER_LOCAL_SERVER_FILE))
846
+ )
847
+ # unpublish event (only if seed node)
848
+ # TODO: no, this should be done via the platform!!!
849
+ # try:
850
+ # if CLUSTER.is_seed_node():
851
+ # console.log("Unregistering pool...")
852
+ # unregister_cluster(
853
+ # name=load_server_info(data_key=CLUSTER_NAME_KEY, file=USER_LOCAL_SERVER_FILE),
854
+ # user_cookie=USER_COOKIE)
855
+ # except Exception as e:
856
+ # console.log(f"[red][WARNING]: (ignore if not a public pool) Error when unpublishing cluster. {str(e)}")
857
+ # remove local node agent
858
+
859
+ # disconnect from VPN first, then remove agent, then remove local files
860
+ try:
861
+ vpns = leave_vpn(container_name=DEFAULT_VPN_CONTAINER_NAME)
862
+ if vpns is not None:
863
+ for vpn in vpns:
864
+ logs.append(f"You have left {vpn} VPN")
865
+ except:
866
+ # no vpn
867
+ pass
868
+
869
+ CLUSTER.remove_agent()
870
+
871
+ # clean local files
872
+ cleanup_local()
873
+
874
+ return logs
875
+
876
+ def list_available_pools(user_only=False):
877
+ pools = get_public_seeds(user_only=user_only, user_cookie=USER_COOKIE)
878
+ return pools