kalavai-client 0.5.0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kalavai_client/__init__.py +2 -0
- kalavai_client/__main__.py +5 -0
- kalavai_client/assets/apps.yaml +201 -0
- kalavai_client/assets/apps_values.yaml +83 -0
- kalavai_client/assets/docker-compose-template.yaml +55 -0
- kalavai_client/assets/pool_config_template.yaml +19 -0
- kalavai_client/assets/pool_config_values.yaml +12 -0
- kalavai_client/assets/user_workspace.yaml +19 -0
- kalavai_client/assets/user_workspace_values.yaml +29 -0
- kalavai_client/assets/vpn-template.yaml +13 -0
- kalavai_client/auth.py +68 -0
- kalavai_client/cli.py +1989 -0
- kalavai_client/cluster.py +308 -0
- kalavai_client/utils.py +456 -0
- kalavai_client-0.5.0.dist-info/LICENSE +201 -0
- kalavai_client-0.5.0.dist-info/METADATA +279 -0
- kalavai_client-0.5.0.dist-info/RECORD +19 -0
- kalavai_client-0.5.0.dist-info/WHEEL +4 -0
- kalavai_client-0.5.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,308 @@
|
|
1
|
+
import os
|
2
|
+
import time
|
3
|
+
from pathlib import Path
|
4
|
+
from abc import ABC, abstractmethod
|
5
|
+
|
6
|
+
from kalavai_client.utils import (
|
7
|
+
run_cmd,
|
8
|
+
check_gpu_drivers,
|
9
|
+
validate_poolconfig,
|
10
|
+
user_path
|
11
|
+
)
|
12
|
+
|
13
|
+
|
14
|
+
class Cluster(ABC):
|
15
|
+
@abstractmethod
|
16
|
+
def start_seed_node(self, ip_address, labels, flannel_iface):
|
17
|
+
raise NotImplementedError()
|
18
|
+
|
19
|
+
@abstractmethod
|
20
|
+
def start_worker_node(self, url, token, node_name, auth_key, watcher_service, ip_address, labels, flannel_iface):
|
21
|
+
raise NotImplementedError()
|
22
|
+
|
23
|
+
|
24
|
+
@abstractmethod
|
25
|
+
def update_dependencies(self, dependencies_files):
|
26
|
+
raise NotImplementedError()
|
27
|
+
|
28
|
+
|
29
|
+
@abstractmethod
|
30
|
+
def remove_agent(self):
|
31
|
+
raise NotImplementedError()
|
32
|
+
|
33
|
+
@abstractmethod
|
34
|
+
def is_agent_running(self) -> bool:
|
35
|
+
raise NotImplementedError()
|
36
|
+
|
37
|
+
@abstractmethod
|
38
|
+
def is_seed_node(self) -> bool:
|
39
|
+
raise NotImplementedError()
|
40
|
+
|
41
|
+
@abstractmethod
|
42
|
+
def is_cluster_init(self) -> bool:
|
43
|
+
raise NotImplementedError()
|
44
|
+
|
45
|
+
@abstractmethod
|
46
|
+
def pause_agent(self) -> bool:
|
47
|
+
raise NotImplementedError()
|
48
|
+
|
49
|
+
@abstractmethod
|
50
|
+
def restart_agent(self) -> bool:
|
51
|
+
raise NotImplementedError()
|
52
|
+
|
53
|
+
@abstractmethod
|
54
|
+
def get_cluster_token(self) -> str:
|
55
|
+
raise NotImplementedError()
|
56
|
+
|
57
|
+
@abstractmethod
|
58
|
+
def diagnostics(self) -> str:
|
59
|
+
raise NotImplementedError()
|
60
|
+
|
61
|
+
@abstractmethod
|
62
|
+
def validate_cluster(self) -> bool:
|
63
|
+
raise NotImplementedError
|
64
|
+
|
65
|
+
class dockerCluster(Cluster):
|
66
|
+
def __init__(self, container_name, compose_file, kubeconfig_file, poolconfig_file, dependencies_file, kube_version="v1.31.1+k3s1", flannel_iface=None):
|
67
|
+
self.kube_version = kube_version
|
68
|
+
self.container_name = container_name
|
69
|
+
self.compose_file = compose_file
|
70
|
+
self.kubeconfig_file = kubeconfig_file
|
71
|
+
self.poolconfig_file = poolconfig_file
|
72
|
+
self.dependencies_file = dependencies_file
|
73
|
+
|
74
|
+
if flannel_iface is not None:
|
75
|
+
self.default_flannel_iface = flannel_iface
|
76
|
+
else:
|
77
|
+
self.default_flannel_iface = ""
|
78
|
+
|
79
|
+
def start_seed_node(self):
|
80
|
+
|
81
|
+
run_cmd(f"docker compose -f {self.compose_file} up -d")
|
82
|
+
time.sleep(5)
|
83
|
+
run_cmd(f"docker cp {self.container_name}:/etc/rancher/k3s/k3s.yaml {self.kubeconfig_file}")
|
84
|
+
|
85
|
+
def start_worker_node(self):
|
86
|
+
run_cmd(f"docker compose -f {self.compose_file} up -d")
|
87
|
+
|
88
|
+
|
89
|
+
def update_dependencies(self, dependencies_file=None, debug=False, retries=3):
|
90
|
+
if dependencies_file is not None:
|
91
|
+
self.dependencies_file = dependencies_file
|
92
|
+
if debug:
|
93
|
+
output = ""
|
94
|
+
else:
|
95
|
+
output = " >/dev/null 2>&1"
|
96
|
+
while True:
|
97
|
+
try:
|
98
|
+
home = user_path("")
|
99
|
+
run_cmd(f"docker run --rm --net=host -v {home}:{home} ghcr.io/helmfile/helmfile:v0.169.2 helmfile sync --file {self.dependencies_file} --kubeconfig {self.kubeconfig_file} {output}")
|
100
|
+
#run_cmd(f"helmfile sync --file {self.dependencies_file} --kubeconfig {self.kubeconfig_file} {output}")
|
101
|
+
break
|
102
|
+
except Exception as e:
|
103
|
+
if retries > 0:
|
104
|
+
retries -= 1
|
105
|
+
print(f"[{retries}] Dependencies failed. Retrying...")
|
106
|
+
else:
|
107
|
+
raise Exception(f"Dependencies failed. Are you connected to the internet?\n\nTrace: {str(e)}")
|
108
|
+
|
109
|
+
def remove_agent(self):
|
110
|
+
try:
|
111
|
+
run_cmd(f'docker compose -f {self.compose_file} down')
|
112
|
+
return True
|
113
|
+
except:
|
114
|
+
return False
|
115
|
+
|
116
|
+
def is_agent_running(self):
|
117
|
+
if not os.path.isfile(self.compose_file):
|
118
|
+
return False
|
119
|
+
status = self.container_name in run_cmd(f"docker compose -f {self.compose_file} ps --services --status=running").decode()
|
120
|
+
return status
|
121
|
+
|
122
|
+
def is_seed_node(self):
|
123
|
+
if not os.path.isfile(self.compose_file):
|
124
|
+
return False
|
125
|
+
status = "server" in run_cmd(f"docker compose -f {self.compose_file} ps --services --status=running").decode()
|
126
|
+
return status
|
127
|
+
|
128
|
+
def is_cluster_init(self):
|
129
|
+
if not os.path.isfile(self.compose_file):
|
130
|
+
return False
|
131
|
+
status = self.container_name in run_cmd(f"docker compose -f {self.compose_file} ps --services --all").decode()
|
132
|
+
return status
|
133
|
+
|
134
|
+
def pause_agent(self):
|
135
|
+
status = False
|
136
|
+
try:
|
137
|
+
run_cmd(f'docker compose -f {self.compose_file} stop')
|
138
|
+
status = True
|
139
|
+
except:
|
140
|
+
pass
|
141
|
+
return status
|
142
|
+
|
143
|
+
def restart_agent(self):
|
144
|
+
try:
|
145
|
+
run_cmd(f'docker compose -f {self.compose_file} start')
|
146
|
+
|
147
|
+
except:
|
148
|
+
pass
|
149
|
+
return self.is_agent_running()
|
150
|
+
|
151
|
+
def get_cluster_token(self):
|
152
|
+
if self.is_seed_node():
|
153
|
+
return run_cmd(f"docker container exec {self.container_name} cat /var/lib/rancher/k3s/server/node-token").decode()
|
154
|
+
#return run_cmd("sudo k3s token create --kubeconfig /etc/rancher/k3s/k3s.yaml --ttl 0").decode()
|
155
|
+
else:
|
156
|
+
return None
|
157
|
+
|
158
|
+
def diagnostics(self) -> str:
|
159
|
+
# TODO: check cache files are in order
|
160
|
+
# get cluster status
|
161
|
+
if self.is_seed_node():
|
162
|
+
return run_cmd(f"docker exec {self.container_name} kubectl get pods -A -o wide").decode() + "\n\n" + run_cmd(f"docker exec {self.container_name} kubectl get nodes").decode()
|
163
|
+
else:
|
164
|
+
return None
|
165
|
+
|
166
|
+
def validate_cluster(self) -> bool:
|
167
|
+
# check if credentials are present
|
168
|
+
return os.path.isfile(self.poolconfig_file)
|
169
|
+
|
170
|
+
|
171
|
+
class k3sCluster(Cluster):
|
172
|
+
|
173
|
+
def __init__(self, kubeconfig_file, poolconfig_file, dependencies_file, kube_version="v1.31.1+k3s1", flannel_iface=None):
|
174
|
+
self.kube_version = kube_version
|
175
|
+
self.kubeconfig_file = kubeconfig_file
|
176
|
+
self.poolconfig_file = poolconfig_file
|
177
|
+
self.dependencies_file = dependencies_file
|
178
|
+
|
179
|
+
if flannel_iface is not None:
|
180
|
+
self.default_flannel_iface = flannel_iface
|
181
|
+
else:
|
182
|
+
self.default_flannel_iface = ""
|
183
|
+
try:
|
184
|
+
if check_gpu_drivers():
|
185
|
+
self.node_labels = "--node-label gpu=on"
|
186
|
+
except:
|
187
|
+
print("[Warning] issues detected with nvidia, GPU has been disabled for this node")
|
188
|
+
self.node_labels = ""
|
189
|
+
|
190
|
+
def start_seed_node(self, ip_address, labels=None, is_public=False):
|
191
|
+
node_labels = self.node_labels
|
192
|
+
if labels is not None:
|
193
|
+
for key, value in labels.items():
|
194
|
+
node_labels += f" --node-label {key}={value}"
|
195
|
+
if is_public:
|
196
|
+
flannel_iface = f"--flannel-iface {self.default_flannel_iface}"
|
197
|
+
else:
|
198
|
+
flannel_iface = ""
|
199
|
+
run_cmd(f'curl -sfL https://get.k3s.io | INSTALL_K3S_VERSION="{self.kube_version}" INSTALL_K3S_EXEC="server --node-ip {ip_address} --node-external-ip {ip_address} {flannel_iface} --flannel-backend wireguard-native {node_labels}" sh - >/dev/null 2>&1')
|
200
|
+
run_cmd(f"sudo cp /etc/rancher/k3s/k3s.yaml {self.kubeconfig_file}")
|
201
|
+
run_cmd(f"sudo chown $USER {self.kubeconfig_file}")
|
202
|
+
|
203
|
+
|
204
|
+
def start_worker_node(self, url, token, node_name, ip_address, labels=None, is_public=False):
|
205
|
+
node_labels = self.node_labels
|
206
|
+
if labels is not None:
|
207
|
+
for key, value in labels.items():
|
208
|
+
node_labels += f" --node-label {key}={value}"
|
209
|
+
if is_public:
|
210
|
+
flannel_iface = f"--flannel-iface {self.default_flannel_iface}"
|
211
|
+
else:
|
212
|
+
flannel_iface = ""
|
213
|
+
command = f'curl -sfL https://get.k3s.io | INSTALL_K3S_VERSION="{self.kube_version}" INSTALL_K3S_EXEC="agent --token {token} --server https://{url}:6443 --node-name {node_name} --node-ip {ip_address} --node-external-ip {ip_address} {flannel_iface} {node_labels}" sh - >/dev/null 2>&1'
|
214
|
+
run_cmd(command)
|
215
|
+
|
216
|
+
|
217
|
+
def update_dependencies(self, dependencies_file=None, debug=False, retries=3):
|
218
|
+
if dependencies_file is not None:
|
219
|
+
self.dependencies_file = dependencies_file
|
220
|
+
if debug:
|
221
|
+
output = ""
|
222
|
+
else:
|
223
|
+
output = " >/dev/null 2>&1"
|
224
|
+
while True:
|
225
|
+
try:
|
226
|
+
run_cmd(f"helmfile sync --file {self.dependencies_file} --kubeconfig {self.kubeconfig_file} {output}")
|
227
|
+
break
|
228
|
+
except Exception as e:
|
229
|
+
if retries > 0:
|
230
|
+
retries -= 1
|
231
|
+
print(f"[{retries}] Dependencies failed. Retrying...")
|
232
|
+
else:
|
233
|
+
raise Exception(f"Dependencies failed. Are you connected to the internet?\n\nTrace: {str(e)}")
|
234
|
+
|
235
|
+
|
236
|
+
def remove_agent(self):
|
237
|
+
try:
|
238
|
+
run_cmd('/usr/local/bin/k3s-uninstall.sh >/dev/null 2>&1')
|
239
|
+
run_cmd('sudo rm -r /etc/rancher/node/ >/dev/null 2>&1')
|
240
|
+
return True
|
241
|
+
except:
|
242
|
+
pass
|
243
|
+
try:
|
244
|
+
run_cmd('/usr/local/bin/k3s-agent-uninstall.sh >/dev/null 2>&1')
|
245
|
+
return True
|
246
|
+
except:
|
247
|
+
pass
|
248
|
+
return False
|
249
|
+
|
250
|
+
def is_agent_running(self):
|
251
|
+
status = (0 == os.system('sudo systemctl is-active --quiet k3s-agent.service')) or (0 == os.system('sudo systemctl is-active --quiet k3s.service'))
|
252
|
+
return status
|
253
|
+
|
254
|
+
def is_seed_node(self):
|
255
|
+
return 0 == os.system('sudo systemctl is-active --quiet k3s.service')
|
256
|
+
|
257
|
+
def is_cluster_init(self):
|
258
|
+
status = Path("/usr/local/bin/k3s-agent-uninstall.sh").is_file() or Path("/usr/local/bin/k3s-uninstall.sh").is_file()
|
259
|
+
return status
|
260
|
+
|
261
|
+
def pause_agent(self):
|
262
|
+
status = False
|
263
|
+
try:
|
264
|
+
run_cmd('sudo systemctl stop k3s >/dev/null 2>&1')
|
265
|
+
status = True
|
266
|
+
except:
|
267
|
+
pass
|
268
|
+
try:
|
269
|
+
run_cmd('sudo systemctl stop k3s-agent >/dev/null 2>&1')
|
270
|
+
status = True
|
271
|
+
except:
|
272
|
+
pass
|
273
|
+
return status
|
274
|
+
|
275
|
+
def restart_agent(self):
|
276
|
+
try:
|
277
|
+
run_cmd('sudo systemctl start k3s >/dev/null 2>&1')
|
278
|
+
except:
|
279
|
+
pass
|
280
|
+
try:
|
281
|
+
run_cmd('sudo systemctl start k3s-agent >/dev/null 2>&1')
|
282
|
+
except:
|
283
|
+
pass
|
284
|
+
return self.is_agent_running()
|
285
|
+
|
286
|
+
def get_cluster_token(self):
|
287
|
+
if self.is_seed_node():
|
288
|
+
return run_cmd("sudo cat /var/lib/rancher/k3s/server/node-token").decode()
|
289
|
+
#return run_cmd("sudo k3s token create --kubeconfig /etc/rancher/k3s/k3s.yaml --ttl 0").decode()
|
290
|
+
else:
|
291
|
+
return None
|
292
|
+
|
293
|
+
def diagnostics(self) -> str:
|
294
|
+
if self.is_seed_node():
|
295
|
+
return run_cmd(f"k3s kubectl get pods -A -o wide --kubeconfig {self.kubeconfig_file}").decode() + "\n\n" + run_cmd(f"k3s kubectl get nodes --kubeconfig {self.kubeconfig_file}").decode()
|
296
|
+
else:
|
297
|
+
return None
|
298
|
+
|
299
|
+
def validate_cluster(self) -> bool:
|
300
|
+
if not self.is_cluster_init():
|
301
|
+
raise ValueError("Pool not initialised")
|
302
|
+
if not self.is_agent_running():
|
303
|
+
raise ValueError("Pool initialised but agent is not running")
|
304
|
+
# check cache files
|
305
|
+
if self.is_seed_node():
|
306
|
+
if not validate_poolconfig(self.poolconfig_file):
|
307
|
+
raise ValueError("Cache missconfigured. Run 'kalavai pool stop' to clear.")
|
308
|
+
return True
|