kalavai-client 0.6.12__py3-none-any.whl → 0.6.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,2 +1,2 @@
1
1
 
2
- __version__ = "0.6.12"
2
+ __version__ = "0.6.13"
kalavai_client/cli.py CHANGED
@@ -186,7 +186,9 @@ def input_gpus(non_interactive=False):
186
186
  try:
187
187
  has_gpus = check_gpu_drivers()
188
188
  if has_gpus:
189
- max_gpus = int(run_cmd("nvidia-smi -L | wc -l").decode())
189
+ max_gpus = len(
190
+ [r for r in run_cmd("nvidia-smi -L").decode().split("\n") if len(r.strip())>0]
191
+ )
190
192
  if non_interactive:
191
193
  num_gpus = max_gpus
192
194
  else:
kalavai_client/cluster.py CHANGED
@@ -1,4 +1,5 @@
1
1
  import os
2
+ import platform
2
3
  import time
3
4
  from pathlib import Path
4
5
  from abc import ABC, abstractmethod
@@ -96,7 +97,7 @@ class dockerCluster(Cluster):
96
97
  # wait for container to be setup
97
98
  while True:
98
99
  try:
99
- run_cmd(f"docker cp {self.container_name}:/etc/rancher/k3s/k3s.yaml {self.kubeconfig_file} >/dev/null 2>&1")
100
+ run_cmd(f"docker cp {self.container_name}:/etc/rancher/k3s/k3s.yaml {self.kubeconfig_file}", hide_output=True)
100
101
  break
101
102
  except:
102
103
  pass
@@ -115,15 +116,15 @@ class dockerCluster(Cluster):
115
116
  def update_dependencies(self, dependencies_file=None, debug=False, retries=3):
116
117
  if dependencies_file is not None:
117
118
  self.dependencies_file = dependencies_file
118
- if debug:
119
- output = ""
120
- else:
121
- output = " >/dev/null 2>&1"
122
119
  while True:
123
120
  try:
124
121
  home = user_path("")
125
- run_cmd(f"docker run --rm --net=host -v {home}:{home} ghcr.io/helmfile/helmfile:v0.169.2 helmfile sync --file {self.dependencies_file} --kubeconfig {self.kubeconfig_file} {output}")
126
- #run_cmd(f"helmfile sync --file {self.dependencies_file} --kubeconfig {self.kubeconfig_file} {output}")
122
+ # convert path on host to path on container (will be different in windows os)
123
+ target_path = "/cache/kalavai"
124
+ kubeconfig_path = f"{target_path}/{Path(self.kubeconfig_file).name}"
125
+ dependencies_path = f"{target_path}/{Path(self.dependencies_file).name}"
126
+
127
+ run_cmd(f"docker run --rm --net=host -v {home}:{target_path} ghcr.io/helmfile/helmfile:v0.169.2 helmfile sync --file {dependencies_path} --kubeconfig {kubeconfig_path}", hide_output=not debug)
127
128
  break
128
129
  except Exception as e:
129
130
  if retries > 0:
@@ -142,11 +143,18 @@ class dockerCluster(Cluster):
142
143
  def is_agent_running(self):
143
144
  if not os.path.isfile(self.compose_file):
144
145
  return False
145
- status = self.container_name in run_cmd(f"docker compose -f {self.compose_file} ps --services --status=running").decode()
146
- if not status:
146
+ try:
147
+ status = self.container_name in run_cmd(f"docker compose -f {self.compose_file} ps --services --status=running").decode()
148
+ if not status:
149
+ return False
150
+ if "windows" in platform.system().lower():
151
+ status = (0 == os.system(f'docker exec {self.container_name} ps aux | findstr /n /c:"k3s server" /c:"k3s agent"'))
152
+ else:
153
+ status = (0 == os.system(f'docker exec {self.container_name} ps aux | grep -v grep | grep -E "k3s (server|agent)"'))
154
+ return status
155
+ except Exception as e:
156
+ print(f"Error when checking agent. Is Docker installed and running?\n\n{str(e)}")
147
157
  return False
148
- status = (0 == os.system(f'docker exec {self.container_name} ps aux | grep -v grep | grep -E "k3s (server|agent)"'))
149
- return status
150
158
 
151
159
  def is_seed_node(self):
152
160
  if not os.path.isfile(self.compose_file):
@@ -154,7 +162,7 @@ class dockerCluster(Cluster):
154
162
  if not self.is_agent_running():
155
163
  return False
156
164
  try:
157
- run_cmd(f"docker container exec {self.container_name} cat /var/lib/rancher/k3s/server/node-token >/dev/null 2>&1")
165
+ run_cmd(f"docker container exec {self.container_name} cat /var/lib/rancher/k3s/server/node-token", hide_output=True)
158
166
  return True
159
167
  except:
160
168
  return False
@@ -162,8 +170,12 @@ class dockerCluster(Cluster):
162
170
  def is_cluster_init(self):
163
171
  if not os.path.isfile(self.compose_file):
164
172
  return False
165
- status = self.container_name in run_cmd(f"docker compose -f {self.compose_file} ps --services --all").decode()
166
- return status
173
+ try:
174
+ status = self.container_name in run_cmd(f"docker compose -f {self.compose_file} ps --services --all").decode()
175
+ return status
176
+ except Exception as e:
177
+ print(f"Error when checking cluster. Is Docker installed and running?\n\n{str(e)}")
178
+ return False
167
179
 
168
180
  def pause_agent(self):
169
181
  status = False
@@ -177,7 +189,6 @@ class dockerCluster(Cluster):
177
189
  def restart_agent(self):
178
190
  try:
179
191
  run_cmd(f'docker compose -f {self.compose_file} start')
180
-
181
192
  except:
182
193
  pass
183
194
  time.sleep(5)
@@ -186,7 +197,6 @@ class dockerCluster(Cluster):
186
197
  def get_cluster_token(self):
187
198
  if self.is_seed_node():
188
199
  return run_cmd(f"docker container exec {self.container_name} cat /var/lib/rancher/k3s/server/node-token").decode()
189
- #return run_cmd("sudo k3s token create --kubeconfig /etc/rancher/k3s/k3s.yaml --ttl 0").decode()
190
200
  else:
191
201
  return None
192
202
 
@@ -231,7 +241,7 @@ class k3sCluster(Cluster):
231
241
  flannel_iface = f"--flannel-iface {self.default_flannel_iface}"
232
242
  else:
233
243
  flannel_iface = ""
234
- run_cmd(f'curl -sfL https://get.k3s.io | INSTALL_K3S_VERSION="{self.kube_version}" INSTALL_K3S_EXEC="server --node-ip {ip_address} --node-external-ip {ip_address} {flannel_iface} --flannel-backend wireguard-native {node_labels}" sh - >/dev/null 2>&1')
244
+ run_cmd(f'curl -sfL https://get.k3s.io | INSTALL_K3S_VERSION="{self.kube_version}" INSTALL_K3S_EXEC="server --node-ip {ip_address} --node-external-ip {ip_address} {flannel_iface} --flannel-backend wireguard-native {node_labels}" sh - ', hide_output=True)
235
245
  run_cmd(f"sudo cp /etc/rancher/k3s/k3s.yaml {self.kubeconfig_file}")
236
246
  run_cmd(f"sudo chown $USER {self.kubeconfig_file}")
237
247
 
@@ -245,8 +255,8 @@ class k3sCluster(Cluster):
245
255
  flannel_iface = f"--flannel-iface {self.default_flannel_iface}"
246
256
  else:
247
257
  flannel_iface = ""
248
- command = f'curl -sfL https://get.k3s.io | INSTALL_K3S_VERSION="{self.kube_version}" INSTALL_K3S_EXEC="agent --token {token} --server https://{url}:6443 --node-name {node_name} --node-ip {ip_address} --node-external-ip {ip_address} {flannel_iface} {node_labels}" sh - >/dev/null 2>&1'
249
- run_cmd(command)
258
+ command = f'curl -sfL https://get.k3s.io | INSTALL_K3S_VERSION="{self.kube_version}" INSTALL_K3S_EXEC="agent --token {token} --server https://{url}:6443 --node-name {node_name} --node-ip {ip_address} --node-external-ip {ip_address} {flannel_iface} {node_labels}" sh - '
259
+ run_cmd(command, hide_output=True)
250
260
 
251
261
 
252
262
  def update_dependencies(self, dependencies_file=None, debug=False, retries=3):
@@ -270,13 +280,13 @@ class k3sCluster(Cluster):
270
280
 
271
281
  def remove_agent(self):
272
282
  try:
273
- run_cmd('/usr/local/bin/k3s-uninstall.sh >/dev/null 2>&1')
274
- run_cmd('sudo rm -r /etc/rancher/node/ >/dev/null 2>&1')
283
+ run_cmd('/usr/local/bin/k3s-uninstall.sh', hide_output=True)
284
+ run_cmd('sudo rm -r /etc/rancher/node/', hide_output=True)
275
285
  return True
276
286
  except:
277
287
  pass
278
288
  try:
279
- run_cmd('/usr/local/bin/k3s-agent-uninstall.sh >/dev/null 2>&1')
289
+ run_cmd('/usr/local/bin/k3s-agent-uninstall.sh', hide_output=True)
280
290
  return True
281
291
  except:
282
292
  pass
@@ -296,12 +306,12 @@ class k3sCluster(Cluster):
296
306
  def pause_agent(self):
297
307
  status = False
298
308
  try:
299
- run_cmd('sudo systemctl stop k3s >/dev/null 2>&1')
309
+ run_cmd('sudo systemctl stop k3s', hide_output=True)
300
310
  status = True
301
311
  except:
302
312
  pass
303
313
  try:
304
- run_cmd('sudo systemctl stop k3s-agent >/dev/null 2>&1')
314
+ run_cmd('sudo systemctl stop k3s-agent', hide_output=True)
305
315
  status = True
306
316
  except:
307
317
  pass
@@ -309,11 +319,11 @@ class k3sCluster(Cluster):
309
319
 
310
320
  def restart_agent(self):
311
321
  try:
312
- run_cmd('sudo systemctl start k3s >/dev/null 2>&1')
322
+ run_cmd('sudo systemctl start k3s', hide_output=True)
313
323
  except:
314
324
  pass
315
325
  try:
316
- run_cmd('sudo systemctl start k3s-agent >/dev/null 2>&1')
326
+ run_cmd('sudo systemctl start k3s-agent', hide_output=True)
317
327
  except:
318
328
  pass
319
329
  return self.is_agent_running()
kalavai_client/core.py CHANGED
@@ -33,6 +33,7 @@ from kalavai_client.utils import (
33
33
  get_public_seeds,
34
34
  load_template,
35
35
  is_storage_compatible,
36
+ get_max_gpus,
36
37
  NODE_NAME_KEY,
37
38
  MANDATORY_TOKEN_FIELDS,
38
39
  PUBLIC_LOCATION_KEY,
@@ -157,7 +158,7 @@ def check_seed_compatibility():
157
158
  logs = []
158
159
  # docker
159
160
  try:
160
- run_cmd("docker version >/dev/null 2>&1")
161
+ run_cmd("docker ps", hide_output=True)
161
162
  except:
162
163
  logs.append("[red]Docker not installed. Install instructions:\n")
163
164
  logs.append(" Linux: https://docs.docker.com/engine/install/\n")
@@ -170,7 +171,7 @@ def check_worker_compatibility():
170
171
  logs = []
171
172
  # docker
172
173
  try:
173
- run_cmd("docker version >/dev/null 2>&1")
174
+ run_cmd("docker ps", hide_output=True)
174
175
  except:
175
176
  logs.append("[red]Docker not installed. Install instructions:\n")
176
177
  logs.append(" Linux: https://docs.docker.com/engine/install/\n")
@@ -594,16 +595,6 @@ def attach_to_pool(token, node_name=None):
594
595
 
595
596
  return cluster_name
596
597
 
597
- def get_max_gpus():
598
- try:
599
- has_gpus = check_gpu_drivers()
600
- if has_gpus:
601
- return int(run_cmd("nvidia-smi -L | wc -l").decode())
602
- else:
603
- return 0
604
- except:
605
- return 0
606
-
607
598
  def generate_worker_package(num_gpus=0, node_name=None, ip_address="0.0.0.0", storage_compatible=True):
608
599
  # get pool data from token
609
600
  token = get_pool_token(mode=TokenType.WORKER)
@@ -772,10 +763,9 @@ def create_pool(
772
763
  node_name=node_name,
773
764
  node_labels=node_labels
774
765
  )
775
-
766
+
776
767
  # start server
777
768
  CLUSTER.start_seed_node()
778
-
779
769
  while not CLUSTER.is_agent_running():
780
770
  time.sleep(10)
781
771
 
kalavai_client/utils.py CHANGED
@@ -73,17 +73,20 @@ KALAVAI_AUTH = KalavaiAuth(
73
73
 
74
74
  ####### Methods to check OS compatibility ########
75
75
  def check_gpu_drivers():
76
- value = run_cmd("command -v nvidia-smi")
77
- if len(value.decode("utf-8")) == 0:
78
- # no nvidia installed, no need to check nvidia any further
79
- return False
80
- else:
81
- # check drivers are set correctly
82
- try:
83
- value = run_cmd("nvidia-smi")
84
- return True
85
- except:
86
- raise ("Nvidia not configured properly. Please check your drivers are installed and configured")
76
+ value = run_cmd("nvidia-smi", hide_output=True)
77
+ return len(value.decode("utf-8")) == 0
78
+
79
+ def get_max_gpus():
80
+ try:
81
+ has_gpus = check_gpu_drivers()
82
+ if has_gpus:
83
+ return len(
84
+ [r for r in run_cmd("nvidia-smi -L").decode().split("\n") if len(r.strip())>0]
85
+ )
86
+ else:
87
+ return 0
88
+ except:
89
+ return 0
87
90
 
88
91
  def is_storage_compatible():
89
92
  """
@@ -92,6 +95,9 @@ def is_storage_compatible():
92
95
  Exclude: WSL
93
96
  """
94
97
  try:
98
+ import platform
99
+ if "windows" in platform.system().lower():
100
+ return True
95
101
  flagged = any([
96
102
  "microsoft" in run_cmd("cat /proc/version").decode().lower()
97
103
  ])
@@ -231,9 +237,17 @@ def validate_poolconfig(poolconfig_file):
231
237
  return False
232
238
  return True
233
239
 
234
- def run_cmd(command):
240
+ def run_cmd(command, hide_output=False):
235
241
  try:
236
- return_value = subprocess.check_output(command, shell=True, executable="/bin/bash")
242
+ import platform
243
+ if "windows" in platform.system().lower():
244
+ if hide_output:
245
+ command = command + " > $nul 2>&1"
246
+ return_value = subprocess.check_output(command, shell=True)
247
+ else:
248
+ if hide_output:
249
+ command = command + " >/dev/null 2>&1"
250
+ return_value = subprocess.check_output(command, shell=True, executable="/bin/bash")
237
251
  return return_value
238
252
  except OSError as error:
239
253
  return error # for exit code
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: kalavai-client
3
- Version: 0.6.12
3
+ Version: 0.6.13
4
4
  Summary: Client app for kalavai platform
5
5
  License: Apache-2.0
6
6
  Keywords: LLM,platform
@@ -1,4 +1,4 @@
1
- kalavai_client/__init__.py,sha256=RnWDr4VWcRjJDXe1q7REqBSUCBo44AvIa_8rGwJtT44,23
1
+ kalavai_client/__init__.py,sha256=RtAfZnVOn5ru1jOt5ukVyb0s2GiJ7s39Qp5_KBjCL-A,23
2
2
  kalavai_client/__main__.py,sha256=WQUfxvRsBJH5gsCJg8pLz95QnZIj7Ol8psTO77m0QE0,73
3
3
  kalavai_client/assets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  kalavai_client/assets/apps.yaml,sha256=AhTA3VZI27y05xHoHJCA9nvGnk8sWMhFDruBI2is3LM,6365
@@ -13,13 +13,13 @@ kalavai_client/assets/user_workspace_values.yaml,sha256=G0HOzQUxrDMCwuW9kbWUZaKM
13
13
  kalavai_client/auth.py,sha256=EB3PMvKUn5_KAQkezkEHEt-OMZXyfkZguIQlUFkEHcA,3243
14
14
  kalavai_client/bridge_api.py,sha256=ZLyFOOz_o4agm-7DrHzoSBFrH65y__hZUoEe4diBTOA,7557
15
15
  kalavai_client/bridge_models.py,sha256=GbIaqGFAVs-3ikVUQZldwTTc06SsxmP6iAifH0oVDro,1219
16
- kalavai_client/cli.py,sha256=tBn3l12XhOJF17cWeX84xm10HPG7rSzalyOyugzcLhw,46900
17
- kalavai_client/cluster.py,sha256=ojUBXp2bR3hVyikIEkiGDbXvQfhBXBSk_mCqLxvyP0c,12943
18
- kalavai_client/core.py,sha256=QOzNSxNJZ8tXhUTUPZUEFI3PX8UlVVnONYqVA61NU6U,34268
16
+ kalavai_client/cli.py,sha256=mmwLqqSYfl9k6vqveMcbHTq7g5FFd84YUUQCSH4J0k0,46967
17
+ kalavai_client/cluster.py,sha256=Z2PIXbZuSAv9xmw-MyZP1M41BpVMpirLzG51bqGA-zc,13548
18
+ kalavai_client/core.py,sha256=u8a4uYqGS0mMJh0ArcXG2hwp2uDUSuwM5ROGXRQkHZg,34051
19
19
  kalavai_client/env.py,sha256=YsfZj7LWf6ABquDsoIFFkXCFYwenpDk8zVnGsf7qv98,2823
20
- kalavai_client/utils.py,sha256=EmjWVLjTBILjw87_6ih_v5CbVqihsYHSKxfD-C_Z-es,12276
21
- kalavai_client-0.6.12.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
22
- kalavai_client-0.6.12.dist-info/METADATA,sha256=ShcxZWQofBxd_VVg6LbOGaPiPDrWSix_yLIbhn7MiJ4,13354
23
- kalavai_client-0.6.12.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
24
- kalavai_client-0.6.12.dist-info/entry_points.txt,sha256=9T6D45gxwzfVbglMm1r6XPdXuuZdHfy_7fCeu2jUphc,50
25
- kalavai_client-0.6.12.dist-info/RECORD,,
20
+ kalavai_client/utils.py,sha256=yHz9n4hGwhpGUTc1ZcG5JHtesoUEzSngmOSTvu2jDic,12674
21
+ kalavai_client-0.6.13.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
22
+ kalavai_client-0.6.13.dist-info/METADATA,sha256=YUeJh_a81RDslXa8QNlgNwmXfQLHxAbqcCK2HWP6t-4,13354
23
+ kalavai_client-0.6.13.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
24
+ kalavai_client-0.6.13.dist-info/entry_points.txt,sha256=9T6D45gxwzfVbglMm1r6XPdXuuZdHfy_7fCeu2jUphc,50
25
+ kalavai_client-0.6.13.dist-info/RECORD,,