addftool 0.2.8__py3-none-any.whl → 0.2.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
addftool/blob.py CHANGED
@@ -76,6 +76,14 @@ def install_main(args):
76
76
  # if has root permission, run install script
77
77
  # else, print install script
78
78
 
79
+ # make sure wget is installed
80
+ if not check_package_installed("wget"):
81
+ print("wget is not installed, installing wget")
82
+ command = "apt-get install wget -y"
83
+ if args.sudo:
84
+ command = "sudo " + command
85
+ execute_command(command, script_writer)
86
+
79
87
  print("Get ubuntu version: ", ubuntu_version)
80
88
  command = f"wget https://packages.microsoft.com/config/ubuntu/{ubuntu_version}/packages-microsoft-prod.deb -O /tmp/packages-microsoft-prod.deb"
81
89
  print("Install packages-microsoft-prod.deb")
addftool/sleep.py CHANGED
@@ -2,16 +2,68 @@ import time
2
2
  import subprocess
3
3
  import sys
4
4
  import multiprocessing as mp
5
+ import re
5
6
 
6
7
  try:
7
8
  import torch
8
9
  except ImportError:
9
10
  print("PyTorch is not installed. Please install it to run this script.")
10
- sys.exit(1)
11
11
 
12
+ try:
13
+ import triton
14
+ import triton.runtime.driver
15
+ except ImportError:
16
+ print("Triton is not installed. Will try to detect GPU type using command line tools.")
17
+ triton = None
18
+
19
+ def is_cuda():
20
+ """使用 triton 检测是否是 CUDA 环境"""
21
+ try:
22
+ if triton is None:
23
+ return None
24
+ return triton.runtime.driver.active.get_current_target().backend == "cuda"
25
+ except:
26
+ return None
27
+
28
+ def get_gpu_type():
29
+ """检测GPU类型(NVIDIA/CUDA或AMD/ROCm)"""
30
+ # 首先尝试使用 triton 检测
31
+ cuda_detected = is_cuda()
32
+ if cuda_detected is True:
33
+ return "nvidia"
34
+ elif cuda_detected is False:
35
+ return "amd"
36
+
37
+ # 如果 triton 检测失败,回退到命令行检测
38
+ try:
39
+ # 尝试检测NVIDIA GPU
40
+ result = subprocess.run("nvidia-smi", shell=True, capture_output=True, text=True)
41
+ if result.returncode == 0:
42
+ return "nvidia"
43
+
44
+ # 尝试检测AMD GPU
45
+ result = subprocess.run("rocm-smi", shell=True, capture_output=True, text=True)
46
+ if result.returncode == 0:
47
+ return "amd"
48
+
49
+ return None
50
+ except:
51
+ return None
12
52
 
13
53
  def get_gpu_stats(device_id):
14
- """获取指定GPU的利用率和显存使用情况"""
54
+ """获取指定GPU的利用率和显存使用情况(支持NVIDIA和AMD)"""
55
+ gpu_type = get_gpu_type()
56
+
57
+ if gpu_type == "nvidia":
58
+ return get_nvidia_gpu_stats(device_id)
59
+ elif gpu_type == "amd":
60
+ return get_amd_gpu_stats(device_id)
61
+ else:
62
+ print("No supported GPU found (neither NVIDIA nor AMD)")
63
+ return None, None
64
+
65
+ def get_nvidia_gpu_stats(device_id):
66
+ """获取NVIDIA GPU的统计信息"""
15
67
  try:
16
68
  cmd = f"nvidia-smi --id={device_id} --query-gpu=utilization.gpu,memory.used --format=csv,noheader,nounits"
17
69
  result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
@@ -31,9 +83,75 @@ def get_gpu_stats(device_id):
31
83
 
32
84
  return None, None
33
85
  except Exception as e:
34
- print(f"Error getting GPU stats for device {device_id}: {e}")
86
+ print(f"Error getting NVIDIA GPU stats for device {device_id}: {e}")
35
87
  return None, None
36
88
 
89
+ def get_amd_gpu_stats(device_id):
90
+ """获取AMD GPU的统计信息"""
91
+ try:
92
+ # 获取GPU利用率和显存使用情况
93
+ cmd = f"rocm-smi -d {device_id} --showuse --showmemuse"
94
+ result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
95
+
96
+ if result.returncode != 0:
97
+ # 尝试备用命令
98
+ cmd = f"rocm-smi -d {device_id}"
99
+ result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
100
+ if result.returncode != 0:
101
+ print(f"Error running rocm-smi for GPU {device_id}")
102
+ return None, None
103
+
104
+ gpu_util = None
105
+ memory_used = None
106
+
107
+ # 解析输出
108
+ output = result.stdout
109
+ lines = output.split('\n')
110
+
111
+ for line in lines:
112
+ # 查找GPU利用率
113
+ if 'GPU use' in line or '%' in line:
114
+ # 匹配百分比
115
+ match = re.search(r'(\d+)%', line)
116
+ if match:
117
+ gpu_util = int(match.group(1))
118
+
119
+ # 查找显存使用(MB)
120
+ if 'vram' in line.lower() or 'memory' in line.lower() or 'MB' in line:
121
+ # 匹配MB数值,格式可能是 "1024 MB" 或 "1024MB"
122
+ match = re.search(r'(\d+)\s*MB', line, re.IGNORECASE)
123
+ if match:
124
+ memory_used = int(match.group(1))
125
+
126
+ # 如果仍然无法获取利用率,设置为0(假设空闲)
127
+ if gpu_util is None:
128
+ gpu_util = 0
129
+
130
+ # 如果仍然无法获取内存使用,设置为0
131
+ if memory_used is None:
132
+ memory_used = 0
133
+
134
+ return gpu_util, memory_used
135
+
136
+ except Exception as e:
137
+ print(f"Error getting AMD GPU stats for device {device_id}: {e}")
138
+ return None, None
139
+
140
+ # 在程序启动时检测GPU类型
141
+ try:
142
+ GPU_TYPE = get_gpu_type()
143
+ if GPU_TYPE:
144
+ cuda_status = is_cuda()
145
+ if cuda_status is not None:
146
+ print(f"Detected {GPU_TYPE.upper()} GPU environment (triton backend: {'cuda' if cuda_status else 'hip'})")
147
+ else:
148
+ print(f"Detected {GPU_TYPE.upper()} GPU environment")
149
+ else:
150
+ print("No supported GPU environment detected")
151
+ except:
152
+ GPU_TYPE = None
153
+ print("Failed to detect GPU environment")
154
+
37
155
  def check_gpu_occupied(device_id, util_threshold=20, memory_threshold=2048):
38
156
  """检查GPU是否被其他进程占用
39
157
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: addftool
3
- Version: 0.2.8
3
+ Version: 0.2.10
4
4
  Requires-Dist: cryptography
5
5
  Requires-Dist: requests
6
6
  Requires-Dist: PyYAML
@@ -1,8 +1,8 @@
1
1
  addftool/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  addftool/addf_portal.py,sha256=vc8opPzValNFPwJne5C5LbZvgcJ0eMBJSWDSiM23OPM,1274
3
- addftool/blob.py,sha256=y1HZaDBUNeXicVytvwpRXwufvvrgxR33ruBlYpxnSa4,9453
3
+ addftool/blob.py,sha256=vyjJHlQZuGrpEiaRF-Bdmow_TMnhXtXEGL31qA5Hb-g,9742
4
4
  addftool/broadcast_folder.py,sha256=GQBuSL8Ch537V_fSBHesWyqT3KRYry68pbYOKy2bDj4,19619
5
- addftool/sleep.py,sha256=FA1fTUI47eQq-9nBtXElkS7SZMunP_5tLiIBuFNSM6w,7823
5
+ addftool/sleep.py,sha256=kykcZR2tA0ZpRUwQf6IIUdJ4dYkolyD91FdriViq9Nc,11771
6
6
  addftool/sync.py,sha256=ZpYxbM8uiPFrV7ODmOaM7asVPCWaxBixA-arVc-1kfs,14045
7
7
  addftool/tool.py,sha256=FmxRY3-pP0_Z0zCUAngjmEMmPUruMftg_iUlB1t2TnQ,2001
8
8
  addftool/util.py,sha256=zlNLu8Be8cGIpNRqBw8_0q7nFxWlsJ9cToN62ohjdXE,2335
@@ -13,8 +13,8 @@ addftool/deploy/vscode_server.py,sha256=tLtSvlcK2fEOaw6udWt8dNELVhwv9F59hF5DJJ-1
13
13
  addftool/process/__init__.py,sha256=Dze8OrcyjQlAbPrjE_h8bMi8W4b3OJyZOjTucPrkJvM,3721
14
14
  addftool/process/utils.py,sha256=JldxnwanLJOgxaPgmCJh7SeBRaaj5rFxWWxh1hpsvbA,2609
15
15
  addftool/ssh/__init__.py,sha256=h5_rCO0A6q2Yw9vFguQZZp_ApAJsT1dcnKnbKKZ0cDM,4409
16
- addftool-0.2.8.dist-info/METADATA,sha256=rxu5Oy4lH7lQF99Z8gzz5QuoGxnZ739h0OBNhr_0NA0,221
17
- addftool-0.2.8.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
18
- addftool-0.2.8.dist-info/entry_points.txt,sha256=9lkmuWMInwUAtev8w8poNkNd7iML9Bjd5CBCFVxg2b8,111
19
- addftool-0.2.8.dist-info/top_level.txt,sha256=jqj56-plrBbyzY0tIxB6wPzjAA8kte4hUlajyyQygN4,9
20
- addftool-0.2.8.dist-info/RECORD,,
16
+ addftool-0.2.10.dist-info/METADATA,sha256=01FZkhGtA7aa_vx22DMHcoFcuN6rPagl6Y0xYMKkd9o,222
17
+ addftool-0.2.10.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
18
+ addftool-0.2.10.dist-info/entry_points.txt,sha256=9lkmuWMInwUAtev8w8poNkNd7iML9Bjd5CBCFVxg2b8,111
19
+ addftool-0.2.10.dist-info/top_level.txt,sha256=jqj56-plrBbyzY0tIxB6wPzjAA8kte4hUlajyyQygN4,9
20
+ addftool-0.2.10.dist-info/RECORD,,