addftool 0.2.3__py3-none-any.whl → 0.2.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- addftool/addf_portal.py +5 -0
- addftool/blob.py +40 -10
- addftool/broadcast_folder.py +133 -7
- addftool/deploy/ssh_server.py +3 -0
- addftool/tool.py +0 -4
- {addftool-0.2.3.dist-info → addftool-0.2.5.dist-info}/METADATA +1 -1
- {addftool-0.2.3.dist-info → addftool-0.2.5.dist-info}/RECORD +10 -10
- {addftool-0.2.3.dist-info → addftool-0.2.5.dist-info}/WHEEL +1 -1
- {addftool-0.2.3.dist-info → addftool-0.2.5.dist-info}/entry_points.txt +0 -0
- {addftool-0.2.3.dist-info → addftool-0.2.5.dist-info}/top_level.txt +0 -0
addftool/addf_portal.py
CHANGED
|
@@ -4,6 +4,8 @@ from addftool.sync import add_sync_args, sync_main
|
|
|
4
4
|
from addftool.deploy import add_deploy_args, deploy_main
|
|
5
5
|
from addftool.broadcast_folder import add_broadcast_folder_args, broadcast_folder_main
|
|
6
6
|
|
|
7
|
+
from addftool.blob import add_blob_args, blob_main
|
|
8
|
+
|
|
7
9
|
|
|
8
10
|
def get_args():
|
|
9
11
|
parser = argparse.ArgumentParser(description="Addf's tool")
|
|
@@ -13,6 +15,7 @@ def get_args():
|
|
|
13
15
|
add_sync_args(subparsers)
|
|
14
16
|
add_deploy_args(subparsers)
|
|
15
17
|
add_broadcast_folder_args(subparsers)
|
|
18
|
+
add_blob_args(subparsers)
|
|
16
19
|
|
|
17
20
|
return parser.parse_args()
|
|
18
21
|
|
|
@@ -27,6 +30,8 @@ def main():
|
|
|
27
30
|
deploy_main(args)
|
|
28
31
|
elif args.command == "broadcast-folder":
|
|
29
32
|
broadcast_folder_main(args)
|
|
33
|
+
elif args.command == "blob":
|
|
34
|
+
blob_main(args)
|
|
30
35
|
else:
|
|
31
36
|
print("Unknown command: ", args.command)
|
|
32
37
|
|
addftool/blob.py
CHANGED
|
@@ -199,13 +199,29 @@ def get_sas_token(api_url, name, container, key, info=False):
|
|
|
199
199
|
return f"Error: {response.status_code}"
|
|
200
200
|
|
|
201
201
|
|
|
202
|
-
def
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
202
|
+
def parse_blob_account_and_container_from_url(blob_url):
|
|
203
|
+
"""
|
|
204
|
+
Parse the blob URL to extract account name and container name.
|
|
205
|
+
"""
|
|
206
|
+
if not blob_url.startswith("https://"):
|
|
207
|
+
raise ValueError(f"Invalid blob URL: {blob_url}")
|
|
208
|
+
|
|
209
|
+
parts = blob_url.split("/")
|
|
210
|
+
account_name = parts[2].split(".")[0]
|
|
211
|
+
container_name = parts[3].strip()
|
|
212
|
+
|
|
213
|
+
return account_name, container_name
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def get_sas_token_for_blob_url(api_url, blob_url, key, info=False):
|
|
217
|
+
an, cn = parse_blob_account_and_container_from_url(blob_url)
|
|
218
|
+
if info:
|
|
219
|
+
print(f"Parse blob URL: {blob_url}, account name: {an}, container name: {cn}")
|
|
220
|
+
return get_sas_token(api_url, an, cn, key, info=info)
|
|
221
|
+
|
|
207
222
|
|
|
208
|
-
|
|
223
|
+
def add_args(parser):
|
|
224
|
+
subparsers = parser.add_subparsers(dest='blob_command', help='Sub-command help')
|
|
209
225
|
install_parser = subparsers.add_parser('install', help='Install help')
|
|
210
226
|
# install_parser.add_argument("-o", "--output_script", help="output script", default=None)
|
|
211
227
|
install_parser.add_argument("--packages", help="packages", default="fuse3 blobfuse2 azcopy")
|
|
@@ -223,19 +239,33 @@ def main():
|
|
|
223
239
|
token_parser = subparsers.add_parser('token', help='Token help')
|
|
224
240
|
add_api(token_parser)
|
|
225
241
|
|
|
226
|
-
|
|
242
|
+
def add_blob_args(subparsers):
|
|
243
|
+
deploy_parser = subparsers.add_parser('blob', help='Blob help')
|
|
244
|
+
add_args(deploy_parser)
|
|
245
|
+
|
|
227
246
|
|
|
247
|
+
def blob_main(args):
|
|
228
248
|
# check os is linux/unix and current user, set --sudo if current user is not root
|
|
229
249
|
if os.name == 'posix' and os.getuid() != 0:
|
|
230
250
|
args.sudo = True
|
|
231
251
|
|
|
232
|
-
if args.
|
|
252
|
+
if args.blob_command == 'install':
|
|
233
253
|
install_main(args)
|
|
234
|
-
elif args.
|
|
254
|
+
elif args.blob_command == 'mount':
|
|
235
255
|
mount_main(args)
|
|
236
|
-
elif args.
|
|
256
|
+
elif args.blob_command == 'token':
|
|
237
257
|
print(get_token(args, info=False))
|
|
238
258
|
|
|
239
259
|
|
|
260
|
+
def main():
|
|
261
|
+
# exmaple usage: addfblob install
|
|
262
|
+
# exmaple usage: addfblob mount -k <key> -a <api> -b <buffer> -m <mount_dir>
|
|
263
|
+
# exmaple usage: addfblob token -k <key> -a <api>
|
|
264
|
+
parser = argparse.ArgumentParser(description="Addf's tool")
|
|
265
|
+
add_args(parser)
|
|
266
|
+
args = parser.parse_args()
|
|
267
|
+
blob_main(args)
|
|
268
|
+
|
|
269
|
+
|
|
240
270
|
if __name__ == "__main__":
|
|
241
271
|
main()
|
addftool/broadcast_folder.py
CHANGED
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import time
|
|
3
|
+
import fnmatch
|
|
3
4
|
import subprocess
|
|
4
5
|
import hashlib
|
|
6
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
5
7
|
|
|
6
8
|
from fabric import Connection, ThreadingGroup
|
|
7
9
|
|
|
@@ -33,6 +35,22 @@ def add_args(parser):
|
|
|
33
35
|
parser.add_argument("--transfer_ranks_per_node", type=int, default=8,
|
|
34
36
|
help="the number of ranks per node to transfer the files, default is 8.")
|
|
35
37
|
|
|
38
|
+
parser.add_argument("--contain_md5_files", action='store_true', default=False,
|
|
39
|
+
help="whether to contain the md5 files in the folder, default is False. " \
|
|
40
|
+
"If True, the md5 files will be transferred to the other nodes and verified. " \
|
|
41
|
+
"If False, the md5 files will be ignored.")
|
|
42
|
+
|
|
43
|
+
parser.add_argument("--include-string", type=str, default="",
|
|
44
|
+
help="the string to include the files, default is empty. " \
|
|
45
|
+
"Such as *.py, *.yaml, *.json, \"*.pt;*.pth\" etc. " \
|
|
46
|
+
"Only node-0 will include the files from the folder, " \
|
|
47
|
+
"If empty, will transfer all the files from the node-0's local folder.")
|
|
48
|
+
parser.add_argument("--exclude-string", type=str, default="",
|
|
49
|
+
help="the string to exclude the files, default is empty. " \
|
|
50
|
+
"Such as *.py, *.yaml, *.json, \"*.pt;*.pth\" etc. " \
|
|
51
|
+
"Only node-0 will exclude the files from the folder, " \
|
|
52
|
+
"If empty, will transfer all the files from the node-0's local folder.")
|
|
53
|
+
|
|
36
54
|
parser.add_argument("--from_blob_url", type=str, default="",
|
|
37
55
|
help="the blob url to download from, default is empty. " \
|
|
38
56
|
"Only node-0 will download the files from the blob url, " \
|
|
@@ -77,6 +95,52 @@ def get_ip_via_ssh(hostname):
|
|
|
77
95
|
return None
|
|
78
96
|
|
|
79
97
|
|
|
98
|
+
def parallel_check_md5(file_list, expected_md5s):
|
|
99
|
+
"""
|
|
100
|
+
Parallel check MD5 checksums for the given files.
|
|
101
|
+
|
|
102
|
+
Args:
|
|
103
|
+
file_list: List of file paths to check
|
|
104
|
+
md5_dir: Directory containing MD5 files
|
|
105
|
+
|
|
106
|
+
Returns:
|
|
107
|
+
True if all MD5 checksums match, False otherwise
|
|
108
|
+
"""
|
|
109
|
+
def calculate_md5(file_path):
|
|
110
|
+
# Call md5sum and capture output
|
|
111
|
+
result = subprocess.run(["md5sum", file_path], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
|
112
|
+
if result.returncode != 0:
|
|
113
|
+
print(f"Failed to calculate MD5 for {file_path}: {result.stderr}")
|
|
114
|
+
return file_path, None
|
|
115
|
+
|
|
116
|
+
# md5sum output format: "<md5_hash> <file_path>"
|
|
117
|
+
md5_hash = result.stdout.strip().split()[0]
|
|
118
|
+
return file_path, md5_hash
|
|
119
|
+
|
|
120
|
+
# Calculate MD5 checksums in parallel
|
|
121
|
+
with ThreadPoolExecutor(max_workers=8) as executor:
|
|
122
|
+
results = list(executor.map(calculate_md5, file_list))
|
|
123
|
+
|
|
124
|
+
# Check if all MD5s match
|
|
125
|
+
all_match = True
|
|
126
|
+
for file_path, actual_md5 in results:
|
|
127
|
+
if actual_md5 is None:
|
|
128
|
+
all_match = False
|
|
129
|
+
continue
|
|
130
|
+
|
|
131
|
+
if file_path not in expected_md5s:
|
|
132
|
+
print(f"No expected MD5 for {file_path}")
|
|
133
|
+
all_match = False
|
|
134
|
+
continue
|
|
135
|
+
|
|
136
|
+
expected_md5 = expected_md5s[file_path]
|
|
137
|
+
if actual_md5 != expected_md5:
|
|
138
|
+
print(f"MD5 mismatch for {file_path}: expected {expected_md5}, got {actual_md5}")
|
|
139
|
+
all_match = False
|
|
140
|
+
|
|
141
|
+
return all_match
|
|
142
|
+
|
|
143
|
+
|
|
80
144
|
def broadcast_folder_main(args):
|
|
81
145
|
with open(args.hostfile, "r") as f:
|
|
82
146
|
host_list = []
|
|
@@ -95,11 +159,17 @@ def broadcast_folder_main(args):
|
|
|
95
159
|
for i, host in enumerate(host_list):
|
|
96
160
|
put_commands = []
|
|
97
161
|
put_commands.append((__file__, os.path.join(remote_temp_config_dir, "broadcast.py")))
|
|
98
|
-
commnads = "NCCL_IB_DISABLE=0 OPENBLAS_NUM_THREADS=1 MKL_NUM_THREADS=1 "
|
|
162
|
+
commnads = "NCCL_IB_DISABLE=0 OPENBLAS_NUM_THREADS=1 MKL_NUM_THREADS=1 OMP_NUM_THREADS=1 "
|
|
99
163
|
if os.environ.get("SAS_TOKEN") is not None and i == 0:
|
|
100
164
|
commnads += f"SAS_TOKEN=\"{os.environ['SAS_TOKEN']}\" "
|
|
101
165
|
commnads += f"{args.torchrun_alias} --nproc_per_node={args.transfer_ranks_per_node} --nnodes={len(host_list)} --node_rank={i} --master_addr={master_addr} --master_port={args.port}"
|
|
102
166
|
commnads += f" {remote_temp_config_dir}/broadcast.py {args.folder} --tool {args.tool} --transfer_ranks_per_node {args.transfer_ranks_per_node} "
|
|
167
|
+
if args.contain_md5_files:
|
|
168
|
+
commnads += " --contain_md5_files"
|
|
169
|
+
if args.include_string:
|
|
170
|
+
commnads += f" --include-string \"{args.include_string}\""
|
|
171
|
+
if args.exclude_string:
|
|
172
|
+
commnads += f" --exclude-string \"{args.exclude_string}\""
|
|
103
173
|
if args.from_blob_url and i == 0:
|
|
104
174
|
commnads += f" --from_blob_url {args.from_blob_url}"
|
|
105
175
|
if args.md5_verify:
|
|
@@ -226,20 +296,53 @@ def broadcast_folder_worker(args):
|
|
|
226
296
|
file_size_dict = {}
|
|
227
297
|
|
|
228
298
|
if global_rank == 0:
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
299
|
+
# Parse include and exclude patterns
|
|
300
|
+
include_patterns = [p.strip() for p in args.include_string.split(";") if p.strip()]
|
|
301
|
+
exclude_patterns = [p.strip() for p in args.exclude_string.split(";") if p.strip()]
|
|
302
|
+
|
|
303
|
+
print(f"Include patterns: {include_patterns}")
|
|
304
|
+
print(f"Exclude patterns: {exclude_patterns}")
|
|
305
|
+
|
|
232
306
|
if not os.path.exists(args.folder):
|
|
233
307
|
raise ValueError(f"Folder {args.folder} does not exist.")
|
|
308
|
+
|
|
309
|
+
# Gather and filter files in a single pass
|
|
310
|
+
file_size_dict = {}
|
|
234
311
|
for root, dirs, files in os.walk(args.folder):
|
|
235
312
|
for file in files:
|
|
236
313
|
file_path = os.path.join(root, file)
|
|
237
|
-
|
|
238
|
-
|
|
314
|
+
file_name = os.path.basename(file_path)
|
|
315
|
+
|
|
316
|
+
# Skip md5 files if not containing them
|
|
317
|
+
if file_name.endswith(".md5") and not args.contain_md5_files:
|
|
318
|
+
continue
|
|
319
|
+
|
|
320
|
+
# Apply include filters first (if any)
|
|
321
|
+
included = not include_patterns # Include by default if no include patterns
|
|
322
|
+
if include_patterns:
|
|
323
|
+
for pattern in include_patterns:
|
|
324
|
+
if fnmatch.fnmatch(file_name, pattern):
|
|
325
|
+
included = True
|
|
326
|
+
break
|
|
327
|
+
|
|
328
|
+
# Then apply exclude filters
|
|
329
|
+
if included and exclude_patterns:
|
|
330
|
+
for pattern in exclude_patterns:
|
|
331
|
+
if fnmatch.fnmatch(file_name, pattern):
|
|
332
|
+
included = False
|
|
333
|
+
break
|
|
334
|
+
|
|
335
|
+
# Add to file dict if passes both filters
|
|
336
|
+
if included:
|
|
337
|
+
file_size_dict[file_path] = os.path.getsize(file_path)
|
|
338
|
+
|
|
339
|
+
print(f"After filtering: {len(file_size_dict)} files selected for transfer")
|
|
340
|
+
if len(include_patterns) > 0 or len(exclude_patterns) > 0:
|
|
341
|
+
print(f"Files selected for transfer: {file_size_dict.keys()}")
|
|
239
342
|
|
|
240
343
|
# sort the file list by size
|
|
241
344
|
file_list = sorted(file_size_dict.keys(), key=lambda x: file_size_dict[x], reverse=True)
|
|
242
|
-
file_size_list = [file_size_dict[file] for file in file_list]
|
|
345
|
+
file_size_list = [file_size_dict[file] for file in file_list]
|
|
243
346
|
obj_list = [file_list, file_size_list]
|
|
244
347
|
dist.broadcast_object_list(obj_list, src=0)
|
|
245
348
|
else:
|
|
@@ -251,6 +354,7 @@ def broadcast_folder_worker(args):
|
|
|
251
354
|
|
|
252
355
|
worker_g = workers_groups[worker_rank]
|
|
253
356
|
from_rank = global_rank % args.transfer_ranks_per_node
|
|
357
|
+
broadcast_file_list = []
|
|
254
358
|
for i in range(len(file_list)):
|
|
255
359
|
if i % args.transfer_ranks_per_node == worker_rank:
|
|
256
360
|
file_path = file_list[i]
|
|
@@ -261,6 +365,7 @@ def broadcast_folder_worker(args):
|
|
|
261
365
|
)
|
|
262
366
|
if global_rank == from_rank:
|
|
263
367
|
print(f"Group {global_rank} finished broadcasting {file_path}, size: {file_size / (1024 * 1024):.2f} MB, time taken: {time.time() - start_time:.2f}s")
|
|
368
|
+
broadcast_file_list.append(file_path)
|
|
264
369
|
|
|
265
370
|
dist.barrier()
|
|
266
371
|
for i in range(len(workers_groups)):
|
|
@@ -268,6 +373,27 @@ def broadcast_folder_worker(args):
|
|
|
268
373
|
dist.destroy_process_group(workers_groups[i])
|
|
269
374
|
destroy_process_group()
|
|
270
375
|
|
|
376
|
+
if args.contain_md5_files and global_rank % args.transfer_ranks_per_node == 0:
|
|
377
|
+
to_verify_files = []
|
|
378
|
+
excepted_md5s = {}
|
|
379
|
+
for file_path in file_list:
|
|
380
|
+
if not file_path.endswith(".md5"):
|
|
381
|
+
md5_file_path = file_path + ".md5"
|
|
382
|
+
if os.path.exists(md5_file_path):
|
|
383
|
+
with open(md5_file_path, "r") as f:
|
|
384
|
+
md5_hash = f.read().strip()
|
|
385
|
+
excepted_md5s[file_path] = md5_hash
|
|
386
|
+
to_verify_files.append(file_path)
|
|
387
|
+
else:
|
|
388
|
+
print(f"MD5 file {md5_file_path} not found, skipping verification.")
|
|
389
|
+
|
|
390
|
+
# Verify MD5 checksums
|
|
391
|
+
if not parallel_check_md5(to_verify_files, excepted_md5s):
|
|
392
|
+
print(f"MD5 verification failed for some files, please check the logs.")
|
|
393
|
+
raise ValueError("MD5 verification failed.")
|
|
394
|
+
else:
|
|
395
|
+
print(f"Rank-{global_rank}: MD5 verification passed for all files.")
|
|
396
|
+
|
|
271
397
|
print(f"Rank {global_rank} finished broadcasting all files, time taken: {time.time() - start_time:.2f}s")
|
|
272
398
|
|
|
273
399
|
|
addftool/deploy/ssh_server.py
CHANGED
|
@@ -67,7 +67,10 @@ def configure_ssh_on_ubuntu(port, username, ssh_public_key="", password=""):
|
|
|
67
67
|
execute_command(command_prefix + "apt-get install -y openssh-server")
|
|
68
68
|
|
|
69
69
|
print("Modifying SSH configuration...")
|
|
70
|
+
# if "#Port 22" in sshd_config
|
|
70
71
|
execute_command(command_prefix + f'sed -i "s/#Port 22/Port {port}/" /etc/ssh/sshd_config')
|
|
72
|
+
# if Port xxx in sshd_config
|
|
73
|
+
execute_command(command_prefix + f'sed -i "s/Port [0-9]*/Port {port}/" /etc/ssh/sshd_config')
|
|
71
74
|
execute_command(command_prefix + 'sed -i "s/#PermitRootLogin prohibit-password/PermitRootLogin yes/" /etc/ssh/sshd_config')
|
|
72
75
|
|
|
73
76
|
if not os.path.exists("/etc/ssh/ssh_host_rsa_key"):
|
addftool/tool.py
CHANGED
|
@@ -1,19 +1,19 @@
|
|
|
1
1
|
addftool/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
addftool/addf_portal.py,sha256=
|
|
3
|
-
addftool/blob.py,sha256=
|
|
4
|
-
addftool/broadcast_folder.py,sha256=
|
|
2
|
+
addftool/addf_portal.py,sha256=6XjwGs5m2mRVDWVvCPOiqn1NlxDcGBTQ9Kr_0g5RsJc,1130
|
|
3
|
+
addftool/blob.py,sha256=YT3eZrC9mdYtntDlwEI5MUHFw98wKtBC_pHEdtqvsv4,9206
|
|
4
|
+
addftool/broadcast_folder.py,sha256=X9tvvMT7cCDbfKqE7hUGaFbBOHIecIgD8xcT34Bqb_8,17708
|
|
5
5
|
addftool/sync.py,sha256=ZpYxbM8uiPFrV7ODmOaM7asVPCWaxBixA-arVc-1kfs,14045
|
|
6
|
-
addftool/tool.py,sha256=
|
|
6
|
+
addftool/tool.py,sha256=FmxRY3-pP0_Z0zCUAngjmEMmPUruMftg_iUlB1t2TnQ,2001
|
|
7
7
|
addftool/util.py,sha256=zlNLu8Be8cGIpNRqBw8_0q7nFxWlsJ9cToN62ohjdXE,2335
|
|
8
8
|
addftool/deploy/__init__.py,sha256=UL8b-Idt7lStlMiOm8oTZ65fdzYz99Fgzq2Gaw8WsZc,1544
|
|
9
9
|
addftool/deploy/azure.py,sha256=_o_9Eh8cVwLDAqvfyRYBtQRHs_Gul-nCs2ZXttwO1bk,1301
|
|
10
|
-
addftool/deploy/ssh_server.py,sha256=
|
|
10
|
+
addftool/deploy/ssh_server.py,sha256=7glpJJNROskpqPkeYrTc2MbVzRendUZLv-ZgPs6HCq8,5641
|
|
11
11
|
addftool/deploy/vscode_server.py,sha256=tLtSvlcK2fEOaw6udWt8dNELVhwv9F59hF5DJJ-1Nak,2666
|
|
12
12
|
addftool/process/__init__.py,sha256=Dze8OrcyjQlAbPrjE_h8bMi8W4b3OJyZOjTucPrkJvM,3721
|
|
13
13
|
addftool/process/utils.py,sha256=JldxnwanLJOgxaPgmCJh7SeBRaaj5rFxWWxh1hpsvbA,2609
|
|
14
14
|
addftool/ssh/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
15
|
-
addftool-0.2.
|
|
16
|
-
addftool-0.2.
|
|
17
|
-
addftool-0.2.
|
|
18
|
-
addftool-0.2.
|
|
19
|
-
addftool-0.2.
|
|
15
|
+
addftool-0.2.5.dist-info/METADATA,sha256=e0fYxOCuq9cps5H7TZS-5wxSN3sqrs9e6g2acvJcMSw,170
|
|
16
|
+
addftool-0.2.5.dist-info/WHEEL,sha256=pxyMxgL8-pra_rKaQ4drOZAegBVuX-G_4nRHjjgWbmo,91
|
|
17
|
+
addftool-0.2.5.dist-info/entry_points.txt,sha256=9lkmuWMInwUAtev8w8poNkNd7iML9Bjd5CBCFVxg2b8,111
|
|
18
|
+
addftool-0.2.5.dist-info/top_level.txt,sha256=jqj56-plrBbyzY0tIxB6wPzjAA8kte4hUlajyyQygN4,9
|
|
19
|
+
addftool-0.2.5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|