project-llm-trainer 0.8.1__py3-none-any.whl → 0.8.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of project-llm-trainer might be problematic. Click here for more details.
- project_llm_trainer-0.8.2.data/scripts/ddp_train +21 -0
- project_llm_trainer-0.8.2.data/scripts/ds_train +17 -0
- {project_llm_trainer-0.8.1.data → project_llm_trainer-0.8.2.data}/scripts/smart_train +7 -16
- {project_llm_trainer-0.8.1.dist-info → project_llm_trainer-0.8.2.dist-info}/METADATA +1 -1
- {project_llm_trainer-0.8.1.dist-info → project_llm_trainer-0.8.2.dist-info}/RECORD +11 -11
- project_llm_trainer-0.8.1.data/scripts/ddp_train +0 -24
- project_llm_trainer-0.8.1.data/scripts/ds_train +0 -30
- {project_llm_trainer-0.8.1.data → project_llm_trainer-0.8.2.data}/scripts/calc_intermediate_size +0 -0
- {project_llm_trainer-0.8.1.data → project_llm_trainer-0.8.2.data}/scripts/plot_loss +0 -0
- {project_llm_trainer-0.8.1.data → project_llm_trainer-0.8.2.data}/scripts/plot_lr +0 -0
- {project_llm_trainer-0.8.1.data → project_llm_trainer-0.8.2.data}/scripts/py_train +0 -0
- {project_llm_trainer-0.8.1.dist-info → project_llm_trainer-0.8.2.dist-info}/WHEEL +0 -0
- {project_llm_trainer-0.8.1.dist-info → project_llm_trainer-0.8.2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
#!python
|
|
2
|
+
|
|
3
|
+
if __name__ == '__main__':
|
|
4
|
+
import os, sys
|
|
5
|
+
arguments = sys.argv[1:]
|
|
6
|
+
# file_name
|
|
7
|
+
run_file_name = arguments[0]
|
|
8
|
+
|
|
9
|
+
extra_args = ''
|
|
10
|
+
if len(arguments) > 1:
|
|
11
|
+
extra_args = f"{' '.join(arguments[1:])} "
|
|
12
|
+
|
|
13
|
+
os.environ['PARALLEL_TYPE'] = 'ddp'
|
|
14
|
+
|
|
15
|
+
if len(extra_args) == 0:
|
|
16
|
+
extra_args = '--standalone --nproc_per_node=gpu '
|
|
17
|
+
|
|
18
|
+
command = f'torchrun {extra_args}{run_file_name}'
|
|
19
|
+
|
|
20
|
+
print(f'run command {command}')
|
|
21
|
+
os.system(command)
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
#!python
|
|
2
|
+
|
|
3
|
+
if __name__ == '__main__':
|
|
4
|
+
import os, sys
|
|
5
|
+
arguments = sys.argv[1:]
|
|
6
|
+
# file_name
|
|
7
|
+
run_file_name = arguments[0]
|
|
8
|
+
|
|
9
|
+
extra_args = ''
|
|
10
|
+
if len(arguments) > 1:
|
|
11
|
+
extra_args = f"{' '.join(arguments[1:])} "
|
|
12
|
+
|
|
13
|
+
os.environ['PARALLEL_TYPE'] = 'ds'
|
|
14
|
+
command = f'deepspeed {extra_args}{run_file_name}'
|
|
15
|
+
|
|
16
|
+
print(f'run command {command}')
|
|
17
|
+
os.system(command)
|
|
@@ -7,18 +7,9 @@ if __name__ == '__main__':
|
|
|
7
7
|
# file name
|
|
8
8
|
run_file_name = arguments[0]
|
|
9
9
|
|
|
10
|
-
|
|
10
|
+
extra_args = ''
|
|
11
11
|
if len(arguments) > 1:
|
|
12
|
-
|
|
13
|
-
cuda_visible_devive = arguments[1]
|
|
14
|
-
else:
|
|
15
|
-
cuda_visible_devive = None
|
|
16
|
-
|
|
17
|
-
# cuda location
|
|
18
|
-
if len(arguments) > 2:
|
|
19
|
-
cuda_loc = arguments[2]
|
|
20
|
-
else:
|
|
21
|
-
cuda_loc = 'localhost'
|
|
12
|
+
extra_args = f"{' '.join(arguments[1:])} "
|
|
22
13
|
|
|
23
14
|
try:
|
|
24
15
|
import deepspeed
|
|
@@ -33,12 +24,12 @@ if __name__ == '__main__':
|
|
|
33
24
|
os.environ['PARALLEL_TYPE'] = parallel_type
|
|
34
25
|
|
|
35
26
|
if parallel_type == 'ds':
|
|
36
|
-
|
|
37
|
-
command = f'deepspeed{cuda_ctrl} {run_file_name}'
|
|
27
|
+
command = f'deepspeed {extra_args}{run_file_name}'
|
|
38
28
|
elif parallel_type == 'ddp':
|
|
39
|
-
if
|
|
40
|
-
|
|
41
|
-
|
|
29
|
+
if len(extra_args) == 0:
|
|
30
|
+
extra_args = '--standalone --nproc_per_node=gpu '
|
|
31
|
+
|
|
32
|
+
command = f'torchrun {extra_args}{run_file_name}'
|
|
42
33
|
else:
|
|
43
34
|
command = f'python3 {run_file_name}'
|
|
44
35
|
|
|
@@ -20,14 +20,14 @@ llm_trainer/tools.py,sha256=5op5qrjjkK-Lr9oes5VxIVnOVYOYGoAdlIJq9mPUf64,2637
|
|
|
20
20
|
llm_trainer/train_configs.py,sha256=pPZkbliRdTnWSv3TUuTM23x9RDdMhGSPrxbNAyzDklY,7636
|
|
21
21
|
llm_trainer/trainer.py,sha256=diP-1suOf2U5dY_R8QH5arAx4MgBrKW-GBQ2_ScGNM8,28799
|
|
22
22
|
llm_trainer/utils.py,sha256=xC5plG-8-_Al5yIF5xIU5lroOcBBk98TEhtUJrazZPE,12305
|
|
23
|
-
project_llm_trainer-0.8.
|
|
24
|
-
project_llm_trainer-0.8.
|
|
25
|
-
project_llm_trainer-0.8.
|
|
26
|
-
project_llm_trainer-0.8.
|
|
27
|
-
project_llm_trainer-0.8.
|
|
28
|
-
project_llm_trainer-0.8.
|
|
29
|
-
project_llm_trainer-0.8.
|
|
30
|
-
project_llm_trainer-0.8.
|
|
31
|
-
project_llm_trainer-0.8.
|
|
32
|
-
project_llm_trainer-0.8.
|
|
33
|
-
project_llm_trainer-0.8.
|
|
23
|
+
project_llm_trainer-0.8.2.data/scripts/calc_intermediate_size,sha256=AggpgNHokJiJMbEtVdOnolqr_4bH3i1UYuZNEAzC2Gc,460
|
|
24
|
+
project_llm_trainer-0.8.2.data/scripts/ddp_train,sha256=eZSud6KYQAoKLsYB5QB-FI2zq5AZm6Apq1azKdupV3o,477
|
|
25
|
+
project_llm_trainer-0.8.2.data/scripts/ds_train,sha256=41q4rOxwbvZDUY0FDdAIpG13PEaUWBpthhvFvww8uOc,388
|
|
26
|
+
project_llm_trainer-0.8.2.data/scripts/plot_loss,sha256=MzFcdJESlVr1srj4Td6-AxPGUKkfB_QEcJwm0Bd-5fU,910
|
|
27
|
+
project_llm_trainer-0.8.2.data/scripts/plot_lr,sha256=w_7XR_x3KYYyboeOVAeu_I4fveLFI-C0wBmRrNlmWUI,894
|
|
28
|
+
project_llm_trainer-0.8.2.data/scripts/py_train,sha256=tOp9TquORQeU8XN5H7OVIk5O0Ypwi34p_GENxTwgwdk,265
|
|
29
|
+
project_llm_trainer-0.8.2.data/scripts/smart_train,sha256=N8dp2n7k6bghGczedBVwOdtf1O66oM_cNPh9QmZt0bM,914
|
|
30
|
+
project_llm_trainer-0.8.2.dist-info/METADATA,sha256=XlNe-d24OrjYkzrJMiQCjiZPT70QOFRd4K2XrVDWZiY,195
|
|
31
|
+
project_llm_trainer-0.8.2.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
|
|
32
|
+
project_llm_trainer-0.8.2.dist-info/top_level.txt,sha256=LtRFg28i0QIG7iBCD2t095oSco99LCtkijibS9cMGik,12
|
|
33
|
+
project_llm_trainer-0.8.2.dist-info/RECORD,,
|
|
@@ -1,24 +0,0 @@
|
|
|
1
|
-
#!python
|
|
2
|
-
|
|
3
|
-
if __name__ == '__main__':
|
|
4
|
-
import os, sys
|
|
5
|
-
arguments = sys.argv[1:]
|
|
6
|
-
# file_name
|
|
7
|
-
run_file_name = arguments[0]
|
|
8
|
-
|
|
9
|
-
# cuda_visible_devive
|
|
10
|
-
if len(arguments) > 1:
|
|
11
|
-
# 0,1,2,3
|
|
12
|
-
cuda_visible_devive = arguments[1]
|
|
13
|
-
else:
|
|
14
|
-
cuda_visible_devive = None
|
|
15
|
-
|
|
16
|
-
os.environ['PARALLEL_TYPE'] = 'ddp'
|
|
17
|
-
|
|
18
|
-
if cuda_visible_devive:
|
|
19
|
-
os.environ['CUDA_VISIBLE_DEVICES'] = cuda_visible_devive
|
|
20
|
-
|
|
21
|
-
command = f'torchrun --standalone --nproc_per_node=gpu {run_file_name}'
|
|
22
|
-
|
|
23
|
-
print(f'run command {command}')
|
|
24
|
-
os.system(command)
|
|
@@ -1,30 +0,0 @@
|
|
|
1
|
-
#!python
|
|
2
|
-
|
|
3
|
-
if __name__ == '__main__':
|
|
4
|
-
import os, sys
|
|
5
|
-
arguments = sys.argv[1:]
|
|
6
|
-
# file_name
|
|
7
|
-
run_file_name = arguments[0]
|
|
8
|
-
|
|
9
|
-
# cuda_visible_devive
|
|
10
|
-
if len(arguments) > 1:
|
|
11
|
-
# 0,1,2,3
|
|
12
|
-
cuda_visible_devive = arguments[1]
|
|
13
|
-
|
|
14
|
-
# cuda location
|
|
15
|
-
if len(arguments) > 2:
|
|
16
|
-
cuda_loc = arguments[2]
|
|
17
|
-
else:
|
|
18
|
-
cuda_loc = 'localhost'
|
|
19
|
-
else:
|
|
20
|
-
cuda_visible_devive = None
|
|
21
|
-
cuda_loc = None
|
|
22
|
-
|
|
23
|
-
os.environ['PARALLEL_TYPE'] = 'ds'
|
|
24
|
-
|
|
25
|
-
cuda_ctrl = f' --include {cuda_loc}:{cuda_visible_devive}' if cuda_visible_devive else ''
|
|
26
|
-
|
|
27
|
-
command = f'deepspeed{cuda_ctrl} {run_file_name}'
|
|
28
|
-
|
|
29
|
-
print(f'run command {command}')
|
|
30
|
-
os.system(command)
|
{project_llm_trainer-0.8.1.data → project_llm_trainer-0.8.2.data}/scripts/calc_intermediate_size
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|