PyPI - project-llm-trainer - Versions diffs - 0.12.3__py3-none-any.whl - Mend

project-llm-trainer 0.12.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

llm_trainer/__init__.py +13 -0
llm_trainer/base_trainer.py +683 -0
llm_trainer/checkpoint.py +126 -0
llm_trainer/dataset.py +335 -0
llm_trainer/dpo_trainer.py +297 -0
llm_trainer/ds_checkpoint.py +63 -0
llm_trainer/eval.py +33 -0
llm_trainer/generate_utils.py +450 -0
llm_trainer/grpo_trainer.py +385 -0
llm_trainer/log.py +65 -0
llm_trainer/loss.py +268 -0
llm_trainer/parallel.py +220 -0
llm_trainer/partition_utils.py +219 -0
llm_trainer/ppo_trainer.py +521 -0
llm_trainer/scheduler.py +179 -0
llm_trainer/sft_trainer.py +97 -0
llm_trainer/tokenizer.py +162 -0
llm_trainer/tools.py +116 -0
llm_trainer/train_configs.py +324 -0
llm_trainer/trainer.py +34 -0
llm_trainer/utils.py +547 -0
project_llm_trainer-0.12.3.data/scripts/calc_intermediate_size +15 -0
project_llm_trainer-0.12.3.data/scripts/ddp_train +21 -0
project_llm_trainer-0.12.3.data/scripts/ds_train +17 -0
project_llm_trainer-0.12.3.data/scripts/plot_log +69 -0
project_llm_trainer-0.12.3.data/scripts/plot_lr +45 -0
project_llm_trainer-0.12.3.data/scripts/py_train +12 -0
project_llm_trainer-0.12.3.data/scripts/smart_train +37 -0
project_llm_trainer-0.12.3.dist-info/METADATA +9 -0
project_llm_trainer-0.12.3.dist-info/RECORD +32 -0
project_llm_trainer-0.12.3.dist-info/WHEEL +5 -0
project_llm_trainer-0.12.3.dist-info/top_level.txt +1 -0

project_llm_trainer-0.12.3.data/scripts/plot_log ADDED Viewed

@@ -0,0 +1,69 @@
+#!python
+import math
+import os, sys
+import matplotlib.pyplot as plt
+from numpy import ndarray
+from matplotlib.ticker import MaxNLocator
+if __name__ == '__main__':
+    arguments = sys.argv[1:]
+    loss_file = arguments[0]
+    if not os.path.exists(loss_file):
+        print(f'{loss_file} not found')
+        exit(0)
+    results = {}
+    # ====epoch: {epoch}, start train {file_name}====
+    # [time] keys_key1: keys_value1, keys_key2: keys_value2 -> values_key1: values_value1, values_key2: values_value2
+    with open(loss_file, 'r') as f:
+        for line in f:
+            if '====' in line:
+                continue
+            # values_key1: values_value1, values_key2: values_value2
+            values_kvs = line.split(' -> ')[1].split(', ')
+            for values_kv in values_kvs:
+                k, v = values_kv.split(': ')
+                if k not in results:
+                    results[k] = [float(v.strip())]
+                else:
+                    results[k].append(float(v.strip()))
+    results_size = len(results.keys())
+    if results_size <= 4:
+        rows = 1
+        cols = results_size
+    else:
+        rows = math.ceil(results_size / 4)
+        cols = 4
+    fig, axes = plt.subplots(nrows=rows, ncols=cols, figsize=(4 * cols, 4 * rows))
+    if isinstance(axes, ndarray):
+        axes = axes.flatten()
+    else:
+        axes = [axes]
+    for idx, title in enumerate(results.keys()):
+        ax = axes[idx]
+        y = results[title]
+        x = list(range(len(y)))
+        ax.plot(x, y)
+        ax.set_title(title)
+        ax.xaxis.set_major_locator(MaxNLocator(nbins=10))
+        ax.tick_params(axis='x', rotation=30)
+        ax.set_xlabel("Step")
+        ax.set_ylabel(title)
+    total_plots = len(results.keys())
+    for i in range(total_plots, len(axes)):
+        axes[i].set_visible(False)
+    plt.tight_layout()
+    plt.show()

project_llm_trainer-0.12.3.data/scripts/plot_lr ADDED Viewed

@@ -0,0 +1,45 @@
+#!python
+import os, sys
+import matplotlib.pyplot as plt
+from matplotlib.ticker import MaxNLocator
+if __name__ == '__main__':
+    arguments = sys.argv[1:]
+    lr_file = arguments[0]
+    if not os.path.exists(lr_file):
+        print(f'{lr_file} not found')
+        exit(0)
+    lrs = {}
+    # [time] step: {self.cur_steps}, lr: {lr}
+    with open(lr_file, 'r') as f:
+        for line in f:
+            if not line:
+                continue
+            data = line.split('step: ')[-1]
+            data = data.split(', lr:')
+            step = int(data[0].strip())
+            lr = float(data[1].strip())
+            lrs[step] = lr
+    plt.title('lr')
+    plt.xlabel("Step")
+    plt.ylabel("Learning Rate")
+    y = lrs.values()
+    x = list(range(len(y)))
+    ax = plt.gca()
+    plt.plot(x, y)
+    ax.xaxis.set_major_locator(MaxNLocator(nbins=20))
+    plt.xticks(rotation=30)
+    plt.tight_layout()
+    plt.show()

project_llm_trainer-0.12.3.data/scripts/py_train ADDED Viewed

@@ -0,0 +1,12 @@
+#!python
+if __name__ == '__main__':
+    import os, sys
+    arguments = sys.argv[1:]
+    run_file_name = arguments[0]
+    os.environ['PARALLEL_TYPE'] = 'none'
+    command = f'python3 {run_file_name}'
+    print(f'real command is {command}')
+    os.system(command)

project_llm_trainer-0.12.3.data/scripts/smart_train ADDED Viewed

@@ -0,0 +1,37 @@
+#!python
+if __name__ == '__main__':
+    import os, sys, torch
+    arguments = sys.argv[1:]
+    # file name
+    run_file_name = arguments[0]
+    extra_args = ''
+    if len(arguments) > 1:
+        extra_args = f"{' '.join(arguments[1:])} "
+    try:
+        import deepspeed
+        parallel_type = 'ds'
+    except:
+        gpu_count = torch.cuda.device_count()
+        if gpu_count <= 1:
+            parallel_type = 'none'
+        else:
+            parallel_type = 'ddp'
+    os.environ['PARALLEL_TYPE'] = parallel_type
+    if parallel_type == 'ds':
+        command = f'deepspeed {extra_args}{run_file_name}'
+    elif parallel_type == 'ddp':
+        if len(extra_args) == 0:
+            extra_args = '--standalone --nproc_per_node=gpu '
+        command = f'torchrun {extra_args}{run_file_name}'
+    else:
+        command = f'python3 {run_file_name}'
+    print(f'run command {command}')
+    os.system(command)

project_llm_trainer-0.12.3.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,9 @@
+Metadata-Version: 2.4
+Name: project_llm_trainer
+Version: 0.12.3
+Summary: LLM and VLM trainer
+Author: qibin
+Author-email: qibin0506@gmail.com
+Dynamic: author
+Dynamic: author-email
+Dynamic: summary

project_llm_trainer-0.12.3.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,32 @@
+llm_trainer/__init__.py,sha256=U_rFD6hqNJuNXjcKJ9QnxnAL3SXhyWdGZEcA5GbrU3s,385
+llm_trainer/base_trainer.py,sha256=tAqUdSsrJBTBZFZKsinHAoBr7KmDD72qJHux2lMOMYg,29029
+llm_trainer/checkpoint.py,sha256=Aal5D7pVPVRlLZU3WAJKC6-cXoDTIj2JdH_InOaP_1E,4466
+llm_trainer/dataset.py,sha256=SuUedIU46yiHRIz-Fa5pgQr5h9UMQKQ6OSyvQ8xkMow,10917
+llm_trainer/dpo_trainer.py,sha256=jSx2g9snX6sNounpU9gcUZzv4XVRyslxrM5msR5o6Ko,12687
+llm_trainer/ds_checkpoint.py,sha256=I67co_LttpX7nIr5rW_qjtt_QJxKG_UiIvqKYI89rA0,2304
+llm_trainer/eval.py,sha256=uuzWF40xfEx5nPntVEXdyb9UnWiG9cSWF0N3v5FFZDk,981
+llm_trainer/generate_utils.py,sha256=wdOmU3PvMP0OzlsE8_zvoK_Kcq0saQm10_vTozfFxjA,15792
+llm_trainer/grpo_trainer.py,sha256=EKwkmTZWAQrNPQQDSRV4ucAunj0_iEYBhJoog9yOQWE,14882
+llm_trainer/log.py,sha256=BCb8qzs2TGltBFHNuDeEibT6FgBZZTZ-Ijuu1XNOSes,1746
+llm_trainer/loss.py,sha256=56Q0sIO8J4uVOgyvbnHDBdls5m3iW3HrsQ2XWN4zC-I,10228
+llm_trainer/parallel.py,sha256=eWRcqFkOfWM50Chv6gKpifAkaoxF3h8lr3592QXBmx8,6199
+llm_trainer/partition_utils.py,sha256=EMXVGi-AN2piqbOCQei7WmddwQ07jwC5RWClaofIj9Q,8087
+llm_trainer/ppo_trainer.py,sha256=8uY2cYfCYLb_hIN9u0VgP-IMY7D-c0lfIUY-a66Dy84,22445
+llm_trainer/scheduler.py,sha256=7VTmv6slOSB03-KY9nCEzsOrqPW9Jw-jPDxVudmGPzw,5178
+llm_trainer/sft_trainer.py,sha256=NWUkHJe3Ii54bwlnBKWs2pP7zIOUM47Sc7A5TWXG_AI,3682
+llm_trainer/tokenizer.py,sha256=8Mccp4sCaYWiKVD78dEwBMHlA9uS0xf22FOiVxTVtK4,5875
+llm_trainer/tools.py,sha256=7i5ZdCE-TOtoD8hz1Xzx9mIe3wANTd3la_T3vXp6LuM,3328
+llm_trainer/train_configs.py,sha256=FjYuW2e9CuTGm07-wfjow_49R7mhAjdcHpdifFPcuRo,10384
+llm_trainer/trainer.py,sha256=PsSDZvvNVrFun7B_sUYA0QsBaC-2C-CYb6ey3PlRWCw,1210
+llm_trainer/utils.py,sha256=TumXZvN7EyxvTsXYdGwaKlPfup-VK3HsF3GJOM0zrf4,20380
+project_llm_trainer-0.12.3.data/scripts/calc_intermediate_size,sha256=AggpgNHokJiJMbEtVdOnolqr_4bH3i1UYuZNEAzC2Gc,460
+project_llm_trainer-0.12.3.data/scripts/ddp_train,sha256=eZSud6KYQAoKLsYB5QB-FI2zq5AZm6Apq1azKdupV3o,477
+project_llm_trainer-0.12.3.data/scripts/ds_train,sha256=41q4rOxwbvZDUY0FDdAIpG13PEaUWBpthhvFvww8uOc,388
+project_llm_trainer-0.12.3.data/scripts/plot_log,sha256=EuYQ2_xx98PEtuDr84B4dIji3QSPBHC6WefqyqX7GwI,1872
+project_llm_trainer-0.12.3.data/scripts/plot_lr,sha256=TfLXzqHIFo3mVPy-v-WZlD8zK6Q8IEb1V-fZiwoOug0,922
+project_llm_trainer-0.12.3.data/scripts/py_train,sha256=tOp9TquORQeU8XN5H7OVIk5O0Ypwi34p_GENxTwgwdk,265
+project_llm_trainer-0.12.3.data/scripts/smart_train,sha256=N8dp2n7k6bghGczedBVwOdtf1O66oM_cNPh9QmZt0bM,914
+project_llm_trainer-0.12.3.dist-info/METADATA,sha256=QuIPMCqL2V4KoiJkdDF-8Zsb2PZU9tMgqXdYVH53j1g,196
+project_llm_trainer-0.12.3.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
+project_llm_trainer-0.12.3.dist-info/top_level.txt,sha256=LtRFg28i0QIG7iBCD2t095oSco99LCtkijibS9cMGik,12
+project_llm_trainer-0.12.3.dist-info/RECORD,,

project_llm_trainer-0.12.3.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,5 @@
+Wheel-Version: 1.0
+Generator: setuptools (80.7.1)
+Root-Is-Purelib: true
+Tag: py3-none-any

project_llm_trainer-0.12.3.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ llm_trainer