PyPI - project-llm-trainer - Versions diffs - 0.7.3__py3-none-any.whl → 0.7.5__py3-none-any.whl - Mend

project-llm-trainer 0.7.3py3-none-any.whl → 0.7.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of project-llm-trainer might be problematic. Click here for more details.

Files changed (14) hide show

llm_trainer/grpo_trainer.py CHANGED Viewed

@@ -50,6 +50,10 @@ class GRPOTrainer(Trainer):
         self._use_origin_pad_sequence = True
     def _init_ref_model(self):
+        # beta == 0，不需要ref_model
+        if self.train_config.grpo_config.loss_beta == 0.0:
+            return None
         ref_model = self._new_model(self.train_config)
         ref_model, _ = TrainerTools().parallel.process(
@@ -68,7 +72,8 @@ class GRPOTrainer(Trainer):
     def _init_loss(self):
         criterion = GRPOLoss(
             beta=self.train_config.grpo_config.loss_beta,
-            clip_eps=self.train_config.grpo_config.loss_clip_eps,
+            clip_eps_low=self.train_config.grpo_config.loss_clip_eps,
+            clip_eps_high=self.train_config.grpo_config.loss_clip_eps_high,
             delta=self.train_config.grpo_config.loss_delta,
             importance_sampling_level=self.train_config.grpo_config.loss_importance_sampling_level,
             loss_type=self.train_config.grpo_config.loss_type,
@@ -229,8 +234,11 @@ class GRPOTrainer(Trainer):
             # Compute old_log_probs from the current model, with gradients disabled.
             old_log_probs, _ = self._compute_log_probabilities(generate_model, input_ids, attention_mask, logits_to_keep)
-            # Compute ref_log_probs from the reference model, which remains static.
-            ref_log_probs, _ = self._compute_log_probabilities(self.ref_model, input_ids, attention_mask, logits_to_keep)
+            if self.ref_model:
+                # Compute ref_log_probs from the reference model, which remains static.
+                ref_log_probs, _ = self._compute_log_probabilities(self.ref_model, input_ids, attention_mask, logits_to_keep)
+            else:
+                ref_log_probs = None
         repeated_prompts = [p for p in prompts for _ in range(group_size)]
         repeated_answers = [a for a in answers for _ in range(group_size)]
@@ -293,11 +301,12 @@ class GRPOTrainer(Trainer):
         aux_loss_coef = self.train_config.loss_config.aux_loss_coef
         for epoch in range(self.train_config.n_epochs):
-            sync_model_params(
-                _from=self.train_model,
-                _to=self.ref_model,
-                mixup_alpha=self.train_config.grpo_config.mixup_alpha
-            )
+            if self.ref_model:
+                sync_model_params(
+                    _from=self.train_model,
+                    _to=self.ref_model,
+                    mixup_alpha=self.train_config.grpo_config.mixup_alpha
+                )
             file_count = len(self.train_config.file_dataset)
@@ -365,7 +374,7 @@ class GRPOTrainer(Trainer):
                             self._log_loss(
                                 epoch_tag=f'epoch: {epoch}',
                                 file_tag=f'file: {file_idx + 1}/{file_count}',
-                                batch_tag=f'batch: {batch}/{batch_count_per_file}',
+                                batch_tag=f'batch: {batch}/{batch_count_per_file}, grpo_step={grpo_step}',
                                 loss=current_loss
                             )
                     except Exception as e:

llm_trainer/loss.py CHANGED Viewed

@@ -2,7 +2,6 @@ from typing import List, Optional
 import torch
 from torch import nn
 import torch.nn.functional as F
-from .tools import TrainerTools
 class LMLoss(nn.Module):
@@ -127,7 +126,8 @@ class GRPOLoss(nn.Module):
     def __init__(
             self,
             beta: float,
-            clip_eps: float,
+            clip_eps_low: float,
+            clip_eps_high: Optional[float] = None,
             delta: Optional[float] = None,
             importance_sampling_level: str = 'token',
             loss_type: str = 'grpo',
@@ -136,7 +136,8 @@ class GRPOLoss(nn.Module):
         super().__init__()
         self.beta = beta
-        self.clip_eps = clip_eps
+        self.clip_eps_low = clip_eps_low
+        self.clip_eps_high = clip_eps_high if clip_eps_high else clip_eps_low
         self.delta = delta
         self.importance_sampling_level = importance_sampling_level
         self.loss_type = loss_type
@@ -166,7 +167,7 @@ class GRPOLoss(nn.Module):
             log_importance_weights = log_ratio
         coef_1 = torch.exp(log_importance_weights)
-        coef_2 = torch.clamp(coef_1, 1 - self.clip_eps, 1 + self.clip_eps)
+        coef_2 = torch.clamp(coef_1, 1 - self.clip_eps_low, 1 + self.clip_eps_high)
         # Two-sided clipping
         if self.delta is not None:

llm_trainer/train_configs.py CHANGED Viewed

@@ -138,10 +138,11 @@ class GRPOConfig:
     grpo_steps: int = 1
     group_size: int = 12
     mixup_alpha: float = 1.0
-    loss_beta: float = 0.04
-    loss_clip_eps: float = 0.1
+    loss_beta: float = 0.0 # or 0.04 for grpo
+    loss_clip_eps: float = 3e-4
+    loss_clip_eps_high: Optional[float] = 4e-4
     loss_delta: Optional[float] = None
-    loss_importance_sampling_level: str = 'token' # token or seq
+    loss_importance_sampling_level: str = 'seq' # token or seq
     loss_type: str = 'grpo' # grpo or bnpo or dr_grpo
     gen_max_new_tokens: Optional[int] = None
     gen_temperature: Optional[float] = None

{project_llm_trainer-0.7.3.dist-info → project_llm_trainer-0.7.5.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: project_llm_trainer
-Version: 0.7.3
+Version: 0.7.5
 Summary: LLM and VLM trainer
 Author: qibin
 Author-email: qibin0506@gmail.com

{project_llm_trainer-0.7.3.dist-info → project_llm_trainer-0.7.5.dist-info}/RECORD RENAMED Viewed

@@ -5,9 +5,9 @@ llm_trainer/dpo_trainer.py,sha256=RMfbTsl3eav4yTJ2PK59mi6a0ECVOg8WwYVsHvMbNUE,12
 llm_trainer/ds_checkpoint.py,sha256=X2IWgpgi0yOtogph7n6DEwvK_0Ceb7juu1WMutv3HSk,2270
 llm_trainer/eval.py,sha256=ZyUfSo2Q8P-lrCdPEnGkoo5pGubd0AabREK5eMISRII,1109
 llm_trainer/generate_utils.py,sha256=8K3YFbp7IF_lCkmkzjHhqTW26EBFb2AilQmarVcfMvs,15001
-llm_trainer/grpo_trainer.py,sha256=zxbLIzk34cHFw5yfRH8EBr0wrFTS7qFa5DepcC0WXwk,16435
+llm_trainer/grpo_trainer.py,sha256=MXnP8Kc9CQJw0CB3uMbHxIYwvpuujai4hgbbpUut_K4,16808
 llm_trainer/log.py,sha256=XwychwKF6gvFPhthCIZCAEUZ0G3DY3fiQrOHqPWsxz0,463
-llm_trainer/loss.py,sha256=eYvOlCoguKnLvdGuqvQpGUoLVSADQ5coaU3DWYbJEdM,6811
+llm_trainer/loss.py,sha256=glf4IeDWHvA2cJo-QKLRL8P6OxK4QjRJGrYJWOZiTPQ,6929
 llm_trainer/parallel.py,sha256=yjStV21DJ26yM8-0O6GTMxdFAcyShY5GsQWSZmbI7HU,4543
 llm_trainer/parallel_ddp.py,sha256=Pob9vUlBZnkL4oP1Re11kFob7nufMSE96pn7m7fuOEM,1345
 llm_trainer/parallel_ds.py,sha256=oy8RRxHud3rACWubFlJqqd0pjPEQhKeAPGPQUSdJX2c,1145
@@ -17,17 +17,17 @@ llm_trainer/scheduler.py,sha256=LAI_0VxClsIQkix0bRoduRD4vPfVuIZDhZgTAT_KK8k,4901
 llm_trainer/sft_trainer.py,sha256=LudTRIaqLQYy6ym6jjMX7v9xtFBJelrR3nnPCwb48nM,1821
 llm_trainer/tokenizer.py,sha256=SSpgXtb0e1NtQqRW0gCq09TTZi47umggy-Fh5EMHKJg,6708
 llm_trainer/tools.py,sha256=5op5qrjjkK-Lr9oes5VxIVnOVYOYGoAdlIJq9mPUf64,2637
-llm_trainer/train_configs.py,sha256=U4hwXWKI6svDqiDOu6RPTitCzpxEYyjZUN6gwh_co8c,7510
+llm_trainer/train_configs.py,sha256=N3ykM1uaLHcSNRC8ErYIxp9VYhSP7voJyAP-2D4ZJe0,7574
 llm_trainer/trainer.py,sha256=jS31zEXIIj9BoPTPlmaGYq61x72HGCjKfS2u3_gOkDk,27924
 llm_trainer/utils.py,sha256=xcdzpvPvXRKqsOK2yB7PZ9GmOvZMDFcglDPUZY2hJTY,11484
-project_llm_trainer-0.7.3.data/scripts/calc_intermediate_size,sha256=AggpgNHokJiJMbEtVdOnolqr_4bH3i1UYuZNEAzC2Gc,460
-project_llm_trainer-0.7.3.data/scripts/ddp_train,sha256=x81AasaN2-9TwARFFF1l7iV1LmfMQ0bLw0i_CGbOwSw,299
-project_llm_trainer-0.7.3.data/scripts/ds_train,sha256=qL3qc3TcedBCw98UZUjW07ONcErRawLE1HymW2AmscA,265
-project_llm_trainer-0.7.3.data/scripts/plot_loss,sha256=MzFcdJESlVr1srj4Td6-AxPGUKkfB_QEcJwm0Bd-5fU,910
-project_llm_trainer-0.7.3.data/scripts/plot_lr,sha256=w_7XR_x3KYYyboeOVAeu_I4fveLFI-C0wBmRrNlmWUI,894
-project_llm_trainer-0.7.3.data/scripts/py_train,sha256=tOp9TquORQeU8XN5H7OVIk5O0Ypwi34p_GENxTwgwdk,265
-project_llm_trainer-0.7.3.data/scripts/smart_train,sha256=Pmt4Q0to4Hoz82iB9uFPZuz7uahNUbfE7FR1940EBy8,716
-project_llm_trainer-0.7.3.dist-info/METADATA,sha256=XKcqKQBKp_R-mC47iLnAUOF_L0n6IO-FD6vqmuqrqq4,195
-project_llm_trainer-0.7.3.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
-project_llm_trainer-0.7.3.dist-info/top_level.txt,sha256=LtRFg28i0QIG7iBCD2t095oSco99LCtkijibS9cMGik,12
-project_llm_trainer-0.7.3.dist-info/RECORD,,
+project_llm_trainer-0.7.5.data/scripts/calc_intermediate_size,sha256=AggpgNHokJiJMbEtVdOnolqr_4bH3i1UYuZNEAzC2Gc,460
+project_llm_trainer-0.7.5.data/scripts/ddp_train,sha256=x81AasaN2-9TwARFFF1l7iV1LmfMQ0bLw0i_CGbOwSw,299
+project_llm_trainer-0.7.5.data/scripts/ds_train,sha256=qL3qc3TcedBCw98UZUjW07ONcErRawLE1HymW2AmscA,265
+project_llm_trainer-0.7.5.data/scripts/plot_loss,sha256=MzFcdJESlVr1srj4Td6-AxPGUKkfB_QEcJwm0Bd-5fU,910
+project_llm_trainer-0.7.5.data/scripts/plot_lr,sha256=w_7XR_x3KYYyboeOVAeu_I4fveLFI-C0wBmRrNlmWUI,894
+project_llm_trainer-0.7.5.data/scripts/py_train,sha256=tOp9TquORQeU8XN5H7OVIk5O0Ypwi34p_GENxTwgwdk,265
+project_llm_trainer-0.7.5.data/scripts/smart_train,sha256=Pmt4Q0to4Hoz82iB9uFPZuz7uahNUbfE7FR1940EBy8,716
+project_llm_trainer-0.7.5.dist-info/METADATA,sha256=9DcoFVuXDrhxZOVWF1Ouzk7NF6NTEnpBTkg1n6bMCYQ,195
+project_llm_trainer-0.7.5.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
+project_llm_trainer-0.7.5.dist-info/top_level.txt,sha256=LtRFg28i0QIG7iBCD2t095oSco99LCtkijibS9cMGik,12
+project_llm_trainer-0.7.5.dist-info/RECORD,,