project-llm-trainer 0.3.4__py3-none-any.whl → 0.3.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of project-llm-trainer might be problematic. Click here for more details.

@@ -69,19 +69,29 @@ class DPOTrainer(Trainer):
69
69
  zero_optimization = {'stage': 0}
70
70
  parallel_kwargs['zero_optimization'] = zero_optimization
71
71
 
72
- if self.train_config.ds_config.fp16_config:
72
+
73
+ if (self.train_config.ds_config.bf16_config is not None
74
+ and self.train_config.ds_config.bf16_config.enabled):
75
+ bf16_config = self.train_config.ds_config.bf16_config
76
+ bf16 = {
77
+ 'enabled': bf16_config.enabled
78
+ }
79
+ parallel_kwargs['bf16'] = bf16
80
+ elif self.train_config.ds_config.fp16_config:
73
81
  fb16_config = self.train_config.ds_config.fp16_config
74
- fp16 = { 'enabled': fb16_config.enabled }
82
+ fp16 = {
83
+ 'enabled': fb16_config.enabled,
84
+ 'loss_scale': fb16_config.loss_scale,
85
+ 'loss_scale_window': fb16_config.loss_scale_window,
86
+ 'initial_scale_power': fb16_config.initial_scale_power,
87
+ 'hysteresis': fb16_config.hysteresis,
88
+ 'min_loss_scale': fb16_config.min_loss_scale
89
+ }
75
90
 
76
91
  if fb16_config.fp16_opt_level is not None:
77
92
  fp16['fp16_opt_level'] = fb16_config.fp16_opt_level
78
93
 
79
94
  parallel_kwargs['fp16'] = fp16
80
-
81
- if self.train_config.ds_config.bf16_config:
82
- bf16_config = self.train_config.ds_config.bf16_config
83
- bf16 = { 'enabled': bf16_config.enabled }
84
- parallel_kwargs['bf16'] = bf16
85
95
  elif isinstance(TrainerTools().parallel, FsdpParallel) and self.train_config.fsdp_config:
86
96
  parallel_kwargs = {
87
97
  'transformer_layer_cls': self.train_config.fsdp_config.transformer_layer_cls,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: project_llm_trainer
3
- Version: 0.3.4
3
+ Version: 0.3.5
4
4
  Summary: LLM and VLM trainer
5
5
  Author: qibin
6
6
  Author-email: qibin0506@gmail.com
@@ -2,7 +2,7 @@ llm_trainer/__init__.py,sha256=HWgtTEVeQSnZmEyYQm2K6eFEG4X2QAoigMlB5Z2tcXE,260
2
2
  llm_trainer/checkpoint.py,sha256=Dlkcit0o7Gx6S9QUrIrVp2pTurP9X0zVA7w7ImSuVQU,6049
3
3
  llm_trainer/dataset.py,sha256=4QlOo0SFB5816BUYegQjgobUqTUMQvdmZMM_OEAMSjE,4347
4
4
  llm_trainer/dcp.py,sha256=PkD97DyrOtoTKn4FJsfL3VqAy4dxufgjdzJEz8-Cnoc,3635
5
- llm_trainer/dpo_trainer.py,sha256=7Bf6snWcu2fT8QRDI1CSzmrc7Cog6JauIeK2KoW_f8I,13135
5
+ llm_trainer/dpo_trainer.py,sha256=kDpzHrxP3qWbBxDGi9Rkus1kw8P3-bAtw9IyuYINgk0,13625
6
6
  llm_trainer/ds_checkpoint.py,sha256=_svpzqRaa43--DKPputoXAelc6X9vPM0gNQu-hlh6NI,2153
7
7
  llm_trainer/eval.py,sha256=sCvdYnqWWf5_nuDQN5BHb_YivXLOQW-V0ET9mPu0tPU,2389
8
8
  llm_trainer/generate_utils.py,sha256=4iM0vyc_1C_iTL31GlS9PR4eZtYaELPRZ02KDSPZA9U,15158
@@ -21,14 +21,14 @@ llm_trainer/tools.py,sha256=AhfjN9oln5Pyif1SgCWwgQg-Q5acTCd9xpz4L26QUjA,3039
21
21
  llm_trainer/train_configs.py,sha256=cadfo8RgxNUR-L3ZLyjiRXTQvhjUl4A1qENaq-ol8h4,15878
22
22
  llm_trainer/trainer.py,sha256=5DgDzg0TReZrXsIaM6A4DzeJnzePNybGdfoVSDybQ2U,24308
23
23
  llm_trainer/utils.py,sha256=-ivhMF0d999va13S1wt2uBvtVw8Nvr3uBzhaUFKL04Q,6826
24
- project_llm_trainer-0.3.4.data/scripts/calc_intermediate_size,sha256=AggpgNHokJiJMbEtVdOnolqr_4bH3i1UYuZNEAzC2Gc,460
25
- project_llm_trainer-0.3.4.data/scripts/ddp_train,sha256=x81AasaN2-9TwARFFF1l7iV1LmfMQ0bLw0i_CGbOwSw,299
26
- project_llm_trainer-0.3.4.data/scripts/ds_train,sha256=qL3qc3TcedBCw98UZUjW07ONcErRawLE1HymW2AmscA,265
27
- project_llm_trainer-0.3.4.data/scripts/plot_loss,sha256=MzFcdJESlVr1srj4Td6-AxPGUKkfB_QEcJwm0Bd-5fU,910
28
- project_llm_trainer-0.3.4.data/scripts/plot_lr,sha256=w_7XR_x3KYYyboeOVAeu_I4fveLFI-C0wBmRrNlmWUI,894
29
- project_llm_trainer-0.3.4.data/scripts/py_train,sha256=tOp9TquORQeU8XN5H7OVIk5O0Ypwi34p_GENxTwgwdk,265
30
- project_llm_trainer-0.3.4.data/scripts/smart_train,sha256=Pmt4Q0to4Hoz82iB9uFPZuz7uahNUbfE7FR1940EBy8,716
31
- project_llm_trainer-0.3.4.dist-info/METADATA,sha256=Y8XjOGdQb7VxN5QKHyKICkkOzjGcXJuI6hPziULJNfc,195
32
- project_llm_trainer-0.3.4.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
33
- project_llm_trainer-0.3.4.dist-info/top_level.txt,sha256=LtRFg28i0QIG7iBCD2t095oSco99LCtkijibS9cMGik,12
34
- project_llm_trainer-0.3.4.dist-info/RECORD,,
24
+ project_llm_trainer-0.3.5.data/scripts/calc_intermediate_size,sha256=AggpgNHokJiJMbEtVdOnolqr_4bH3i1UYuZNEAzC2Gc,460
25
+ project_llm_trainer-0.3.5.data/scripts/ddp_train,sha256=x81AasaN2-9TwARFFF1l7iV1LmfMQ0bLw0i_CGbOwSw,299
26
+ project_llm_trainer-0.3.5.data/scripts/ds_train,sha256=qL3qc3TcedBCw98UZUjW07ONcErRawLE1HymW2AmscA,265
27
+ project_llm_trainer-0.3.5.data/scripts/plot_loss,sha256=MzFcdJESlVr1srj4Td6-AxPGUKkfB_QEcJwm0Bd-5fU,910
28
+ project_llm_trainer-0.3.5.data/scripts/plot_lr,sha256=w_7XR_x3KYYyboeOVAeu_I4fveLFI-C0wBmRrNlmWUI,894
29
+ project_llm_trainer-0.3.5.data/scripts/py_train,sha256=tOp9TquORQeU8XN5H7OVIk5O0Ypwi34p_GENxTwgwdk,265
30
+ project_llm_trainer-0.3.5.data/scripts/smart_train,sha256=Pmt4Q0to4Hoz82iB9uFPZuz7uahNUbfE7FR1940EBy8,716
31
+ project_llm_trainer-0.3.5.dist-info/METADATA,sha256=jfnJI_XqE7U89-8tLEGPmLpuzwp-3qw-aERIgV8GJpk,195
32
+ project_llm_trainer-0.3.5.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
33
+ project_llm_trainer-0.3.5.dist-info/top_level.txt,sha256=LtRFg28i0QIG7iBCD2t095oSco99LCtkijibS9cMGik,12
34
+ project_llm_trainer-0.3.5.dist-info/RECORD,,