project-llm-trainer 0.4.11__py3-none-any.whl → 0.4.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of project-llm-trainer might be problematic. Click here for more details.

@@ -67,7 +67,7 @@ class DsFp16Config:
67
67
  initial_scale_power: int = 16
68
68
  hysteresis: int = 2
69
69
  min_loss_scale: int = 1
70
- fp16_opt_level: Optional[str] = '02'
70
+ fp16_opt_level: Optional[str] = 'O2'
71
71
 
72
72
 
73
73
  @dataclass(kw_only=True)
@@ -77,9 +77,9 @@ class DsBf16Config:
77
77
 
78
78
  @dataclass(kw_only=True)
79
79
  class DsConfig:
80
- zero_config: Optional[DsZeROConfig] = DsZero3Config()
81
- fp16_config: Optional[DsFp16Config] = DsFp16Config()
82
- bf16_config: Optional[DsBf16Config] = DsBf16Config()
80
+ zero_config: Optional[DsZeROConfig] = field(default_factory=DsZero3Config)
81
+ fp16_config: Optional[DsFp16Config] = field(default_factory=DsFp16Config)
82
+ bf16_config: Optional[DsBf16Config] = field(default_factory=DsBf16Config)
83
83
  gradient_clipping: Optional[float] = 1.0
84
84
  activation_checkpointing: Optional[DsActivationCheckpointingConfig] = None
85
85
 
@@ -224,14 +224,14 @@ class TrainConfig:
224
224
  model_config: Union[ModelConfig, VLMConfig]
225
225
 
226
226
  file_dataset: FileDataset
227
- data_loader_config: DataLoaderConfig = DataLoaderConfig()
227
+ data_loader_config: DataLoaderConfig = field(default_factory=DataLoaderConfig)
228
228
  image_tags_file_dataset: Optional[FileDataset] = None
229
229
 
230
- loss_config: LossConfig = LossConfig()
231
- lr_config: LrConfig = LrConfig()
230
+ loss_config: LossConfig = field(default_factory=LossConfig)
231
+ lr_config: LrConfig = field(default_factory=LrConfig)
232
232
 
233
- ds_config: DsConfig = DsConfig()
234
- fsdp_config: FsdpConfig = FsdpConfig()
233
+ ds_config: DsConfig = field(default_factory=DsConfig)
234
+ fsdp_config: FsdpConfig = field(default_factory=FsdpConfig)
235
235
 
236
236
  kd_config: Optional[KDConfig] = None
237
237
  dpo_config: Optional[DPOConfig] = None
@@ -241,7 +241,7 @@ class TrainConfig:
241
241
  gradient_accumulation_steps: int = 0
242
242
  eval_batch_interval: int = 100
243
243
 
244
- eval_config: EvalConfig = EvalConfig()
244
+ eval_config: EvalConfig = field(default_factory=EvalConfig)
245
245
  pixel_values_provider: Optional[Callable[[list[str]], torch.Tensor]] = None
246
246
 
247
247
  init_state_dict: Optional[Mapping[str, Any]] = None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: project_llm_trainer
3
- Version: 0.4.11
3
+ Version: 0.4.12
4
4
  Summary: LLM and VLM trainer
5
5
  Author: qibin
6
6
  Author-email: qibin0506@gmail.com
@@ -19,17 +19,17 @@ llm_trainer/scheduler.py,sha256=Xz8HhwoRMjRe41sf_NHhpZfkTlEs0I2MYusvMY6hCVw,3531
19
19
  llm_trainer/sft_trainer.py,sha256=gxQA7T1o1QGUsHp2CX1Qb_fO5LppBJuNbc0H4ixCYUA,1783
20
20
  llm_trainer/tokenizer.py,sha256=A7TYYUbtPf75kjCvWP7yBui4xZBObMk2aPem62YpwpY,6776
21
21
  llm_trainer/tools.py,sha256=O45-20wRmh-nyTfU-U-XtjbKAoe7boEIsUvWT_NaKx4,3041
22
- llm_trainer/train_configs.py,sha256=4sM96SOgwcn6jBGtbG5-qDZbJjiHVB6l7FWqdq7hbj0,7979
22
+ llm_trainer/train_configs.py,sha256=HKzH3nfMT1-SW4Htwa0KqYtMd6FAJcthR5IEo6di8us,8168
23
23
  llm_trainer/trainer.py,sha256=pUtJVRosn54j1hn76CFAptJcAsrDo59H6p8NMkg2zt4,25521
24
24
  llm_trainer/utils.py,sha256=-ivhMF0d999va13S1wt2uBvtVw8Nvr3uBzhaUFKL04Q,6826
25
- project_llm_trainer-0.4.11.data/scripts/calc_intermediate_size,sha256=AggpgNHokJiJMbEtVdOnolqr_4bH3i1UYuZNEAzC2Gc,460
26
- project_llm_trainer-0.4.11.data/scripts/ddp_train,sha256=x81AasaN2-9TwARFFF1l7iV1LmfMQ0bLw0i_CGbOwSw,299
27
- project_llm_trainer-0.4.11.data/scripts/ds_train,sha256=qL3qc3TcedBCw98UZUjW07ONcErRawLE1HymW2AmscA,265
28
- project_llm_trainer-0.4.11.data/scripts/plot_loss,sha256=MzFcdJESlVr1srj4Td6-AxPGUKkfB_QEcJwm0Bd-5fU,910
29
- project_llm_trainer-0.4.11.data/scripts/plot_lr,sha256=w_7XR_x3KYYyboeOVAeu_I4fveLFI-C0wBmRrNlmWUI,894
30
- project_llm_trainer-0.4.11.data/scripts/py_train,sha256=tOp9TquORQeU8XN5H7OVIk5O0Ypwi34p_GENxTwgwdk,265
31
- project_llm_trainer-0.4.11.data/scripts/smart_train,sha256=Pmt4Q0to4Hoz82iB9uFPZuz7uahNUbfE7FR1940EBy8,716
32
- project_llm_trainer-0.4.11.dist-info/METADATA,sha256=JEZo2-np0t_K-J6yapyAXsArpvYTmrSNGDsdy32kWas,196
33
- project_llm_trainer-0.4.11.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
34
- project_llm_trainer-0.4.11.dist-info/top_level.txt,sha256=LtRFg28i0QIG7iBCD2t095oSco99LCtkijibS9cMGik,12
35
- project_llm_trainer-0.4.11.dist-info/RECORD,,
25
+ project_llm_trainer-0.4.12.data/scripts/calc_intermediate_size,sha256=AggpgNHokJiJMbEtVdOnolqr_4bH3i1UYuZNEAzC2Gc,460
26
+ project_llm_trainer-0.4.12.data/scripts/ddp_train,sha256=x81AasaN2-9TwARFFF1l7iV1LmfMQ0bLw0i_CGbOwSw,299
27
+ project_llm_trainer-0.4.12.data/scripts/ds_train,sha256=qL3qc3TcedBCw98UZUjW07ONcErRawLE1HymW2AmscA,265
28
+ project_llm_trainer-0.4.12.data/scripts/plot_loss,sha256=MzFcdJESlVr1srj4Td6-AxPGUKkfB_QEcJwm0Bd-5fU,910
29
+ project_llm_trainer-0.4.12.data/scripts/plot_lr,sha256=w_7XR_x3KYYyboeOVAeu_I4fveLFI-C0wBmRrNlmWUI,894
30
+ project_llm_trainer-0.4.12.data/scripts/py_train,sha256=tOp9TquORQeU8XN5H7OVIk5O0Ypwi34p_GENxTwgwdk,265
31
+ project_llm_trainer-0.4.12.data/scripts/smart_train,sha256=Pmt4Q0to4Hoz82iB9uFPZuz7uahNUbfE7FR1940EBy8,716
32
+ project_llm_trainer-0.4.12.dist-info/METADATA,sha256=W-HeRGlXi3bFsKIVE1FyQAh4Lcvo0SOXMNu-9YnACKQ,196
33
+ project_llm_trainer-0.4.12.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
34
+ project_llm_trainer-0.4.12.dist-info/top_level.txt,sha256=LtRFg28i0QIG7iBCD2t095oSco99LCtkijibS9cMGik,12
35
+ project_llm_trainer-0.4.12.dist-info/RECORD,,