project-llm-trainer 0.3__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of project-llm-trainer might be problematic. Click here for more details.

llm_trainer/checkpoint.py CHANGED
@@ -144,7 +144,7 @@ def load_checkpoint_for_eval(
144
144
  def save_steps(global_steps: int, lr_scheduler: Optional[LRScheduler] = None):
145
145
  # 暂时只保存主进程的
146
146
  if TrainerTools().parallel.is_main_process:
147
- steps_checkpoint_name = f"{os.environ.get('CHECKPOINT_NAME', DEFAULT_CHECKPOINT_NAME)}.steps"
147
+ steps_checkpoint_name = f"{os.environ.get('LOG_DIR', './')}steps.pt"
148
148
  ckpt = {'global_steps': global_steps, 'lr_steps': lr_scheduler.cur_steps}
149
149
  torch.save(ckpt, steps_checkpoint_name)
150
150
 
@@ -153,7 +153,7 @@ def load_steps(
153
153
  default_global_steps: int = 0,
154
154
  default_lr_steps: int = 0
155
155
  ) -> Tuple[Optional[int], Optional[int]]:
156
- steps_checkpoint_name = f"{os.environ.get('CHECKPOINT_NAME', DEFAULT_CHECKPOINT_NAME)}.steps"
156
+ steps_checkpoint_name = f"{os.environ.get('LOG_DIR', './')}steps.pt"
157
157
  if os.path.exists(steps_checkpoint_name):
158
158
  ckpt = torch.load(steps_checkpoint_name, weights_only=True)
159
159
  return ckpt['global_steps'], ckpt['lr_steps']
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: project_llm_trainer
3
- Version: 0.3
3
+ Version: 0.3.1
4
4
  Summary: LLM and VLM trainer
5
5
  Author: qibin
6
6
  Author-email: qibin0506@gmail.com
@@ -1,5 +1,5 @@
1
1
  llm_trainer/__init__.py,sha256=HWgtTEVeQSnZmEyYQm2K6eFEG4X2QAoigMlB5Z2tcXE,260
2
- llm_trainer/checkpoint.py,sha256=iTbnmVrT0Ql4DpD178UI95zCmfBUdYtoJS5wIvf8_4k,6099
2
+ llm_trainer/checkpoint.py,sha256=Dlkcit0o7Gx6S9QUrIrVp2pTurP9X0zVA7w7ImSuVQU,6049
3
3
  llm_trainer/dataset.py,sha256=uz1TTd87ikf7CZPdGxmR95TSQTFWPPTilgWLBWO46_I,3916
4
4
  llm_trainer/dcp.py,sha256=PkD97DyrOtoTKn4FJsfL3VqAy4dxufgjdzJEz8-Cnoc,3635
5
5
  llm_trainer/dpo_trainer.py,sha256=6rm8Jq0rI0xazcl_bCOun8rnd34Tb_PKgezowhwoiCM,13150
@@ -21,14 +21,14 @@ llm_trainer/tools.py,sha256=AhfjN9oln5Pyif1SgCWwgQg-Q5acTCd9xpz4L26QUjA,3039
21
21
  llm_trainer/train_configs.py,sha256=FAlylSYVeh_oJGTy2fcMNUV8JLD6B70hMuk-iKx14iI,15748
22
22
  llm_trainer/trainer.py,sha256=mq51d-2ADUpcWCArszhYnOSTveatt3_x43hcC7IZgYk,24330
23
23
  llm_trainer/utils.py,sha256=04XiMENVotNgbNRBn9wadHu-cJHPxj0Xq-zzLJmNgZQ,8062
24
- project_llm_trainer-0.3.data/scripts/calc_intermediate_size,sha256=AggpgNHokJiJMbEtVdOnolqr_4bH3i1UYuZNEAzC2Gc,460
25
- project_llm_trainer-0.3.data/scripts/ddp_train,sha256=x81AasaN2-9TwARFFF1l7iV1LmfMQ0bLw0i_CGbOwSw,299
26
- project_llm_trainer-0.3.data/scripts/ds_train,sha256=qL3qc3TcedBCw98UZUjW07ONcErRawLE1HymW2AmscA,265
27
- project_llm_trainer-0.3.data/scripts/plot_loss,sha256=MzFcdJESlVr1srj4Td6-AxPGUKkfB_QEcJwm0Bd-5fU,910
28
- project_llm_trainer-0.3.data/scripts/plot_lr,sha256=w_7XR_x3KYYyboeOVAeu_I4fveLFI-C0wBmRrNlmWUI,894
29
- project_llm_trainer-0.3.data/scripts/py_train,sha256=tOp9TquORQeU8XN5H7OVIk5O0Ypwi34p_GENxTwgwdk,265
30
- project_llm_trainer-0.3.data/scripts/smart_train,sha256=Pmt4Q0to4Hoz82iB9uFPZuz7uahNUbfE7FR1940EBy8,716
31
- project_llm_trainer-0.3.dist-info/METADATA,sha256=P64NiFbJzSd4QkFJ5udQ4qMyHUorPp3ex4F3eIdtVdU,193
32
- project_llm_trainer-0.3.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
33
- project_llm_trainer-0.3.dist-info/top_level.txt,sha256=LtRFg28i0QIG7iBCD2t095oSco99LCtkijibS9cMGik,12
34
- project_llm_trainer-0.3.dist-info/RECORD,,
24
+ project_llm_trainer-0.3.1.data/scripts/calc_intermediate_size,sha256=AggpgNHokJiJMbEtVdOnolqr_4bH3i1UYuZNEAzC2Gc,460
25
+ project_llm_trainer-0.3.1.data/scripts/ddp_train,sha256=x81AasaN2-9TwARFFF1l7iV1LmfMQ0bLw0i_CGbOwSw,299
26
+ project_llm_trainer-0.3.1.data/scripts/ds_train,sha256=qL3qc3TcedBCw98UZUjW07ONcErRawLE1HymW2AmscA,265
27
+ project_llm_trainer-0.3.1.data/scripts/plot_loss,sha256=MzFcdJESlVr1srj4Td6-AxPGUKkfB_QEcJwm0Bd-5fU,910
28
+ project_llm_trainer-0.3.1.data/scripts/plot_lr,sha256=w_7XR_x3KYYyboeOVAeu_I4fveLFI-C0wBmRrNlmWUI,894
29
+ project_llm_trainer-0.3.1.data/scripts/py_train,sha256=tOp9TquORQeU8XN5H7OVIk5O0Ypwi34p_GENxTwgwdk,265
30
+ project_llm_trainer-0.3.1.data/scripts/smart_train,sha256=Pmt4Q0to4Hoz82iB9uFPZuz7uahNUbfE7FR1940EBy8,716
31
+ project_llm_trainer-0.3.1.dist-info/METADATA,sha256=LJl2lNqTIIQZpTt7iVqzQJ2NhAvTUOwS9w44_XxIn0Y,195
32
+ project_llm_trainer-0.3.1.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
33
+ project_llm_trainer-0.3.1.dist-info/top_level.txt,sha256=LtRFg28i0QIG7iBCD2t095oSco99LCtkijibS9cMGik,12
34
+ project_llm_trainer-0.3.1.dist-info/RECORD,,