project-llm-trainer 0.5.1__py3-none-any.whl → 0.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of project-llm-trainer might be problematic. Click here for more details.
- llm_trainer/checkpoint.py +37 -0
- llm_trainer/dpo_trainer.py +12 -1
- llm_trainer/grpo_trainer.py +13 -1
- llm_trainer/scheduler.py +12 -7
- llm_trainer/train_configs.py +4 -4
- llm_trainer/trainer.py +20 -9
- {project_llm_trainer-0.5.1.dist-info → project_llm_trainer-0.5.2.dist-info}/METADATA +1 -1
- {project_llm_trainer-0.5.1.dist-info → project_llm_trainer-0.5.2.dist-info}/RECORD +17 -17
- {project_llm_trainer-0.5.1.data → project_llm_trainer-0.5.2.data}/scripts/calc_intermediate_size +0 -0
- {project_llm_trainer-0.5.1.data → project_llm_trainer-0.5.2.data}/scripts/ddp_train +0 -0
- {project_llm_trainer-0.5.1.data → project_llm_trainer-0.5.2.data}/scripts/ds_train +0 -0
- {project_llm_trainer-0.5.1.data → project_llm_trainer-0.5.2.data}/scripts/plot_loss +0 -0
- {project_llm_trainer-0.5.1.data → project_llm_trainer-0.5.2.data}/scripts/plot_lr +0 -0
- {project_llm_trainer-0.5.1.data → project_llm_trainer-0.5.2.data}/scripts/py_train +0 -0
- {project_llm_trainer-0.5.1.data → project_llm_trainer-0.5.2.data}/scripts/smart_train +0 -0
- {project_llm_trainer-0.5.1.dist-info → project_llm_trainer-0.5.2.dist-info}/WHEEL +0 -0
- {project_llm_trainer-0.5.1.dist-info → project_llm_trainer-0.5.2.dist-info}/top_level.txt +0 -0
llm_trainer/checkpoint.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import os
|
|
2
2
|
from typing import Optional, Union, Tuple
|
|
3
|
+
import shutil
|
|
3
4
|
import torch
|
|
4
5
|
from torch import nn
|
|
5
6
|
from torch.optim import Optimizer
|
|
@@ -34,6 +35,42 @@ def save_checkpoint(
|
|
|
34
35
|
torch.save(ckpt, checkpoint_name)
|
|
35
36
|
|
|
36
37
|
|
|
38
|
+
def save_best_checkpoint(
|
|
39
|
+
current_loss: float,
|
|
40
|
+
last_best_checkpoint_loss: float,
|
|
41
|
+
suffix: Optional[str] = None
|
|
42
|
+
) -> bool:
|
|
43
|
+
need_replace = current_loss <= last_best_checkpoint_loss
|
|
44
|
+
if need_replace and TrainerTools().parallel.is_main_process:
|
|
45
|
+
if isinstance(TrainerTools().parallel, DsParallel):
|
|
46
|
+
checkpoint_name = os.environ.get('DIST_CHECKPOINT_DIR', 'checkpoint')
|
|
47
|
+
if suffix:
|
|
48
|
+
checkpoint_name = f"{checkpoint_name}_{suffix}"
|
|
49
|
+
|
|
50
|
+
best_checkpoint_name = f'best_{checkpoint_name}'
|
|
51
|
+
if not os.path.exists(best_checkpoint_name):
|
|
52
|
+
os.makedirs(best_checkpoint_name)
|
|
53
|
+
|
|
54
|
+
if os.path.exists(checkpoint_name):
|
|
55
|
+
shutil.rmtree(best_checkpoint_name)
|
|
56
|
+
shutil.copytree(checkpoint_name, best_checkpoint_name)
|
|
57
|
+
else:
|
|
58
|
+
checkpoint_name = os.environ.get('CHECKPOINT_NAME', DEFAULT_CHECKPOINT_NAME)
|
|
59
|
+
if suffix:
|
|
60
|
+
checkpoint_name = f"{checkpoint_name}_{suffix}"
|
|
61
|
+
|
|
62
|
+
best_checkpoint_name = f'best_{checkpoint_name}'
|
|
63
|
+
|
|
64
|
+
if os.path.exists(checkpoint_name):
|
|
65
|
+
if os.path.exists(best_checkpoint_name):
|
|
66
|
+
os.remove(best_checkpoint_name)
|
|
67
|
+
|
|
68
|
+
shutil.copy2(checkpoint_name, best_checkpoint_name)
|
|
69
|
+
|
|
70
|
+
TrainerTools().parallel.wait()
|
|
71
|
+
return need_replace
|
|
72
|
+
|
|
73
|
+
|
|
37
74
|
def load_checkpoint(
|
|
38
75
|
model: nn.Module,
|
|
39
76
|
optimizer: Optional[Optimizer] = None,
|
llm_trainer/dpo_trainer.py
CHANGED
|
@@ -16,6 +16,7 @@ from .partition_utils import sync_model_params
|
|
|
16
16
|
|
|
17
17
|
from .checkpoint import (
|
|
18
18
|
save_checkpoint,
|
|
19
|
+
save_best_checkpoint,
|
|
19
20
|
save_steps,
|
|
20
21
|
)
|
|
21
22
|
|
|
@@ -139,6 +140,9 @@ class DPOTrainer(Trainer):
|
|
|
139
140
|
loss_accumulation = 0.0
|
|
140
141
|
skipping_train = False
|
|
141
142
|
|
|
143
|
+
current_loss: float = 0.0
|
|
144
|
+
last_best_checkpoint_loss: float = 0.0
|
|
145
|
+
|
|
142
146
|
aux_loss_coef = self.train_config.loss_config.aux_loss_coef
|
|
143
147
|
|
|
144
148
|
for epoch in range(self.train_config.n_epochs):
|
|
@@ -243,6 +247,9 @@ class DPOTrainer(Trainer):
|
|
|
243
247
|
|
|
244
248
|
if (batch - last_ckpt_batch) >= self.train_config.eval_batch_interval:
|
|
245
249
|
save_checkpoint(model=self.train_model, optimizer=self.optimizer)
|
|
250
|
+
if save_best_checkpoint(current_loss, last_best_checkpoint_loss):
|
|
251
|
+
last_best_checkpoint_loss = current_loss
|
|
252
|
+
|
|
246
253
|
last_ckpt_batch = batch
|
|
247
254
|
self._on_batch_end(tag=f'epoch:{epoch}/batch:{batch}')
|
|
248
255
|
|
|
@@ -252,8 +259,12 @@ class DPOTrainer(Trainer):
|
|
|
252
259
|
|
|
253
260
|
# end epoch
|
|
254
261
|
if not skipping_train:
|
|
255
|
-
save_checkpoint(model=self.train_model, optimizer=self.optimizer)
|
|
256
262
|
save_steps(global_steps=global_steps, lr_scheduler=self.lr_scheduler)
|
|
263
|
+
|
|
264
|
+
save_checkpoint(model=self.train_model, optimizer=self.optimizer)
|
|
265
|
+
if save_best_checkpoint(current_loss, last_best_checkpoint_loss):
|
|
266
|
+
last_best_checkpoint_loss = current_loss
|
|
267
|
+
|
|
257
268
|
TrainerTools().parallel.on_epoch_end(epoch)
|
|
258
269
|
self._on_epoch_end(tag=f'epoch:{epoch}')
|
|
259
270
|
|
llm_trainer/grpo_trainer.py
CHANGED
|
@@ -22,6 +22,7 @@ from .partition_utils import (
|
|
|
22
22
|
|
|
23
23
|
from .checkpoint import (
|
|
24
24
|
save_checkpoint,
|
|
25
|
+
save_best_checkpoint,
|
|
25
26
|
save_steps,
|
|
26
27
|
)
|
|
27
28
|
|
|
@@ -280,6 +281,10 @@ class GRPOTrainer(Trainer):
|
|
|
280
281
|
def train(self):
|
|
281
282
|
global_steps = 0
|
|
282
283
|
skipping_train = False
|
|
284
|
+
|
|
285
|
+
current_loss: float = 0.0
|
|
286
|
+
last_best_checkpoint_loss: float = 0.0
|
|
287
|
+
|
|
283
288
|
aux_loss_coef = self.train_config.loss_config.aux_loss_coef
|
|
284
289
|
|
|
285
290
|
for epoch in range(self.train_config.n_epochs):
|
|
@@ -361,6 +366,9 @@ class GRPOTrainer(Trainer):
|
|
|
361
366
|
|
|
362
367
|
if (batch - last_ckpt_batch) >= self.train_config.eval_batch_interval:
|
|
363
368
|
save_checkpoint(model=self.train_model, optimizer=self.optimizer)
|
|
369
|
+
if save_best_checkpoint(current_loss, last_best_checkpoint_loss):
|
|
370
|
+
last_best_checkpoint_loss = current_loss
|
|
371
|
+
|
|
364
372
|
last_ckpt_batch = batch
|
|
365
373
|
self._on_batch_end(tag=f'epoch:{epoch}/batch:{batch}')
|
|
366
374
|
|
|
@@ -370,8 +378,12 @@ class GRPOTrainer(Trainer):
|
|
|
370
378
|
|
|
371
379
|
# end epoch
|
|
372
380
|
if not skipping_train:
|
|
373
|
-
save_checkpoint(model=self.train_model, optimizer=self.optimizer)
|
|
374
381
|
save_steps(global_steps=global_steps, lr_scheduler=self.lr_scheduler)
|
|
382
|
+
|
|
383
|
+
save_checkpoint(model=self.train_model, optimizer=self.optimizer)
|
|
384
|
+
if save_best_checkpoint(current_loss, last_best_checkpoint_loss):
|
|
385
|
+
last_best_checkpoint_loss = current_loss
|
|
386
|
+
|
|
375
387
|
TrainerTools().parallel.on_epoch_end(epoch)
|
|
376
388
|
self._on_epoch_end(tag=f'epoch:{epoch}')
|
|
377
389
|
|
llm_trainer/scheduler.py
CHANGED
|
@@ -30,12 +30,12 @@ class WarmupCosineAnnealingLRScheduler(LRScheduler):
|
|
|
30
30
|
self,
|
|
31
31
|
*,
|
|
32
32
|
optimizer: torch.optim.Optimizer,
|
|
33
|
+
warmup_iters: int,
|
|
33
34
|
initial_lr: float,
|
|
34
35
|
min_lr: float,
|
|
35
36
|
max_lr: float,
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
period_mul: int = 1, # 周期长度的倍数
|
|
37
|
+
cosine_annealing_period: int, # 每个周期的步数
|
|
38
|
+
cosine_annealing_period_mul: int = 0, # 周期长度的倍数
|
|
39
39
|
need_log: bool = False
|
|
40
40
|
):
|
|
41
41
|
super().__init__()
|
|
@@ -46,8 +46,8 @@ class WarmupCosineAnnealingLRScheduler(LRScheduler):
|
|
|
46
46
|
self._max_lr = max_lr
|
|
47
47
|
self._warmup_iters = warmup_iters
|
|
48
48
|
|
|
49
|
-
self.
|
|
50
|
-
self.
|
|
49
|
+
self._cosine_annealing_period = cosine_annealing_period
|
|
50
|
+
self._cosine_annealing_period_mul = cosine_annealing_period_mul
|
|
51
51
|
|
|
52
52
|
self.T_cur = 0 # 当前周期内已走过的步数
|
|
53
53
|
self.cycle = 0 # 当前周期编号
|
|
@@ -85,7 +85,12 @@ class WarmupCosineAnnealingLRScheduler(LRScheduler):
|
|
|
85
85
|
return self._steps > self._warmup_iters
|
|
86
86
|
|
|
87
87
|
def _update_lr(self):
|
|
88
|
-
|
|
88
|
+
# 如果period_mul是0,则认为没有周期,超过余弦退火总步数,则一直保持最小lr
|
|
89
|
+
if self._cosine_annealing_period_mul == 0 and self._steps >= self._cosine_annealing_period + self._warmup_iters:
|
|
90
|
+
lr = self._min_lr
|
|
91
|
+
for param_group in self._optimizer.param_groups:
|
|
92
|
+
param_group['lr'] = lr
|
|
93
|
+
elif self._steps <= self._warmup_iters:
|
|
89
94
|
# Warmup: adjust learning rate linearly
|
|
90
95
|
# (max_lr - initial_lr) / warmup_iters
|
|
91
96
|
lr = self._initial_lr + self._steps * self._lr_increment
|
|
@@ -97,7 +102,7 @@ class WarmupCosineAnnealingLRScheduler(LRScheduler):
|
|
|
97
102
|
|
|
98
103
|
"""每步更新学习率"""
|
|
99
104
|
# 计算当前周期的最大步数
|
|
100
|
-
T_max = self.
|
|
105
|
+
T_max = self._cosine_annealing_period * (max(self._cosine_annealing_period_mul, 1) ** self.cycle)
|
|
101
106
|
|
|
102
107
|
# 更新周期状态
|
|
103
108
|
self.T_cur += 1
|
llm_trainer/train_configs.py
CHANGED
|
@@ -109,13 +109,13 @@ class DataLoaderConfig:
|
|
|
109
109
|
@dataclass(kw_only=True)
|
|
110
110
|
class LrConfig:
|
|
111
111
|
enable_lr_scheduler: bool = False
|
|
112
|
-
initial_lr:
|
|
112
|
+
initial_lr: float
|
|
113
113
|
weight_decay: float = 0.1
|
|
114
|
+
warmup_iters: Optional[int] = None
|
|
114
115
|
max_lr: Optional[float] = None
|
|
115
116
|
min_lr: Optional[float] = None
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
warmup_iters: Optional[int] = None
|
|
117
|
+
cosine_annealing_period: Optional[int] = None
|
|
118
|
+
cosine_annealing_period_mul: int = 0
|
|
119
119
|
|
|
120
120
|
|
|
121
121
|
@dataclass(kw_only=True)
|
llm_trainer/trainer.py
CHANGED
|
@@ -30,6 +30,7 @@ from .scheduler import (
|
|
|
30
30
|
from .checkpoint import (
|
|
31
31
|
load_checkpoint,
|
|
32
32
|
save_checkpoint,
|
|
33
|
+
save_best_checkpoint,
|
|
33
34
|
load_steps,
|
|
34
35
|
save_steps,
|
|
35
36
|
)
|
|
@@ -172,20 +173,20 @@ class Trainer:
|
|
|
172
173
|
|
|
173
174
|
def _init_lr_scheduler(self, initial_lr: float) -> LRScheduler:
|
|
174
175
|
if self.train_config.lr_config.enable_lr_scheduler:
|
|
176
|
+
warmup_iters = self.train_config.lr_config.warmup_iters
|
|
175
177
|
min_lr = self.train_config.lr_config.min_lr
|
|
176
178
|
max_lr = self.train_config.lr_config.max_lr
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
period_mul = self.train_config.lr_config.period_mul
|
|
179
|
+
cosine_annealing_period = self.train_config.lr_config.cosine_annealing_period
|
|
180
|
+
cosine_annealing_period_mul = self.train_config.lr_config.cosine_annealing_period_mul
|
|
180
181
|
|
|
181
182
|
return WarmupCosineAnnealingLRScheduler(
|
|
182
183
|
optimizer=self.optimizer,
|
|
184
|
+
warmup_iters=warmup_iters,
|
|
183
185
|
initial_lr=initial_lr,
|
|
184
186
|
min_lr=min_lr,
|
|
185
187
|
max_lr=max_lr,
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
period_mul=period_mul,
|
|
188
|
+
cosine_annealing_period=cosine_annealing_period,
|
|
189
|
+
cosine_annealing_period_mul=cosine_annealing_period_mul,
|
|
189
190
|
need_log=TrainerTools().parallel.is_main_process
|
|
190
191
|
)
|
|
191
192
|
|
|
@@ -467,6 +468,9 @@ class Trainer:
|
|
|
467
468
|
loss_accumulation = 0.0
|
|
468
469
|
skipping_train = False
|
|
469
470
|
|
|
471
|
+
current_loss: float = 0.0
|
|
472
|
+
last_best_checkpoint_loss: float = 0.0
|
|
473
|
+
|
|
470
474
|
for epoch in range(self.train_config.n_epochs):
|
|
471
475
|
self.train_model.train()
|
|
472
476
|
file_count = len(self.train_config.file_dataset)
|
|
@@ -539,7 +543,7 @@ class Trainer:
|
|
|
539
543
|
if TrainerTools().parallel.parallel_train:
|
|
540
544
|
dist.all_reduce(loss_tensor, dist.ReduceOp.AVG)
|
|
541
545
|
|
|
542
|
-
|
|
546
|
+
current_loss = loss_tensor.item()
|
|
543
547
|
|
|
544
548
|
# ds模式已经集成gradient_clipping
|
|
545
549
|
if not isinstance(TrainerTools().parallel, DsParallel) and self.lr_scheduler.can_clip_grad():
|
|
@@ -553,7 +557,7 @@ class Trainer:
|
|
|
553
557
|
epoch_tag=f'epoch: {epoch}',
|
|
554
558
|
file_tag=f'file: {file_idx + 1}/{file_count}',
|
|
555
559
|
batch_tag=f'batch: {batch}/{batch_count_per_file}',
|
|
556
|
-
loss=
|
|
560
|
+
loss=current_loss
|
|
557
561
|
)
|
|
558
562
|
# reset to default
|
|
559
563
|
loss_accumulation = 0.0
|
|
@@ -565,6 +569,9 @@ class Trainer:
|
|
|
565
569
|
|
|
566
570
|
if (batch - last_ckpt_batch) >= self.train_config.eval_batch_interval:
|
|
567
571
|
save_checkpoint(model=self.train_model, optimizer=self.optimizer)
|
|
572
|
+
if save_best_checkpoint(current_loss, last_best_checkpoint_loss):
|
|
573
|
+
last_best_checkpoint_loss = current_loss
|
|
574
|
+
|
|
568
575
|
last_ckpt_batch = batch
|
|
569
576
|
self._on_batch_end(tag=f'epoch:{epoch}/batch:{batch}')
|
|
570
577
|
|
|
@@ -574,8 +581,12 @@ class Trainer:
|
|
|
574
581
|
|
|
575
582
|
# end epoch
|
|
576
583
|
if not skipping_train:
|
|
577
|
-
save_checkpoint(model=self.train_model, optimizer=self.optimizer)
|
|
578
584
|
save_steps(global_steps=global_steps, lr_scheduler=self.lr_scheduler)
|
|
585
|
+
|
|
586
|
+
save_checkpoint(model=self.train_model, optimizer=self.optimizer)
|
|
587
|
+
if save_best_checkpoint(current_loss, last_best_checkpoint_loss):
|
|
588
|
+
last_best_checkpoint_loss = current_loss
|
|
589
|
+
|
|
579
590
|
TrainerTools().parallel.on_epoch_end(epoch)
|
|
580
591
|
self._on_epoch_end(tag=f'epoch:{epoch}')
|
|
581
592
|
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
llm_trainer/__init__.py,sha256=HWgtTEVeQSnZmEyYQm2K6eFEG4X2QAoigMlB5Z2tcXE,260
|
|
2
|
-
llm_trainer/checkpoint.py,sha256=
|
|
2
|
+
llm_trainer/checkpoint.py,sha256=wC4GdIY2HAnxGHzUND5Yq-J_ynhDPT7A_2sXlWRElBc,4647
|
|
3
3
|
llm_trainer/dataset.py,sha256=4QlOo0SFB5816BUYegQjgobUqTUMQvdmZMM_OEAMSjE,4347
|
|
4
|
-
llm_trainer/dpo_trainer.py,sha256=
|
|
4
|
+
llm_trainer/dpo_trainer.py,sha256=xfYXlLA5TbqPKCUbk5_V79TreEh-dnLMaN72a3-Tdzg,11860
|
|
5
5
|
llm_trainer/ds_checkpoint.py,sha256=wz48HoLBBt8QGO1tXfvJwrXoiGtPG_gjwHfEqARllso,2175
|
|
6
6
|
llm_trainer/eval.py,sha256=fjASCILU3fSPJxo9cP3rIXEEnkc5ZlUyHqXlZtUiHrw,888
|
|
7
7
|
llm_trainer/generate_utils.py,sha256=CbJ3mfAD6DkQ0GUHcJQ1AK02m-ocwmd-BPXEpiwvNNQ,14933
|
|
8
|
-
llm_trainer/grpo_trainer.py,sha256=
|
|
8
|
+
llm_trainer/grpo_trainer.py,sha256=vTNi3n6R4NbwFh_s8LYN1TWEJm8AW2F5NVJlT5MHxKk,15990
|
|
9
9
|
llm_trainer/log.py,sha256=LxqTGRNZUGMTSQCePRpk-rYyxSnSIbT4kOdP8Fbzr0M,462
|
|
10
10
|
llm_trainer/loss.py,sha256=NZCQeUXnLSj__mmDflE8g89KgE0emAJXIab0IERCLno,6023
|
|
11
11
|
llm_trainer/parallel.py,sha256=DQu8GqEFxD99HQ6hKuIxxyKi-05dMO33eMhImYlPuOI,4468
|
|
@@ -13,21 +13,21 @@ llm_trainer/parallel_ddp.py,sha256=Pob9vUlBZnkL4oP1Re11kFob7nufMSE96pn7m7fuOEM,1
|
|
|
13
13
|
llm_trainer/parallel_ds.py,sha256=oy8RRxHud3rACWubFlJqqd0pjPEQhKeAPGPQUSdJX2c,1145
|
|
14
14
|
llm_trainer/parallel_none.py,sha256=TG6Pm829Dg-yQu-97O-EHV3FCARBlNcP47KkGFAs16E,676
|
|
15
15
|
llm_trainer/partition_utils.py,sha256=xzv8kwlbKp3dai2pBwX89gN5ymeHk1bGbTkGru5H-UM,5167
|
|
16
|
-
llm_trainer/scheduler.py,sha256=
|
|
16
|
+
llm_trainer/scheduler.py,sha256=lyC9TFuF_y8EXYq9d-WAqN4CSaq_w9kSKeh_BOo3EpI,4039
|
|
17
17
|
llm_trainer/sft_trainer.py,sha256=gxQA7T1o1QGUsHp2CX1Qb_fO5LppBJuNbc0H4ixCYUA,1783
|
|
18
18
|
llm_trainer/tokenizer.py,sha256=SSpgXtb0e1NtQqRW0gCq09TTZi47umggy-Fh5EMHKJg,6708
|
|
19
19
|
llm_trainer/tools.py,sha256=yF17lp6oOfLe2XJeKDQ1juZcbv-6vFamJSLwEeArduA,2975
|
|
20
|
-
llm_trainer/train_configs.py,sha256=
|
|
21
|
-
llm_trainer/trainer.py,sha256=
|
|
20
|
+
llm_trainer/train_configs.py,sha256=c6bgivkkWRYcPD3NzI5uRItAUhZiIBgKVMuMgVFRnFo,7336
|
|
21
|
+
llm_trainer/trainer.py,sha256=g8YUP0FmBP3MGwewyoyOW35p9CY98rS62pzjnOMiWvE,25875
|
|
22
22
|
llm_trainer/utils.py,sha256=LWNhyQ0NDEZ9mZtk2Ryvh6EulvHIaUGIflugSpqmeFI,6791
|
|
23
|
-
project_llm_trainer-0.5.
|
|
24
|
-
project_llm_trainer-0.5.
|
|
25
|
-
project_llm_trainer-0.5.
|
|
26
|
-
project_llm_trainer-0.5.
|
|
27
|
-
project_llm_trainer-0.5.
|
|
28
|
-
project_llm_trainer-0.5.
|
|
29
|
-
project_llm_trainer-0.5.
|
|
30
|
-
project_llm_trainer-0.5.
|
|
31
|
-
project_llm_trainer-0.5.
|
|
32
|
-
project_llm_trainer-0.5.
|
|
33
|
-
project_llm_trainer-0.5.
|
|
23
|
+
project_llm_trainer-0.5.2.data/scripts/calc_intermediate_size,sha256=AggpgNHokJiJMbEtVdOnolqr_4bH3i1UYuZNEAzC2Gc,460
|
|
24
|
+
project_llm_trainer-0.5.2.data/scripts/ddp_train,sha256=x81AasaN2-9TwARFFF1l7iV1LmfMQ0bLw0i_CGbOwSw,299
|
|
25
|
+
project_llm_trainer-0.5.2.data/scripts/ds_train,sha256=qL3qc3TcedBCw98UZUjW07ONcErRawLE1HymW2AmscA,265
|
|
26
|
+
project_llm_trainer-0.5.2.data/scripts/plot_loss,sha256=MzFcdJESlVr1srj4Td6-AxPGUKkfB_QEcJwm0Bd-5fU,910
|
|
27
|
+
project_llm_trainer-0.5.2.data/scripts/plot_lr,sha256=w_7XR_x3KYYyboeOVAeu_I4fveLFI-C0wBmRrNlmWUI,894
|
|
28
|
+
project_llm_trainer-0.5.2.data/scripts/py_train,sha256=tOp9TquORQeU8XN5H7OVIk5O0Ypwi34p_GENxTwgwdk,265
|
|
29
|
+
project_llm_trainer-0.5.2.data/scripts/smart_train,sha256=Pmt4Q0to4Hoz82iB9uFPZuz7uahNUbfE7FR1940EBy8,716
|
|
30
|
+
project_llm_trainer-0.5.2.dist-info/METADATA,sha256=AP3lS957E984t8klMW_Z6VmrIU7-sBtrzAszA6V-KcQ,195
|
|
31
|
+
project_llm_trainer-0.5.2.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
|
|
32
|
+
project_llm_trainer-0.5.2.dist-info/top_level.txt,sha256=LtRFg28i0QIG7iBCD2t095oSco99LCtkijibS9cMGik,12
|
|
33
|
+
project_llm_trainer-0.5.2.dist-info/RECORD,,
|
{project_llm_trainer-0.5.1.data → project_llm_trainer-0.5.2.data}/scripts/calc_intermediate_size
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|