liger-kernel-nightly 0.5.5.dev20250327235249__py3-none-any.whl → 0.5.5.dev20250328142430__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of liger-kernel-nightly might be problematic. Click here for more details.
- liger_kernel/chunked_loss/fused_linear_distillation.py +4 -4
- {liger_kernel_nightly-0.5.5.dev20250327235249.dist-info → liger_kernel_nightly-0.5.5.dev20250328142430.dist-info}/METADATA +1 -1
- {liger_kernel_nightly-0.5.5.dev20250327235249.dist-info → liger_kernel_nightly-0.5.5.dev20250328142430.dist-info}/RECORD +7 -7
- {liger_kernel_nightly-0.5.5.dev20250327235249.dist-info → liger_kernel_nightly-0.5.5.dev20250328142430.dist-info}/LICENSE +0 -0
- {liger_kernel_nightly-0.5.5.dev20250327235249.dist-info → liger_kernel_nightly-0.5.5.dev20250328142430.dist-info}/NOTICE +0 -0
- {liger_kernel_nightly-0.5.5.dev20250327235249.dist-info → liger_kernel_nightly-0.5.5.dev20250328142430.dist-info}/WHEEL +0 -0
- {liger_kernel_nightly-0.5.5.dev20250327235249.dist-info → liger_kernel_nightly-0.5.5.dev20250328142430.dist-info}/top_level.txt +0 -0
|
@@ -112,6 +112,9 @@ class LigerFusedLinearDistillationBase(torch.autograd.Function):
|
|
|
112
112
|
compute_ce_loss=compute_ce_loss,
|
|
113
113
|
)
|
|
114
114
|
|
|
115
|
+
student_logits_chunk /= temperature
|
|
116
|
+
teacher_logits_chunk /= temperature
|
|
117
|
+
|
|
115
118
|
# If the teacher and student token size is different, pad student logits to match the teacher's.
|
|
116
119
|
# This only applies to cases where they share exactly the same vocab and tokenizer just
|
|
117
120
|
# that teacher logit is padded for some training efficiency such as
|
|
@@ -123,13 +126,10 @@ class LigerFusedLinearDistillationBase(torch.autograd.Function):
|
|
|
123
126
|
pad_tensor = torch.zeros(
|
|
124
127
|
(*student_logits_chunk.shape[:-1], pad_size),
|
|
125
128
|
dtype=student_logits_chunk.dtype,
|
|
126
|
-
device=student_logits_chunk.device
|
|
129
|
+
device=student_logits_chunk.device,
|
|
127
130
|
)
|
|
128
131
|
student_logits_chunk = torch.cat([student_logits_chunk, pad_tensor], dim=-1)
|
|
129
132
|
|
|
130
|
-
student_logits_chunk /= temperature
|
|
131
|
-
teacher_logits_chunk /= temperature
|
|
132
|
-
|
|
133
133
|
hard_loss /= full_target.shape[0]
|
|
134
134
|
|
|
135
135
|
soft_loss = distillation_loss_fn(student_logits_chunk, teacher_logits_chunk, **loss_kwargs)
|
|
@@ -6,7 +6,7 @@ liger_kernel/chunked_loss/__init__.py,sha256=ATu-xX5Fc49Cr6yBOGBRNTo593ZrU5ZCsIu
|
|
|
6
6
|
liger_kernel/chunked_loss/cpo_loss.py,sha256=Gzz1eU4kgcbdubFVRy55e8A1Cr-r45UgNicXwZIjmBU,5454
|
|
7
7
|
liger_kernel/chunked_loss/dpo_loss.py,sha256=xZwGqS04si9zXyob95SAdalC-hajZg8fWINqiqffN8k,5855
|
|
8
8
|
liger_kernel/chunked_loss/functional.py,sha256=THWWpCnRVhTVfnPnyvQjdBvo1JDtxhwLmtZE_yiBBqM,817
|
|
9
|
-
liger_kernel/chunked_loss/fused_linear_distillation.py,sha256=
|
|
9
|
+
liger_kernel/chunked_loss/fused_linear_distillation.py,sha256=ooR-qnZCyWJN935oHCSWLaKKKyaYERyhNczRGi1VOiw,11935
|
|
10
10
|
liger_kernel/chunked_loss/fused_linear_preference.py,sha256=ojB42jYPu0c4ki96Ft-hy7Sf6fh_WikG-aWNrlZzSio,18362
|
|
11
11
|
liger_kernel/chunked_loss/fused_linear_rlhf.py,sha256=wGujqwLz91mOE9MmdenhBIKvbmswhwtINMCpcP7D74c,9050
|
|
12
12
|
liger_kernel/chunked_loss/fused_linear_unpaired_preference.py,sha256=RiuK3UtRwH9T6jZ36sA8Urj-TVuOLOO2syLg_JOQapY,13437
|
|
@@ -71,9 +71,9 @@ liger_kernel/transformers/trainer/__init__.py,sha256=p7yQfklV8-467qSz_ZMimkbDF7H
|
|
|
71
71
|
liger_kernel/transformers/trainer/orpo_trainer.py,sha256=pdekW7l6Qg_aqa5SYKYlSWUF8m3lkOFvFLcIMEHrz9s,8338
|
|
72
72
|
liger_kernel/triton/__init__.py,sha256=qCiCamzCRv6lpV8IqpAc9YMdNKC7GKurClWceQPnlis,92
|
|
73
73
|
liger_kernel/triton/monkey_patch.py,sha256=Rd0hUHAzDkFfHvnX7-PBaNK5EKnZhtfM_h-fgQH9HPY,1568
|
|
74
|
-
liger_kernel_nightly-0.5.5.
|
|
75
|
-
liger_kernel_nightly-0.5.5.
|
|
76
|
-
liger_kernel_nightly-0.5.5.
|
|
77
|
-
liger_kernel_nightly-0.5.5.
|
|
78
|
-
liger_kernel_nightly-0.5.5.
|
|
79
|
-
liger_kernel_nightly-0.5.5.
|
|
74
|
+
liger_kernel_nightly-0.5.5.dev20250328142430.dist-info/LICENSE,sha256=OhzLDHJ0to4a8sodVLELZiCFylZ1NAAYLs-HrjPy0ag,1312
|
|
75
|
+
liger_kernel_nightly-0.5.5.dev20250328142430.dist-info/METADATA,sha256=ZCqBj_JT8pEfnanXMGvgjo_IGq-mLd3Ii199ohgolVc,22959
|
|
76
|
+
liger_kernel_nightly-0.5.5.dev20250328142430.dist-info/NOTICE,sha256=njwnoPZLh9AN8SJQzxvCGLHi-8X__AvWRze6joNXIY8,2066
|
|
77
|
+
liger_kernel_nightly-0.5.5.dev20250328142430.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
|
|
78
|
+
liger_kernel_nightly-0.5.5.dev20250328142430.dist-info/top_level.txt,sha256=2eghu4hA3LnkM7ElW92tQ8zegWKgSbeo-k-aGe1YnvY,13
|
|
79
|
+
liger_kernel_nightly-0.5.5.dev20250328142430.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|