liger-kernel-nightly 0.5.5.dev20250327235249__py3-none-any.whl → 0.5.5.dev20250328142430__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of liger-kernel-nightly might be problematic. Click here for more details.

@@ -112,6 +112,9 @@ class LigerFusedLinearDistillationBase(torch.autograd.Function):
112
112
  compute_ce_loss=compute_ce_loss,
113
113
  )
114
114
 
115
+ student_logits_chunk /= temperature
116
+ teacher_logits_chunk /= temperature
117
+
115
118
  # If the teacher and student token size is different, pad student logits to match the teacher's.
116
119
  # This only applies to cases where they share exactly the same vocab and tokenizer just
117
120
  # that teacher logit is padded for some training efficiency such as
@@ -123,13 +126,10 @@ class LigerFusedLinearDistillationBase(torch.autograd.Function):
123
126
  pad_tensor = torch.zeros(
124
127
  (*student_logits_chunk.shape[:-1], pad_size),
125
128
  dtype=student_logits_chunk.dtype,
126
- device=student_logits_chunk.device
129
+ device=student_logits_chunk.device,
127
130
  )
128
131
  student_logits_chunk = torch.cat([student_logits_chunk, pad_tensor], dim=-1)
129
132
 
130
- student_logits_chunk /= temperature
131
- teacher_logits_chunk /= temperature
132
-
133
133
  hard_loss /= full_target.shape[0]
134
134
 
135
135
  soft_loss = distillation_loss_fn(student_logits_chunk, teacher_logits_chunk, **loss_kwargs)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: liger_kernel_nightly
3
- Version: 0.5.5.dev20250327235249
3
+ Version: 0.5.5.dev20250328142430
4
4
  Summary: Efficient Triton kernels for LLM Training
5
5
  License: BSD 2-CLAUSE LICENSE
6
6
  Copyright 2024 LinkedIn Corporation
@@ -6,7 +6,7 @@ liger_kernel/chunked_loss/__init__.py,sha256=ATu-xX5Fc49Cr6yBOGBRNTo593ZrU5ZCsIu
6
6
  liger_kernel/chunked_loss/cpo_loss.py,sha256=Gzz1eU4kgcbdubFVRy55e8A1Cr-r45UgNicXwZIjmBU,5454
7
7
  liger_kernel/chunked_loss/dpo_loss.py,sha256=xZwGqS04si9zXyob95SAdalC-hajZg8fWINqiqffN8k,5855
8
8
  liger_kernel/chunked_loss/functional.py,sha256=THWWpCnRVhTVfnPnyvQjdBvo1JDtxhwLmtZE_yiBBqM,817
9
- liger_kernel/chunked_loss/fused_linear_distillation.py,sha256=y7e2mF_6HGMNNuoWAmJ8Y5bK-hRUe2q4-R6r7lf-Mw8,11934
9
+ liger_kernel/chunked_loss/fused_linear_distillation.py,sha256=ooR-qnZCyWJN935oHCSWLaKKKyaYERyhNczRGi1VOiw,11935
10
10
  liger_kernel/chunked_loss/fused_linear_preference.py,sha256=ojB42jYPu0c4ki96Ft-hy7Sf6fh_WikG-aWNrlZzSio,18362
11
11
  liger_kernel/chunked_loss/fused_linear_rlhf.py,sha256=wGujqwLz91mOE9MmdenhBIKvbmswhwtINMCpcP7D74c,9050
12
12
  liger_kernel/chunked_loss/fused_linear_unpaired_preference.py,sha256=RiuK3UtRwH9T6jZ36sA8Urj-TVuOLOO2syLg_JOQapY,13437
@@ -71,9 +71,9 @@ liger_kernel/transformers/trainer/__init__.py,sha256=p7yQfklV8-467qSz_ZMimkbDF7H
71
71
  liger_kernel/transformers/trainer/orpo_trainer.py,sha256=pdekW7l6Qg_aqa5SYKYlSWUF8m3lkOFvFLcIMEHrz9s,8338
72
72
  liger_kernel/triton/__init__.py,sha256=qCiCamzCRv6lpV8IqpAc9YMdNKC7GKurClWceQPnlis,92
73
73
  liger_kernel/triton/monkey_patch.py,sha256=Rd0hUHAzDkFfHvnX7-PBaNK5EKnZhtfM_h-fgQH9HPY,1568
74
- liger_kernel_nightly-0.5.5.dev20250327235249.dist-info/LICENSE,sha256=OhzLDHJ0to4a8sodVLELZiCFylZ1NAAYLs-HrjPy0ag,1312
75
- liger_kernel_nightly-0.5.5.dev20250327235249.dist-info/METADATA,sha256=4_bQ76AZvAHUe6dzZt_JTtxjAX7_UV6O5zLmi7RNmK4,22959
76
- liger_kernel_nightly-0.5.5.dev20250327235249.dist-info/NOTICE,sha256=njwnoPZLh9AN8SJQzxvCGLHi-8X__AvWRze6joNXIY8,2066
77
- liger_kernel_nightly-0.5.5.dev20250327235249.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
78
- liger_kernel_nightly-0.5.5.dev20250327235249.dist-info/top_level.txt,sha256=2eghu4hA3LnkM7ElW92tQ8zegWKgSbeo-k-aGe1YnvY,13
79
- liger_kernel_nightly-0.5.5.dev20250327235249.dist-info/RECORD,,
74
+ liger_kernel_nightly-0.5.5.dev20250328142430.dist-info/LICENSE,sha256=OhzLDHJ0to4a8sodVLELZiCFylZ1NAAYLs-HrjPy0ag,1312
75
+ liger_kernel_nightly-0.5.5.dev20250328142430.dist-info/METADATA,sha256=ZCqBj_JT8pEfnanXMGvgjo_IGq-mLd3Ii199ohgolVc,22959
76
+ liger_kernel_nightly-0.5.5.dev20250328142430.dist-info/NOTICE,sha256=njwnoPZLh9AN8SJQzxvCGLHi-8X__AvWRze6joNXIY8,2066
77
+ liger_kernel_nightly-0.5.5.dev20250328142430.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
78
+ liger_kernel_nightly-0.5.5.dev20250328142430.dist-info/top_level.txt,sha256=2eghu4hA3LnkM7ElW92tQ8zegWKgSbeo-k-aGe1YnvY,13
79
+ liger_kernel_nightly-0.5.5.dev20250328142430.dist-info/RECORD,,