liger-kernel-nightly 0.5.5.dev20250327235657__py3-none-any.whl → 0.5.5.dev20250328142430__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of liger-kernel-nightly might be problematic. Click here for more details.

@@ -115,6 +115,21 @@ class LigerFusedLinearDistillationBase(torch.autograd.Function):
115
115
  student_logits_chunk /= temperature
116
116
  teacher_logits_chunk /= temperature
117
117
 
118
+ # If the teacher and student token size is different, pad student logits to match the teacher's.
119
+ # This only applies to cases where they share exactly the same vocab and tokenizer just
120
+ # that teacher logit is padded for some training efficiency such as
121
+ # https://huggingface.co/Qwen/Qwen1.5-72B-Chat/discussions/1#662883f568adf59b07b176d2
122
+ teacher_vocab_size = teacher_weight.shape[0]
123
+ student_vocab_size = student_weight.shape[0]
124
+ if teacher_vocab_size > student_vocab_size:
125
+ pad_size = teacher_vocab_size - student_vocab_size
126
+ pad_tensor = torch.zeros(
127
+ (*student_logits_chunk.shape[:-1], pad_size),
128
+ dtype=student_logits_chunk.dtype,
129
+ device=student_logits_chunk.device,
130
+ )
131
+ student_logits_chunk = torch.cat([student_logits_chunk, pad_tensor], dim=-1)
132
+
118
133
  hard_loss /= full_target.shape[0]
119
134
 
120
135
  soft_loss = distillation_loss_fn(student_logits_chunk, teacher_logits_chunk, **loss_kwargs)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: liger_kernel_nightly
3
- Version: 0.5.5.dev20250327235657
3
+ Version: 0.5.5.dev20250328142430
4
4
  Summary: Efficient Triton kernels for LLM Training
5
5
  License: BSD 2-CLAUSE LICENSE
6
6
  Copyright 2024 LinkedIn Corporation
@@ -6,7 +6,7 @@ liger_kernel/chunked_loss/__init__.py,sha256=ATu-xX5Fc49Cr6yBOGBRNTo593ZrU5ZCsIu
6
6
  liger_kernel/chunked_loss/cpo_loss.py,sha256=Gzz1eU4kgcbdubFVRy55e8A1Cr-r45UgNicXwZIjmBU,5454
7
7
  liger_kernel/chunked_loss/dpo_loss.py,sha256=xZwGqS04si9zXyob95SAdalC-hajZg8fWINqiqffN8k,5855
8
8
  liger_kernel/chunked_loss/functional.py,sha256=THWWpCnRVhTVfnPnyvQjdBvo1JDtxhwLmtZE_yiBBqM,817
9
- liger_kernel/chunked_loss/fused_linear_distillation.py,sha256=oeZhRw87UUo01UotfaMxDhWa7Xr6IERmK3zzF1CQqEc,11037
9
+ liger_kernel/chunked_loss/fused_linear_distillation.py,sha256=ooR-qnZCyWJN935oHCSWLaKKKyaYERyhNczRGi1VOiw,11935
10
10
  liger_kernel/chunked_loss/fused_linear_preference.py,sha256=ojB42jYPu0c4ki96Ft-hy7Sf6fh_WikG-aWNrlZzSio,18362
11
11
  liger_kernel/chunked_loss/fused_linear_rlhf.py,sha256=wGujqwLz91mOE9MmdenhBIKvbmswhwtINMCpcP7D74c,9050
12
12
  liger_kernel/chunked_loss/fused_linear_unpaired_preference.py,sha256=RiuK3UtRwH9T6jZ36sA8Urj-TVuOLOO2syLg_JOQapY,13437
@@ -71,9 +71,9 @@ liger_kernel/transformers/trainer/__init__.py,sha256=p7yQfklV8-467qSz_ZMimkbDF7H
71
71
  liger_kernel/transformers/trainer/orpo_trainer.py,sha256=pdekW7l6Qg_aqa5SYKYlSWUF8m3lkOFvFLcIMEHrz9s,8338
72
72
  liger_kernel/triton/__init__.py,sha256=qCiCamzCRv6lpV8IqpAc9YMdNKC7GKurClWceQPnlis,92
73
73
  liger_kernel/triton/monkey_patch.py,sha256=Rd0hUHAzDkFfHvnX7-PBaNK5EKnZhtfM_h-fgQH9HPY,1568
74
- liger_kernel_nightly-0.5.5.dev20250327235657.dist-info/LICENSE,sha256=OhzLDHJ0to4a8sodVLELZiCFylZ1NAAYLs-HrjPy0ag,1312
75
- liger_kernel_nightly-0.5.5.dev20250327235657.dist-info/METADATA,sha256=0paW1IiVPZaCFS8SvM36NiGYNM8zPHKq77UHVowl4ts,22959
76
- liger_kernel_nightly-0.5.5.dev20250327235657.dist-info/NOTICE,sha256=njwnoPZLh9AN8SJQzxvCGLHi-8X__AvWRze6joNXIY8,2066
77
- liger_kernel_nightly-0.5.5.dev20250327235657.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
78
- liger_kernel_nightly-0.5.5.dev20250327235657.dist-info/top_level.txt,sha256=2eghu4hA3LnkM7ElW92tQ8zegWKgSbeo-k-aGe1YnvY,13
79
- liger_kernel_nightly-0.5.5.dev20250327235657.dist-info/RECORD,,
74
+ liger_kernel_nightly-0.5.5.dev20250328142430.dist-info/LICENSE,sha256=OhzLDHJ0to4a8sodVLELZiCFylZ1NAAYLs-HrjPy0ag,1312
75
+ liger_kernel_nightly-0.5.5.dev20250328142430.dist-info/METADATA,sha256=ZCqBj_JT8pEfnanXMGvgjo_IGq-mLd3Ii199ohgolVc,22959
76
+ liger_kernel_nightly-0.5.5.dev20250328142430.dist-info/NOTICE,sha256=njwnoPZLh9AN8SJQzxvCGLHi-8X__AvWRze6joNXIY8,2066
77
+ liger_kernel_nightly-0.5.5.dev20250328142430.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
78
+ liger_kernel_nightly-0.5.5.dev20250328142430.dist-info/top_level.txt,sha256=2eghu4hA3LnkM7ElW92tQ8zegWKgSbeo-k-aGe1YnvY,13
79
+ liger_kernel_nightly-0.5.5.dev20250328142430.dist-info/RECORD,,