liger-kernel-nightly 0.6.0.dev20250709042125__py3-none-any.whl → 0.6.0.dev20250714030807__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- liger_kernel/transformers/model/gemma3.py +1 -1
- {liger_kernel_nightly-0.6.0.dev20250709042125.dist-info → liger_kernel_nightly-0.6.0.dev20250714030807.dist-info}/METADATA +4 -4
- {liger_kernel_nightly-0.6.0.dev20250709042125.dist-info → liger_kernel_nightly-0.6.0.dev20250714030807.dist-info}/RECORD +7 -7
- {liger_kernel_nightly-0.6.0.dev20250709042125.dist-info → liger_kernel_nightly-0.6.0.dev20250714030807.dist-info}/LICENSE +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709042125.dist-info → liger_kernel_nightly-0.6.0.dev20250714030807.dist-info}/NOTICE +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709042125.dist-info → liger_kernel_nightly-0.6.0.dev20250714030807.dist-info}/WHEEL +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709042125.dist-info → liger_kernel_nightly-0.6.0.dev20250714030807.dist-info}/top_level.txt +0 -0
@@ -255,7 +255,7 @@ def multimodal_forward(
|
|
255
255
|
shift_labels = shift_labels.view(-1).to(hidden_device)
|
256
256
|
|
257
257
|
lce = LigerFusedLinearCrossEntropyLoss()
|
258
|
-
loss = lce(self.
|
258
|
+
loss = lce(self.lm_head.weight, shift_hidden_states, shift_labels)
|
259
259
|
else:
|
260
260
|
logits = self.lm_head(kept_hidden_states)
|
261
261
|
if labels is not None:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: liger_kernel_nightly
|
3
|
-
Version: 0.6.0.
|
3
|
+
Version: 0.6.0.dev20250714030807
|
4
4
|
Summary: Efficient Triton kernels for LLM Training
|
5
5
|
License: BSD 2-CLAUSE LICENSE
|
6
6
|
Copyright 2024 LinkedIn Corporation
|
@@ -411,7 +411,7 @@ loss.backward()
|
|
411
411
|
|
412
412
|
- For issues, create a Github ticket in this repository
|
413
413
|
- For open discussion, join [our discord channel on GPUMode](https://discord.com/channels/1189498204333543425/1275130785933951039)
|
414
|
-
- For formal collaboration, send an email to yannchen@linkedin.com and
|
414
|
+
- For formal collaboration, send an email to yannchen@linkedin.com and zhipwang@linkedin.com
|
415
415
|
|
416
416
|
## Cite this work
|
417
417
|
|
@@ -419,13 +419,13 @@ Biblatex entry:
|
|
419
419
|
```bib
|
420
420
|
@article{hsu2024ligerkernelefficienttriton,
|
421
421
|
title={Liger Kernel: Efficient Triton Kernels for LLM Training},
|
422
|
-
author={Pin-Lun Hsu and Yun Dai and Vignesh Kothapalli and Qingquan Song and Shao Tang and Siyu Zhu and Steven Shimizu and Shivam Sahni and Haowen Ning and Yanning Chen},
|
422
|
+
author={Pin-Lun Hsu and Yun Dai and Vignesh Kothapalli and Qingquan Song and Shao Tang and Siyu Zhu and Steven Shimizu and Shivam Sahni and Haowen Ning and Yanning Chen and Zhipeng Wang},
|
423
423
|
year={2024},
|
424
424
|
eprint={2410.10989},
|
425
425
|
archivePrefix={arXiv},
|
426
426
|
primaryClass={cs.LG},
|
427
427
|
url={https://arxiv.org/abs/2410.10989},
|
428
|
-
journal={
|
428
|
+
journal={Proceedings of the ICML 2025 Workshop on Championing Opensource Development in Machine Learning (CODEML ’25)},
|
429
429
|
}
|
430
430
|
```
|
431
431
|
|
@@ -68,7 +68,7 @@ liger_kernel/transformers/experimental/embedding.py,sha256=2P0QYdlFyFrG5OqTzTa1w
|
|
68
68
|
liger_kernel/transformers/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
69
69
|
liger_kernel/transformers/model/gemma.py,sha256=mNX-mIwV6jI4zfbrUHp0C468pOmjzsL7mjXipGt-eS0,10007
|
70
70
|
liger_kernel/transformers/model/gemma2.py,sha256=R_JFPyWTk7RyA7D05ZiIaNO5pX8gWcvfWf-6rdCRMxs,11296
|
71
|
-
liger_kernel/transformers/model/gemma3.py,sha256=
|
71
|
+
liger_kernel/transformers/model/gemma3.py,sha256=FKO4j3t4W_5uECRA1lhVnXC-It2GhirHm4tpCf9ApAc,12785
|
72
72
|
liger_kernel/transformers/model/glm4.py,sha256=GlnEhdGJuDIqp2R9qC54biY3HwV1tWmfpJm6ijoAsrM,5257
|
73
73
|
liger_kernel/transformers/model/llama.py,sha256=i8jJgyZsMKWQ-zKloETLugtwFpUOdaWxLDceciFXKd4,12832
|
74
74
|
liger_kernel/transformers/model/llama4.py,sha256=IgbB8sTh3dlETQnaNNy1bZLuXy-Nt7qmeAjF27ydGpg,4210
|
@@ -89,9 +89,9 @@ liger_kernel/transformers/trainer/__init__.py,sha256=p7yQfklV8-467qSz_ZMimkbDF7H
|
|
89
89
|
liger_kernel/transformers/trainer/orpo_trainer.py,sha256=tX0h63aOFe3rNqTmk6JpMf75UPo981yzEa6TghnjS0Q,5370
|
90
90
|
liger_kernel/triton/__init__.py,sha256=qCiCamzCRv6lpV8IqpAc9YMdNKC7GKurClWceQPnlis,92
|
91
91
|
liger_kernel/triton/monkey_patch.py,sha256=Rd0hUHAzDkFfHvnX7-PBaNK5EKnZhtfM_h-fgQH9HPY,1568
|
92
|
-
liger_kernel_nightly-0.6.0.
|
93
|
-
liger_kernel_nightly-0.6.0.
|
94
|
-
liger_kernel_nightly-0.6.0.
|
95
|
-
liger_kernel_nightly-0.6.0.
|
96
|
-
liger_kernel_nightly-0.6.0.
|
97
|
-
liger_kernel_nightly-0.6.0.
|
92
|
+
liger_kernel_nightly-0.6.0.dev20250714030807.dist-info/LICENSE,sha256=OhzLDHJ0to4a8sodVLELZiCFylZ1NAAYLs-HrjPy0ag,1312
|
93
|
+
liger_kernel_nightly-0.6.0.dev20250714030807.dist-info/METADATA,sha256=OSGl46Gr7NnD-8gSkMhIczRaOmpJFM_W6pc28IbIHm0,24634
|
94
|
+
liger_kernel_nightly-0.6.0.dev20250714030807.dist-info/NOTICE,sha256=njwnoPZLh9AN8SJQzxvCGLHi-8X__AvWRze6joNXIY8,2066
|
95
|
+
liger_kernel_nightly-0.6.0.dev20250714030807.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
|
96
|
+
liger_kernel_nightly-0.6.0.dev20250714030807.dist-info/top_level.txt,sha256=2eghu4hA3LnkM7ElW92tQ8zegWKgSbeo-k-aGe1YnvY,13
|
97
|
+
liger_kernel_nightly-0.6.0.dev20250714030807.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|