liger-kernel-nightly 0.6.0.dev20250709042125__py3-none-any.whl → 0.6.0.dev20250714030807__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -255,7 +255,7 @@ def multimodal_forward(
255
255
  shift_labels = shift_labels.view(-1).to(hidden_device)
256
256
 
257
257
  lce = LigerFusedLinearCrossEntropyLoss()
258
- loss = lce(self.language_model.lm_head.weight, shift_hidden_states, shift_labels)
258
+ loss = lce(self.lm_head.weight, shift_hidden_states, shift_labels)
259
259
  else:
260
260
  logits = self.lm_head(kept_hidden_states)
261
261
  if labels is not None:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: liger_kernel_nightly
3
- Version: 0.6.0.dev20250709042125
3
+ Version: 0.6.0.dev20250714030807
4
4
  Summary: Efficient Triton kernels for LLM Training
5
5
  License: BSD 2-CLAUSE LICENSE
6
6
  Copyright 2024 LinkedIn Corporation
@@ -411,7 +411,7 @@ loss.backward()
411
411
 
412
412
  - For issues, create a Github ticket in this repository
413
413
  - For open discussion, join [our discord channel on GPUMode](https://discord.com/channels/1189498204333543425/1275130785933951039)
414
- - For formal collaboration, send an email to yannchen@linkedin.com and hning@linkedin.com
414
+ - For formal collaboration, send an email to yannchen@linkedin.com and zhipwang@linkedin.com
415
415
 
416
416
  ## Cite this work
417
417
 
@@ -419,13 +419,13 @@ Biblatex entry:
419
419
  ```bib
420
420
  @article{hsu2024ligerkernelefficienttriton,
421
421
  title={Liger Kernel: Efficient Triton Kernels for LLM Training},
422
- author={Pin-Lun Hsu and Yun Dai and Vignesh Kothapalli and Qingquan Song and Shao Tang and Siyu Zhu and Steven Shimizu and Shivam Sahni and Haowen Ning and Yanning Chen},
422
+ author={Pin-Lun Hsu and Yun Dai and Vignesh Kothapalli and Qingquan Song and Shao Tang and Siyu Zhu and Steven Shimizu and Shivam Sahni and Haowen Ning and Yanning Chen and Zhipeng Wang},
423
423
  year={2024},
424
424
  eprint={2410.10989},
425
425
  archivePrefix={arXiv},
426
426
  primaryClass={cs.LG},
427
427
  url={https://arxiv.org/abs/2410.10989},
428
- journal={arXiv preprint arXiv:2410.10989},
428
+ journal={Proceedings of the ICML 2025 Workshop on Championing Opensource Development in Machine Learning (CODEML ’25)},
429
429
  }
430
430
  ```
431
431
 
@@ -68,7 +68,7 @@ liger_kernel/transformers/experimental/embedding.py,sha256=2P0QYdlFyFrG5OqTzTa1w
68
68
  liger_kernel/transformers/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
69
69
  liger_kernel/transformers/model/gemma.py,sha256=mNX-mIwV6jI4zfbrUHp0C468pOmjzsL7mjXipGt-eS0,10007
70
70
  liger_kernel/transformers/model/gemma2.py,sha256=R_JFPyWTk7RyA7D05ZiIaNO5pX8gWcvfWf-6rdCRMxs,11296
71
- liger_kernel/transformers/model/gemma3.py,sha256=XbwoqOSPmtS0BPHgT8jZftTzplmiAicgBa6ocNcet8o,12800
71
+ liger_kernel/transformers/model/gemma3.py,sha256=FKO4j3t4W_5uECRA1lhVnXC-It2GhirHm4tpCf9ApAc,12785
72
72
  liger_kernel/transformers/model/glm4.py,sha256=GlnEhdGJuDIqp2R9qC54biY3HwV1tWmfpJm6ijoAsrM,5257
73
73
  liger_kernel/transformers/model/llama.py,sha256=i8jJgyZsMKWQ-zKloETLugtwFpUOdaWxLDceciFXKd4,12832
74
74
  liger_kernel/transformers/model/llama4.py,sha256=IgbB8sTh3dlETQnaNNy1bZLuXy-Nt7qmeAjF27ydGpg,4210
@@ -89,9 +89,9 @@ liger_kernel/transformers/trainer/__init__.py,sha256=p7yQfklV8-467qSz_ZMimkbDF7H
89
89
  liger_kernel/transformers/trainer/orpo_trainer.py,sha256=tX0h63aOFe3rNqTmk6JpMf75UPo981yzEa6TghnjS0Q,5370
90
90
  liger_kernel/triton/__init__.py,sha256=qCiCamzCRv6lpV8IqpAc9YMdNKC7GKurClWceQPnlis,92
91
91
  liger_kernel/triton/monkey_patch.py,sha256=Rd0hUHAzDkFfHvnX7-PBaNK5EKnZhtfM_h-fgQH9HPY,1568
92
- liger_kernel_nightly-0.6.0.dev20250709042125.dist-info/LICENSE,sha256=OhzLDHJ0to4a8sodVLELZiCFylZ1NAAYLs-HrjPy0ag,1312
93
- liger_kernel_nightly-0.6.0.dev20250709042125.dist-info/METADATA,sha256=wPo1-0a5kmFfk7NvN28x3Sk_NV0g4AEzrOnbB4fJHMk,24535
94
- liger_kernel_nightly-0.6.0.dev20250709042125.dist-info/NOTICE,sha256=njwnoPZLh9AN8SJQzxvCGLHi-8X__AvWRze6joNXIY8,2066
95
- liger_kernel_nightly-0.6.0.dev20250709042125.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
96
- liger_kernel_nightly-0.6.0.dev20250709042125.dist-info/top_level.txt,sha256=2eghu4hA3LnkM7ElW92tQ8zegWKgSbeo-k-aGe1YnvY,13
97
- liger_kernel_nightly-0.6.0.dev20250709042125.dist-info/RECORD,,
92
+ liger_kernel_nightly-0.6.0.dev20250714030807.dist-info/LICENSE,sha256=OhzLDHJ0to4a8sodVLELZiCFylZ1NAAYLs-HrjPy0ag,1312
93
+ liger_kernel_nightly-0.6.0.dev20250714030807.dist-info/METADATA,sha256=OSGl46Gr7NnD-8gSkMhIczRaOmpJFM_W6pc28IbIHm0,24634
94
+ liger_kernel_nightly-0.6.0.dev20250714030807.dist-info/NOTICE,sha256=njwnoPZLh9AN8SJQzxvCGLHi-8X__AvWRze6joNXIY8,2066
95
+ liger_kernel_nightly-0.6.0.dev20250714030807.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
96
+ liger_kernel_nightly-0.6.0.dev20250714030807.dist-info/top_level.txt,sha256=2eghu4hA3LnkM7ElW92tQ8zegWKgSbeo-k-aGe1YnvY,13
97
+ liger_kernel_nightly-0.6.0.dev20250714030807.dist-info/RECORD,,