liger-kernel-nightly 0.5.10.dev20250630171450__tar.gz → 0.5.10.dev20250702150221__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (274) hide show
  1. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/PKG-INFO +1 -1
  2. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/benchmark/data/all_benchmark_data.csv +24 -0
  3. liger_kernel_nightly-0.5.10.dev20250702150221/benchmark/scripts/benchmark_distill_cosine_loss.py +266 -0
  4. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/pyproject.toml +1 -1
  5. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/chunked_loss/__init__.py +1 -0
  6. liger_kernel_nightly-0.5.10.dev20250702150221/src/liger_kernel/chunked_loss/cosine_similarity_loss.py +127 -0
  7. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/chunked_loss/functional.py +2 -0
  8. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/transformers/monkey_patch.py +113 -31
  9. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel_nightly.egg-info/PKG-INFO +1 -1
  10. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel_nightly.egg-info/SOURCES.txt +3 -0
  11. liger_kernel_nightly-0.5.10.dev20250702150221/test/chunked_loss/test_cosine_loss.py +320 -0
  12. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/test/transformers/test_monkey_patch.py +89 -1
  13. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/.github/ISSUE_TEMPLATE/bug_report.yaml +0 -0
  14. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/.github/ISSUE_TEMPLATE/feature_request.yaml +0 -0
  15. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/.github/pull_request_template.md +0 -0
  16. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/.github/workflows/amd-ci.yml +0 -0
  17. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/.github/workflows/benchmark.yml +0 -0
  18. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/.github/workflows/docs.yml +0 -0
  19. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/.github/workflows/intel-ci.yml +0 -0
  20. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/.github/workflows/nvi-ci.yml +0 -0
  21. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/.github/workflows/publish-nightly.yml +0 -0
  22. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/.github/workflows/publish-release.yml +0 -0
  23. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/.gitignore +0 -0
  24. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/.idea/workspace.xml +0 -0
  25. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/LICENSE +0 -0
  26. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/Makefile +0 -0
  27. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/NOTICE +0 -0
  28. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/README.md +0 -0
  29. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/benchmark/README.md +0 -0
  30. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/benchmark/__init__.py +0 -0
  31. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/benchmark/benchmarks_visualizer.py +0 -0
  32. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/benchmark/scripts/__init__.py +0 -0
  33. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/benchmark/scripts/benchmark_cpo_loss.py +0 -0
  34. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/benchmark/scripts/benchmark_cross_entropy.py +0 -0
  35. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/benchmark/scripts/benchmark_distill_jsd_loss.py +0 -0
  36. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/benchmark/scripts/benchmark_dpo_loss.py +0 -0
  37. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/benchmark/scripts/benchmark_dyt.py +0 -0
  38. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/benchmark/scripts/benchmark_embedding.py +0 -0
  39. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/benchmark/scripts/benchmark_fused_linear_cross_entropy.py +0 -0
  40. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/benchmark/scripts/benchmark_fused_linear_jsd.py +0 -0
  41. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/benchmark/scripts/benchmark_fused_neighborhood_attention.py +0 -0
  42. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/benchmark/scripts/benchmark_geglu.py +0 -0
  43. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/benchmark/scripts/benchmark_group_norm.py +0 -0
  44. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/benchmark/scripts/benchmark_jsd.py +0 -0
  45. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/benchmark/scripts/benchmark_kl_div.py +0 -0
  46. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/benchmark/scripts/benchmark_kto_loss.py +0 -0
  47. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/benchmark/scripts/benchmark_layer_norm.py +0 -0
  48. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/benchmark/scripts/benchmark_multi_token_attention.py +0 -0
  49. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/benchmark/scripts/benchmark_orpo_loss.py +0 -0
  50. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/benchmark/scripts/benchmark_qwen2vl_mrope.py +0 -0
  51. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/benchmark/scripts/benchmark_rms_norm.py +0 -0
  52. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/benchmark/scripts/benchmark_rope.py +0 -0
  53. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/benchmark/scripts/benchmark_simpo_loss.py +0 -0
  54. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/benchmark/scripts/benchmark_softmax.py +0 -0
  55. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/benchmark/scripts/benchmark_sparse_multi_token_attention.py +0 -0
  56. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/benchmark/scripts/benchmark_sparsemax.py +0 -0
  57. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/benchmark/scripts/benchmark_swiglu.py +0 -0
  58. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/benchmark/scripts/benchmark_tvd.py +0 -0
  59. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/benchmark/scripts/utils.py +0 -0
  60. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/dev/fmt-requirements.txt +0 -0
  61. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/dev/modal/benchmarks.py +0 -0
  62. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/dev/modal/tests.py +0 -0
  63. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/dev/modal/tests_bwd.py +0 -0
  64. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/docs/Examples.md +0 -0
  65. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/docs/Getting-Started.md +0 -0
  66. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/docs/High-Level-APIs.md +0 -0
  67. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/docs/Low-Level-APIs.md +0 -0
  68. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/docs/acknowledgement.md +0 -0
  69. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/docs/contributing.md +0 -0
  70. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/docs/images/banner.GIF +0 -0
  71. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/docs/images/compose.gif +0 -0
  72. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/docs/images/e2e-memory.png +0 -0
  73. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/docs/images/e2e-tps.png +0 -0
  74. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/docs/images/logo-banner.png +0 -0
  75. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/docs/images/patch.gif +0 -0
  76. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/docs/images/post-training.png +0 -0
  77. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/docs/index.md +0 -0
  78. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/docs/license.md +0 -0
  79. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/examples/alignment/accelerate_config.yaml +0 -0
  80. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/examples/alignment/run_orpo.py +0 -0
  81. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/examples/huggingface/README.md +0 -0
  82. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/examples/huggingface/callback.py +0 -0
  83. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/examples/huggingface/config/fsdp_config.json +0 -0
  84. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/examples/huggingface/img/gemma_7b_mem.png +0 -0
  85. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/examples/huggingface/img/gemma_7b_tp.png +0 -0
  86. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/examples/huggingface/img/llama_mem_alloc.png +0 -0
  87. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/examples/huggingface/img/llama_tps.png +0 -0
  88. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/examples/huggingface/img/qwen_mem_alloc.png +0 -0
  89. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/examples/huggingface/img/qwen_tps.png +0 -0
  90. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/examples/huggingface/launch_on_modal.py +0 -0
  91. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/examples/huggingface/requirements.txt +0 -0
  92. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/examples/huggingface/run_benchmarks.sh +0 -0
  93. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/examples/huggingface/run_gemma.sh +0 -0
  94. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/examples/huggingface/run_llama.sh +0 -0
  95. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/examples/huggingface/run_qwen.sh +0 -0
  96. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/examples/huggingface/run_qwen2_vl.sh +0 -0
  97. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/examples/huggingface/training.py +0 -0
  98. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/examples/huggingface/training_multimodal.py +0 -0
  99. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/examples/lightning/README.md +0 -0
  100. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/examples/lightning/requirements.txt +0 -0
  101. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/examples/lightning/training.py +0 -0
  102. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/examples/medusa/README.md +0 -0
  103. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/examples/medusa/callback.py +0 -0
  104. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/examples/medusa/docs/images/Memory_Stage1_num_head_3.png +0 -0
  105. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/examples/medusa/docs/images/Memory_Stage1_num_head_5.png +0 -0
  106. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/examples/medusa/docs/images/Memory_Stage2_num_head_3.png +0 -0
  107. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/examples/medusa/docs/images/Memory_Stage2_num_head_5.png +0 -0
  108. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/examples/medusa/docs/images/Throughput_Stage1_num_head_3.png +0 -0
  109. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/examples/medusa/docs/images/Throughput_Stage1_num_head_5.png +0 -0
  110. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/examples/medusa/docs/images/Throughput_Stage2_num_head_3.png +0 -0
  111. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/examples/medusa/docs/images/Throughput_Stage2_num_head_5.png +0 -0
  112. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/examples/medusa/fsdp/acc-fsdp.conf +0 -0
  113. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/examples/medusa/medusa_util.py +0 -0
  114. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/examples/medusa/requirements.txt +0 -0
  115. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/examples/medusa/scripts/llama3_8b_medusa.sh +0 -0
  116. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/examples/medusa/train.py +0 -0
  117. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/licenses/LICENSE-Apache-2.0 +0 -0
  118. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/licenses/LICENSE-MIT-AutoAWQ +0 -0
  119. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/licenses/LICENSE-MIT-Efficient-Cross-Entropy +0 -0
  120. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/licenses/LICENSE-MIT-llmc +0 -0
  121. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/licenses/LICENSE-MIT-triton +0 -0
  122. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/mkdocs.yml +0 -0
  123. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/setup.cfg +0 -0
  124. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/setup.py +0 -0
  125. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/__init__.py +0 -0
  126. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/chunked_loss/README.md +0 -0
  127. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/chunked_loss/cpo_loss.py +0 -0
  128. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/chunked_loss/dpo_loss.py +0 -0
  129. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/chunked_loss/fused_linear_distillation.py +0 -0
  130. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/chunked_loss/fused_linear_ppo.py +0 -0
  131. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/chunked_loss/fused_linear_preference.py +0 -0
  132. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/chunked_loss/fused_linear_unpaired_preference.py +0 -0
  133. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/chunked_loss/grpo_loss.py +0 -0
  134. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/chunked_loss/jsd_loss.py +0 -0
  135. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/chunked_loss/kto_loss.py +0 -0
  136. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/chunked_loss/orpo_loss.py +0 -0
  137. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/chunked_loss/simpo_loss.py +0 -0
  138. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/env_report.py +0 -0
  139. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/ops/__init__.py +0 -0
  140. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/ops/cross_entropy.py +0 -0
  141. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/ops/dyt.py +0 -0
  142. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/ops/experimental/embedding.py +0 -0
  143. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/ops/experimental/mm_int8int2.py +0 -0
  144. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/ops/fused_linear_cross_entropy.py +0 -0
  145. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/ops/fused_linear_jsd.py +0 -0
  146. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/ops/fused_neighborhood_attention.py +0 -0
  147. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/ops/geglu.py +0 -0
  148. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/ops/group_norm.py +0 -0
  149. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/ops/grpo_loss.py +0 -0
  150. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/ops/jsd.py +0 -0
  151. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/ops/kl_div.py +0 -0
  152. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/ops/layer_norm.py +0 -0
  153. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/ops/multi_token_attention.py +0 -0
  154. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/ops/qwen2vl_mrope.py +0 -0
  155. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/ops/rms_norm.py +0 -0
  156. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/ops/rope.py +0 -0
  157. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/ops/softmax.py +0 -0
  158. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/ops/sparsemax.py +0 -0
  159. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/ops/swiglu.py +0 -0
  160. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/ops/tvd.py +0 -0
  161. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/ops/utils.py +0 -0
  162. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/transformers/__init__.py +0 -0
  163. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/transformers/auto_model.py +0 -0
  164. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/transformers/cross_entropy.py +0 -0
  165. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/transformers/dyt.py +0 -0
  166. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/transformers/experimental/embedding.py +0 -0
  167. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/transformers/fsdp.py +0 -0
  168. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/transformers/functional.py +0 -0
  169. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/transformers/fused_linear_cross_entropy.py +0 -0
  170. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/transformers/fused_linear_jsd.py +0 -0
  171. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/transformers/fused_neighborhood_attention.py +0 -0
  172. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/transformers/geglu.py +0 -0
  173. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/transformers/group_norm.py +0 -0
  174. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/transformers/grpo_loss.py +0 -0
  175. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/transformers/jsd.py +0 -0
  176. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/transformers/kl_div.py +0 -0
  177. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/transformers/layer_norm.py +0 -0
  178. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/transformers/model/__init__.py +0 -0
  179. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/transformers/model/gemma.py +0 -0
  180. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/transformers/model/gemma2.py +0 -0
  181. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/transformers/model/gemma3.py +0 -0
  182. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/transformers/model/glm4.py +0 -0
  183. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/transformers/model/llama.py +0 -0
  184. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/transformers/model/llama4.py +0 -0
  185. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/transformers/model/llava.py +0 -0
  186. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/transformers/model/loss_utils.py +0 -0
  187. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/transformers/model/mistral.py +0 -0
  188. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/transformers/model/mixtral.py +0 -0
  189. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/transformers/model/mllama.py +0 -0
  190. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/transformers/model/olmo2.py +0 -0
  191. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/transformers/model/paligemma.py +0 -0
  192. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/transformers/model/phi3.py +0 -0
  193. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/transformers/model/qwen2.py +0 -0
  194. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/transformers/model/qwen2_5_vl.py +0 -0
  195. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/transformers/model/qwen2_vl.py +0 -0
  196. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/transformers/model/qwen3.py +0 -0
  197. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/transformers/model/qwen3_moe.py +0 -0
  198. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/transformers/multi_token_attention.py +0 -0
  199. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/transformers/qwen2vl_mrope.py +0 -0
  200. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/transformers/rms_norm.py +0 -0
  201. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/transformers/rope.py +0 -0
  202. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/transformers/softmax.py +0 -0
  203. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/transformers/sparsemax.py +0 -0
  204. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/transformers/swiglu.py +0 -0
  205. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/transformers/trainer/__init__.py +0 -0
  206. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/transformers/trainer/orpo_trainer.py +0 -0
  207. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/transformers/trainer_integration.py +0 -0
  208. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/transformers/tvd.py +0 -0
  209. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/triton/__init__.py +0 -0
  210. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/triton/monkey_patch.py +0 -0
  211. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel/utils.py +0 -0
  212. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel_nightly.egg-info/dependency_links.txt +0 -0
  213. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel_nightly.egg-info/requires.txt +0 -0
  214. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/src/liger_kernel_nightly.egg-info/top_level.txt +0 -0
  215. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/test/__init__.py +0 -0
  216. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/test/chunked_loss/__init__.py +0 -0
  217. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/test/chunked_loss/test_cpo_loss.py +0 -0
  218. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/test/chunked_loss/test_dpo_loss.py +0 -0
  219. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/test/chunked_loss/test_grpo_loss.py +0 -0
  220. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/test/chunked_loss/test_jsd_loss.py +0 -0
  221. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/test/chunked_loss/test_kto_loss.py +0 -0
  222. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/test/chunked_loss/test_orpo_loss.py +0 -0
  223. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/test/chunked_loss/test_simpo_loss.py +0 -0
  224. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/test/conftest.py +0 -0
  225. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/test/convergence/__init__.py +0 -0
  226. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/test/convergence/bf16/__init__.py +0 -0
  227. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/test/convergence/bf16/test_mini_models.py +0 -0
  228. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/test/convergence/bf16/test_mini_models_multimodal.py +0 -0
  229. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/test/convergence/bf16/test_mini_models_with_logits.py +0 -0
  230. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/test/convergence/fp32/__init__.py +0 -0
  231. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/test/convergence/fp32/test_mini_models.py +0 -0
  232. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/test/convergence/fp32/test_mini_models_multimodal.py +0 -0
  233. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/test/convergence/fp32/test_mini_models_with_logits.py +0 -0
  234. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/test/resources/fake_configs/Google/Gemma3/gemma-3-4b-it/tokenizer_config.json +0 -0
  235. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/test/resources/fake_configs/Google/Paligemma/paligemma-3b-pt-224/tokenizer_config.json +0 -0
  236. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/preprocessor_config.json +0 -0
  237. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/processor_config.json +0 -0
  238. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/tokenizer_config.json +0 -0
  239. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/test/resources/fake_configs/Qwen/Qwen2-VL-7B-Instruct/tokenizer_config.json +0 -0
  240. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/test/resources/fake_configs/Qwen/Qwen2.5-VL-7B-Instruct/tokenizer_config.json +0 -0
  241. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/test/resources/fake_configs/meta-llama/Llama-3.2-11B-Vision-Instruct/tokenizer_config.json +0 -0
  242. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/test/resources/fake_configs/meta-llama/Llama-4-Scout-17B-16E-Instruct/tokenizer_config.json +0 -0
  243. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/test/resources/scripts/generate_tokenized_dataset.py +0 -0
  244. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/test/resources/tiny_shakespeare.txt +0 -0
  245. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/test/resources/tiny_shakespeare_tokenized/data-00000-of-00001.arrow +0 -0
  246. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/test/resources/tiny_shakespeare_tokenized/dataset_info.json +0 -0
  247. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/test/resources/tiny_shakespeare_tokenized/state.json +0 -0
  248. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/test/transformers/test_auto_model.py +0 -0
  249. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/test/transformers/test_cross_entropy.py +0 -0
  250. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/test/transformers/test_dyt.py +0 -0
  251. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/test/transformers/test_embedding.py +0 -0
  252. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/test/transformers/test_flex_attention.py +0 -0
  253. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/test/transformers/test_fused_linear_cross_entropy.py +0 -0
  254. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/test/transformers/test_fused_linear_jsd.py +0 -0
  255. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/test/transformers/test_fused_neighborhood_attention.py +0 -0
  256. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/test/transformers/test_geglu.py +0 -0
  257. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/test/transformers/test_group_norm.py +0 -0
  258. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/test/transformers/test_grpo_loss.py +0 -0
  259. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/test/transformers/test_jsd.py +0 -0
  260. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/test/transformers/test_kl_div.py +0 -0
  261. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/test/transformers/test_layer_norm.py +0 -0
  262. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/test/transformers/test_mm_int8int2.py +0 -0
  263. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/test/transformers/test_multi_token_attention.py +0 -0
  264. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/test/transformers/test_qwen2vl_mrope.py +0 -0
  265. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/test/transformers/test_rms_norm.py +0 -0
  266. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/test/transformers/test_rope.py +0 -0
  267. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/test/transformers/test_softmax.py +0 -0
  268. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/test/transformers/test_sparsemax.py +0 -0
  269. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/test/transformers/test_swiglu.py +0 -0
  270. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/test/transformers/test_trainer_integration.py +0 -0
  271. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/test/transformers/test_transformers.py +0 -0
  272. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/test/transformers/test_tvd.py +0 -0
  273. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/test/triton/test_triton_monkey_patch.py +0 -0
  274. {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250702150221}/test/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: liger_kernel_nightly
3
- Version: 0.5.10.dev20250630171450
3
+ Version: 0.5.10.dev20250702150221
4
4
  Summary: Efficient Triton kernels for LLM Training
5
5
  License: BSD 2-CLAUSE LICENSE
6
6
  Copyright 2024 LinkedIn Corporation
@@ -1469,3 +1469,27 @@ fused_neighborhood_attention,torch,full,memory,MB,seq_len,sequence length,512,15
1469
1469
  fused_neighborhood_attention,torch,full,memory,MB,seq_len,sequence length,1024,369.0234375,369.0234375,369.0234375,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:39,0.5.10
1470
1470
  fused_neighborhood_attention,torch,full,memory,MB,seq_len,sequence length,2048,1176.0234375,1176.0234375,1176.0234375,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:39,0.5.10
1471
1471
  fused_neighborhood_attention,torch,full,memory,MB,seq_len,sequence length,4096,4332.0234375,4332.0234375,4332.0234375,"{""batch_size"": 2, ""hidden_size"": 512, ""num_heads"": 8, ""kernel_size"": 7, ""dilation"": 2, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA H100 80GB HBM3,2025-05-27 19:27:39,0.5.10
1472
+ distill_cosine_loss,liger,forward,speed,ms,BT,B x T,1024,13.828096389770508,13.821133041381836,13.885849952697754,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA A100-SXM4-80GB,2025-06-27 09:19:52,0.5.10
1473
+ distill_cosine_loss,liger,forward,speed,ms,BT,B x T,2048,27.57427215576172,27.52573432922363,27.579801940917967,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA A100-SXM4-80GB,2025-06-27 09:19:52,0.5.10
1474
+ distill_cosine_loss,liger,forward,speed,ms,BT,B x T,4096,54.79423904418945,54.79423904418945,54.79423904418945,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA A100-SXM4-80GB,2025-06-27 09:19:52,0.5.10
1475
+ distill_cosine_loss,liger,forward,speed,ms,BT,B x T,8192,109.73490905761719,109.73490905761719,109.73490905761719,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA A100-SXM4-80GB,2025-06-27 09:19:52,0.5.10
1476
+ distill_cosine_loss,torch,forward,speed,ms,BT,B x T,1024,16.456703186035156,15.045836448669434,16.761650466918944,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA A100-SXM4-80GB,2025-06-27 09:20:34,0.5.10
1477
+ distill_cosine_loss,torch,forward,speed,ms,BT,B x T,2048,29.703168869018555,29.69333839416504,29.71177024841309,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA A100-SXM4-80GB,2025-06-27 09:20:34,0.5.10
1478
+ distill_cosine_loss,torch,forward,speed,ms,BT,B x T,4096,59.177982330322266,59.177982330322266,59.177982330322266,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA A100-SXM4-80GB,2025-06-27 09:20:34,0.5.10
1479
+ distill_cosine_loss,torch,forward,speed,ms,BT,B x T,8192,118.3815689086914,118.3815689086914,118.3815689086914,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA A100-SXM4-80GB,2025-06-27 09:20:34,0.5.10
1480
+ distill_cosine_loss,liger,full,speed,ms,BT,B x T,1024,14.654463768005371,14.63398380279541,14.68006420135498,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA A100-SXM4-80GB,2025-06-27 09:21:16,0.5.10
1481
+ distill_cosine_loss,liger,full,speed,ms,BT,B x T,2048,28.274688720703125,28.27284507751465,28.279603958129883,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA A100-SXM4-80GB,2025-06-27 09:21:16,0.5.10
1482
+ distill_cosine_loss,liger,full,speed,ms,BT,B x T,4096,55.96672058105469,55.96672058105469,55.96672058105469,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA A100-SXM4-80GB,2025-06-27 09:21:16,0.5.10
1483
+ distill_cosine_loss,liger,full,speed,ms,BT,B x T,8192,111.38764953613281,111.38764953613281,111.38764953613281,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA A100-SXM4-80GB,2025-06-27 09:21:16,0.5.10
1484
+ distill_cosine_loss,torch,full,speed,ms,BT,B x T,1024,37.45382308959961,37.42556076049805,37.482085418701175,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA A100-SXM4-80GB,2025-06-27 09:22:01,0.5.10
1485
+ distill_cosine_loss,torch,full,speed,ms,BT,B x T,2048,73.56620788574219,73.56620788574219,73.56620788574219,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA A100-SXM4-80GB,2025-06-27 09:22:01,0.5.10
1486
+ distill_cosine_loss,torch,full,speed,ms,BT,B x T,4096,145.73056030273438,145.73056030273438,145.73056030273438,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA A100-SXM4-80GB,2025-06-27 09:22:01,0.5.10
1487
+ distill_cosine_loss,torch,full,speed,ms,BT,B x T,8192,291.5000305175781,291.5000305175781,291.5000305175781,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA A100-SXM4-80GB,2025-06-27 09:22:01,0.5.10
1488
+ distill_cosine_loss,liger,full,memory,MB,BT,B x T,1024,5059.26806640625,5059.26806640625,5059.26806640625,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA A100-SXM4-80GB,2025-06-27 09:22:43,0.5.10
1489
+ distill_cosine_loss,liger,full,memory,MB,BT,B x T,2048,5087.27587890625,5087.27587890625,5087.27587890625,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA A100-SXM4-80GB,2025-06-27 09:22:43,0.5.10
1490
+ distill_cosine_loss,liger,full,memory,MB,BT,B x T,4096,5143.29150390625,5143.29150390625,5143.29150390625,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA A100-SXM4-80GB,2025-06-27 09:22:43,0.5.10
1491
+ distill_cosine_loss,liger,full,memory,MB,BT,B x T,8192,5255.32275390625,5255.32275390625,5255.32275390625,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA A100-SXM4-80GB,2025-06-27 09:22:43,0.5.10
1492
+ distill_cosine_loss,torch,full,memory,MB,BT,B x T,1024,7566.2822265625,7566.2822265625,7566.2822265625,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA A100-SXM4-80GB,2025-06-27 09:23:28,0.5.10
1493
+ distill_cosine_loss,torch,full,memory,MB,BT,B x T,2048,11590.3134765625,11590.3134765625,11590.3134765625,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA A100-SXM4-80GB,2025-06-27 09:23:28,0.5.10
1494
+ distill_cosine_loss,torch,full,memory,MB,BT,B x T,4096,19654.375,19654.375,19654.375,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA A100-SXM4-80GB,2025-06-27 09:23:28,0.5.10
1495
+ distill_cosine_loss,torch,full,memory,MB,BT,B x T,8192,35782.5,35782.5,35782.5,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA A100-SXM4-80GB,2025-06-27 09:23:28,0.5.10
@@ -0,0 +1,266 @@
1
+ import os
2
+ import sys
3
+
4
+ import torch
5
+ import torch.nn as nn
6
+ import triton
7
+
8
+ from utils import QUANTILES
9
+ from utils import SingleBenchmarkRunInput
10
+ from utils import SingleBenchmarkRunOutput
11
+ from utils import _test_memory
12
+ from utils import parse_benchmark_script_args
13
+ from utils import run_benchmarks
14
+
15
+ from liger_kernel.chunked_loss.cosine_similarity_loss import LigerFusedLinearCosineSimilarityFunction
16
+ from liger_kernel.utils import infer_device
17
+
18
+ device = infer_device()
19
+
20
+ # Ensure the project root is in the path
21
+ sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "../..")))
22
+
23
+
24
+ class TorchCosineSimilarityLoss(nn.Module):
25
+ def __init__(
26
+ self,
27
+ H: int,
28
+ V: int,
29
+ dtype: torch.dtype,
30
+ weight_hard_loss: float = 0.5,
31
+ weight_soft_loss: float = 0.5,
32
+ ignore_index: int = -100,
33
+ temperature: float = 1.0,
34
+ bias: bool = False,
35
+ ):
36
+ from test.chunked_loss.test_cosine_loss import HFCosineLoss
37
+
38
+ super().__init__()
39
+ self.student_lin = nn.Linear(in_features=H // 2, out_features=V, bias=bias).to(dtype=dtype)
40
+ self.teacher_lin = nn.Linear(in_features=H, out_features=V, bias=bias).to(dtype=dtype)
41
+ self.cosine_loss = HFCosineLoss(
42
+ ignore_index=ignore_index,
43
+ weight_hard_loss=weight_hard_loss,
44
+ weight_soft_loss=weight_soft_loss,
45
+ temperature=temperature,
46
+ ).get_batch_loss_metrics
47
+
48
+ def forward(self, student: torch.Tensor, teacher: torch.Tensor, target: torch.Tensor):
49
+ return self.cosine_loss(student, self.student_lin.weight, teacher, self.teacher_lin.weight, target)
50
+
51
+
52
+ class LigerCosineSimilarityLoss(nn.Module):
53
+ def __init__(
54
+ self,
55
+ H: int,
56
+ V: int,
57
+ dtype: torch.dtype,
58
+ weight_hard_loss: float = 0.5,
59
+ weight_soft_loss: float = 0.5,
60
+ ignore_index: int = -100,
61
+ temperature: float = 1.0,
62
+ bias: bool = False,
63
+ ):
64
+ super().__init__()
65
+ self.student_lin = nn.Linear(in_features=H // 2, out_features=V, bias=bias).to(dtype=dtype)
66
+ self.teacher_lin = nn.Linear(in_features=H, out_features=V, bias=bias).to(dtype=dtype)
67
+ self.weight_hard_loss = weight_hard_loss
68
+ self.weight_soft_loss = weight_soft_loss
69
+ self.ignore_index = ignore_index
70
+ self.temperature = temperature
71
+ self.cosine_loss = LigerFusedLinearCosineSimilarityFunction.apply
72
+
73
+ def forward(self, student: torch.Tensor, teacher: torch.Tensor, target: torch.Tensor):
74
+ return self.cosine_loss(
75
+ student,
76
+ self.student_lin.weight,
77
+ teacher,
78
+ self.teacher_lin.weight,
79
+ target,
80
+ self.student_lin.bias,
81
+ self.teacher_lin.bias,
82
+ self.weight_hard_loss,
83
+ self.weight_soft_loss,
84
+ )
85
+
86
+
87
+ def bench_memory_cosine_similarity_loss(input: SingleBenchmarkRunInput) -> SingleBenchmarkRunOutput:
88
+ BT = input.x
89
+ H = input.extra_benchmark_config["H"]
90
+ V = input.extra_benchmark_config["V"]
91
+ dtype = input.extra_benchmark_config["dtype"]
92
+ bias = input.extra_benchmark_config["bias"]
93
+ weight_hard_loss = input.extra_benchmark_config["weight_hard_loss"]
94
+ weight_soft_loss = input.extra_benchmark_config["weight_soft_loss"]
95
+ ignore_index = input.extra_benchmark_config["ignore_index"]
96
+ provider = input.kernel_provider
97
+
98
+ torch_cosine_loss = TorchCosineSimilarityLoss(
99
+ H=H,
100
+ V=V,
101
+ dtype=dtype,
102
+ weight_hard_loss=weight_hard_loss,
103
+ weight_soft_loss=weight_soft_loss,
104
+ bias=bias,
105
+ ).to(device)
106
+ liger_cosine_loss = LigerCosineSimilarityLoss(
107
+ H=H,
108
+ V=V,
109
+ dtype=dtype,
110
+ ignore_index=ignore_index,
111
+ bias=bias,
112
+ weight_hard_loss=weight_hard_loss,
113
+ weight_soft_loss=weight_soft_loss,
114
+ ).to(device)
115
+
116
+ _tensor = torch.rand(BT, H // 2, device=device, dtype=dtype)
117
+ student_input1 = _tensor.detach().clone().requires_grad_(True)
118
+ student_input2 = _tensor.detach().clone().requires_grad_(True)
119
+
120
+ teacher_input = torch.rand(BT, H, device=device, dtype=dtype)
121
+
122
+ target = torch.randint(0, V, (BT,), device=device, dtype=torch.long)
123
+
124
+ def fwd():
125
+ if provider == "liger":
126
+ return liger_cosine_loss(student_input1, teacher_input, target)
127
+ elif provider == "torch":
128
+ return torch_cosine_loss(student_input2, teacher_input, target)
129
+
130
+ def full():
131
+ y = fwd()
132
+ y.backward()
133
+
134
+ mem_50, mem_20, mem_80 = _test_memory(full, _iter=10, quantiles=QUANTILES)
135
+ return SingleBenchmarkRunOutput(
136
+ y_20=mem_20,
137
+ y_50=mem_50,
138
+ y_80=mem_80,
139
+ )
140
+
141
+
142
+ def bench_speed_cosine_similarity_loss(input: SingleBenchmarkRunInput) -> SingleBenchmarkRunOutput:
143
+ BT = input.x
144
+ H = input.extra_benchmark_config["H"]
145
+ V = input.extra_benchmark_config["V"]
146
+ dtype = input.extra_benchmark_config["dtype"]
147
+ bias = input.extra_benchmark_config["bias"]
148
+ weight_hard_loss = input.extra_benchmark_config["weight_hard_loss"]
149
+ weight_soft_loss = input.extra_benchmark_config["weight_soft_loss"]
150
+ ignore_index = input.extra_benchmark_config["ignore_index"]
151
+ provider = input.kernel_provider
152
+ mode = input.kernel_operation_mode
153
+
154
+ torch_cosine_loss = TorchCosineSimilarityLoss(
155
+ H=H,
156
+ V=V,
157
+ dtype=dtype,
158
+ ignore_index=ignore_index,
159
+ bias=bias,
160
+ weight_hard_loss=weight_hard_loss,
161
+ weight_soft_loss=weight_soft_loss,
162
+ ).to(device)
163
+
164
+ liger_cosine_loss = LigerCosineSimilarityLoss(
165
+ H=H,
166
+ V=V,
167
+ dtype=dtype,
168
+ ignore_index=ignore_index,
169
+ bias=bias,
170
+ weight_hard_loss=weight_hard_loss,
171
+ weight_soft_loss=weight_soft_loss,
172
+ ).to(device)
173
+
174
+ _tensor = torch.rand(BT, H // 2, device=device, dtype=dtype)
175
+ student_input1 = _tensor.detach().clone().requires_grad_(True)
176
+ student_input2 = _tensor.detach().clone().requires_grad_(True)
177
+
178
+ teacher_input = torch.rand(BT, H, device=device, dtype=dtype)
179
+
180
+ target = torch.randint(0, V, (BT,), device=device, dtype=torch.long)
181
+
182
+ def fwd():
183
+ if provider == "liger":
184
+ return liger_cosine_loss(student_input1, teacher_input, target)
185
+ elif provider == "torch":
186
+ return torch_cosine_loss(student_input2, teacher_input, target)
187
+
188
+ if mode == "forward":
189
+ ms_50, ms_20, ms_80 = triton.testing.do_bench(
190
+ fwd,
191
+ rep=100,
192
+ quantiles=QUANTILES,
193
+ )
194
+ elif mode == "backward":
195
+ y = fwd()
196
+ ms_50, ms_20, ms_80 = triton.testing.do_bench(
197
+ fwd,
198
+ rep=100,
199
+ quantiles=QUANTILES,
200
+ )
201
+ elif mode == "backward":
202
+ y = fwd()
203
+ ms_50, ms_20, ms_80 = triton.testing.do_bench(
204
+ lambda: y.backward(retain_graph=True),
205
+ grad_to_none=[student_input1, student_input2],
206
+ rep=100,
207
+ quantiles=QUANTILES,
208
+ )
209
+ elif mode == "full":
210
+
211
+ def full():
212
+ y = fwd()
213
+ y.backward()
214
+
215
+ ms_50, ms_20, ms_80 = triton.testing.do_bench(
216
+ full,
217
+ rep=100,
218
+ quantiles=QUANTILES,
219
+ )
220
+
221
+ return SingleBenchmarkRunOutput(
222
+ y_20=ms_20,
223
+ y_50=ms_50,
224
+ y_80=ms_80,
225
+ )
226
+
227
+
228
+ if __name__ == "__main__":
229
+ args = parse_benchmark_script_args()
230
+
231
+ common_configs = {
232
+ "kernel_name": "distill_cosine_loss",
233
+ "x_name": "BT",
234
+ "x_label": "B x T",
235
+ "x_values": [2**i for i in range(10, 14)],
236
+ "kernel_providers": ["liger", "torch"],
237
+ "extra_benchmark_configs": [
238
+ {
239
+ "H": 4096,
240
+ "V": 128256,
241
+ "mode": "forward",
242
+ "dtype": torch.bfloat16,
243
+ "bias": False,
244
+ "weight_hard_loss": 0.5,
245
+ "weight_soft_loss": 0.5,
246
+ "ignore_index": -100,
247
+ }
248
+ ],
249
+ "overwrite": args.overwrite,
250
+ }
251
+
252
+ run_benchmarks(
253
+ bench_test_fn=bench_speed_cosine_similarity_loss,
254
+ kernel_operation_modes=["forward", "full"],
255
+ metric_name="speed",
256
+ metric_unit="ms",
257
+ **common_configs,
258
+ )
259
+
260
+ run_benchmarks(
261
+ bench_test_fn=bench_memory_cosine_similarity_loss,
262
+ kernel_operation_modes=["full"],
263
+ metric_name="memory",
264
+ metric_unit="MB",
265
+ **common_configs,
266
+ )
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "liger_kernel_nightly"
7
- version = "0.5.10.dev20250630171450"
7
+ version = "0.5.10.dev20250702150221"
8
8
  description = "Efficient Triton kernels for LLM Training"
9
9
  urls = { "Homepage" = "https://github.com/linkedin/Liger-Kernel" }
10
10
  readme = { file = "README.md", content-type = "text/markdown" }
@@ -1,3 +1,4 @@
1
+ from liger_kernel.chunked_loss.cosine_similarity_loss import LigerFusedLinearCosineSimilarityLoss # noqa:F401
1
2
  from liger_kernel.chunked_loss.cpo_loss import LigerFusedLinearCPOLoss # noqa: F401
2
3
  from liger_kernel.chunked_loss.dpo_loss import LigerFusedLinearDPOLoss # noqa: F401
3
4
  from liger_kernel.chunked_loss.grpo_loss import LigerFusedLinearGRPOLoss # noqa: F401
@@ -0,0 +1,127 @@
1
+ import torch
2
+ import torch.nn.functional as F
3
+
4
+ from liger_kernel.chunked_loss.fused_linear_distillation import LigerFusedLinearDistillationBase
5
+
6
+
7
+ class LigerFusedLinearCosineSimilarityFunction(LigerFusedLinearDistillationBase):
8
+ @staticmethod
9
+ def distillation_loss_fn(student_logits, teacher_logits, beta=1.0):
10
+ """
11
+ Compute Cosine loss (Cosine Similarity Loss).
12
+ Args:
13
+ student_logits (torch.Tensor): Logits of student tokens. Shape: (batch_size * seq_len,).
14
+ teacher_logits (torch.Tensor): Logits of teacher tokens. Shape: (batch_size * seq_len,).
15
+ beta: Coefficient beta of generalized Cosine Similarity in the interval [0, 1]. Default: `1.0` (float): .
16
+ Returns:
17
+ torch.Tensor: cosine similarity loss
18
+ """
19
+ student_norm = F.normalize(student_logits, p=2, dim=-1)
20
+ teacher_norm = F.normalize(teacher_logits, p=2, dim=-1)
21
+
22
+ cosine_sim = F.cosine_similarity(student_norm, teacher_norm, dim=-1)
23
+ loss = beta * (1 - cosine_sim)
24
+ return loss.sum()
25
+
26
+ @classmethod
27
+ def forward(
28
+ cls,
29
+ ctx,
30
+ student_input: torch.Tensor,
31
+ student_weight: torch.Tensor,
32
+ teacher_input: torch.Tensor,
33
+ teacher_weight: torch.Tensor,
34
+ true_labels: torch.LongTensor,
35
+ student_bias: torch.Tensor,
36
+ teacher_bias: torch.Tensor,
37
+ weight_hard_loss: float = 0.5,
38
+ weight_soft_loss: float = 0.5,
39
+ beta: float = 0.5,
40
+ ignore_index: int = -100,
41
+ temperature: float = 1.0,
42
+ compiled: bool = True,
43
+ chunk_size: int = 1024,
44
+ ):
45
+ return super().forward(
46
+ cls=cls,
47
+ ctx=ctx,
48
+ student_input=student_input,
49
+ student_weight=student_weight,
50
+ teacher_input=teacher_input,
51
+ teacher_weight=teacher_weight,
52
+ target=true_labels,
53
+ student_bias=student_bias,
54
+ teacher_bias=teacher_bias,
55
+ chunk_size=chunk_size,
56
+ weight_hard_loss=weight_hard_loss,
57
+ weight_soft_loss=weight_soft_loss,
58
+ beta=beta,
59
+ ignore_index=ignore_index,
60
+ temperature=temperature,
61
+ compiled=compiled,
62
+ )
63
+
64
+ @staticmethod
65
+ def backward(ctx, grad_output):
66
+ grads = LigerFusedLinearDistillationBase.backward(ctx, grad_output)[:6]
67
+
68
+ return (
69
+ *grads,
70
+ None, # teacher_bias
71
+ None, # weight_hard_loss
72
+ None, # weight_soft_loss
73
+ None, # beta
74
+ None, # ignore_index
75
+ None, # temperature
76
+ None, # compiled
77
+ None, # chunk_size
78
+ )
79
+
80
+
81
+ class LigerFusedLinearCosineSimilarityLoss(torch.nn.Module):
82
+ def __init__(
83
+ self,
84
+ weight_hard_loss: float = 0.5,
85
+ weight_soft_loss: float = 0.5,
86
+ beta: float = 0.5,
87
+ ignore_index: int = -100,
88
+ temperature: float = 1.0,
89
+ compiled: bool = True,
90
+ chunk_size: int = 1024,
91
+ ):
92
+ super().__init__()
93
+ assert temperature != 0, "Temperature cannot be 0."
94
+ self.weight_hard_loss = weight_hard_loss
95
+ self.weight_soft_loss = weight_soft_loss
96
+ self.ignore_index = ignore_index
97
+ self.temperature = temperature
98
+ self.compiled = compiled
99
+ self.beta = beta
100
+ self.chunk_size = chunk_size
101
+
102
+ def forward(
103
+ self,
104
+ student_input: torch.Tensor,
105
+ student_weight: torch.Tensor,
106
+ teacher_input: torch.Tensor,
107
+ teacher_weight: torch.Tensor,
108
+ true_labels: torch.LongTensor,
109
+ student_bias: torch.Tensor = None,
110
+ teacher_bias: torch.Tensor = None,
111
+ ) -> torch.Tensor:
112
+ return LigerFusedLinearCosineSimilarityFunction.apply(
113
+ student_input,
114
+ student_weight,
115
+ teacher_input,
116
+ teacher_weight,
117
+ true_labels,
118
+ student_bias,
119
+ teacher_bias,
120
+ self.weight_hard_loss,
121
+ self.weight_soft_loss,
122
+ self.beta,
123
+ self.ignore_index,
124
+ self.temperature,
125
+ self.compiled,
126
+ self.chunk_size,
127
+ )
@@ -1,3 +1,4 @@
1
+ from liger_kernel.chunked_loss.cosine_similarity_loss import LigerFusedLinearCosineSimilarityFunction
1
2
  from liger_kernel.chunked_loss.cpo_loss import LigerFusedLinearCPOFunction
2
3
  from liger_kernel.chunked_loss.dpo_loss import LigerFusedLinearDPOFunction
3
4
  from liger_kernel.chunked_loss.grpo_loss import LigerFusedLinearGRPOFunction
@@ -9,6 +10,7 @@ from liger_kernel.chunked_loss.simpo_loss import LigerFusedLinearSimPOFunction
9
10
  liger_fused_linear_orpo = LigerFusedLinearORPOFunction.apply
10
11
  liger_fused_linear_dpo = LigerFusedLinearDPOFunction.apply
11
12
  liger_fused_linear_jsd = LigerFusedLinearJSDFunction.apply
13
+ liger_fused_linear_cosine = LigerFusedLinearCosineSimilarityFunction.apply
12
14
  liger_fused_linear_cpo = LigerFusedLinearCPOFunction.apply
13
15
  liger_fused_linear_simpo = LigerFusedLinearSimPOFunction.apply
14
16
  liger_fused_linear_kto = LigerFusedLinearKTOFunction.apply