liger-kernel-nightly 0.6.0.dev20250718050840__tar.gz → 0.6.0.dev20250719041120__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (278) hide show
  1. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/PKG-INFO +1 -1
  2. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/benchmark/data/all_benchmark_data.csv +72 -0
  3. liger_kernel_nightly-0.6.0.dev20250719041120/benchmark/scripts/benchmark_fused_add_rms_norm.py +201 -0
  4. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/pyproject.toml +1 -1
  5. liger_kernel_nightly-0.6.0.dev20250719041120/src/liger_kernel/ops/fused_add_rms_norm.py +412 -0
  6. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/transformers/__init__.py +2 -0
  7. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/transformers/functional.py +5 -0
  8. liger_kernel_nightly-0.6.0.dev20250719041120/src/liger_kernel/transformers/fused_add_rms_norm.py +39 -0
  9. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel_nightly.egg-info/PKG-INFO +1 -1
  10. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel_nightly.egg-info/SOURCES.txt +4 -0
  11. liger_kernel_nightly-0.6.0.dev20250719041120/test/transformers/test_fused_add_rms_norm.py +219 -0
  12. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/test/transformers/test_monkey_patch.py +2 -1
  13. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/.github/ISSUE_TEMPLATE/bug_report.yaml +0 -0
  14. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/.github/ISSUE_TEMPLATE/feature_request.yaml +0 -0
  15. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/.github/pull_request_template.md +0 -0
  16. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/.github/workflows/amd-ci.yml +0 -0
  17. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/.github/workflows/benchmark.yml +0 -0
  18. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/.github/workflows/docs.yml +0 -0
  19. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/.github/workflows/intel-ci.yml +0 -0
  20. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/.github/workflows/nvi-ci.yml +0 -0
  21. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/.github/workflows/publish-nightly.yml +0 -0
  22. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/.github/workflows/publish-release.yml +0 -0
  23. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/.gitignore +0 -0
  24. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/LICENSE +0 -0
  25. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/Makefile +0 -0
  26. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/NOTICE +0 -0
  27. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/README.md +0 -0
  28. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/benchmark/README.md +0 -0
  29. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/benchmark/__init__.py +0 -0
  30. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/benchmark/benchmarks_visualizer.py +0 -0
  31. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/benchmark/scripts/__init__.py +0 -0
  32. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/benchmark/scripts/benchmark_cpo_loss.py +0 -0
  33. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/benchmark/scripts/benchmark_cross_entropy.py +0 -0
  34. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/benchmark/scripts/benchmark_distill_cosine_loss.py +0 -0
  35. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/benchmark/scripts/benchmark_distill_jsd_loss.py +0 -0
  36. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/benchmark/scripts/benchmark_dpo_loss.py +0 -0
  37. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/benchmark/scripts/benchmark_dyt.py +0 -0
  38. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/benchmark/scripts/benchmark_embedding.py +0 -0
  39. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/benchmark/scripts/benchmark_fused_linear_cross_entropy.py +0 -0
  40. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/benchmark/scripts/benchmark_fused_linear_jsd.py +0 -0
  41. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/benchmark/scripts/benchmark_fused_neighborhood_attention.py +0 -0
  42. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/benchmark/scripts/benchmark_geglu.py +0 -0
  43. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/benchmark/scripts/benchmark_group_norm.py +0 -0
  44. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/benchmark/scripts/benchmark_jsd.py +0 -0
  45. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/benchmark/scripts/benchmark_kl_div.py +0 -0
  46. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/benchmark/scripts/benchmark_kto_loss.py +0 -0
  47. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/benchmark/scripts/benchmark_layer_norm.py +0 -0
  48. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/benchmark/scripts/benchmark_multi_token_attention.py +0 -0
  49. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/benchmark/scripts/benchmark_orpo_loss.py +0 -0
  50. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/benchmark/scripts/benchmark_qwen2vl_mrope.py +0 -0
  51. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/benchmark/scripts/benchmark_rms_norm.py +0 -0
  52. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/benchmark/scripts/benchmark_rope.py +0 -0
  53. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/benchmark/scripts/benchmark_simpo_loss.py +0 -0
  54. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/benchmark/scripts/benchmark_softmax.py +0 -0
  55. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/benchmark/scripts/benchmark_sparse_multi_token_attention.py +0 -0
  56. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/benchmark/scripts/benchmark_sparsemax.py +0 -0
  57. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/benchmark/scripts/benchmark_swiglu.py +0 -0
  58. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/benchmark/scripts/benchmark_tvd.py +0 -0
  59. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/benchmark/scripts/utils.py +0 -0
  60. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/dev/fmt-requirements.txt +0 -0
  61. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/dev/modal/benchmarks.py +0 -0
  62. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/dev/modal/tests.py +0 -0
  63. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/dev/modal/tests_bwd.py +0 -0
  64. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/docs/Examples.md +0 -0
  65. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/docs/Getting-Started.md +0 -0
  66. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/docs/High-Level-APIs.md +0 -0
  67. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/docs/Low-Level-APIs.md +0 -0
  68. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/docs/acknowledgement.md +0 -0
  69. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/docs/contributing.md +0 -0
  70. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/docs/images/banner.GIF +0 -0
  71. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/docs/images/compose.gif +0 -0
  72. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/docs/images/e2e-memory.png +0 -0
  73. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/docs/images/e2e-tps.png +0 -0
  74. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/docs/images/logo-banner.png +0 -0
  75. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/docs/images/patch.gif +0 -0
  76. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/docs/images/post-training.png +0 -0
  77. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/docs/index.md +0 -0
  78. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/docs/license.md +0 -0
  79. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/examples/alignment/accelerate_config.yaml +0 -0
  80. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/examples/alignment/run_orpo.py +0 -0
  81. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/examples/huggingface/README.md +0 -0
  82. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/examples/huggingface/callback.py +0 -0
  83. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/examples/huggingface/config/fsdp_config.json +0 -0
  84. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/examples/huggingface/img/gemma_7b_mem.png +0 -0
  85. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/examples/huggingface/img/gemma_7b_tp.png +0 -0
  86. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/examples/huggingface/img/llama_mem_alloc.png +0 -0
  87. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/examples/huggingface/img/llama_tps.png +0 -0
  88. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/examples/huggingface/img/qwen_mem_alloc.png +0 -0
  89. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/examples/huggingface/img/qwen_tps.png +0 -0
  90. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/examples/huggingface/launch_on_modal.py +0 -0
  91. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/examples/huggingface/requirements.txt +0 -0
  92. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/examples/huggingface/run_benchmarks.sh +0 -0
  93. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/examples/huggingface/run_gemma.sh +0 -0
  94. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/examples/huggingface/run_llama.sh +0 -0
  95. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/examples/huggingface/run_qwen.sh +0 -0
  96. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/examples/huggingface/run_qwen2_vl.sh +0 -0
  97. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/examples/huggingface/training.py +0 -0
  98. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/examples/huggingface/training_multimodal.py +0 -0
  99. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/examples/lightning/README.md +0 -0
  100. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/examples/lightning/requirements.txt +0 -0
  101. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/examples/lightning/training.py +0 -0
  102. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/examples/medusa/README.md +0 -0
  103. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/examples/medusa/callback.py +0 -0
  104. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/examples/medusa/docs/images/Memory_Stage1_num_head_3.png +0 -0
  105. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/examples/medusa/docs/images/Memory_Stage1_num_head_5.png +0 -0
  106. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/examples/medusa/docs/images/Memory_Stage2_num_head_3.png +0 -0
  107. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/examples/medusa/docs/images/Memory_Stage2_num_head_5.png +0 -0
  108. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/examples/medusa/docs/images/Throughput_Stage1_num_head_3.png +0 -0
  109. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/examples/medusa/docs/images/Throughput_Stage1_num_head_5.png +0 -0
  110. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/examples/medusa/docs/images/Throughput_Stage2_num_head_3.png +0 -0
  111. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/examples/medusa/docs/images/Throughput_Stage2_num_head_5.png +0 -0
  112. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/examples/medusa/fsdp/acc-fsdp.conf +0 -0
  113. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/examples/medusa/medusa_util.py +0 -0
  114. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/examples/medusa/requirements.txt +0 -0
  115. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/examples/medusa/scripts/llama3_8b_medusa.sh +0 -0
  116. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/examples/medusa/train.py +0 -0
  117. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/licenses/LICENSE-Apache-2.0 +0 -0
  118. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/licenses/LICENSE-MIT-AutoAWQ +0 -0
  119. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/licenses/LICENSE-MIT-Efficient-Cross-Entropy +0 -0
  120. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/licenses/LICENSE-MIT-llmc +0 -0
  121. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/licenses/LICENSE-MIT-triton +0 -0
  122. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/mkdocs.yml +0 -0
  123. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/setup.cfg +0 -0
  124. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/setup.py +0 -0
  125. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/__init__.py +0 -0
  126. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/chunked_loss/README.md +0 -0
  127. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/chunked_loss/__init__.py +0 -0
  128. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/chunked_loss/cosine_similarity_loss.py +0 -0
  129. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/chunked_loss/cpo_loss.py +0 -0
  130. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/chunked_loss/dpo_loss.py +0 -0
  131. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/chunked_loss/functional.py +0 -0
  132. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/chunked_loss/fused_linear_distillation.py +0 -0
  133. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/chunked_loss/fused_linear_ppo.py +0 -0
  134. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/chunked_loss/fused_linear_preference.py +0 -0
  135. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/chunked_loss/fused_linear_unpaired_preference.py +0 -0
  136. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/chunked_loss/grpo_loss.py +0 -0
  137. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/chunked_loss/jsd_loss.py +0 -0
  138. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/chunked_loss/kto_loss.py +0 -0
  139. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/chunked_loss/orpo_loss.py +0 -0
  140. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/chunked_loss/simpo_loss.py +0 -0
  141. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/env_report.py +0 -0
  142. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/ops/__init__.py +0 -0
  143. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/ops/cross_entropy.py +0 -0
  144. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/ops/dyt.py +0 -0
  145. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/ops/experimental/embedding.py +0 -0
  146. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/ops/experimental/mm_int8int2.py +0 -0
  147. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/ops/fused_linear_cross_entropy.py +0 -0
  148. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/ops/fused_linear_jsd.py +0 -0
  149. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/ops/fused_neighborhood_attention.py +0 -0
  150. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/ops/geglu.py +0 -0
  151. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/ops/group_norm.py +0 -0
  152. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/ops/grpo_loss.py +0 -0
  153. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/ops/jsd.py +0 -0
  154. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/ops/kl_div.py +0 -0
  155. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/ops/layer_norm.py +0 -0
  156. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/ops/multi_token_attention.py +0 -0
  157. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/ops/qwen2vl_mrope.py +0 -0
  158. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/ops/rms_norm.py +0 -0
  159. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/ops/rope.py +0 -0
  160. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/ops/softmax.py +0 -0
  161. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/ops/sparsemax.py +0 -0
  162. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/ops/swiglu.py +0 -0
  163. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/ops/tvd.py +0 -0
  164. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/ops/utils.py +0 -0
  165. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/transformers/auto_model.py +0 -0
  166. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/transformers/cross_entropy.py +0 -0
  167. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/transformers/dyt.py +0 -0
  168. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/transformers/experimental/embedding.py +0 -0
  169. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/transformers/fsdp.py +0 -0
  170. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/transformers/fused_linear_cross_entropy.py +0 -0
  171. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/transformers/fused_linear_jsd.py +0 -0
  172. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/transformers/fused_neighborhood_attention.py +0 -0
  173. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/transformers/geglu.py +0 -0
  174. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/transformers/group_norm.py +0 -0
  175. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/transformers/grpo_loss.py +0 -0
  176. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/transformers/jsd.py +0 -0
  177. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/transformers/kl_div.py +0 -0
  178. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/transformers/layer_norm.py +0 -0
  179. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/transformers/model/__init__.py +0 -0
  180. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/transformers/model/gemma.py +0 -0
  181. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/transformers/model/gemma2.py +0 -0
  182. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/transformers/model/gemma3.py +0 -0
  183. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/transformers/model/glm4.py +0 -0
  184. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/transformers/model/llama.py +0 -0
  185. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/transformers/model/llama4.py +0 -0
  186. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/transformers/model/llava.py +0 -0
  187. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/transformers/model/loss_utils.py +0 -0
  188. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/transformers/model/mistral.py +0 -0
  189. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/transformers/model/mixtral.py +0 -0
  190. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/transformers/model/mllama.py +0 -0
  191. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/transformers/model/olmo2.py +0 -0
  192. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/transformers/model/paligemma.py +0 -0
  193. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/transformers/model/phi3.py +0 -0
  194. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/transformers/model/qwen2.py +0 -0
  195. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/transformers/model/qwen2_5_vl.py +0 -0
  196. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/transformers/model/qwen2_vl.py +0 -0
  197. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/transformers/model/qwen3.py +0 -0
  198. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/transformers/model/qwen3_moe.py +0 -0
  199. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/transformers/model/smollm3.py +0 -0
  200. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/transformers/monkey_patch.py +0 -0
  201. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/transformers/multi_token_attention.py +0 -0
  202. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/transformers/qwen2vl_mrope.py +0 -0
  203. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/transformers/rms_norm.py +0 -0
  204. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/transformers/rope.py +0 -0
  205. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/transformers/softmax.py +0 -0
  206. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/transformers/sparsemax.py +0 -0
  207. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/transformers/swiglu.py +0 -0
  208. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/transformers/trainer/__init__.py +0 -0
  209. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/transformers/trainer/orpo_trainer.py +0 -0
  210. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/transformers/trainer_integration.py +0 -0
  211. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/transformers/tvd.py +0 -0
  212. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/triton/__init__.py +0 -0
  213. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/triton/monkey_patch.py +0 -0
  214. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel/utils.py +0 -0
  215. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel_nightly.egg-info/dependency_links.txt +0 -0
  216. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel_nightly.egg-info/requires.txt +0 -0
  217. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/src/liger_kernel_nightly.egg-info/top_level.txt +0 -0
  218. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/test/__init__.py +0 -0
  219. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/test/chunked_loss/__init__.py +0 -0
  220. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/test/chunked_loss/test_cosine_loss.py +0 -0
  221. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/test/chunked_loss/test_cpo_loss.py +0 -0
  222. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/test/chunked_loss/test_dpo_loss.py +0 -0
  223. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/test/chunked_loss/test_grpo_loss.py +0 -0
  224. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/test/chunked_loss/test_jsd_loss.py +0 -0
  225. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/test/chunked_loss/test_kto_loss.py +0 -0
  226. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/test/chunked_loss/test_orpo_loss.py +0 -0
  227. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/test/chunked_loss/test_simpo_loss.py +0 -0
  228. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/test/conftest.py +0 -0
  229. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/test/convergence/__init__.py +0 -0
  230. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/test/convergence/bf16/__init__.py +0 -0
  231. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/test/convergence/bf16/test_mini_models.py +0 -0
  232. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/test/convergence/bf16/test_mini_models_multimodal.py +0 -0
  233. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/test/convergence/bf16/test_mini_models_with_logits.py +0 -0
  234. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/test/convergence/fp32/__init__.py +0 -0
  235. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/test/convergence/fp32/test_mini_models.py +0 -0
  236. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/test/convergence/fp32/test_mini_models_multimodal.py +0 -0
  237. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/test/convergence/fp32/test_mini_models_with_logits.py +0 -0
  238. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/test/resources/fake_configs/Google/Gemma3/gemma-3-4b-it/tokenizer_config.json +0 -0
  239. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/test/resources/fake_configs/Google/Paligemma/paligemma-3b-pt-224/tokenizer_config.json +0 -0
  240. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/preprocessor_config.json +0 -0
  241. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/processor_config.json +0 -0
  242. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/tokenizer_config.json +0 -0
  243. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/test/resources/fake_configs/Qwen/Qwen2-VL-7B-Instruct/tokenizer_config.json +0 -0
  244. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/test/resources/fake_configs/Qwen/Qwen2.5-VL-7B-Instruct/tokenizer_config.json +0 -0
  245. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/test/resources/fake_configs/meta-llama/Llama-3.2-11B-Vision-Instruct/tokenizer_config.json +0 -0
  246. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/test/resources/fake_configs/meta-llama/Llama-4-Scout-17B-16E-Instruct/tokenizer_config.json +0 -0
  247. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/test/resources/scripts/generate_tokenized_dataset.py +0 -0
  248. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/test/resources/tiny_shakespeare.txt +0 -0
  249. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/test/resources/tiny_shakespeare_tokenized/data-00000-of-00001.arrow +0 -0
  250. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/test/resources/tiny_shakespeare_tokenized/dataset_info.json +0 -0
  251. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/test/resources/tiny_shakespeare_tokenized/state.json +0 -0
  252. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/test/transformers/test_auto_model.py +0 -0
  253. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/test/transformers/test_cross_entropy.py +0 -0
  254. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/test/transformers/test_dyt.py +0 -0
  255. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/test/transformers/test_embedding.py +0 -0
  256. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/test/transformers/test_flex_attention.py +0 -0
  257. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/test/transformers/test_fused_linear_cross_entropy.py +0 -0
  258. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/test/transformers/test_fused_linear_jsd.py +0 -0
  259. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/test/transformers/test_fused_neighborhood_attention.py +0 -0
  260. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/test/transformers/test_geglu.py +0 -0
  261. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/test/transformers/test_group_norm.py +0 -0
  262. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/test/transformers/test_grpo_loss.py +0 -0
  263. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/test/transformers/test_jsd.py +0 -0
  264. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/test/transformers/test_kl_div.py +0 -0
  265. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/test/transformers/test_layer_norm.py +0 -0
  266. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/test/transformers/test_mm_int8int2.py +0 -0
  267. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/test/transformers/test_multi_token_attention.py +0 -0
  268. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/test/transformers/test_qwen2vl_mrope.py +0 -0
  269. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/test/transformers/test_rms_norm.py +0 -0
  270. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/test/transformers/test_rope.py +0 -0
  271. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/test/transformers/test_softmax.py +0 -0
  272. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/test/transformers/test_sparsemax.py +0 -0
  273. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/test/transformers/test_swiglu.py +0 -0
  274. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/test/transformers/test_trainer_integration.py +0 -0
  275. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/test/transformers/test_transformers.py +0 -0
  276. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/test/transformers/test_tvd.py +0 -0
  277. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/test/triton/test_triton_monkey_patch.py +0 -0
  278. {liger_kernel_nightly-0.6.0.dev20250718050840 → liger_kernel_nightly-0.6.0.dev20250719041120}/test/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: liger_kernel_nightly
3
- Version: 0.6.0.dev20250718050840
3
+ Version: 0.6.0.dev20250719041120
4
4
  Summary: Efficient Triton kernels for LLM Training
5
5
  License: BSD 2-CLAUSE LICENSE
6
6
  Copyright 2024 LinkedIn Corporation
@@ -1493,3 +1493,75 @@ distill_cosine_loss,torch,full,memory,MB,BT,B x T,1024,7566.2822265625,7566.2822
1493
1493
  distill_cosine_loss,torch,full,memory,MB,BT,B x T,2048,11590.3134765625,11590.3134765625,11590.3134765625,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA A100-SXM4-80GB,2025-06-27 09:23:28,0.5.10
1494
1494
  distill_cosine_loss,torch,full,memory,MB,BT,B x T,4096,19654.375,19654.375,19654.375,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA A100-SXM4-80GB,2025-06-27 09:23:28,0.5.10
1495
1495
  distill_cosine_loss,torch,full,memory,MB,BT,B x T,8192,35782.5,35782.5,35782.5,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA A100-SXM4-80GB,2025-06-27 09:23:28,0.5.10
1496
+ fused_add_rms_norm,liger_fused_add_rms_norm,forward,speed,ms,H,hidden size,1024,0.01759999990463257,0.017311999574303627,0.017920000478625298,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:20,0.6.0
1497
+ fused_add_rms_norm,liger_fused_add_rms_norm,forward,speed,ms,H,hidden size,2048,0.02924799919128418,0.028863999992609024,0.029983999207615852,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:20,0.6.0
1498
+ fused_add_rms_norm,liger_fused_add_rms_norm,forward,speed,ms,H,hidden size,4096,0.05129599943757057,0.050624001771211624,0.05209600180387497,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:20,0.6.0
1499
+ fused_add_rms_norm,liger_fused_add_rms_norm,forward,speed,ms,H,hidden size,8192,0.09344000369310379,0.09296000003814697,0.09382399916648865,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:20,0.6.0
1500
+ fused_add_rms_norm,liger_fused_add_rms_norm,forward,speed,ms,H,hidden size,16384,0.1791680008172989,0.17814399302005768,0.1796800047159195,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:20,0.6.0
1501
+ fused_add_rms_norm,liger_fused_add_rms_norm,forward,speed,ms,H,hidden size,32768,0.43830400705337524,0.43744000792503357,0.43929600715637207,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:20,0.6.0
1502
+ fused_add_rms_norm,huggingface,forward,speed,ms,H,hidden size,1024,0.060095999389886856,0.059808000922203064,0.06054399907588959,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:23,0.6.0
1503
+ fused_add_rms_norm,huggingface,forward,speed,ms,H,hidden size,2048,0.09084799885749817,0.09027200192213058,0.09161599725484848,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:23,0.6.0
1504
+ fused_add_rms_norm,huggingface,forward,speed,ms,H,hidden size,4096,0.17820799350738525,0.17744000256061554,0.17897599935531616,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:23,0.6.0
1505
+ fused_add_rms_norm,huggingface,forward,speed,ms,H,hidden size,8192,0.312608003616333,0.3118720054626465,0.31324800848960876,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:23,0.6.0
1506
+ fused_add_rms_norm,huggingface,forward,speed,ms,H,hidden size,16384,0.574944019317627,0.5740479826927185,0.5756288051605225,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:23,0.6.0
1507
+ fused_add_rms_norm,huggingface,forward,speed,ms,H,hidden size,32768,1.0943039655685425,1.0934272289276123,1.0951999425888062,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:23,0.6.0
1508
+ fused_add_rms_norm,liger_rms_norm,forward,speed,ms,H,hidden size,1024,0.0352960005402565,0.03481600061058998,0.03811199963092804,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:26,0.6.0
1509
+ fused_add_rms_norm,liger_rms_norm,forward,speed,ms,H,hidden size,2048,0.05430399999022484,0.05392000079154968,0.05503999814391136,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:26,0.6.0
1510
+ fused_add_rms_norm,liger_rms_norm,forward,speed,ms,H,hidden size,4096,0.10592000186443329,0.1054655984044075,0.10630399733781815,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:26,0.6.0
1511
+ fused_add_rms_norm,liger_rms_norm,forward,speed,ms,H,hidden size,8192,0.19679999351501465,0.19631999731063843,0.19724799692630768,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:26,0.6.0
1512
+ fused_add_rms_norm,liger_rms_norm,forward,speed,ms,H,hidden size,16384,0.37436801195144653,0.3733760118484497,0.3752320110797882,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:26,0.6.0
1513
+ fused_add_rms_norm,liger_rms_norm,forward,speed,ms,H,hidden size,32768,0.7376000285148621,0.7361343741416931,0.7391359806060791,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:26,0.6.0
1514
+ fused_add_rms_norm,liger_fused_add_rms_norm,full,speed,ms,H,hidden size,1024,0.3147200047969818,0.30796160697937014,0.32764801383018494,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:30,0.6.0
1515
+ fused_add_rms_norm,liger_fused_add_rms_norm,full,speed,ms,H,hidden size,2048,0.3089919984340668,0.30374398827552795,0.3226880133152008,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:30,0.6.0
1516
+ fused_add_rms_norm,liger_fused_add_rms_norm,full,speed,ms,H,hidden size,4096,0.30691200494766235,0.3023296058177948,0.3205504059791565,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:30,0.6.0
1517
+ fused_add_rms_norm,liger_fused_add_rms_norm,full,speed,ms,H,hidden size,8192,0.3246079981327057,0.3185984075069428,0.33656961321830753,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:30,0.6.0
1518
+ fused_add_rms_norm,liger_fused_add_rms_norm,full,speed,ms,H,hidden size,16384,0.6010559797286987,0.5996800065040588,0.6026239991188049,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:30,0.6.0
1519
+ fused_add_rms_norm,liger_fused_add_rms_norm,full,speed,ms,H,hidden size,32768,1.8402559757232666,1.8322880268096924,1.8461120128631592,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:30,0.6.0
1520
+ fused_add_rms_norm,huggingface,full,speed,ms,H,hidden size,1024,0.23878400027751923,0.23545600473880768,0.2507520020008087,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:33,0.6.0
1521
+ fused_add_rms_norm,huggingface,full,speed,ms,H,hidden size,2048,0.34513600170612335,0.34377598762512207,0.34678399562835693,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:33,0.6.0
1522
+ fused_add_rms_norm,huggingface,full,speed,ms,H,hidden size,4096,0.6330879926681519,0.631712019443512,0.6345599889755249,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:33,0.6.0
1523
+ fused_add_rms_norm,huggingface,full,speed,ms,H,hidden size,8192,1.1185599565505981,1.1172800064086914,1.1196800470352173,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:33,0.6.0
1524
+ fused_add_rms_norm,huggingface,full,speed,ms,H,hidden size,16384,2.0697600841522217,2.0678528785705566,2.0713536739349365,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:33,0.6.0
1525
+ fused_add_rms_norm,huggingface,full,speed,ms,H,hidden size,32768,3.9561920166015625,3.953824043273926,3.9581120014190674,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:33,0.6.0
1526
+ fused_add_rms_norm,liger_rms_norm,full,speed,ms,H,hidden size,1024,0.38916800916194916,0.3824320137500763,0.4037184059619903,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:36,0.6.0
1527
+ fused_add_rms_norm,liger_rms_norm,full,speed,ms,H,hidden size,2048,0.3890720009803772,0.38193280100822447,0.4032831907272339,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:36,0.6.0
1528
+ fused_add_rms_norm,liger_rms_norm,full,speed,ms,H,hidden size,4096,0.39715200662612915,0.3928639888763428,0.41097599267959595,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:36,0.6.0
1529
+ fused_add_rms_norm,liger_rms_norm,full,speed,ms,H,hidden size,8192,0.6275200247764587,0.6259520053863525,0.6287999749183655,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:36,0.6.0
1530
+ fused_add_rms_norm,liger_rms_norm,full,speed,ms,H,hidden size,16384,1.202239990234375,1.199679970741272,1.2048959732055664,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:36,0.6.0
1531
+ fused_add_rms_norm,liger_rms_norm,full,speed,ms,H,hidden size,32768,2.7738559246063232,2.7705343723297116,2.777868890762329,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:36,0.6.0
1532
+ fused_add_rms_norm,liger_fused_add_rms_norm,backward,speed,ms,H,hidden size,1024,0.15619200468063354,0.15376000106334686,0.1661248028278351,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:39,0.6.0
1533
+ fused_add_rms_norm,liger_fused_add_rms_norm,backward,speed,ms,H,hidden size,2048,0.15825600177049637,0.15600000321865082,0.16911999881267548,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:39,0.6.0
1534
+ fused_add_rms_norm,liger_fused_add_rms_norm,backward,speed,ms,H,hidden size,4096,0.16700799763202667,0.16502399742603302,0.1709440052509308,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:39,0.6.0
1535
+ fused_add_rms_norm,liger_fused_add_rms_norm,backward,speed,ms,H,hidden size,8192,0.1712000072002411,0.1700800061225891,0.17215999960899353,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:39,0.6.0
1536
+ fused_add_rms_norm,liger_fused_add_rms_norm,backward,speed,ms,H,hidden size,16384,0.42505601048469543,0.4233280122280121,0.42691200971603394,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:39,0.6.0
1537
+ fused_add_rms_norm,liger_fused_add_rms_norm,backward,speed,ms,H,hidden size,32768,1.4057759642601013,1.3944000005722046,1.4099839925765991,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:39,0.6.0
1538
+ fused_add_rms_norm,huggingface,backward,speed,ms,H,hidden size,1024,0.1520960032939911,0.15136000514030457,0.1528960019350052,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:42,0.6.0
1539
+ fused_add_rms_norm,huggingface,backward,speed,ms,H,hidden size,2048,0.2533760070800781,0.2524160146713257,0.25436800718307495,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:42,0.6.0
1540
+ fused_add_rms_norm,huggingface,backward,speed,ms,H,hidden size,4096,0.4551039934158325,0.4540799856185913,0.45612800121307373,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:42,0.6.0
1541
+ fused_add_rms_norm,huggingface,backward,speed,ms,H,hidden size,8192,0.8053439855575562,0.8038079738616943,0.806656002998352,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:42,0.6.0
1542
+ fused_add_rms_norm,huggingface,backward,speed,ms,H,hidden size,16384,1.4933120012283325,1.492095947265625,1.49452805519104,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:42,0.6.0
1543
+ fused_add_rms_norm,huggingface,backward,speed,ms,H,hidden size,32768,2.8600640296936035,2.8583295822143557,2.8612607955932616,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:42,0.6.0
1544
+ fused_add_rms_norm,liger_rms_norm,backward,speed,ms,H,hidden size,1024,0.20175999402999878,0.199072003364563,0.2154303938150406,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0
1545
+ fused_add_rms_norm,liger_rms_norm,backward,speed,ms,H,hidden size,2048,0.20263999700546265,0.20000000298023224,0.21675519943237304,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0
1546
+ fused_add_rms_norm,liger_rms_norm,backward,speed,ms,H,hidden size,4096,0.25276800990104675,0.2515519857406616,0.2539199888706207,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0
1547
+ fused_add_rms_norm,liger_rms_norm,backward,speed,ms,H,hidden size,8192,0.4322720021009445,0.43088001012802124,0.4336000084877014,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0
1548
+ fused_add_rms_norm,liger_rms_norm,backward,speed,ms,H,hidden size,16384,0.8288000226020813,0.8266303777694701,0.8311295866966247,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0
1549
+ fused_add_rms_norm,liger_rms_norm,backward,speed,ms,H,hidden size,32768,2.03987193107605,2.0360767364501955,2.0436416149139403,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0
1550
+ fused_add_rms_norm,liger_fused_add_rms_norm,full,memory,MB,H,hidden size,1024,72.546875,72.546875,72.546875,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0
1551
+ fused_add_rms_norm,liger_fused_add_rms_norm,full,memory,MB,H,hidden size,2048,145.0859375,145.0859375,145.0859375,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0
1552
+ fused_add_rms_norm,liger_fused_add_rms_norm,full,memory,MB,H,hidden size,4096,290.1640625,290.1640625,290.1640625,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0
1553
+ fused_add_rms_norm,liger_fused_add_rms_norm,full,memory,MB,H,hidden size,8192,580.3203125,580.3203125,580.3203125,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0
1554
+ fused_add_rms_norm,liger_fused_add_rms_norm,full,memory,MB,H,hidden size,16384,1160.6328125,1160.6328125,1160.6328125,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0
1555
+ fused_add_rms_norm,liger_fused_add_rms_norm,full,memory,MB,H,hidden size,32768,2321.2578125,2321.2578125,2321.2578125,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0
1556
+ fused_add_rms_norm,huggingface,full,memory,MB,H,hidden size,1024,104.03173828125,104.03173828125,104.03173828125,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0
1557
+ fused_add_rms_norm,huggingface,full,memory,MB,H,hidden size,2048,208.05517578125,208.05517578125,208.05517578125,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0
1558
+ fused_add_rms_norm,huggingface,full,memory,MB,H,hidden size,4096,416.10205078125,416.10205078125,416.10205078125,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0
1559
+ fused_add_rms_norm,huggingface,full,memory,MB,H,hidden size,8192,832.19580078125,832.19580078125,832.19580078125,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0
1560
+ fused_add_rms_norm,huggingface,full,memory,MB,H,hidden size,16384,1664.3125,1664.3125,1664.3125,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0
1561
+ fused_add_rms_norm,huggingface,full,memory,MB,H,hidden size,32768,3328.625,3328.625,3328.625,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0
1562
+ fused_add_rms_norm,liger_rms_norm,full,memory,MB,H,hidden size,1024,104.03564453125,104.03564453125,104.03564453125,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0
1563
+ fused_add_rms_norm,liger_rms_norm,full,memory,MB,H,hidden size,2048,208.06298828125,208.06298828125,208.06298828125,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0
1564
+ fused_add_rms_norm,liger_rms_norm,full,memory,MB,H,hidden size,4096,416.11767578125,416.11767578125,416.11767578125,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0
1565
+ fused_add_rms_norm,liger_rms_norm,full,memory,MB,H,hidden size,8192,832.22705078125,832.22705078125,832.22705078125,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0
1566
+ fused_add_rms_norm,liger_rms_norm,full,memory,MB,H,hidden size,16384,1544.44580078125,1544.44580078125,1544.44580078125,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0
1567
+ fused_add_rms_norm,liger_rms_norm,full,memory,MB,H,hidden size,32768,2960.8837890625,2960.8837890625,2960.8837890625,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0
@@ -0,0 +1,201 @@
1
+ import torch
2
+ import torch.nn as nn
3
+ import triton
4
+
5
+ from utils import QUANTILES
6
+ from utils import SingleBenchmarkRunInput
7
+ from utils import SingleBenchmarkRunOutput
8
+ from utils import _test_memory
9
+ from utils import parse_benchmark_script_args
10
+ from utils import run_benchmarks
11
+
12
+ from liger_kernel.transformers.fused_add_rms_norm import LigerFusedAddRMSNorm
13
+ from liger_kernel.transformers.rms_norm import LigerRMSNorm
14
+ from liger_kernel.utils import infer_device
15
+
16
+ device = infer_device()
17
+
18
+
19
+ class NaiveAddRMSNorm(nn.Module):
20
+ def __init__(self, hidden_size, eps=1e-6):
21
+ """
22
+ Naive implementation of the add residual rms norm.
23
+ """
24
+ super().__init__()
25
+ self.weight = nn.Parameter(torch.ones(hidden_size))
26
+ self.variance_epsilon = eps
27
+
28
+ def forward(self, hidden_states, residual):
29
+ input_dtype = hidden_states.dtype
30
+ hidden_states = hidden_states.to(torch.float32)
31
+ residual = residual.to(torch.float32)
32
+ hidden_states = hidden_states + residual
33
+ residual = hidden_states
34
+ variance = hidden_states.pow(2).mean(-1, keepdim=True)
35
+ hidden_states = hidden_states * torch.rsqrt(variance + self.variance_epsilon)
36
+ return self.weight * hidden_states.to(input_dtype), residual.to(input_dtype)
37
+
38
+
39
+ class AddLigerRMSNorm(nn.Module):
40
+ def __init__(self, hidden_size, eps=1e-6):
41
+ """
42
+ AddLigerRMSNorm is equivalent to NaiveAddRMSNorm class above, but uses the LigerRMSNorm kernel.
43
+ """
44
+ super().__init__()
45
+ self.weight = nn.Parameter(torch.ones(hidden_size))
46
+ self.variance_epsilon = eps
47
+ self.rms_norm = LigerRMSNorm(hidden_size, eps, in_place=False)
48
+
49
+ def forward(self, hidden_states, residual):
50
+ input_dtype = hidden_states.dtype
51
+ hidden_states = hidden_states.to(torch.float32)
52
+ residual = residual.to(torch.float32)
53
+ hidden_states = hidden_states + residual
54
+ residual = hidden_states
55
+ hidden_states = self.rms_norm(hidden_states)
56
+ return self.weight * hidden_states.to(input_dtype), residual.to(input_dtype)
57
+
58
+
59
+ def bench_speed_fused_residual_rms_norm(input: SingleBenchmarkRunInput) -> SingleBenchmarkRunOutput:
60
+ N = input.x
61
+ provider = input.kernel_provider
62
+ mode = input.kernel_operation_mode
63
+
64
+ extra_benchmark_config = input.extra_benchmark_config
65
+ M = extra_benchmark_config["M"]
66
+ eps = extra_benchmark_config["eps"]
67
+ dtype = extra_benchmark_config["dtype"]
68
+
69
+ x_shape = (M, N)
70
+
71
+ # Fused Add RMS Norm
72
+ fused_add_rms_norm = LigerFusedAddRMSNorm(hidden_size=N, eps=eps).to(device)
73
+ # Naive implementation
74
+ naive_rms_norm = NaiveAddRMSNorm(hidden_size=N, eps=eps).to(device)
75
+ # LigerRMSNorm without fused residual addition
76
+ liger_rms_norm = AddLigerRMSNorm(hidden_size=N, eps=eps).to(device)
77
+
78
+ x = torch.randn(x_shape, dtype=dtype, device=device)
79
+ r = torch.randn(x_shape, dtype=dtype, device=device)
80
+ dy = torch.randn_like(x)
81
+ ds = torch.randn_like(r)
82
+ x.requires_grad_(True)
83
+ r.requires_grad_(True)
84
+ # utility functions
85
+
86
+ def y_fwd():
87
+ if provider == "liger_fused_add_rms_norm":
88
+ return fused_add_rms_norm(x, r)
89
+
90
+ if provider == "huggingface":
91
+ return naive_rms_norm(x, r)
92
+
93
+ if provider == "liger_rms_norm":
94
+ return liger_rms_norm(x, r)
95
+
96
+ if mode == "forward":
97
+ ms_50, ms_20, ms_80 = triton.testing.do_bench(
98
+ y_fwd,
99
+ grad_to_none=[x, r],
100
+ rep=500,
101
+ quantiles=QUANTILES,
102
+ )
103
+ elif mode == "backward":
104
+ y, s = y_fwd()
105
+ ms_50, ms_20, ms_80 = triton.testing.do_bench(
106
+ lambda: (torch.autograd.backward((y, s), (dy, ds), retain_graph=True)),
107
+ grad_to_none=[x, r],
108
+ rep=500,
109
+ quantiles=QUANTILES,
110
+ )
111
+ elif mode == "full":
112
+
113
+ def full():
114
+ y, s = y_fwd()
115
+ torch.autograd.backward((y, s), (dy, ds))
116
+
117
+ ms_50, ms_20, ms_80 = triton.testing.do_bench(
118
+ full,
119
+ grad_to_none=[x, r],
120
+ rep=500,
121
+ quantiles=QUANTILES,
122
+ )
123
+
124
+ return SingleBenchmarkRunOutput(
125
+ y_20=ms_20,
126
+ y_50=ms_50,
127
+ y_80=ms_80,
128
+ )
129
+
130
+
131
+ def bench_memory_fused_residual_rms_norm(input: SingleBenchmarkRunInput) -> SingleBenchmarkRunOutput:
132
+ N = input.x
133
+ provider = input.kernel_provider
134
+
135
+ extra_benchmark_config = input.extra_benchmark_config
136
+ M = extra_benchmark_config["M"]
137
+ eps = extra_benchmark_config["eps"]
138
+ dtype = extra_benchmark_config["dtype"]
139
+
140
+ x_shape = (M, N)
141
+
142
+ fused_add_rms_norm = LigerFusedAddRMSNorm(hidden_size=N, eps=eps).to(device)
143
+ naive_rms_norm = NaiveAddRMSNorm(hidden_size=N, eps=eps).to(device)
144
+ liger_rms_norm = AddLigerRMSNorm(hidden_size=N, eps=eps).to(device)
145
+
146
+ x = torch.randn(x_shape, dtype=dtype, device=device)
147
+ r = torch.randn(x_shape, dtype=dtype, device=device)
148
+ dy = torch.randn_like(x)
149
+ ds = torch.randn_like(r)
150
+ x.requires_grad_(True)
151
+ r.requires_grad_(True)
152
+
153
+ # utility functions
154
+ def y_fwd():
155
+ if provider == "liger_fused_add_rms_norm":
156
+ return fused_add_rms_norm(x, r)
157
+ if provider == "huggingface":
158
+ return naive_rms_norm(x, r)
159
+ if provider == "liger_rms_norm":
160
+ return liger_rms_norm(x, r)
161
+
162
+ def full():
163
+ y, s = y_fwd()
164
+ torch.autograd.backward((y, s), (dy, ds))
165
+
166
+ mem_50, mem_20, mem_80 = _test_memory(full, quantiles=QUANTILES)
167
+
168
+ return SingleBenchmarkRunOutput(
169
+ y_20=mem_20,
170
+ y_50=mem_50,
171
+ y_80=mem_80,
172
+ )
173
+
174
+
175
+ if __name__ == "__main__":
176
+ args = parse_benchmark_script_args()
177
+
178
+ common_configs = {
179
+ "kernel_name": "fused_add_rms_norm",
180
+ "x_name": "H",
181
+ "x_label": "hidden size",
182
+ "x_values": [2**i for i in range(10, 16)],
183
+ "kernel_providers": ["liger_fused_add_rms_norm", "huggingface", "liger_rms_norm"],
184
+ "extra_benchmark_configs": [{"M": 2048, "dtype": torch.float32, "eps": 1e-6}],
185
+ "overwrite": args.overwrite,
186
+ }
187
+
188
+ run_benchmarks(
189
+ bench_test_fn=bench_speed_fused_residual_rms_norm,
190
+ kernel_operation_modes=["forward", "full", "backward"],
191
+ metric_name="speed",
192
+ metric_unit="ms",
193
+ **common_configs,
194
+ )
195
+ run_benchmarks(
196
+ bench_test_fn=bench_memory_fused_residual_rms_norm,
197
+ kernel_operation_modes=["full"],
198
+ metric_name="memory",
199
+ metric_unit="MB",
200
+ **common_configs,
201
+ )
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "liger_kernel_nightly"
7
- version = "0.6.0.dev20250718050840"
7
+ version = "0.6.0.dev20250719041120"
8
8
  description = "Efficient Triton kernels for LLM Training"
9
9
  urls = { "Homepage" = "https://github.com/linkedin/Liger-Kernel" }
10
10
  readme = { file = "README.md", content-type = "text/markdown" }