liger-kernel-nightly 0.5.10.dev20250611215839__tar.gz → 0.5.10.dev20250613212111__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (270) hide show
  1. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/PKG-INFO +1 -1
  2. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_cpo_loss.py +1 -1
  3. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_cross_entropy.py +1 -1
  4. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_distill_jsd_loss.py +1 -1
  5. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_dpo_loss.py +1 -1
  6. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_embedding.py +1 -1
  7. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_fused_linear_cross_entropy.py +1 -1
  8. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_fused_linear_jsd.py +1 -1
  9. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_jsd.py +1 -1
  10. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_kl_div.py +1 -1
  11. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_kto_loss.py +1 -1
  12. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_layer_norm.py +1 -1
  13. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_orpo_loss.py +1 -1
  14. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_simpo_loss.py +1 -1
  15. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_softmax.py +1 -1
  16. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_swiglu.py +1 -1
  17. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_tvd.py +1 -1
  18. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/utils.py +1 -1
  19. liger_kernel_nightly-0.5.10.dev20250613212111/dev/modal/benchmarks.py +73 -0
  20. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/pyproject.toml +1 -1
  21. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel_nightly.egg-info/PKG-INFO +1 -1
  22. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/convergence/bf16/test_mini_models.py +20 -18
  23. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/convergence/bf16/test_mini_models_multimodal.py +16 -10
  24. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/convergence/bf16/test_mini_models_with_logits.py +18 -11
  25. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/convergence/fp32/test_mini_models.py +14 -12
  26. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/convergence/fp32/test_mini_models_multimodal.py +19 -12
  27. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/convergence/fp32/test_mini_models_with_logits.py +15 -9
  28. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/utils.py +11 -0
  29. liger_kernel_nightly-0.5.10.dev20250611215839/dev/modal/benchmarks.py +0 -28
  30. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/.github/ISSUE_TEMPLATE/bug_report.yaml +0 -0
  31. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/.github/ISSUE_TEMPLATE/feature_request.yaml +0 -0
  32. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/.github/pull_request_template.md +0 -0
  33. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/.github/workflows/amd-ci.yml +0 -0
  34. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/.github/workflows/benchmark.yml +2 -2
  35. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/.github/workflows/docs.yml +0 -0
  36. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/.github/workflows/intel-ci.yml +0 -0
  37. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/.github/workflows/nvi-ci.yml +0 -0
  38. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/.github/workflows/publish-nightly.yml +0 -0
  39. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/.github/workflows/publish-release.yml +0 -0
  40. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/.gitignore +0 -0
  41. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/.idea/workspace.xml +0 -0
  42. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/LICENSE +0 -0
  43. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/Makefile +0 -0
  44. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/NOTICE +0 -0
  45. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/README.md +0 -0
  46. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/README.md +0 -0
  47. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/__init__.py +0 -0
  48. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/benchmarks_visualizer.py +0 -0
  49. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/data/all_benchmark_data.csv +0 -0
  50. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/__init__.py +0 -0
  51. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_dyt.py +0 -0
  52. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_fused_neighborhood_attention.py +0 -0
  53. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_geglu.py +0 -0
  54. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_group_norm.py +0 -0
  55. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_multi_token_attention.py +0 -0
  56. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_qwen2vl_mrope.py +0 -0
  57. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_rms_norm.py +0 -0
  58. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_rope.py +0 -0
  59. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_sparse_multi_token_attention.py +0 -0
  60. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_sparsemax.py +0 -0
  61. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/dev/fmt-requirements.txt +0 -0
  62. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/dev/modal/tests.py +0 -0
  63. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/dev/modal/tests_bwd.py +0 -0
  64. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/docs/Examples.md +0 -0
  65. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/docs/Getting-Started.md +0 -0
  66. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/docs/High-Level-APIs.md +0 -0
  67. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/docs/Low-Level-APIs.md +0 -0
  68. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/docs/acknowledgement.md +0 -0
  69. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/docs/contributing.md +0 -0
  70. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/docs/images/banner.GIF +0 -0
  71. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/docs/images/compose.gif +0 -0
  72. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/docs/images/e2e-memory.png +0 -0
  73. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/docs/images/e2e-tps.png +0 -0
  74. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/docs/images/logo-banner.png +0 -0
  75. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/docs/images/patch.gif +0 -0
  76. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/docs/images/post-training.png +0 -0
  77. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/docs/index.md +0 -0
  78. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/docs/license.md +0 -0
  79. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/alignment/accelerate_config.yaml +0 -0
  80. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/alignment/run_orpo.py +0 -0
  81. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/huggingface/README.md +0 -0
  82. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/huggingface/callback.py +0 -0
  83. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/huggingface/config/fsdp_config.json +0 -0
  84. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/huggingface/img/gemma_7b_mem.png +0 -0
  85. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/huggingface/img/gemma_7b_tp.png +0 -0
  86. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/huggingface/img/llama_mem_alloc.png +0 -0
  87. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/huggingface/img/llama_tps.png +0 -0
  88. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/huggingface/img/qwen_mem_alloc.png +0 -0
  89. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/huggingface/img/qwen_tps.png +0 -0
  90. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/huggingface/launch_on_modal.py +0 -0
  91. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/huggingface/requirements.txt +0 -0
  92. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/huggingface/run_benchmarks.sh +0 -0
  93. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/huggingface/run_gemma.sh +0 -0
  94. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/huggingface/run_llama.sh +0 -0
  95. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/huggingface/run_qwen.sh +0 -0
  96. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/huggingface/run_qwen2_vl.sh +0 -0
  97. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/huggingface/training.py +0 -0
  98. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/huggingface/training_multimodal.py +0 -0
  99. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/lightning/README.md +0 -0
  100. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/lightning/requirements.txt +0 -0
  101. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/lightning/training.py +0 -0
  102. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/medusa/README.md +0 -0
  103. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/medusa/callback.py +0 -0
  104. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/medusa/docs/images/Memory_Stage1_num_head_3.png +0 -0
  105. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/medusa/docs/images/Memory_Stage1_num_head_5.png +0 -0
  106. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/medusa/docs/images/Memory_Stage2_num_head_3.png +0 -0
  107. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/medusa/docs/images/Memory_Stage2_num_head_5.png +0 -0
  108. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/medusa/docs/images/Throughput_Stage1_num_head_3.png +0 -0
  109. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/medusa/docs/images/Throughput_Stage1_num_head_5.png +0 -0
  110. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/medusa/docs/images/Throughput_Stage2_num_head_3.png +0 -0
  111. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/medusa/docs/images/Throughput_Stage2_num_head_5.png +0 -0
  112. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/medusa/fsdp/acc-fsdp.conf +0 -0
  113. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/medusa/medusa_util.py +0 -0
  114. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/medusa/requirements.txt +0 -0
  115. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/medusa/scripts/llama3_8b_medusa.sh +0 -0
  116. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/medusa/train.py +0 -0
  117. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/licenses/LICENSE-Apache-2.0 +0 -0
  118. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/licenses/LICENSE-MIT-AutoAWQ +0 -0
  119. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/licenses/LICENSE-MIT-Efficient-Cross-Entropy +0 -0
  120. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/licenses/LICENSE-MIT-llmc +0 -0
  121. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/licenses/LICENSE-MIT-triton +0 -0
  122. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/mkdocs.yml +0 -0
  123. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/setup.cfg +0 -0
  124. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/setup.py +0 -0
  125. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/__init__.py +0 -0
  126. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/chunked_loss/README.md +0 -0
  127. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/chunked_loss/__init__.py +0 -0
  128. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/chunked_loss/cpo_loss.py +0 -0
  129. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/chunked_loss/dpo_loss.py +0 -0
  130. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/chunked_loss/functional.py +0 -0
  131. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/chunked_loss/fused_linear_distillation.py +0 -0
  132. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/chunked_loss/fused_linear_ppo.py +0 -0
  133. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/chunked_loss/fused_linear_preference.py +0 -0
  134. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/chunked_loss/fused_linear_unpaired_preference.py +0 -0
  135. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/chunked_loss/grpo_loss.py +0 -0
  136. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/chunked_loss/jsd_loss.py +0 -0
  137. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/chunked_loss/kto_loss.py +0 -0
  138. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/chunked_loss/orpo_loss.py +0 -0
  139. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/chunked_loss/simpo_loss.py +0 -0
  140. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/env_report.py +0 -0
  141. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/__init__.py +0 -0
  142. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/cross_entropy.py +0 -0
  143. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/dyt.py +0 -0
  144. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/experimental/embedding.py +0 -0
  145. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/experimental/mm_int8int2.py +0 -0
  146. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/fused_linear_cross_entropy.py +0 -0
  147. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/fused_linear_jsd.py +0 -0
  148. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/fused_neighborhood_attention.py +0 -0
  149. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/geglu.py +0 -0
  150. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/group_norm.py +0 -0
  151. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/grpo_loss.py +0 -0
  152. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/jsd.py +0 -0
  153. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/kl_div.py +0 -0
  154. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/layer_norm.py +0 -0
  155. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/multi_token_attention.py +0 -0
  156. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/qwen2vl_mrope.py +0 -0
  157. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/rms_norm.py +0 -0
  158. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/rope.py +0 -0
  159. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/softmax.py +0 -0
  160. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/sparsemax.py +0 -0
  161. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/swiglu.py +0 -0
  162. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/tvd.py +0 -0
  163. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/utils.py +0 -0
  164. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/__init__.py +0 -0
  165. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/auto_model.py +0 -0
  166. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/cross_entropy.py +0 -0
  167. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/dyt.py +0 -0
  168. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/experimental/embedding.py +0 -0
  169. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/fsdp.py +0 -0
  170. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/functional.py +0 -0
  171. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/fused_linear_cross_entropy.py +0 -0
  172. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/fused_linear_jsd.py +0 -0
  173. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/fused_neighborhood_attention.py +0 -0
  174. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/geglu.py +0 -0
  175. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/group_norm.py +0 -0
  176. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/grpo_loss.py +0 -0
  177. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/jsd.py +0 -0
  178. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/kl_div.py +0 -0
  179. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/layer_norm.py +0 -0
  180. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/model/__init__.py +0 -0
  181. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/model/gemma.py +0 -0
  182. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/model/gemma2.py +0 -0
  183. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/model/gemma3.py +0 -0
  184. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/model/glm4.py +0 -0
  185. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/model/llama.py +0 -0
  186. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/model/llava.py +0 -0
  187. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/model/loss_utils.py +0 -0
  188. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/model/mistral.py +0 -0
  189. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/model/mixtral.py +0 -0
  190. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/model/mllama.py +0 -0
  191. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/model/olmo2.py +0 -0
  192. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/model/paligemma.py +0 -0
  193. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/model/phi3.py +0 -0
  194. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/model/qwen2.py +0 -0
  195. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/model/qwen2_5_vl.py +0 -0
  196. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/model/qwen2_vl.py +0 -0
  197. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/model/qwen3.py +0 -0
  198. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/model/qwen3_moe.py +0 -0
  199. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/monkey_patch.py +0 -0
  200. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/multi_token_attention.py +0 -0
  201. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/qwen2vl_mrope.py +0 -0
  202. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/rms_norm.py +0 -0
  203. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/rope.py +0 -0
  204. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/softmax.py +0 -0
  205. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/sparsemax.py +0 -0
  206. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/swiglu.py +0 -0
  207. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/trainer/__init__.py +0 -0
  208. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/trainer/orpo_trainer.py +0 -0
  209. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/trainer_integration.py +0 -0
  210. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/tvd.py +0 -0
  211. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/triton/__init__.py +0 -0
  212. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/triton/monkey_patch.py +0 -0
  213. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/utils.py +0 -0
  214. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel_nightly.egg-info/SOURCES.txt +0 -0
  215. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel_nightly.egg-info/dependency_links.txt +0 -0
  216. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel_nightly.egg-info/requires.txt +0 -0
  217. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel_nightly.egg-info/top_level.txt +0 -0
  218. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/__init__.py +0 -0
  219. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/chunked_loss/__init__.py +0 -0
  220. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/chunked_loss/test_cpo_loss.py +0 -0
  221. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/chunked_loss/test_dpo_loss.py +0 -0
  222. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/chunked_loss/test_grpo_loss.py +0 -0
  223. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/chunked_loss/test_jsd_loss.py +0 -0
  224. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/chunked_loss/test_kto_loss.py +0 -0
  225. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/chunked_loss/test_orpo_loss.py +0 -0
  226. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/chunked_loss/test_simpo_loss.py +0 -0
  227. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/conftest.py +0 -0
  228. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/convergence/__init__.py +0 -0
  229. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/convergence/bf16/__init__.py +0 -0
  230. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/convergence/fp32/__init__.py +0 -0
  231. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/resources/fake_configs/Google/Gemma3/gemma-3-4b-it/tokenizer_config.json +0 -0
  232. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/resources/fake_configs/Google/Paligemma/paligemma-3b-pt-224/tokenizer_config.json +0 -0
  233. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/preprocessor_config.json +0 -0
  234. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/processor_config.json +0 -0
  235. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/tokenizer_config.json +0 -0
  236. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/resources/fake_configs/Qwen/Qwen2-VL-7B-Instruct/tokenizer_config.json +0 -0
  237. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/resources/fake_configs/Qwen/Qwen2.5-VL-7B-Instruct/tokenizer_config.json +0 -0
  238. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/resources/fake_configs/meta-llama/Llama-3.2-11B-Vision-Instruct/tokenizer_config.json +0 -0
  239. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/resources/scripts/generate_tokenized_dataset.py +0 -0
  240. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/resources/tiny_shakespeare.txt +0 -0
  241. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/resources/tiny_shakespeare_tokenized/data-00000-of-00001.arrow +0 -0
  242. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/resources/tiny_shakespeare_tokenized/dataset_info.json +0 -0
  243. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/resources/tiny_shakespeare_tokenized/state.json +0 -0
  244. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_auto_model.py +0 -0
  245. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_cross_entropy.py +0 -0
  246. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_dyt.py +0 -0
  247. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_embedding.py +0 -0
  248. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_flex_attention.py +0 -0
  249. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_fused_linear_cross_entropy.py +0 -0
  250. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_fused_linear_jsd.py +0 -0
  251. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_fused_neighborhood_attention.py +0 -0
  252. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_geglu.py +0 -0
  253. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_group_norm.py +0 -0
  254. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_grpo_loss.py +0 -0
  255. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_jsd.py +0 -0
  256. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_kl_div.py +0 -0
  257. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_layer_norm.py +0 -0
  258. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_mm_int8int2.py +0 -0
  259. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_monkey_patch.py +0 -0
  260. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_multi_token_attention.py +0 -0
  261. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_qwen2vl_mrope.py +0 -0
  262. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_rms_norm.py +0 -0
  263. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_rope.py +0 -0
  264. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_softmax.py +0 -0
  265. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_sparsemax.py +0 -0
  266. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_swiglu.py +0 -0
  267. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_trainer_integration.py +0 -0
  268. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_transformers.py +0 -0
  269. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_tvd.py +0 -0
  270. {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/triton/test_triton_monkey_patch.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: liger_kernel_nightly
3
- Version: 0.5.10.dev20250611215839
3
+ Version: 0.5.10.dev20250613212111
4
4
  Summary: Efficient Triton kernels for LLM Training
5
5
  License: BSD 2-CLAUSE LICENSE
6
6
  Copyright 2024 LinkedIn Corporation
@@ -147,7 +147,7 @@ if __name__ == "__main__":
147
147
 
148
148
  run_benchmarks(
149
149
  bench_test_fn=bench_speed_fused_linear_cpo_loss,
150
- kernel_operation_modes=["forward", "full"],
150
+ kernel_operation_modes=["forward", "backward", "full"],
151
151
  metric_name="speed",
152
152
  metric_unit="ms",
153
153
  **common_configs,
@@ -109,7 +109,7 @@ if __name__ == "__main__":
109
109
 
110
110
  run_benchmarks(
111
111
  bench_test_fn=bench_speed_cross_entropy,
112
- kernel_operation_modes=["forward", "full"],
112
+ kernel_operation_modes=["forward", "backward", "full"],
113
113
  metric_name="speed",
114
114
  metric_unit="ms",
115
115
  **common_configs,
@@ -248,7 +248,7 @@ if __name__ == "__main__":
248
248
 
249
249
  run_benchmarks(
250
250
  bench_test_fn=bench_speed_jsd_loss,
251
- kernel_operation_modes=["forward", "full"],
251
+ kernel_operation_modes=["forward", "backward", "full"],
252
252
  metric_name="speed",
253
253
  metric_unit="ms",
254
254
  **common_configs,
@@ -166,7 +166,7 @@ if __name__ == "__main__":
166
166
 
167
167
  run_benchmarks(
168
168
  bench_test_fn=bench_speed_dpo_loss,
169
- kernel_operation_modes=["forward", "full"],
169
+ kernel_operation_modes=["forward", "backward", "full"],
170
170
  metric_name="speed",
171
171
  metric_unit="ms",
172
172
  **common_configs,
@@ -112,7 +112,7 @@ if __name__ == "__main__":
112
112
 
113
113
  run_benchmarks(
114
114
  bench_test_fn=bench_speed_embedding,
115
- kernel_operation_modes=["forward", "full"],
115
+ kernel_operation_modes=["forward", "backward", "full"],
116
116
  metric_name="speed",
117
117
  metric_unit="ms",
118
118
  **common_configs,
@@ -156,7 +156,7 @@ if __name__ == "__main__":
156
156
 
157
157
  run_benchmarks(
158
158
  bench_test_fn=bench_speed_fused_linear_cross_entropy,
159
- kernel_operation_modes=["forward", "full"],
159
+ kernel_operation_modes=["forward", "backward", "full"],
160
160
  metric_name="speed",
161
161
  metric_unit="ms",
162
162
  **common_configs,
@@ -246,7 +246,7 @@ if __name__ == "__main__":
246
246
 
247
247
  run_benchmarks(
248
248
  bench_test_fn=bench_speed_fused_linear_jsd,
249
- kernel_operation_modes=["forward", "full"],
249
+ kernel_operation_modes=["forward", "backward", "full"],
250
250
  metric_name="speed",
251
251
  metric_unit="ms",
252
252
  **common_configs,
@@ -143,7 +143,7 @@ if __name__ == "__main__":
143
143
 
144
144
  run_benchmarks(
145
145
  bench_test_fn=bench_speed_jsd,
146
- kernel_operation_modes=["forward", "full"],
146
+ kernel_operation_modes=["forward", "backward", "full"],
147
147
  metric_name="speed",
148
148
  metric_unit="ms",
149
149
  **common_args,
@@ -110,7 +110,7 @@ if __name__ == "__main__":
110
110
 
111
111
  run_benchmarks(
112
112
  bench_test_fn=bench_speed_kldiv,
113
- kernel_operation_modes=["forward", "full"],
113
+ kernel_operation_modes=["forward", "backward", "full"],
114
114
  metric_name="speed",
115
115
  metric_unit="ms",
116
116
  **common_args,
@@ -299,7 +299,7 @@ if __name__ == "__main__":
299
299
 
300
300
  run_benchmarks(
301
301
  bench_test_fn=bench_speed_kto_loss,
302
- kernel_operation_modes=["forward", "full"],
302
+ kernel_operation_modes=["forward", "backward", "full"],
303
303
  metric_name="speed",
304
304
  metric_unit="ms",
305
305
  **common_configs,
@@ -111,7 +111,7 @@ if __name__ == "__main__":
111
111
 
112
112
  run_benchmarks(
113
113
  bench_test_fn=bench_speed_layer_norm,
114
- kernel_operation_modes=["forward", "full"],
114
+ kernel_operation_modes=["forward", "backward", "full"],
115
115
  metric_name="speed",
116
116
  metric_unit="ms",
117
117
  **common_configs,
@@ -149,7 +149,7 @@ if __name__ == "__main__":
149
149
 
150
150
  run_benchmarks(
151
151
  bench_test_fn=bench_speed_fused_linear_orpo_loss,
152
- kernel_operation_modes=["forward", "full"],
152
+ kernel_operation_modes=["forward", "full", "backward"],
153
153
  metric_name="speed",
154
154
  metric_unit="ms",
155
155
  **common_configs,
@@ -147,7 +147,7 @@ if __name__ == "__main__":
147
147
 
148
148
  run_benchmarks(
149
149
  bench_test_fn=bench_speed_fused_linear_simpo_loss,
150
- kernel_operation_modes=["forward", "full"],
150
+ kernel_operation_modes=["forward", "full", "backward"],
151
151
  metric_name="speed",
152
152
  metric_unit="ms",
153
153
  **common_configs,
@@ -124,7 +124,7 @@ if __name__ == "__main__":
124
124
 
125
125
  run_benchmarks(
126
126
  bench_test_fn=bench_speed_softmax,
127
- kernel_operation_modes=["forward", "full"],
127
+ kernel_operation_modes=["forward", "full", "backward"],
128
128
  metric_name="speed",
129
129
  metric_unit="ms",
130
130
  overwrite=args.overwrite,
@@ -161,7 +161,7 @@ if __name__ == "__main__":
161
161
 
162
162
  run_benchmarks(
163
163
  bench_test_fn=bench_speed_swiglu,
164
- kernel_operation_modes=["forward"],
164
+ kernel_operation_modes=["forward", "full", "backward"],
165
165
  metric_name="speed",
166
166
  metric_unit="ms",
167
167
  **common_configs,
@@ -126,7 +126,7 @@ if __name__ == "__main__":
126
126
 
127
127
  run_benchmarks(
128
128
  bench_test_fn=bench_speed_tvd,
129
- kernel_operation_modes=["forward", "full"],
129
+ kernel_operation_modes=["forward", "full", "backward"],
130
130
  metric_name="speed",
131
131
  metric_unit="ms",
132
132
  **common_args,
@@ -235,7 +235,7 @@ def update_benchmark_data_csv(
235
235
  pass
236
236
  else:
237
237
  existing_data_dict[row_key] = row_dict
238
-
238
+ os.makedirs(os.path.dirname(filename_abs_path), exist_ok=True)
239
239
  with open(filename_abs_path, mode="w", newline="") as file:
240
240
  writer = csv.DictWriter(file, fieldnames=fieldnames)
241
241
  writer.writeheader()
@@ -0,0 +1,73 @@
1
+ from pathlib import Path
2
+
3
+ import modal
4
+
5
+ ROOT_PATH = Path(__file__).parent.parent.parent
6
+ REMOTE_ROOT_PATH = "/root/liger-kernel"
7
+ PYTHON_VERSION = "3.12"
8
+
9
+ image = modal.Image.debian_slim(python_version=PYTHON_VERSION).pip_install("uv")
10
+
11
+ app = modal.App("liger_benchmarks", image=image)
12
+
13
+ # mount: add local files to the remote container
14
+ repo = image.add_local_dir(ROOT_PATH, remote_path=REMOTE_ROOT_PATH)
15
+
16
+
17
+ @app.function(gpu="H100", image=repo, timeout=60 * 45)
18
+ def liger_benchmarks():
19
+ import os
20
+ import subprocess
21
+
22
+ subprocess.run(
23
+ ["uv pip install -e '.[dev]' --system"],
24
+ check=True,
25
+ shell=True,
26
+ cwd=REMOTE_ROOT_PATH,
27
+ )
28
+ subprocess.run(["make run-benchmarks"], check=True, shell=True, cwd=REMOTE_ROOT_PATH)
29
+
30
+ file_path = Path(REMOTE_ROOT_PATH) / "benchmark" / "data" / "all_benchmark_data.csv"
31
+ print(f"Checking if file exists at: {file_path}")
32
+ print(f"File exists: {os.path.exists(file_path)}")
33
+
34
+ if not os.path.exists(file_path):
35
+ print("Listing directory contents:")
36
+ data_dir = file_path.parent
37
+ if os.path.exists(data_dir):
38
+ print(f"Contents of {data_dir}:")
39
+ print(os.listdir(data_dir))
40
+ else:
41
+ print(f"Data directory {data_dir} does not exist")
42
+ raise FileNotFoundError(f"Benchmark data file not found at {file_path}")
43
+
44
+ with open(file_path, "rb") as f:
45
+ data = f.read()
46
+ print(f"Successfully read {len(data)} bytes of data")
47
+ return data
48
+
49
+
50
+ @app.local_entrypoint()
51
+ def main():
52
+ try:
53
+ # Run the benchmarks and get the data
54
+ print("Starting benchmark run...")
55
+ benchmark_data = liger_benchmarks.remote()
56
+
57
+ if not benchmark_data:
58
+ raise ValueError("No data received from remote function")
59
+
60
+ # Save the data locally
61
+ local_data_path = ROOT_PATH / "benchmark" / "data" / "all_benchmark_data.csv"
62
+ print(f"Attempting to save data to: {local_data_path}")
63
+
64
+ local_data_path.parent.mkdir(parents=True, exist_ok=True)
65
+
66
+ with open(local_data_path, "wb") as f:
67
+ f.write(benchmark_data)
68
+
69
+ print(f"Successfully saved {len(benchmark_data)} bytes to: {local_data_path}")
70
+
71
+ except Exception as e:
72
+ print(f"Error occurred: {str(e)}")
73
+ raise
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "liger_kernel_nightly"
7
- version = "0.5.10.dev20250611215839"
7
+ version = "0.5.10.dev20250613212111"
8
8
  description = "Efficient Triton kernels for LLM Training"
9
9
  urls = { "Homepage" = "https://github.com/linkedin/Liger-Kernel" }
10
10
  readme = { file = "README.md", content-type = "text/markdown" }
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: liger_kernel_nightly
3
- Version: 0.5.10.dev20250611215839
3
+ Version: 0.5.10.dev20250613212111
4
4
  Summary: Efficient Triton kernels for LLM Training
5
5
  License: BSD 2-CLAUSE LICENSE
6
6
  Copyright 2024 LinkedIn Corporation
@@ -38,6 +38,8 @@ from liger_kernel.transformers import apply_liger_kernel_to_qwen3_moe
38
38
  from test.utils import DEFAULT_DATASET_PATH
39
39
  from test.utils import MiniModelConfig
40
40
  from test.utils import assert_verbose_allclose
41
+ from test.utils import get_logprobs
42
+ from test.utils import get_topk
41
43
  from test.utils import revert_liger_kernel_to_gemma
42
44
  from test.utils import revert_liger_kernel_to_gemma2
43
45
  from test.utils import revert_liger_kernel_to_gemma3_text
@@ -851,17 +853,17 @@ def run_mini_model(
851
853
  eval_output = model(**eval_batch)
852
854
  print(f"Eval Loss: {eval_output.loss.item()}")
853
855
  loss_list.append(eval_output.loss.item())
854
-
856
+ topk_logprobs = get_topk(get_logprobs(eval_output.logits))
855
857
  MINI_MODEL_SETUPS[model_name].liger_kernel_patch_revert_func(**revert_kwargs)
856
858
  return {
857
859
  "loss": loss_list,
858
- "logits": eval_output.logits,
860
+ "topk_logprobs": topk_logprobs.values,
859
861
  "model": model,
860
862
  }
861
863
 
862
864
 
863
865
  @pytest.mark.parametrize(
864
- "model_name, num_steps, lr, dtype, loss_atol, loss_rtol, logits_atol, logits_rtol, param_atol, param_rtol",
866
+ "model_name, num_steps, lr, dtype, loss_atol, loss_rtol, logprobs_atol, logprobs_rtol, param_atol, param_rtol",
865
867
  [
866
868
  pytest.param(
867
869
  "mini_llama3",
@@ -884,7 +886,7 @@ def run_mini_model(
884
886
  1e-3,
885
887
  1e-2,
886
888
  1e-1,
887
- 1e-2,
889
+ 1e-1,
888
890
  1e-2,
889
891
  1e-2,
890
892
  marks=[
@@ -902,7 +904,7 @@ def run_mini_model(
902
904
  torch.bfloat16,
903
905
  1e-3,
904
906
  1e-2,
905
- 1, # 1e-1
907
+ 1e-1, # 1e-1
906
908
  1e-1, # 1e-2
907
909
  1e-2,
908
910
  1e-2,
@@ -972,7 +974,7 @@ def run_mini_model(
972
974
  torch.bfloat16,
973
975
  1e-3,
974
976
  1e-2,
975
- 1, # 1e-1
977
+ 1e-1, # 1e-1
976
978
  1e-1, # 1e-2
977
979
  1e-2,
978
980
  1e-2,
@@ -1111,8 +1113,8 @@ def run_mini_model(
1111
1113
  torch.bfloat16,
1112
1114
  1e-3,
1113
1115
  1e-2,
1114
- 1e-1,
1115
1116
  1e-2,
1117
+ 1e-1,
1116
1118
  1e-2,
1117
1119
  1e-2,
1118
1120
  marks=pytest.mark.skipif(not supports_bfloat16(), reason="bfloat16 not supported on this GPU"),
@@ -1124,8 +1126,8 @@ def run_mini_model(
1124
1126
  torch.bfloat16,
1125
1127
  1e-3,
1126
1128
  1e-2,
1127
- 1e-1,
1128
1129
  1e-2,
1130
+ 1e-1,
1129
1131
  1e-2,
1130
1132
  1e-2,
1131
1133
  marks=pytest.mark.skipif(not supports_bfloat16(), reason="bfloat16 not supported on this GPU"),
@@ -1153,8 +1155,8 @@ def run_mini_model(
1153
1155
  torch.bfloat16,
1154
1156
  1e-3,
1155
1157
  1e-2,
1156
- 1e-1,
1157
- 1e-2,
1158
+ 3e-1,
1159
+ 4e-1,
1158
1160
  1e-2,
1159
1161
  1e-2,
1160
1162
  marks=[
@@ -1174,8 +1176,8 @@ def test_mini_model(
1174
1176
  dtype,
1175
1177
  loss_atol,
1176
1178
  loss_rtol,
1177
- logits_atol,
1178
- logits_rtol,
1179
+ logprobs_atol,
1180
+ logprobs_rtol,
1179
1181
  param_atol,
1180
1182
  param_rtol,
1181
1183
  ):
@@ -1193,13 +1195,13 @@ def test_mini_model(
1193
1195
  rtol=loss_rtol,
1194
1196
  )
1195
1197
 
1196
- # Compare the logits from evaluation step
1197
- if expected_output["logits"] is not None and actual_output["logits"] is not None:
1198
+ # Compare the topk logprobs from evaluation step
1199
+ if expected_output["topk_logprobs"] is not None and actual_output["topk_logprobs"] is not None:
1198
1200
  assert_verbose_allclose(
1199
- expected_output["logits"],
1200
- actual_output["logits"],
1201
- atol=logits_atol,
1202
- rtol=logits_rtol,
1201
+ expected_output["topk_logprobs"],
1202
+ actual_output["topk_logprobs"],
1203
+ atol=logprobs_atol,
1204
+ rtol=logprobs_rtol,
1203
1205
  )
1204
1206
 
1205
1207
  # Compare the params from the last step
@@ -20,6 +20,8 @@ from test.utils import FAKE_CONFIGS_PATH
20
20
  from test.utils import UNTOKENIZED_DATASET_PATH
21
21
  from test.utils import MiniModelConfig
22
22
  from test.utils import assert_verbose_allclose
23
+ from test.utils import get_logprobs
24
+ from test.utils import get_topk
23
25
  from test.utils import is_torchvision_available
24
26
  from test.utils import load_image_processing_config
25
27
  from test.utils import load_processor_config
@@ -764,13 +766,17 @@ def run_mini_model_multimodal(
764
766
 
765
767
  print(f"Step {i}, Loss: {output.loss.item()}")
766
768
  loss_list.append(output.loss.item())
767
-
769
+ topk_logprobs = get_topk(get_logprobs(output.logits))
768
770
  MINI_MODEL_SETUPS[model_name].liger_kernel_patch_revert_func(**revert_kwargs)
769
- return {"loss": loss_list, "logits": output.logits, "model": model}
771
+ return {
772
+ "loss": loss_list,
773
+ "topk_logprobs": topk_logprobs.values,
774
+ "model": model,
775
+ }
770
776
 
771
777
 
772
778
  @pytest.mark.parametrize(
773
- "model_name, num_steps, lr, dtype, loss_atol, loss_rtol, logits_atol, logits_rtol, param_atol, param_rtol",
779
+ "model_name, num_steps, lr, dtype, loss_atol, loss_rtol, logprobs_atol, logprobs_rtol, param_atol, param_rtol",
774
780
  [
775
781
  pytest.param(
776
782
  "mini_qwen2_vl",
@@ -917,8 +923,8 @@ def test_mini_model_multimodal(
917
923
  dtype,
918
924
  loss_atol,
919
925
  loss_rtol,
920
- logits_atol,
921
- logits_rtol,
926
+ logprobs_atol,
927
+ logprobs_rtol,
922
928
  param_atol,
923
929
  param_rtol,
924
930
  ):
@@ -937,12 +943,12 @@ def test_mini_model_multimodal(
937
943
  rtol=loss_rtol,
938
944
  )
939
945
 
940
- # Compare the logits from the last step
946
+ # Compare the topk logprobs from evaluation step
941
947
  assert_verbose_allclose(
942
- expected_output["logits"],
943
- actual_output["logits"],
944
- atol=logits_atol,
945
- rtol=logits_rtol,
948
+ expected_output["topk_logprobs"],
949
+ actual_output["topk_logprobs"],
950
+ atol=logprobs_atol,
951
+ rtol=logprobs_rtol,
946
952
  )
947
953
 
948
954
  # Compare the params from the last step
@@ -38,6 +38,8 @@ from liger_kernel.transformers import apply_liger_kernel_to_qwen3_moe
38
38
  from test.utils import DEFAULT_DATASET_PATH
39
39
  from test.utils import MiniModelConfig
40
40
  from test.utils import assert_verbose_allclose
41
+ from test.utils import get_logprobs
42
+ from test.utils import get_topk
41
43
  from test.utils import revert_liger_kernel_to_gemma
42
44
  from test.utils import revert_liger_kernel_to_gemma2
43
45
  from test.utils import revert_liger_kernel_to_gemma3_text
@@ -842,12 +844,17 @@ def run_mini_model(
842
844
  print(f"Step {i}, Loss: {output.loss.item()}")
843
845
  loss_list.append(output.loss.item())
844
846
 
847
+ topk_logprobs = get_topk(get_logprobs(output.logits))
845
848
  MINI_MODEL_SETUPS[model_name].liger_kernel_patch_revert_func(**revert_kwargs)
846
- return {"loss": loss_list, "logits": output.logits, "model": model}
849
+ return {
850
+ "loss": loss_list,
851
+ "topk_logprobs": topk_logprobs.values,
852
+ "model": model,
853
+ }
847
854
 
848
855
 
849
856
  @pytest.mark.parametrize(
850
- "model_name, num_steps, lr, dtype, loss_atol, loss_rtol, logits_atol, logits_rtol, param_atol, param_rtol",
857
+ "model_name, num_steps, lr, dtype, loss_atol, loss_rtol, logprobs_atol, logprobs_rtol, param_atol, param_rtol",
851
858
  [
852
859
  pytest.param(
853
860
  "mini_llama3",
@@ -1058,8 +1065,8 @@ def run_mini_model(
1058
1065
  torch.bfloat16,
1059
1066
  1e-3,
1060
1067
  1e-2,
1061
- 1e-1,
1062
1068
  1e-2,
1069
+ 1e-1,
1063
1070
  1e-2,
1064
1071
  1e-2,
1065
1072
  marks=pytest.mark.skipif(not supports_bfloat16(), reason="bfloat16 not supported on this GPU"),
@@ -1071,8 +1078,8 @@ def run_mini_model(
1071
1078
  torch.bfloat16,
1072
1079
  1e-3,
1073
1080
  1e-2,
1074
- 1e-1,
1075
1081
  1e-2,
1082
+ 1e-1,
1076
1083
  1e-2,
1077
1084
  1e-2,
1078
1085
  marks=pytest.mark.skipif(not supports_bfloat16(), reason="bfloat16 not supported on this GPU"),
@@ -1159,8 +1166,8 @@ def test_mini_model(
1159
1166
  dtype,
1160
1167
  loss_atol,
1161
1168
  loss_rtol,
1162
- logits_atol,
1163
- logits_rtol,
1169
+ logprobs_atol,
1170
+ logprobs_rtol,
1164
1171
  param_atol,
1165
1172
  param_rtol,
1166
1173
  ):
@@ -1180,12 +1187,12 @@ def test_mini_model(
1180
1187
 
1181
1188
  # No logits are materialized
1182
1189
  # import pdb; pdb.set_trace()
1183
- # Compare the logits from the last step
1190
+ # Compare the topk logprobs from evaluation step
1184
1191
  assert_verbose_allclose(
1185
- expected_output["logits"],
1186
- actual_output["logits"],
1187
- atol=logits_atol,
1188
- rtol=logits_rtol,
1192
+ expected_output["topk_logprobs"],
1193
+ actual_output["topk_logprobs"],
1194
+ atol=logprobs_atol,
1195
+ rtol=logprobs_rtol,
1189
1196
  )
1190
1197
 
1191
1198
  # Compare the params from the last step
@@ -38,6 +38,8 @@ from liger_kernel.transformers import apply_liger_kernel_to_qwen3_moe
38
38
  from test.utils import DEFAULT_DATASET_PATH
39
39
  from test.utils import MiniModelConfig
40
40
  from test.utils import assert_verbose_allclose
41
+ from test.utils import get_logprobs
42
+ from test.utils import get_topk
41
43
  from test.utils import revert_liger_kernel_to_gemma
42
44
  from test.utils import revert_liger_kernel_to_gemma2
43
45
  from test.utils import revert_liger_kernel_to_gemma3_text
@@ -849,17 +851,17 @@ def run_mini_model(
849
851
  eval_output = model(**eval_batch)
850
852
  print(f"Eval Loss: {eval_output.loss.item()}")
851
853
  loss_list.append(eval_output.loss.item())
852
-
854
+ topk_logprobs = get_topk(get_logprobs(eval_output.logits))
853
855
  MINI_MODEL_SETUPS[model_name].liger_kernel_patch_revert_func(**revert_kwargs)
854
856
  return {
855
857
  "loss": loss_list,
856
- "logits": eval_output.logits,
858
+ "topk_logprobs": topk_logprobs.values,
857
859
  "model": model,
858
860
  }
859
861
 
860
862
 
861
863
  @pytest.mark.parametrize(
862
- "model_name, num_steps, lr, dtype, loss_atol, loss_rtol, logits_atol, logits_rtol, param_atol, param_rtol",
864
+ "model_name, num_steps, lr, dtype, loss_atol, loss_rtol, logprobs_atol, logprobs_rtol, param_atol, param_rtol",
863
865
  [
864
866
  ("mini_llama3", 32, 1e-4, torch.float32, 1e-8, 2e-5, 1e-4, 1e-5, 5e-3, 1e-5),
865
867
  pytest.param(
@@ -1013,7 +1015,7 @@ def run_mini_model(
1013
1015
  # TODO: mixtral is flaky so disable the test for now
1014
1016
  # ("mini_mixtral", 32, 1e-4, torch.float32, 5e-4, 1e-4, 5e-3, 1e-5, 1e-2, 1e-5),
1015
1017
  # Gemma 1.1 and 2 has more tolerance because currently, the kernel is not a perfect match (casts are not done the same way)
1016
- ("mini_gemma1", 32, 1e-4, torch.float32, 1e-8, 1e-4, 5e-3, 1e-5, 5e-3, 1e-5),
1018
+ ("mini_gemma1", 32, 1e-4, torch.float32, 1e-8, 1e-4, 5e-3, 1e-2, 5e-3, 1e-5),
1017
1019
  ("mini_gemma1.1", 32, 1e-4, torch.float32, 1e-8, 1e-4, 5e-3, 1e-5, 5e-3, 1e-5),
1018
1020
  ("mini_gemma2", 32, 1e-4, torch.float32, 1e-8, 1e-4, 5e-3, 1e-5, 5e-3, 1e-5),
1019
1021
  pytest.param(
@@ -1041,8 +1043,8 @@ def test_mini_model(
1041
1043
  dtype,
1042
1044
  loss_atol,
1043
1045
  loss_rtol,
1044
- logits_atol,
1045
- logits_rtol,
1046
+ logprobs_atol,
1047
+ logprobs_rtol,
1046
1048
  param_atol,
1047
1049
  param_rtol,
1048
1050
  ):
@@ -1060,13 +1062,13 @@ def test_mini_model(
1060
1062
  rtol=loss_rtol,
1061
1063
  )
1062
1064
 
1063
- # Compare the logits from evaluation step
1064
- if expected_output["logits"] is not None and actual_output["logits"] is not None:
1065
+ # Compare the topk logprobs from evaluation step
1066
+ if expected_output["topk_logprobs"] is not None and actual_output["topk_logprobs"] is not None:
1065
1067
  assert_verbose_allclose(
1066
- expected_output["logits"],
1067
- actual_output["logits"],
1068
- atol=logits_atol,
1069
- rtol=logits_rtol,
1068
+ expected_output["topk_logprobs"],
1069
+ actual_output["topk_logprobs"],
1070
+ atol=logprobs_atol,
1071
+ rtol=logprobs_rtol,
1070
1072
  )
1071
1073
 
1072
1074
  # Compare the params from the last step