liger-kernel-nightly 0.6.2.dev20250911161200__tar.gz → 0.6.2.dev20250916231145__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of liger-kernel-nightly might be problematic. Click here for more details.

Files changed (285) hide show
  1. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/.gitignore +2 -1
  2. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/Makefile +10 -1
  3. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/PKG-INFO +3 -1
  4. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/docs/Examples.md +1 -1
  5. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/alignment/run_orpo.py +1 -1
  6. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/huggingface/training.py +2 -2
  7. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/huggingface/training_multimodal.py +1 -1
  8. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/medusa/train.py +1 -1
  9. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/pyproject.toml +31 -1
  10. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/setup.py +2 -0
  11. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/fused_linear_cross_entropy.py +1 -1
  12. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/model/glm4v.py +1 -1
  13. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/model/glm4v_moe.py +1 -1
  14. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel_nightly.egg-info/PKG-INFO +3 -1
  15. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel_nightly.egg-info/requires.txt +2 -0
  16. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/transformers/test_monkey_patch.py +27 -27
  17. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/.github/ISSUE_TEMPLATE/bug_report.yaml +0 -0
  18. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/.github/ISSUE_TEMPLATE/feature_request.yaml +0 -0
  19. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/.github/pull_request_template.md +0 -0
  20. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/.github/workflows/amd-ci.yml +0 -0
  21. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/.github/workflows/benchmark.yml +0 -0
  22. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/.github/workflows/docs.yml +0 -0
  23. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/.github/workflows/intel-ci.yml +0 -0
  24. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/.github/workflows/nvi-ci.yml +0 -0
  25. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/.github/workflows/publish-nightly.yml +0 -0
  26. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/.github/workflows/publish-release.yml +0 -0
  27. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/LICENSE +0 -0
  28. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/NOTICE +0 -0
  29. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/README.md +0 -0
  30. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/README.md +0 -0
  31. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/__init__.py +0 -0
  32. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/benchmarks_visualizer.py +0 -0
  33. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/data/all_benchmark_data.csv +0 -0
  34. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/scripts/__init__.py +0 -0
  35. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/scripts/benchmark_cpo_loss.py +0 -0
  36. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/scripts/benchmark_cross_entropy.py +0 -0
  37. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/scripts/benchmark_distill_cosine_loss.py +0 -0
  38. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/scripts/benchmark_distill_jsd_loss.py +0 -0
  39. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/scripts/benchmark_dpo_loss.py +0 -0
  40. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/scripts/benchmark_dyt.py +0 -0
  41. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/scripts/benchmark_embedding.py +0 -0
  42. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/scripts/benchmark_fused_add_rms_norm.py +0 -0
  43. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/scripts/benchmark_fused_linear_cross_entropy.py +0 -0
  44. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/scripts/benchmark_fused_linear_jsd.py +0 -0
  45. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/scripts/benchmark_fused_neighborhood_attention.py +0 -0
  46. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/scripts/benchmark_geglu.py +0 -0
  47. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/scripts/benchmark_group_norm.py +0 -0
  48. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/scripts/benchmark_grpo_loss.py +0 -0
  49. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/scripts/benchmark_jsd.py +0 -0
  50. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/scripts/benchmark_kl_div.py +0 -0
  51. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/scripts/benchmark_kto_loss.py +0 -0
  52. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/scripts/benchmark_layer_norm.py +0 -0
  53. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/scripts/benchmark_llama4_rope.py +0 -0
  54. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/scripts/benchmark_multi_token_attention.py +0 -0
  55. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/scripts/benchmark_orpo_loss.py +0 -0
  56. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/scripts/benchmark_qwen2vl_mrope.py +0 -0
  57. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/scripts/benchmark_rms_norm.py +0 -0
  58. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/scripts/benchmark_rope.py +0 -0
  59. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/scripts/benchmark_simpo_loss.py +0 -0
  60. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/scripts/benchmark_softmax.py +0 -0
  61. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/scripts/benchmark_sparse_multi_token_attention.py +0 -0
  62. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/scripts/benchmark_sparsemax.py +0 -0
  63. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/scripts/benchmark_swiglu.py +0 -0
  64. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/scripts/benchmark_tvd.py +0 -0
  65. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/scripts/utils.py +0 -0
  66. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/dev/fmt-requirements.txt +0 -0
  67. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/dev/modal/benchmarks.py +0 -0
  68. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/dev/modal/tests.py +0 -0
  69. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/dev/modal/tests_bwd.py +0 -0
  70. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/docs/Getting-Started.md +0 -0
  71. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/docs/High-Level-APIs.md +0 -0
  72. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/docs/Low-Level-APIs.md +0 -0
  73. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/docs/acknowledgement.md +0 -0
  74. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/docs/contributing.md +0 -0
  75. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/docs/images/banner.GIF +0 -0
  76. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/docs/images/compose.gif +0 -0
  77. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/docs/images/e2e-memory.png +0 -0
  78. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/docs/images/e2e-tps.png +0 -0
  79. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/docs/images/logo-banner.png +0 -0
  80. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/docs/images/patch.gif +0 -0
  81. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/docs/images/post-training.png +0 -0
  82. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/docs/index.md +0 -0
  83. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/docs/license.md +0 -0
  84. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/alignment/accelerate_config.yaml +0 -0
  85. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/huggingface/README.md +0 -0
  86. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/huggingface/callback.py +0 -0
  87. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/huggingface/config/fsdp_config.json +0 -0
  88. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/huggingface/img/gemma_7b_mem.png +0 -0
  89. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/huggingface/img/gemma_7b_tp.png +0 -0
  90. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/huggingface/img/llama_mem_alloc.png +0 -0
  91. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/huggingface/img/llama_tps.png +0 -0
  92. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/huggingface/img/qwen_mem_alloc.png +0 -0
  93. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/huggingface/img/qwen_tps.png +0 -0
  94. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/huggingface/launch_on_modal.py +0 -0
  95. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/huggingface/requirements.txt +0 -0
  96. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/huggingface/run_benchmarks.sh +0 -0
  97. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/huggingface/run_gemma.sh +0 -0
  98. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/huggingface/run_llama.sh +0 -0
  99. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/huggingface/run_qwen.sh +0 -0
  100. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/huggingface/run_qwen2_vl.sh +0 -0
  101. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/lightning/README.md +0 -0
  102. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/lightning/requirements.txt +0 -0
  103. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/lightning/training.py +0 -0
  104. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/medusa/README.md +0 -0
  105. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/medusa/callback.py +0 -0
  106. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/medusa/docs/images/Memory_Stage1_num_head_3.png +0 -0
  107. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/medusa/docs/images/Memory_Stage1_num_head_5.png +0 -0
  108. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/medusa/docs/images/Memory_Stage2_num_head_3.png +0 -0
  109. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/medusa/docs/images/Memory_Stage2_num_head_5.png +0 -0
  110. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/medusa/docs/images/Throughput_Stage1_num_head_3.png +0 -0
  111. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/medusa/docs/images/Throughput_Stage1_num_head_5.png +0 -0
  112. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/medusa/docs/images/Throughput_Stage2_num_head_3.png +0 -0
  113. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/medusa/docs/images/Throughput_Stage2_num_head_5.png +0 -0
  114. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/medusa/fsdp/acc-fsdp.conf +0 -0
  115. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/medusa/medusa_util.py +0 -0
  116. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/medusa/requirements.txt +0 -0
  117. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/medusa/scripts/llama3_8b_medusa.sh +0 -0
  118. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/licenses/LICENSE-Apache-2.0 +0 -0
  119. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/licenses/LICENSE-MIT-AutoAWQ +0 -0
  120. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/licenses/LICENSE-MIT-Efficient-Cross-Entropy +0 -0
  121. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/licenses/LICENSE-MIT-llmc +0 -0
  122. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/licenses/LICENSE-MIT-triton +0 -0
  123. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/mkdocs.yml +0 -0
  124. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/setup.cfg +0 -0
  125. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/__init__.py +0 -0
  126. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/chunked_loss/README.md +0 -0
  127. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/chunked_loss/__init__.py +0 -0
  128. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/chunked_loss/cosine_similarity_loss.py +0 -0
  129. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/chunked_loss/cpo_loss.py +0 -0
  130. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/chunked_loss/dpo_loss.py +0 -0
  131. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/chunked_loss/functional.py +0 -0
  132. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/chunked_loss/fused_linear_distillation.py +0 -0
  133. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/chunked_loss/fused_linear_ppo.py +0 -0
  134. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/chunked_loss/fused_linear_preference.py +0 -0
  135. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/chunked_loss/fused_linear_unpaired_preference.py +0 -0
  136. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/chunked_loss/grpo_loss.py +0 -0
  137. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/chunked_loss/jsd_loss.py +0 -0
  138. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/chunked_loss/kto_loss.py +0 -0
  139. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/chunked_loss/orpo_loss.py +0 -0
  140. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/chunked_loss/simpo_loss.py +0 -0
  141. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/env_report.py +0 -0
  142. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/ops/__init__.py +0 -0
  143. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/ops/cross_entropy.py +0 -0
  144. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/ops/dyt.py +0 -0
  145. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/ops/experimental/embedding.py +0 -0
  146. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/ops/experimental/mm_int8int2.py +0 -0
  147. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/ops/fused_add_rms_norm.py +0 -0
  148. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/ops/fused_linear_cross_entropy.py +0 -0
  149. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/ops/fused_linear_jsd.py +0 -0
  150. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/ops/fused_neighborhood_attention.py +0 -0
  151. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/ops/geglu.py +0 -0
  152. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/ops/group_norm.py +0 -0
  153. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/ops/grpo_loss.py +0 -0
  154. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/ops/jsd.py +0 -0
  155. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/ops/kl_div.py +0 -0
  156. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/ops/layer_norm.py +0 -0
  157. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/ops/llama4_rope.py +0 -0
  158. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/ops/multi_token_attention.py +0 -0
  159. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/ops/qwen2vl_mrope.py +0 -0
  160. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/ops/rms_norm.py +0 -0
  161. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/ops/rope.py +0 -0
  162. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/ops/softmax.py +0 -0
  163. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/ops/sparsemax.py +0 -0
  164. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/ops/swiglu.py +0 -0
  165. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/ops/tvd.py +0 -0
  166. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/ops/utils.py +0 -0
  167. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/__init__.py +0 -0
  168. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/auto_model.py +0 -0
  169. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/cross_entropy.py +0 -0
  170. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/dyt.py +0 -0
  171. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/experimental/__init__.py +0 -0
  172. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/experimental/embedding.py +0 -0
  173. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/fsdp.py +0 -0
  174. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/functional.py +0 -0
  175. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/fused_add_rms_norm.py +0 -0
  176. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/fused_linear_jsd.py +0 -0
  177. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/fused_neighborhood_attention.py +0 -0
  178. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/geglu.py +0 -0
  179. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/group_norm.py +0 -0
  180. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/grpo_loss.py +0 -0
  181. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/jsd.py +0 -0
  182. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/kl_div.py +0 -0
  183. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/layer_norm.py +0 -0
  184. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/llama4_rope.py +0 -0
  185. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/model/__init__.py +0 -0
  186. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/model/gemma.py +0 -0
  187. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/model/gemma2.py +0 -0
  188. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/model/gemma3.py +0 -0
  189. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/model/glm4.py +0 -0
  190. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/model/llama.py +0 -0
  191. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/model/llama4.py +0 -0
  192. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/model/llava.py +0 -0
  193. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/model/loss_utils.py +0 -0
  194. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/model/mistral.py +0 -0
  195. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/model/mixtral.py +0 -0
  196. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/model/mllama.py +0 -0
  197. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/model/olmo2.py +0 -0
  198. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/model/paligemma.py +0 -0
  199. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/model/phi3.py +0 -0
  200. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/model/qwen2.py +0 -0
  201. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/model/qwen2_5_vl.py +0 -0
  202. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/model/qwen2_vl.py +0 -0
  203. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/model/qwen3.py +0 -0
  204. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/model/qwen3_moe.py +0 -0
  205. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/model/smollm3.py +0 -0
  206. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/monkey_patch.py +0 -0
  207. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/multi_token_attention.py +0 -0
  208. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/qwen2vl_mrope.py +0 -0
  209. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/rms_norm.py +0 -0
  210. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/rope.py +0 -0
  211. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/softmax.py +0 -0
  212. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/sparsemax.py +0 -0
  213. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/swiglu.py +0 -0
  214. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/trainer/__init__.py +0 -0
  215. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/trainer/orpo_trainer.py +0 -0
  216. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/trainer_integration.py +0 -0
  217. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/tvd.py +0 -0
  218. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/triton/__init__.py +0 -0
  219. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/triton/monkey_patch.py +0 -0
  220. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/utils.py +0 -0
  221. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel_nightly.egg-info/SOURCES.txt +0 -0
  222. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel_nightly.egg-info/dependency_links.txt +0 -0
  223. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel_nightly.egg-info/top_level.txt +0 -0
  224. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/__init__.py +0 -0
  225. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/chunked_loss/__init__.py +0 -0
  226. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/chunked_loss/test_cosine_loss.py +0 -0
  227. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/chunked_loss/test_cpo_loss.py +0 -0
  228. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/chunked_loss/test_dpo_loss.py +0 -0
  229. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/chunked_loss/test_grpo_loss.py +0 -0
  230. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/chunked_loss/test_jsd_loss.py +0 -0
  231. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/chunked_loss/test_kto_loss.py +0 -0
  232. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/chunked_loss/test_orpo_loss.py +0 -0
  233. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/chunked_loss/test_simpo_loss.py +0 -0
  234. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/conftest.py +0 -0
  235. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/convergence/__init__.py +0 -0
  236. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/convergence/bf16/__init__.py +0 -0
  237. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/convergence/bf16/test_mini_models.py +0 -0
  238. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/convergence/bf16/test_mini_models_multimodal.py +0 -0
  239. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/convergence/bf16/test_mini_models_with_logits.py +0 -0
  240. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/convergence/fp32/__init__.py +0 -0
  241. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/convergence/fp32/test_mini_models.py +0 -0
  242. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/convergence/fp32/test_mini_models_multimodal.py +0 -0
  243. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/convergence/fp32/test_mini_models_with_logits.py +0 -0
  244. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/resources/fake_configs/Google/Gemma3/gemma-3-4b-it/tokenizer_config.json +0 -0
  245. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/resources/fake_configs/Google/Paligemma/paligemma-3b-pt-224/tokenizer_config.json +0 -0
  246. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/preprocessor_config.json +0 -0
  247. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/processor_config.json +0 -0
  248. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/tokenizer_config.json +0 -0
  249. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/resources/fake_configs/Qwen/Qwen2-VL-7B-Instruct/tokenizer_config.json +0 -0
  250. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/resources/fake_configs/Qwen/Qwen2.5-VL-7B-Instruct/tokenizer_config.json +0 -0
  251. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/resources/fake_configs/meta-llama/Llama-3.2-11B-Vision-Instruct/tokenizer_config.json +0 -0
  252. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/resources/fake_configs/meta-llama/Llama-4-Scout-17B-16E-Instruct/tokenizer_config.json +0 -0
  253. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/resources/scripts/generate_tokenized_dataset.py +0 -0
  254. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/resources/tiny_shakespeare.txt +0 -0
  255. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/resources/tiny_shakespeare_tokenized/data-00000-of-00001.arrow +0 -0
  256. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/resources/tiny_shakespeare_tokenized/dataset_info.json +0 -0
  257. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/resources/tiny_shakespeare_tokenized/state.json +0 -0
  258. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/transformers/test_auto_model.py +0 -0
  259. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/transformers/test_cross_entropy.py +0 -0
  260. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/transformers/test_dyt.py +0 -0
  261. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/transformers/test_embedding.py +0 -0
  262. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/transformers/test_flex_attention.py +0 -0
  263. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/transformers/test_fused_add_rms_norm.py +0 -0
  264. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/transformers/test_fused_linear_cross_entropy.py +0 -0
  265. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/transformers/test_fused_linear_jsd.py +0 -0
  266. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/transformers/test_fused_neighborhood_attention.py +0 -0
  267. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/transformers/test_geglu.py +0 -0
  268. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/transformers/test_group_norm.py +0 -0
  269. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/transformers/test_grpo_loss.py +0 -0
  270. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/transformers/test_jsd.py +0 -0
  271. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/transformers/test_kl_div.py +0 -0
  272. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/transformers/test_layer_norm.py +0 -0
  273. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/transformers/test_mm_int8int2.py +0 -0
  274. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/transformers/test_multi_token_attention.py +0 -0
  275. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/transformers/test_qwen2vl_mrope.py +0 -0
  276. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/transformers/test_rms_norm.py +0 -0
  277. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/transformers/test_rope.py +0 -0
  278. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/transformers/test_softmax.py +0 -0
  279. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/transformers/test_sparsemax.py +0 -0
  280. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/transformers/test_swiglu.py +0 -0
  281. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/transformers/test_trainer_integration.py +0 -0
  282. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/transformers/test_transformers.py +0 -0
  283. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/transformers/test_tvd.py +0 -0
  284. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/triton/test_triton_monkey_patch.py +0 -0
  285. {liger_kernel_nightly-0.6.2.dev20250911161200 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/utils.py +0 -0
@@ -23,4 +23,5 @@ uv.lock
23
23
 
24
24
  # Benchmark images
25
25
  benchmark/visualizations
26
- .vscode/
26
+ .vscode/
27
+ .coverage
@@ -5,7 +5,16 @@ all: checkstyle test test-convergence
5
5
 
6
6
  # Command to run pytest for correctness tests
7
7
  test:
8
- python -m pytest --disable-warnings test/ --ignore=test/convergence
8
+ python -m pytest --disable-warnings \
9
+ -n auto \
10
+ --dist=load \
11
+ --cov=src/liger_kernel \
12
+ --cov-report=term-missing \
13
+ --ignore=test/convergence \
14
+ test/
15
+ coverage combine
16
+ coverage report -m
17
+ coverage html
9
18
 
10
19
  # Command to run ruff for linting and formatting code
11
20
  checkstyle:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: liger_kernel_nightly
3
- Version: 0.6.2.dev20250911161200
3
+ Version: 0.6.2.dev20250916231145
4
4
  Summary: Efficient Triton kernels for LLM Training
5
5
  License: BSD 2-CLAUSE LICENSE
6
6
  Copyright 2024 LinkedIn Corporation
@@ -40,6 +40,8 @@ Requires-Dist: black>=24.4.2; extra == "dev"
40
40
  Requires-Dist: isort>=5.13.2; extra == "dev"
41
41
  Requires-Dist: pytest>=7.1.2; extra == "dev"
42
42
  Requires-Dist: pytest-xdist; extra == "dev"
43
+ Requires-Dist: pytest-cov; extra == "dev"
44
+ Requires-Dist: pytest-asyncio; extra == "dev"
43
45
  Requires-Dist: pytest-rerunfailures; extra == "dev"
44
46
  Requires-Dist: datasets>=2.19.2; extra == "dev"
45
47
  Requires-Dist: seaborn; extra == "dev"
@@ -239,7 +239,7 @@ from liger_kernel.transformers.trainer import LigerORPOTrainer # noqa: F401
239
239
 
240
240
  model = AutoModelForCausalLM.from_pretrained(
241
241
  "meta-llama/Llama-3.2-1B-Instruct",
242
- torch_dtype=torch.bfloat16,
242
+ dtype=torch.bfloat16,
243
243
  )
244
244
 
245
245
  tokenizer = AutoTokenizer.from_pretrained(
@@ -9,7 +9,7 @@ from liger_kernel.transformers.trainer import LigerORPOTrainer # noqa: F401
9
9
 
10
10
  model = AutoModelForCausalLM.from_pretrained(
11
11
  "meta-llama/Llama-3.2-1B-Instruct",
12
- torch_dtype=torch.bfloat16,
12
+ dtype=torch.bfloat16,
13
13
  )
14
14
 
15
15
  tokenizer = AutoTokenizer.from_pretrained(
@@ -48,7 +48,7 @@ def train():
48
48
  custom_args.model_name,
49
49
  trust_remote_code=True,
50
50
  use_cache=False,
51
- torch_dtype=torch.bfloat16,
51
+ dtype=torch.bfloat16,
52
52
  # These args will get passed to the appropriate apply_liger_kernel_to_* function
53
53
  # to override the default settings
54
54
  # cross_entropy=True,
@@ -59,7 +59,7 @@ def train():
59
59
  custom_args.model_name,
60
60
  trust_remote_code=True,
61
61
  use_cache=False,
62
- torch_dtype=torch.bfloat16,
62
+ dtype=torch.bfloat16,
63
63
  )
64
64
 
65
65
  trainer = SFTTrainer(
@@ -56,7 +56,7 @@ def construct_model_and_processor(model_name: str, use_liger: bool) -> torch.nn.
56
56
  model = Qwen2VLForConditionalGeneration.from_pretrained(
57
57
  pretrained_model_name_or_path=model_name,
58
58
  use_cache=False,
59
- torch_dtype=torch.bfloat16,
59
+ dtype=torch.bfloat16,
60
60
  low_cpu_mem_usage=True,
61
61
  attn_implementation="sdpa",
62
62
  )
@@ -319,7 +319,7 @@ def train():
319
319
  model = model_builder(
320
320
  model_args.model_name_or_path,
321
321
  cache_dir=training_args.cache_dir,
322
- torch_dtype=torch.bfloat16,
322
+ dtype=torch.bfloat16,
323
323
  )
324
324
 
325
325
  # Freeze the base model
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "liger_kernel_nightly"
7
- version = "0.6.2.dev20250911161200"
7
+ version = "0.6.2.dev20250916231145"
8
8
  description = "Efficient Triton kernels for LLM Training"
9
9
  urls = { "Homepage" = "https://github.com/linkedin/Liger-Kernel" }
10
10
  readme = { file = "README.md", content-type = "text/markdown" }
@@ -24,6 +24,36 @@ pythonpath = ["src", "."]
24
24
  asyncio_mode = "auto"
25
25
  log_cli = true
26
26
  log_cli_level = "INFO"
27
+ addopts = [
28
+ "-n", "auto",
29
+ "--dist=load", # use "load" to distribute tests and let pytest-cov combine coverage
30
+ "--cov=src/liger_kernel",
31
+ "--cov-report=term-missing",
32
+ "--cov-report=html",
33
+ "--cov-config=pyproject.toml",
34
+ "--durations=0"
35
+ ]
36
+ python_files = "test_*.py"
37
+ testpaths = ["test/"]
38
+
39
+ [tool.coverage.run]
40
+ branch = true
41
+ parallel = true
42
+ source = ["src/liger_kernel"]
43
+ # xdist uses subprocesses; "multiprocessing" is a safe concurrency choice
44
+ concurrency = ["multiprocessing"]
45
+
46
+ [tool.coverage.paths]
47
+ liger_kernel = [
48
+ "src/liger_kernel",
49
+ "*/site-packages/liger_kernel"
50
+ ]
51
+
52
+ [tool.coverage.report]
53
+ omit = ["test/*"]
54
+ show_missing = true
55
+ skip_covered = false
56
+
27
57
 
28
58
  [tool.ruff]
29
59
  line-length = 120
@@ -38,6 +38,8 @@ def get_optional_dependencies():
38
38
  "isort>=5.13.2",
39
39
  "pytest>=7.1.2",
40
40
  "pytest-xdist",
41
+ "pytest-cov",
42
+ "pytest-asyncio",
41
43
  "pytest-rerunfailures",
42
44
  "datasets>=2.19.2",
43
45
  "seaborn",
@@ -25,7 +25,7 @@ class LigerFusedLinearCrossEntropyLoss(torch.nn.Module):
25
25
  assert reduction in {
26
26
  "mean",
27
27
  "sum",
28
- "none",
28
+ "none",
29
29
  }, f"reduction must be 'mean' or 'sum' or 'none'. Got: {reduction}"
30
30
  assert softcap is None or softcap > 0, f"softcap must greater than 0.0 or None. Got: {softcap}"
31
31
  self.ce_weight = ce_weight
@@ -70,7 +70,7 @@ def lce_forward(
70
70
  >>> processor = AutoProcessor.from_pretrained(MODEL_PATH, use_fast=True)
71
71
  >>> model = Glm4vForConditionalGeneration.from_pretrained(
72
72
  pretrained_model_name_or_path=MODEL_PATH,
73
- torch_dtype=torch.bfloat16,
73
+ dtype=torch.bfloat16,
74
74
  device_map="auto",
75
75
  )
76
76
  >>> inputs = processor.apply_chat_template(
@@ -75,7 +75,7 @@ def lce_forward(
75
75
  >>> processor = AutoProcessor.from_pretrained(MODEL_PATH)
76
76
  >>> model = Glm4vMoeForConditionalGeneration.from_pretrained(
77
77
  pretrained_model_name_or_path=MODEL_PATH,
78
- torch_dtype="auto",
78
+ dtype="auto",
79
79
  device_map="auto",
80
80
  )
81
81
  >>> inputs = processor.apply_chat_template(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: liger_kernel_nightly
3
- Version: 0.6.2.dev20250911161200
3
+ Version: 0.6.2.dev20250916231145
4
4
  Summary: Efficient Triton kernels for LLM Training
5
5
  License: BSD 2-CLAUSE LICENSE
6
6
  Copyright 2024 LinkedIn Corporation
@@ -40,6 +40,8 @@ Requires-Dist: black>=24.4.2; extra == "dev"
40
40
  Requires-Dist: isort>=5.13.2; extra == "dev"
41
41
  Requires-Dist: pytest>=7.1.2; extra == "dev"
42
42
  Requires-Dist: pytest-xdist; extra == "dev"
43
+ Requires-Dist: pytest-cov; extra == "dev"
44
+ Requires-Dist: pytest-asyncio; extra == "dev"
43
45
  Requires-Dist: pytest-rerunfailures; extra == "dev"
44
46
  Requires-Dist: datasets>=2.19.2; extra == "dev"
45
47
  Requires-Dist: seaborn; extra == "dev"
@@ -9,6 +9,8 @@ black>=24.4.2
9
9
  isort>=5.13.2
10
10
  pytest>=7.1.2
11
11
  pytest-xdist
12
+ pytest-cov
13
+ pytest-asyncio
12
14
  pytest-rerunfailures
13
15
  datasets>=2.19.2
14
16
  seaborn
@@ -338,7 +338,7 @@ def test_apply_liger_kernel_to_instance_for_llama():
338
338
  with patch("transformers.models.llama.modeling_llama"):
339
339
  # Instantiate a dummy model
340
340
  config = transformers.models.llama.configuration_llama.LlamaConfig(
341
- torch_dtype=torch.bfloat16,
341
+ dtype=torch.bfloat16,
342
342
  rms_norm_eps=1e-5,
343
343
  hidden_size=32,
344
344
  intermediate_size=64,
@@ -382,7 +382,7 @@ def test_apply_liger_kernel_to_instance_for_mllama_for_conditional_generation():
382
382
 
383
383
  # Instantiate a dummy model
384
384
  config = transformers.models.mllama.configuration_mllama.MllamaConfig(
385
- torch_dtype=torch.bfloat16,
385
+ dtype=torch.bfloat16,
386
386
  text_config=transformers.models.mllama.configuration_mllama.MllamaTextConfig(
387
387
  rms_norm_eps=1e-5,
388
388
  hidden_size=32,
@@ -533,7 +533,7 @@ def test_apply_liger_kernel_to_instance_for_llama4_for_causal_lm():
533
533
 
534
534
  # Instantiate a dummy model
535
535
  config = transformers.models.llama4.configuration_llama4.Llama4TextConfig(
536
- torch_dtype=torch.bfloat16,
536
+ dtype=torch.bfloat16,
537
537
  rms_norm_eps=1e-5,
538
538
  hidden_size=32,
539
539
  intermediate_size=64,
@@ -573,9 +573,9 @@ def test_apply_liger_kernel_to_instance_for_llama4_for_conditional_generation():
573
573
 
574
574
  # Instantiate a dummy model
575
575
  config = transformers.models.llama4.configuration_llama4.Llama4Config(
576
- torch_dtype=torch.bfloat16,
576
+ dtype=torch.bfloat16,
577
577
  text_config=transformers.models.llama4.configuration_llama4.Llama4TextConfig(
578
- torch_dtype=torch.bfloat16,
578
+ dtype=torch.bfloat16,
579
579
  rms_norm_eps=1e-5,
580
580
  hidden_size=32,
581
581
  intermediate_size=64,
@@ -656,7 +656,7 @@ def test_apply_liger_kernel_to_instance_for_mistral():
656
656
  with patch("transformers.models.mistral.modeling_mistral"):
657
657
  # Instantiate a dummy model
658
658
  config = transformers.models.mistral.configuration_mistral.MistralConfig(
659
- torch_dtype=torch.bfloat16,
659
+ dtype=torch.bfloat16,
660
660
  rms_norm_eps=1e-5,
661
661
  hidden_size=32,
662
662
  intermediate_size=64,
@@ -695,7 +695,7 @@ def test_apply_liger_kernel_to_instance_for_mixtral():
695
695
  with patch("transformers.models.mixtral.modeling_mixtral"):
696
696
  # Instantiate a dummy model
697
697
  config = transformers.models.mixtral.configuration_mixtral.MixtralConfig(
698
- torch_dtype=torch.bfloat16,
698
+ dtype=torch.bfloat16,
699
699
  rms_norm_eps=1e-5,
700
700
  hidden_size=32,
701
701
  intermediate_size=64,
@@ -738,7 +738,7 @@ def test_apply_liger_kernel_to_instance_for_gemma():
738
738
  with patch("transformers.models.gemma.modeling_gemma"):
739
739
  # Instantiate a dummy model
740
740
  config = transformers.models.gemma.configuration_gemma.GemmaConfig(
741
- torch_dtype=torch.bfloat16,
741
+ dtype=torch.bfloat16,
742
742
  rms_norm_eps=1e-5,
743
743
  hidden_size=32,
744
744
  intermediate_size=64,
@@ -777,7 +777,7 @@ def test_apply_liger_kernel_to_instance_for_gemma2():
777
777
  with patch("transformers.models.gemma2.modeling_gemma2"):
778
778
  # Instantiate a dummy model
779
779
  config = transformers.models.gemma2.configuration_gemma2.Gemma2Config(
780
- torch_dtype=torch.bfloat16,
780
+ dtype=torch.bfloat16,
781
781
  rms_norm_eps=1e-5,
782
782
  hidden_size=32,
783
783
  intermediate_size=64,
@@ -827,7 +827,7 @@ def test_apply_liger_kernel_to_instance_for_paligemma():
827
827
 
828
828
  # Instantiate a dummy model
829
829
  config = transformers.models.paligemma.configuration_paligemma.PaliGemmaConfig(
830
- torch_dtype=torch.bfloat16,
830
+ dtype=torch.bfloat16,
831
831
  text_config={
832
832
  "num_hidden_layers": 2,
833
833
  "rms_norm_eps": 1e-5,
@@ -883,7 +883,7 @@ def test_apply_liger_kernel_to_instance_for_gemma3_text():
883
883
 
884
884
  # Instantiate a dummy model
885
885
  config = transformers.models.gemma3.configuration_gemma3.Gemma3TextConfig(
886
- torch_dtype=torch.bfloat16,
886
+ dtype=torch.bfloat16,
887
887
  rms_norm_eps=1e-5,
888
888
  hidden_size=32,
889
889
  intermediate_size=64,
@@ -939,7 +939,7 @@ def test_apply_liger_kernel_to_instance_for_gemma3_conditional_generation():
939
939
 
940
940
  # Instantiate a dummy model
941
941
  text_config = transformers.models.gemma3.configuration_gemma3.Gemma3TextConfig(
942
- torch_dtype=torch.bfloat16,
942
+ dtype=torch.bfloat16,
943
943
  rms_norm_eps=1e-5,
944
944
  hidden_size=32,
945
945
  intermediate_size=64,
@@ -1026,7 +1026,7 @@ def test_apply_liger_kernel_to_instance_for_qwen2():
1026
1026
  with patch("transformers.models.qwen2.modeling_qwen2"):
1027
1027
  # Instantiate a dummy model
1028
1028
  config = transformers.models.qwen2.configuration_qwen2.Qwen2Config(
1029
- torch_dtype=torch.bfloat16,
1029
+ dtype=torch.bfloat16,
1030
1030
  rms_norm_eps=1e-5,
1031
1031
  hidden_size=32,
1032
1032
  intermediate_size=64,
@@ -1068,7 +1068,7 @@ def test_apply_liger_kernel_to_instance_for_qwen3():
1068
1068
 
1069
1069
  # Instantiate a dummy model
1070
1070
  config = transformers.models.qwen3.configuration_qwen3.Qwen3Config(
1071
- torch_dtype=torch.bfloat16,
1071
+ dtype=torch.bfloat16,
1072
1072
  rms_norm_eps=1e-5,
1073
1073
  hidden_size=32,
1074
1074
  intermediate_size=64,
@@ -1110,7 +1110,7 @@ def test_apply_liger_kernel_to_instance_for_qwen3_moe():
1110
1110
 
1111
1111
  # Instantiate a dummy model
1112
1112
  config = transformers.models.qwen3_moe.configuration_qwen3_moe.Qwen3MoeConfig(
1113
- torch_dtype=torch.bfloat16,
1113
+ dtype=torch.bfloat16,
1114
1114
  rms_norm_eps=1e-5,
1115
1115
  hidden_size=32,
1116
1116
  intermediate_size=64,
@@ -1158,7 +1158,7 @@ def test_apply_liger_kernel_to_instance_for_qwen2_vl_for_conditional_generation(
1158
1158
 
1159
1159
  # Instantiate a dummy model
1160
1160
  config = transformers.models.qwen2_vl.configuration_qwen2_vl.Qwen2VLConfig(
1161
- torch_dtype=torch.bfloat16,
1161
+ dtype=torch.bfloat16,
1162
1162
  rms_norm_eps=1e-5,
1163
1163
  hidden_size=32,
1164
1164
  intermediate_size=48,
@@ -1227,7 +1227,7 @@ def test_apply_liger_kernel_to_instance_for_qwen2_vl():
1227
1227
 
1228
1228
  # Instantiate a dummy model
1229
1229
  config = transformers.models.qwen2_vl.configuration_qwen2_vl.Qwen2VLConfig(
1230
- torch_dtype=torch.bfloat16,
1230
+ dtype=torch.bfloat16,
1231
1231
  rms_norm_eps=1e-5,
1232
1232
  hidden_size=32,
1233
1233
  intermediate_size=48,
@@ -1294,7 +1294,7 @@ def test_apply_liger_kernel_to_instance_for_qwen2_vl_text():
1294
1294
 
1295
1295
  # Instantiate a dummy model
1296
1296
  config = transformers.models.qwen2_vl.configuration_qwen2_vl.Qwen2VLTextConfig(
1297
- torch_dtype=torch.bfloat16,
1297
+ dtype=torch.bfloat16,
1298
1298
  rms_norm_eps=1e-5,
1299
1299
  hidden_size=32,
1300
1300
  intermediate_size=48,
@@ -1347,7 +1347,7 @@ def test_apply_liger_kernel_to_instance_for_qwen2_5_vl():
1347
1347
 
1348
1348
  # Instantiate a dummy model
1349
1349
  config = transformers.models.qwen2_5_vl.configuration_qwen2_5_vl.Qwen2_5_VLConfig(
1350
- torch_dtype=torch.bfloat16,
1350
+ dtype=torch.bfloat16,
1351
1351
  rms_norm_eps=1e-5,
1352
1352
  hidden_size=32,
1353
1353
  intermediate_size=48,
@@ -1416,7 +1416,7 @@ def test_apply_liger_kernel_to_instance_for_qwen2_5_vl_for_conditional_generatio
1416
1416
 
1417
1417
  # Instantiate a dummy model
1418
1418
  config = transformers.models.qwen2_5_vl.configuration_qwen2_5_vl.Qwen2_5_VLConfig(
1419
- torch_dtype=torch.bfloat16,
1419
+ dtype=torch.bfloat16,
1420
1420
  rms_norm_eps=1e-5,
1421
1421
  hidden_size=32,
1422
1422
  intermediate_size=48,
@@ -1483,7 +1483,7 @@ def test_apply_liger_kernel_to_instance_for_qwen2_5_vl_text():
1483
1483
 
1484
1484
  # Instantiate a dummy model
1485
1485
  config = transformers.models.qwen2_5_vl.configuration_qwen2_5_vl.Qwen2_5_VLTextConfig(
1486
- torch_dtype=torch.bfloat16,
1486
+ dtype=torch.bfloat16,
1487
1487
  rms_norm_eps=1e-5,
1488
1488
  hidden_size=32,
1489
1489
  intermediate_size=48,
@@ -1528,7 +1528,7 @@ def test_apply_liger_kernel_to_instance_for_phi3():
1528
1528
  with patch("transformers.models.phi3.modeling_phi3"):
1529
1529
  # Instantiate a dummy model
1530
1530
  config = transformers.models.phi3.configuration_phi3.Phi3Config(
1531
- torch_dtype=torch.bfloat16,
1531
+ dtype=torch.bfloat16,
1532
1532
  rms_norm_eps=1e-5,
1533
1533
  hidden_size=32,
1534
1534
  intermediate_size=64,
@@ -1570,7 +1570,7 @@ def test_apply_liger_kernel_to_instance_for_olmo2():
1570
1570
 
1571
1571
  # Instantiate a dummy model
1572
1572
  config = transformers.models.olmo2.configuration_olmo2.Olmo2Config(
1573
- torch_dtype=torch.bfloat16,
1573
+ dtype=torch.bfloat16,
1574
1574
  rms_norm_eps=1e-5,
1575
1575
  hidden_size=32,
1576
1576
  intermediate_size=64,
@@ -1616,7 +1616,7 @@ def test_apply_liger_kernel_to_instance_for_glm4():
1616
1616
 
1617
1617
  # Instantiate a dummy model
1618
1618
  config = transformers.models.glm4.configuration_glm4.Glm4Config(
1619
- torch_dtype=torch.bfloat16,
1619
+ dtype=torch.bfloat16,
1620
1620
  rms_norm_eps=1e-5,
1621
1621
  hidden_size=32,
1622
1622
  intermediate_size=64,
@@ -1664,7 +1664,7 @@ def test_apply_liger_kernel_to_instance_for_glm4v():
1664
1664
 
1665
1665
  # Instantiate a dummy model
1666
1666
  config = transformers.models.glm4v.configuration_glm4v.Glm4vConfig(
1667
- torch_dtype=torch.bfloat16,
1667
+ dtype=torch.bfloat16,
1668
1668
  text_config={
1669
1669
  "num_hidden_layers": 2,
1670
1670
  "rms_norm_eps": 1e-5,
@@ -1734,7 +1734,7 @@ def test_apply_liger_kernel_to_instance_for_glm4v_moe():
1734
1734
 
1735
1735
  # Instantiate a dummy model
1736
1736
  config = transformers.models.glm4v_moe.configuration_glm4v_moe.Glm4vMoeConfig(
1737
- torch_dtype=torch.bfloat16,
1737
+ dtype=torch.bfloat16,
1738
1738
  hidden_size=32,
1739
1739
  num_attention_heads=4,
1740
1740
  num_key_value_heads=2,
@@ -1837,7 +1837,7 @@ def test_apply_liger_kernel_to_instance_for_smollm3():
1837
1837
  with patch("transformers.models.smollm3.modeling_smollm3"):
1838
1838
  # Instantiate a dummy model
1839
1839
  config = transformers.models.smollm3.configuration_smollm3.SmolLM3Config(
1840
- torch_dtype=torch.bfloat16,
1840
+ dtype=torch.bfloat16,
1841
1841
  rms_norm_eps=1e-5,
1842
1842
  hidden_size=32,
1843
1843
  intermediate_size=64,