liger-kernel-nightly 0.6.0.dev20250708194445__tar.gz → 0.6.0.dev20250709042125__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (273) hide show
  1. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/PKG-INFO +2 -2
  2. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/dev/modal/tests.py +1 -1
  3. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/dev/modal/tests_bwd.py +3 -3
  4. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/pyproject.toml +1 -1
  5. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/setup.py +1 -1
  6. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/transformers/monkey_patch.py +8 -3
  7. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel_nightly.egg-info/PKG-INFO +2 -2
  8. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel_nightly.egg-info/requires.txt +1 -1
  9. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/test/convergence/bf16/test_mini_models.py +2 -2
  10. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/test/convergence/fp32/test_mini_models.py +3 -3
  11. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/test/transformers/test_monkey_patch.py +102 -33
  12. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/.github/ISSUE_TEMPLATE/bug_report.yaml +0 -0
  13. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/.github/ISSUE_TEMPLATE/feature_request.yaml +0 -0
  14. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/.github/pull_request_template.md +0 -0
  15. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/.github/workflows/amd-ci.yml +0 -0
  16. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/.github/workflows/benchmark.yml +0 -0
  17. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/.github/workflows/docs.yml +0 -0
  18. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/.github/workflows/intel-ci.yml +0 -0
  19. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/.github/workflows/nvi-ci.yml +0 -0
  20. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/.github/workflows/publish-nightly.yml +0 -0
  21. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/.github/workflows/publish-release.yml +0 -0
  22. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/.gitignore +0 -0
  23. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/LICENSE +0 -0
  24. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/Makefile +0 -0
  25. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/NOTICE +0 -0
  26. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/README.md +0 -0
  27. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/benchmark/README.md +0 -0
  28. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/benchmark/__init__.py +0 -0
  29. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/benchmark/benchmarks_visualizer.py +0 -0
  30. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/benchmark/data/all_benchmark_data.csv +0 -0
  31. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/benchmark/scripts/__init__.py +0 -0
  32. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/benchmark/scripts/benchmark_cpo_loss.py +0 -0
  33. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/benchmark/scripts/benchmark_cross_entropy.py +0 -0
  34. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/benchmark/scripts/benchmark_distill_cosine_loss.py +0 -0
  35. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/benchmark/scripts/benchmark_distill_jsd_loss.py +0 -0
  36. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/benchmark/scripts/benchmark_dpo_loss.py +0 -0
  37. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/benchmark/scripts/benchmark_dyt.py +0 -0
  38. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/benchmark/scripts/benchmark_embedding.py +0 -0
  39. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/benchmark/scripts/benchmark_fused_linear_cross_entropy.py +0 -0
  40. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/benchmark/scripts/benchmark_fused_linear_jsd.py +0 -0
  41. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/benchmark/scripts/benchmark_fused_neighborhood_attention.py +0 -0
  42. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/benchmark/scripts/benchmark_geglu.py +0 -0
  43. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/benchmark/scripts/benchmark_group_norm.py +0 -0
  44. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/benchmark/scripts/benchmark_jsd.py +0 -0
  45. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/benchmark/scripts/benchmark_kl_div.py +0 -0
  46. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/benchmark/scripts/benchmark_kto_loss.py +0 -0
  47. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/benchmark/scripts/benchmark_layer_norm.py +0 -0
  48. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/benchmark/scripts/benchmark_multi_token_attention.py +0 -0
  49. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/benchmark/scripts/benchmark_orpo_loss.py +0 -0
  50. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/benchmark/scripts/benchmark_qwen2vl_mrope.py +0 -0
  51. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/benchmark/scripts/benchmark_rms_norm.py +0 -0
  52. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/benchmark/scripts/benchmark_rope.py +0 -0
  53. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/benchmark/scripts/benchmark_simpo_loss.py +0 -0
  54. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/benchmark/scripts/benchmark_softmax.py +0 -0
  55. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/benchmark/scripts/benchmark_sparse_multi_token_attention.py +0 -0
  56. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/benchmark/scripts/benchmark_sparsemax.py +0 -0
  57. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/benchmark/scripts/benchmark_swiglu.py +0 -0
  58. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/benchmark/scripts/benchmark_tvd.py +0 -0
  59. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/benchmark/scripts/utils.py +0 -0
  60. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/dev/fmt-requirements.txt +0 -0
  61. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/dev/modal/benchmarks.py +0 -0
  62. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/docs/Examples.md +0 -0
  63. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/docs/Getting-Started.md +0 -0
  64. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/docs/High-Level-APIs.md +0 -0
  65. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/docs/Low-Level-APIs.md +0 -0
  66. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/docs/acknowledgement.md +0 -0
  67. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/docs/contributing.md +0 -0
  68. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/docs/images/banner.GIF +0 -0
  69. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/docs/images/compose.gif +0 -0
  70. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/docs/images/e2e-memory.png +0 -0
  71. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/docs/images/e2e-tps.png +0 -0
  72. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/docs/images/logo-banner.png +0 -0
  73. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/docs/images/patch.gif +0 -0
  74. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/docs/images/post-training.png +0 -0
  75. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/docs/index.md +0 -0
  76. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/docs/license.md +0 -0
  77. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/examples/alignment/accelerate_config.yaml +0 -0
  78. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/examples/alignment/run_orpo.py +0 -0
  79. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/examples/huggingface/README.md +0 -0
  80. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/examples/huggingface/callback.py +0 -0
  81. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/examples/huggingface/config/fsdp_config.json +0 -0
  82. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/examples/huggingface/img/gemma_7b_mem.png +0 -0
  83. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/examples/huggingface/img/gemma_7b_tp.png +0 -0
  84. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/examples/huggingface/img/llama_mem_alloc.png +0 -0
  85. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/examples/huggingface/img/llama_tps.png +0 -0
  86. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/examples/huggingface/img/qwen_mem_alloc.png +0 -0
  87. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/examples/huggingface/img/qwen_tps.png +0 -0
  88. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/examples/huggingface/launch_on_modal.py +0 -0
  89. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/examples/huggingface/requirements.txt +0 -0
  90. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/examples/huggingface/run_benchmarks.sh +0 -0
  91. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/examples/huggingface/run_gemma.sh +0 -0
  92. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/examples/huggingface/run_llama.sh +0 -0
  93. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/examples/huggingface/run_qwen.sh +0 -0
  94. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/examples/huggingface/run_qwen2_vl.sh +0 -0
  95. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/examples/huggingface/training.py +0 -0
  96. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/examples/huggingface/training_multimodal.py +0 -0
  97. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/examples/lightning/README.md +0 -0
  98. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/examples/lightning/requirements.txt +0 -0
  99. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/examples/lightning/training.py +0 -0
  100. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/examples/medusa/README.md +0 -0
  101. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/examples/medusa/callback.py +0 -0
  102. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/examples/medusa/docs/images/Memory_Stage1_num_head_3.png +0 -0
  103. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/examples/medusa/docs/images/Memory_Stage1_num_head_5.png +0 -0
  104. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/examples/medusa/docs/images/Memory_Stage2_num_head_3.png +0 -0
  105. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/examples/medusa/docs/images/Memory_Stage2_num_head_5.png +0 -0
  106. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/examples/medusa/docs/images/Throughput_Stage1_num_head_3.png +0 -0
  107. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/examples/medusa/docs/images/Throughput_Stage1_num_head_5.png +0 -0
  108. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/examples/medusa/docs/images/Throughput_Stage2_num_head_3.png +0 -0
  109. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/examples/medusa/docs/images/Throughput_Stage2_num_head_5.png +0 -0
  110. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/examples/medusa/fsdp/acc-fsdp.conf +0 -0
  111. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/examples/medusa/medusa_util.py +0 -0
  112. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/examples/medusa/requirements.txt +0 -0
  113. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/examples/medusa/scripts/llama3_8b_medusa.sh +0 -0
  114. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/examples/medusa/train.py +0 -0
  115. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/licenses/LICENSE-Apache-2.0 +0 -0
  116. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/licenses/LICENSE-MIT-AutoAWQ +0 -0
  117. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/licenses/LICENSE-MIT-Efficient-Cross-Entropy +0 -0
  118. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/licenses/LICENSE-MIT-llmc +0 -0
  119. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/licenses/LICENSE-MIT-triton +0 -0
  120. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/mkdocs.yml +0 -0
  121. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/setup.cfg +0 -0
  122. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/__init__.py +0 -0
  123. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/chunked_loss/README.md +0 -0
  124. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/chunked_loss/__init__.py +0 -0
  125. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/chunked_loss/cosine_similarity_loss.py +0 -0
  126. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/chunked_loss/cpo_loss.py +0 -0
  127. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/chunked_loss/dpo_loss.py +0 -0
  128. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/chunked_loss/functional.py +0 -0
  129. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/chunked_loss/fused_linear_distillation.py +0 -0
  130. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/chunked_loss/fused_linear_ppo.py +0 -0
  131. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/chunked_loss/fused_linear_preference.py +0 -0
  132. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/chunked_loss/fused_linear_unpaired_preference.py +0 -0
  133. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/chunked_loss/grpo_loss.py +0 -0
  134. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/chunked_loss/jsd_loss.py +0 -0
  135. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/chunked_loss/kto_loss.py +0 -0
  136. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/chunked_loss/orpo_loss.py +0 -0
  137. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/chunked_loss/simpo_loss.py +0 -0
  138. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/env_report.py +0 -0
  139. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/ops/__init__.py +0 -0
  140. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/ops/cross_entropy.py +0 -0
  141. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/ops/dyt.py +0 -0
  142. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/ops/experimental/embedding.py +0 -0
  143. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/ops/experimental/mm_int8int2.py +0 -0
  144. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/ops/fused_linear_cross_entropy.py +0 -0
  145. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/ops/fused_linear_jsd.py +0 -0
  146. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/ops/fused_neighborhood_attention.py +0 -0
  147. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/ops/geglu.py +0 -0
  148. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/ops/group_norm.py +0 -0
  149. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/ops/grpo_loss.py +0 -0
  150. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/ops/jsd.py +0 -0
  151. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/ops/kl_div.py +0 -0
  152. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/ops/layer_norm.py +0 -0
  153. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/ops/multi_token_attention.py +0 -0
  154. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/ops/qwen2vl_mrope.py +0 -0
  155. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/ops/rms_norm.py +0 -0
  156. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/ops/rope.py +0 -0
  157. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/ops/softmax.py +0 -0
  158. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/ops/sparsemax.py +0 -0
  159. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/ops/swiglu.py +0 -0
  160. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/ops/tvd.py +0 -0
  161. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/ops/utils.py +0 -0
  162. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/transformers/__init__.py +0 -0
  163. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/transformers/auto_model.py +0 -0
  164. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/transformers/cross_entropy.py +0 -0
  165. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/transformers/dyt.py +0 -0
  166. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/transformers/experimental/embedding.py +0 -0
  167. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/transformers/fsdp.py +0 -0
  168. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/transformers/functional.py +0 -0
  169. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/transformers/fused_linear_cross_entropy.py +0 -0
  170. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/transformers/fused_linear_jsd.py +0 -0
  171. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/transformers/fused_neighborhood_attention.py +0 -0
  172. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/transformers/geglu.py +0 -0
  173. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/transformers/group_norm.py +0 -0
  174. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/transformers/grpo_loss.py +0 -0
  175. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/transformers/jsd.py +0 -0
  176. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/transformers/kl_div.py +0 -0
  177. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/transformers/layer_norm.py +0 -0
  178. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/transformers/model/__init__.py +0 -0
  179. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/transformers/model/gemma.py +0 -0
  180. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/transformers/model/gemma2.py +0 -0
  181. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/transformers/model/gemma3.py +0 -0
  182. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/transformers/model/glm4.py +0 -0
  183. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/transformers/model/llama.py +0 -0
  184. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/transformers/model/llama4.py +0 -0
  185. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/transformers/model/llava.py +0 -0
  186. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/transformers/model/loss_utils.py +0 -0
  187. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/transformers/model/mistral.py +0 -0
  188. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/transformers/model/mixtral.py +0 -0
  189. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/transformers/model/mllama.py +0 -0
  190. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/transformers/model/olmo2.py +0 -0
  191. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/transformers/model/paligemma.py +0 -0
  192. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/transformers/model/phi3.py +0 -0
  193. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/transformers/model/qwen2.py +0 -0
  194. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/transformers/model/qwen2_5_vl.py +0 -0
  195. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/transformers/model/qwen2_vl.py +0 -0
  196. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/transformers/model/qwen3.py +0 -0
  197. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/transformers/model/qwen3_moe.py +0 -0
  198. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/transformers/multi_token_attention.py +0 -0
  199. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/transformers/qwen2vl_mrope.py +0 -0
  200. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/transformers/rms_norm.py +0 -0
  201. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/transformers/rope.py +0 -0
  202. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/transformers/softmax.py +0 -0
  203. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/transformers/sparsemax.py +0 -0
  204. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/transformers/swiglu.py +0 -0
  205. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/transformers/trainer/__init__.py +0 -0
  206. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/transformers/trainer/orpo_trainer.py +0 -0
  207. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/transformers/trainer_integration.py +0 -0
  208. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/transformers/tvd.py +0 -0
  209. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/triton/__init__.py +0 -0
  210. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/triton/monkey_patch.py +0 -0
  211. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel/utils.py +0 -0
  212. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel_nightly.egg-info/SOURCES.txt +0 -0
  213. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel_nightly.egg-info/dependency_links.txt +0 -0
  214. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/src/liger_kernel_nightly.egg-info/top_level.txt +0 -0
  215. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/test/__init__.py +0 -0
  216. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/test/chunked_loss/__init__.py +0 -0
  217. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/test/chunked_loss/test_cosine_loss.py +0 -0
  218. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/test/chunked_loss/test_cpo_loss.py +0 -0
  219. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/test/chunked_loss/test_dpo_loss.py +0 -0
  220. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/test/chunked_loss/test_grpo_loss.py +0 -0
  221. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/test/chunked_loss/test_jsd_loss.py +0 -0
  222. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/test/chunked_loss/test_kto_loss.py +0 -0
  223. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/test/chunked_loss/test_orpo_loss.py +0 -0
  224. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/test/chunked_loss/test_simpo_loss.py +0 -0
  225. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/test/conftest.py +0 -0
  226. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/test/convergence/__init__.py +0 -0
  227. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/test/convergence/bf16/__init__.py +0 -0
  228. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/test/convergence/bf16/test_mini_models_multimodal.py +0 -0
  229. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/test/convergence/bf16/test_mini_models_with_logits.py +0 -0
  230. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/test/convergence/fp32/__init__.py +0 -0
  231. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/test/convergence/fp32/test_mini_models_multimodal.py +0 -0
  232. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/test/convergence/fp32/test_mini_models_with_logits.py +0 -0
  233. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/test/resources/fake_configs/Google/Gemma3/gemma-3-4b-it/tokenizer_config.json +0 -0
  234. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/test/resources/fake_configs/Google/Paligemma/paligemma-3b-pt-224/tokenizer_config.json +0 -0
  235. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/preprocessor_config.json +0 -0
  236. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/processor_config.json +0 -0
  237. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/tokenizer_config.json +0 -0
  238. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/test/resources/fake_configs/Qwen/Qwen2-VL-7B-Instruct/tokenizer_config.json +0 -0
  239. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/test/resources/fake_configs/Qwen/Qwen2.5-VL-7B-Instruct/tokenizer_config.json +0 -0
  240. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/test/resources/fake_configs/meta-llama/Llama-3.2-11B-Vision-Instruct/tokenizer_config.json +0 -0
  241. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/test/resources/fake_configs/meta-llama/Llama-4-Scout-17B-16E-Instruct/tokenizer_config.json +0 -0
  242. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/test/resources/scripts/generate_tokenized_dataset.py +0 -0
  243. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/test/resources/tiny_shakespeare.txt +0 -0
  244. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/test/resources/tiny_shakespeare_tokenized/data-00000-of-00001.arrow +0 -0
  245. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/test/resources/tiny_shakespeare_tokenized/dataset_info.json +0 -0
  246. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/test/resources/tiny_shakespeare_tokenized/state.json +0 -0
  247. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/test/transformers/test_auto_model.py +0 -0
  248. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/test/transformers/test_cross_entropy.py +0 -0
  249. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/test/transformers/test_dyt.py +0 -0
  250. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/test/transformers/test_embedding.py +0 -0
  251. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/test/transformers/test_flex_attention.py +0 -0
  252. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/test/transformers/test_fused_linear_cross_entropy.py +0 -0
  253. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/test/transformers/test_fused_linear_jsd.py +0 -0
  254. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/test/transformers/test_fused_neighborhood_attention.py +0 -0
  255. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/test/transformers/test_geglu.py +0 -0
  256. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/test/transformers/test_group_norm.py +0 -0
  257. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/test/transformers/test_grpo_loss.py +0 -0
  258. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/test/transformers/test_jsd.py +0 -0
  259. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/test/transformers/test_kl_div.py +0 -0
  260. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/test/transformers/test_layer_norm.py +0 -0
  261. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/test/transformers/test_mm_int8int2.py +0 -0
  262. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/test/transformers/test_multi_token_attention.py +0 -0
  263. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/test/transformers/test_qwen2vl_mrope.py +0 -0
  264. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/test/transformers/test_rms_norm.py +0 -0
  265. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/test/transformers/test_rope.py +0 -0
  266. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/test/transformers/test_softmax.py +0 -0
  267. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/test/transformers/test_sparsemax.py +0 -0
  268. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/test/transformers/test_swiglu.py +0 -0
  269. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/test/transformers/test_trainer_integration.py +0 -0
  270. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/test/transformers/test_transformers.py +0 -0
  271. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/test/transformers/test_tvd.py +0 -0
  272. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/test/triton/test_triton_monkey_patch.py +0 -0
  273. {liger_kernel_nightly-0.6.0.dev20250708194445 → liger_kernel_nightly-0.6.0.dev20250709042125}/test/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: liger_kernel_nightly
3
- Version: 0.6.0.dev20250708194445
3
+ Version: 0.6.0.dev20250709042125
4
4
  Summary: Efficient Triton kernels for LLM Training
5
5
  License: BSD 2-CLAUSE LICENSE
6
6
  Copyright 2024 LinkedIn Corporation
@@ -33,7 +33,7 @@ License-File: NOTICE
33
33
  Requires-Dist: torch>=2.1.2
34
34
  Requires-Dist: triton>=2.3.1
35
35
  Provides-Extra: dev
36
- Requires-Dist: transformers>=4.44.2; extra == "dev"
36
+ Requires-Dist: transformers>=4.49.0; extra == "dev"
37
37
  Requires-Dist: matplotlib>=3.7.2; extra == "dev"
38
38
  Requires-Dist: flake8>=4.0.1.1; extra == "dev"
39
39
  Requires-Dist: black>=24.4.2; extra == "dev"
@@ -14,7 +14,7 @@ app = modal.App("liger_tests", image=image)
14
14
  repo = image.add_local_dir(ROOT_PATH, remote_path=REMOTE_ROOT_PATH)
15
15
 
16
16
 
17
- @app.function(gpu="A10G", image=repo, timeout=60 * 45)
17
+ @app.function(gpu="A10G", image=repo, timeout=60 * 60)
18
18
  def liger_tests():
19
19
  import subprocess
20
20
 
@@ -14,7 +14,7 @@ app = modal.App("liger_tests_bwd", image=image)
14
14
  repo = image.add_local_dir(ROOT_PATH, remote_path=REMOTE_ROOT_PATH)
15
15
 
16
16
 
17
- @app.function(gpu="A10G", image=repo, timeout=60 * 30)
17
+ @app.function(gpu="A10G", image=repo, timeout=60 * 60)
18
18
  def liger_bwd_tests():
19
19
  import subprocess
20
20
 
@@ -24,9 +24,9 @@ def liger_bwd_tests():
24
24
  shell=True,
25
25
  cwd=REMOTE_ROOT_PATH,
26
26
  )
27
- # force install transformers==4.44.2
27
+ # force install transformers==4.49.0
28
28
  subprocess.run(
29
- ["uv pip install transformers==4.44.2 --system"],
29
+ ["uv pip install transformers==4.49.0 --system"],
30
30
  check=True,
31
31
  shell=True,
32
32
  cwd=REMOTE_ROOT_PATH,
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "liger_kernel_nightly"
7
- version = "0.6.0.dev20250708194445"
7
+ version = "0.6.0.dev20250709042125"
8
8
  description = "Efficient Triton kernels for LLM Training"
9
9
  urls = { "Homepage" = "https://github.com/linkedin/Liger-Kernel" }
10
10
  readme = { file = "README.md", content-type = "text/markdown" }
@@ -31,7 +31,7 @@ def get_optional_dependencies():
31
31
  """Get optional dependency groups."""
32
32
  return {
33
33
  "dev": [
34
- "transformers>=4.44.2",
34
+ "transformers>=4.49.0",
35
35
  "matplotlib>=3.7.2",
36
36
  "flake8>=4.0.1.1",
37
37
  "black>=24.4.2",
@@ -537,7 +537,10 @@ def apply_liger_kernel_to_mllama(
537
537
  if isinstance(model, MllamaForConditionalGeneration):
538
538
  language_model: MllamaForCausalLM = model.language_model
539
539
  vision_model: MllamaVisionModel = model.vision_model
540
- text_model: MllamaTextModel = language_model
540
+ if isinstance(language_model, MllamaForCausalLM):
541
+ text_model: MllamaTextModel = language_model.model
542
+ else:
543
+ text_model = language_model
541
544
  elif isinstance(model, MllamaForCausalLM):
542
545
  text_model = model.model
543
546
  vision_model = None
@@ -1096,7 +1099,9 @@ def apply_liger_kernel_to_paligemma(
1096
1099
  # PaliGemma submodules are ['vision_tower', 'multi_modal_projector', 'language_model']
1097
1100
 
1098
1101
  from transformers.models.gemma.modeling_gemma import GemmaForCausalLM
1102
+ from transformers.models.gemma.modeling_gemma import GemmaModel
1099
1103
  from transformers.models.gemma2.modeling_gemma2 import Gemma2ForCausalLM
1104
+ from transformers.models.gemma2.modeling_gemma2 import Gemma2Model
1100
1105
  from transformers.models.paligemma import modeling_paligemma
1101
1106
  from transformers.models.paligemma.modeling_paligemma import PaliGemmaForConditionalGeneration
1102
1107
  from transformers.models.siglip import modeling_siglip
@@ -1155,7 +1160,7 @@ def apply_liger_kernel_to_paligemma(
1155
1160
 
1156
1161
  language_model = model.language_model
1157
1162
 
1158
- if isinstance(language_model, GemmaForCausalLM):
1163
+ if isinstance(language_model, (GemmaForCausalLM, GemmaModel)):
1159
1164
  apply_liger_kernel_to_gemma(
1160
1165
  rope=rope,
1161
1166
  cross_entropy=False,
@@ -1165,7 +1170,7 @@ def apply_liger_kernel_to_paligemma(
1165
1170
  model=language_model,
1166
1171
  )
1167
1172
 
1168
- elif isinstance(language_model, Gemma2ForCausalLM):
1173
+ elif isinstance(language_model, (Gemma2ForCausalLM, Gemma2Model)):
1169
1174
  apply_liger_kernel_to_gemma2(
1170
1175
  rope=rope,
1171
1176
  cross_entropy=False,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: liger_kernel_nightly
3
- Version: 0.6.0.dev20250708194445
3
+ Version: 0.6.0.dev20250709042125
4
4
  Summary: Efficient Triton kernels for LLM Training
5
5
  License: BSD 2-CLAUSE LICENSE
6
6
  Copyright 2024 LinkedIn Corporation
@@ -33,7 +33,7 @@ License-File: NOTICE
33
33
  Requires-Dist: torch>=2.1.2
34
34
  Requires-Dist: triton>=2.3.1
35
35
  Provides-Extra: dev
36
- Requires-Dist: transformers>=4.44.2; extra == "dev"
36
+ Requires-Dist: transformers>=4.49.0; extra == "dev"
37
37
  Requires-Dist: matplotlib>=3.7.2; extra == "dev"
38
38
  Requires-Dist: flake8>=4.0.1.1; extra == "dev"
39
39
  Requires-Dist: black>=24.4.2; extra == "dev"
@@ -2,7 +2,7 @@ torch>=2.1.2
2
2
  triton>=2.3.1
3
3
 
4
4
  [dev]
5
- transformers>=4.44.2
5
+ transformers>=4.49.0
6
6
  matplotlib>=3.7.2
7
7
  flake8>=4.0.1.1
8
8
  black>=24.4.2
@@ -957,8 +957,8 @@ def run_mini_model(
957
957
  reason="LLaVa not available in this version of transformers",
958
958
  ),
959
959
  pytest.mark.skipif(
960
- version.parse(transformers.__version__) < version.parse("4.49.0"),
961
- reason="Mistral not available in transformers<=4.49.0",
960
+ version.parse(transformers.__version__) < version.parse("4.52.0"),
961
+ reason="LLaVa doesn't materialize logits in transformers<=4.52.0 so we can't test it",
962
962
  ),
963
963
  ],
964
964
  ),
@@ -938,8 +938,8 @@ def run_mini_model(
938
938
  reason="LLaVa not available in this version of transformers",
939
939
  ),
940
940
  pytest.mark.skipif(
941
- version.parse(transformers.__version__) < version.parse("4.49.0"),
942
- reason="Mistral not available in transformers<=4.49.0",
941
+ version.parse(transformers.__version__) < version.parse("4.52.0"),
942
+ reason="LLaVa doesn't materialize logits in transformers<=4.52.0 so we can't test it",
943
943
  ),
944
944
  ],
945
945
  ),
@@ -1103,7 +1103,7 @@ def run_mini_model(
1103
1103
  torch.float32,
1104
1104
  1e-8,
1105
1105
  1e-4,
1106
- 5e-3, # 4e-3
1106
+ 4e-2, # 4e-3
1107
1107
  1e-5, # 1e-5
1108
1108
  5e-3,
1109
1109
  1e-5,
@@ -38,6 +38,7 @@ if transformer_version >= version.parse(SUPPORTED_TRANSFORMER_VERSION):
38
38
  from liger_kernel.transformers.model.mistral import lce_forward as mistral_lce_forward
39
39
  from liger_kernel.transformers.model.mixtral import lce_forward as mixtral_lce_forward
40
40
  from liger_kernel.transformers.model.mllama import lce_forward as mllama_lce_forward
41
+ from liger_kernel.transformers.model.paligemma import lce_forward as paligemma_lce_forward
41
42
  from liger_kernel.transformers.model.phi3 import lce_forward as phi3_lce_forward
42
43
  from liger_kernel.transformers.model.qwen2 import lce_forward as qwen2_lce_forward
43
44
  else:
@@ -49,6 +50,7 @@ else:
49
50
  )
50
51
  from liger_kernel.transformers.model.mixtral import lce_forward_deprecated as mixtral_lce_forward
51
52
  from liger_kernel.transformers.model.mllama import lce_forward_deprecated as mllama_lce_forward
53
+ from liger_kernel.transformers.model.paligemma import lce_forward_deprecated as paligemma_lce_forward
52
54
  from liger_kernel.transformers.model.phi3 import lce_forward_deprecated as phi3_lce_forward
53
55
  from liger_kernel.transformers.model.qwen2 import lce_forward_deprecated as qwen2_lce_forward
54
56
 
@@ -72,24 +74,6 @@ def is_llama4_available():
72
74
  return False
73
75
 
74
76
 
75
- def is_qwen2_vl_available():
76
- try:
77
- import transformers.models.qwen2_vl # noqa: F401
78
-
79
- return True
80
- except ImportError:
81
- return False
82
-
83
-
84
- def is_qwen2_5_vl_available():
85
- try:
86
- import transformers.models.qwen2_5_vl # noqa: F401
87
-
88
- return True
89
- except ImportError:
90
- return False
91
-
92
-
93
77
  def is_qwen3_available():
94
78
  try:
95
79
  import transformers.models.qwen3 # noqa: F401
@@ -126,6 +110,15 @@ def is_gemma3_available():
126
110
  return False
127
111
 
128
112
 
113
+ def is_paligemma_available():
114
+ try:
115
+ import transformers.models.paligemma # noqa: F401
116
+
117
+ return True
118
+ except ImportError:
119
+ return False
120
+
121
+
129
122
  def test_import_from_root():
130
123
  try:
131
124
  from liger_kernel.transformers import AutoLigerKernelForCausalLM # noqa: F401
@@ -354,6 +347,7 @@ def test_apply_liger_kernel_to_instance_for_mllama_for_conditional_generation():
354
347
  # Ensure any monkey patching is cleaned up for subsequent tests
355
348
  with patch("transformers.models.mllama.modeling_mllama"):
356
349
  from transformers.models.mllama.modeling_mllama import MllamaForConditionalGeneration
350
+ from transformers.models.mllama.modeling_mllama import MllamaTextModel
357
351
 
358
352
  # Instantiate a dummy model
359
353
  config = transformers.models.mllama.configuration_mllama.MllamaConfig(
@@ -387,10 +381,14 @@ def test_apply_liger_kernel_to_instance_for_mllama_for_conditional_generation():
387
381
 
388
382
  # Check that model instance variables are not yet patched with Liger modules
389
383
  assert inspect.getsource(dummy_model_instance.forward) != inspect.getsource(mllama_lce_forward)
390
- assert inspect.getsource(dummy_model_instance.language_model.norm.forward) != inspect.getsource(
391
- LigerRMSNorm.forward
392
- )
393
- for layer in dummy_model_instance.language_model.layers:
384
+
385
+ if isinstance(dummy_model_instance.language_model, MllamaTextModel):
386
+ language_model = dummy_model_instance.language_model
387
+ else:
388
+ language_model = dummy_model_instance.language_model.model
389
+
390
+ assert inspect.getsource(language_model.norm.forward) != inspect.getsource(LigerRMSNorm.forward)
391
+ for layer in language_model.layers:
394
392
  assert inspect.getsource(layer.mlp.forward) != inspect.getsource(LigerSwiGLUMLP.forward)
395
393
  assert inspect.getsource(layer.input_layernorm.forward) != inspect.getsource(LigerRMSNorm.forward)
396
394
  assert inspect.getsource(layer.post_attention_layernorm.forward) != inspect.getsource(LigerRMSNorm.forward)
@@ -417,10 +415,8 @@ def test_apply_liger_kernel_to_instance_for_mllama_for_conditional_generation():
417
415
 
418
416
  # Check that the model's instance variables were correctly patched with Liger modules
419
417
  assert inspect.getsource(dummy_model_instance.forward) == inspect.getsource(mllama_lce_forward)
420
- assert inspect.getsource(dummy_model_instance.language_model.norm.forward) == inspect.getsource(
421
- LigerRMSNorm.forward
422
- )
423
- for layer in dummy_model_instance.language_model.layers:
418
+ assert inspect.getsource(language_model.norm.forward) == inspect.getsource(LigerRMSNorm.forward)
419
+ for layer in language_model.layers:
424
420
  assert inspect.getsource(layer.mlp.forward) == inspect.getsource(LigerSwiGLUMLP.forward)
425
421
  assert inspect.getsource(layer.input_layernorm.forward) == inspect.getsource(LigerRMSNorm.forward)
426
422
  assert inspect.getsource(layer.post_attention_layernorm.forward) == inspect.getsource(LigerRMSNorm.forward)
@@ -441,7 +437,6 @@ def test_apply_liger_kernel_to_instance_for_mllama_for_conditional_generation():
441
437
  assert inspect.getsource(layer.post_attention_layernorm.forward) == inspect.getsource(
442
438
  LigerLayerNorm.forward
443
439
  )
444
-
445
440
  try:
446
441
  print(dummy_model_instance)
447
442
  except Exception as e:
@@ -793,6 +788,62 @@ def test_apply_liger_kernel_to_instance_for_gemma2():
793
788
  pytest.fail(f"An exception occured in extra_expr: {type(e).__name__} - {e}")
794
789
 
795
790
 
791
+ @pytest.mark.skipif(not is_paligemma_available(), reason="paligemma module not available")
792
+ def test_apply_liger_kernel_to_instance_for_paligemma():
793
+ # Ensure any monkey patching is cleaned up for subsequent tests
794
+ with patch("transformers.models.paligemma.modeling_paligemma"):
795
+ from transformers.models.paligemma.modeling_paligemma import PaliGemmaForConditionalGeneration
796
+
797
+ # Instantiate a dummy model
798
+ config = transformers.models.paligemma.configuration_paligemma.PaliGemmaConfig(
799
+ torch_dtype=torch.bfloat16,
800
+ text_config={
801
+ "num_hidden_layers": 2,
802
+ "rms_norm_eps": 1e-5,
803
+ "hidden_size": 32,
804
+ "intermediate_size": 64,
805
+ "hidden_act": "silu",
806
+ },
807
+ vision_config={
808
+ "num_hidden_layers": 2,
809
+ "layer_norm_eps": 1e-5,
810
+ "hidden_size": 48,
811
+ "intermediate_size": 64,
812
+ },
813
+ )
814
+
815
+ dummy_model_instance = PaliGemmaForConditionalGeneration(config)
816
+ assert isinstance(dummy_model_instance, PaliGemmaForConditionalGeneration)
817
+
818
+ # Check that model instance variables are not yet patched with Liger modules
819
+ assert inspect.getsource(dummy_model_instance.forward) != inspect.getsource(paligemma_lce_forward)
820
+ assert inspect.getsource(
821
+ dummy_model_instance.vision_tower.vision_model.post_layernorm.forward
822
+ ) != inspect.getsource(LigerLayerNorm.forward)
823
+
824
+ for layer in dummy_model_instance.vision_tower.vision_model.encoder.layers:
825
+ assert inspect.getsource(layer.layer_norm1.forward) != inspect.getsource(LigerLayerNorm.forward)
826
+ assert inspect.getsource(layer.layer_norm2.forward) != inspect.getsource(LigerLayerNorm.forward)
827
+
828
+ # Test applying kernels to the model instance
829
+ _apply_liger_kernel_to_instance(model=dummy_model_instance)
830
+
831
+ # Check that the model's instance variables were correctly patched with Liger modules
832
+ assert inspect.getsource(dummy_model_instance.forward) == inspect.getsource(paligemma_lce_forward)
833
+ assert inspect.getsource(
834
+ dummy_model_instance.vision_tower.vision_model.post_layernorm.forward
835
+ ) == inspect.getsource(LigerLayerNorm.forward)
836
+
837
+ for layer in dummy_model_instance.vision_tower.vision_model.encoder.layers:
838
+ assert inspect.getsource(layer.layer_norm1.forward) == inspect.getsource(LigerLayerNorm.forward)
839
+ assert inspect.getsource(layer.layer_norm2.forward) == inspect.getsource(LigerLayerNorm.forward)
840
+
841
+ try:
842
+ print(dummy_model_instance)
843
+ except Exception as e:
844
+ pytest.fail(f"An exception occured in extra_expr: {type(e).__name__} - {e}")
845
+
846
+
796
847
  @pytest.mark.skipif(not is_gemma3_available(), reason="gemma3 module not available")
797
848
  def test_apply_liger_kernel_to_instance_for_gemma3_text():
798
849
  # Ensure any monkey patching is cleaned up for subsequent tests
@@ -1063,7 +1114,10 @@ def test_apply_liger_kernel_to_instance_for_qwen3_moe():
1063
1114
  pytest.fail(f"An exception occured in extra_expr: {type(e).__name__} - {e}")
1064
1115
 
1065
1116
 
1066
- @pytest.mark.skipif(not is_qwen2_vl_available(), reason="qwen2_vl module not available")
1117
+ @pytest.mark.skipif(
1118
+ transformer_version < version.parse("4.52.4"),
1119
+ reason="Qwen2-VL support is only compatible with transformers >= 4.52.4",
1120
+ )
1067
1121
  def test_apply_liger_kernel_to_instance_for_qwen2_vl_for_conditional_generation():
1068
1122
  # Ensure any monkey patching is cleaned up for subsequent tests
1069
1123
  with patch("transformers.models.qwen2_vl.modeling_qwen2_vl"):
@@ -1129,7 +1183,10 @@ def test_apply_liger_kernel_to_instance_for_qwen2_vl_for_conditional_generation(
1129
1183
  pytest.fail(f"An exception occured in extra_expr: {type(e).__name__} - {e}")
1130
1184
 
1131
1185
 
1132
- @pytest.mark.skipif(not is_qwen2_vl_available(), reason="qwen2_vl module not available")
1186
+ @pytest.mark.skipif(
1187
+ transformer_version < version.parse("4.52.4"),
1188
+ reason="Qwen2-VL support is only compatible with transformers >= 4.52.4",
1189
+ )
1133
1190
  def test_apply_liger_kernel_to_instance_for_qwen2_vl():
1134
1191
  # Ensure any monkey patching is cleaned up for subsequent tests
1135
1192
  with patch("transformers.models.qwen2_vl.modeling_qwen2_vl"):
@@ -1195,7 +1252,10 @@ def test_apply_liger_kernel_to_instance_for_qwen2_vl():
1195
1252
  pytest.fail(f"An exception occured in extra_expr: {type(e).__name__} - {e}")
1196
1253
 
1197
1254
 
1198
- @pytest.mark.skipif(not is_qwen2_vl_available(), reason="qwen2_vl module not available")
1255
+ @pytest.mark.skipif(
1256
+ transformer_version < version.parse("4.52.4"),
1257
+ reason="Qwen2-VL support is only compatible with transformers >= 4.52.4",
1258
+ )
1199
1259
  def test_apply_liger_kernel_to_instance_for_qwen2_vl_text():
1200
1260
  # Ensure any monkey patching is cleaned up for subsequent tests
1201
1261
  with patch("transformers.models.qwen2_vl.modeling_qwen2_vl"):
@@ -1243,7 +1303,10 @@ def test_apply_liger_kernel_to_instance_for_qwen2_vl_text():
1243
1303
  pytest.fail(f"An exception occured in extra_expr: {type(e).__name__} - {e}")
1244
1304
 
1245
1305
 
1246
- @pytest.mark.skipif(not is_qwen2_5_vl_available(), reason="qwen2_5_vl module not available")
1306
+ @pytest.mark.skipif(
1307
+ transformer_version < version.parse("4.52.4"),
1308
+ reason="Qwen2.5-VL support is only compatible with transformers >= 4.52.4",
1309
+ )
1247
1310
  def test_apply_liger_kernel_to_instance_for_qwen2_5_vl():
1248
1311
  # Ensure any monkey patching is cleaned up for subsequent tests
1249
1312
  with patch("transformers.models.qwen2_5_vl.modeling_qwen2_5_vl"):
@@ -1309,7 +1372,10 @@ def test_apply_liger_kernel_to_instance_for_qwen2_5_vl():
1309
1372
  pytest.fail(f"An exception occured in extra_expr: {type(e).__name__} - {e}")
1310
1373
 
1311
1374
 
1312
- @pytest.mark.skipif(not is_qwen2_5_vl_available(), reason="qwen2_5_vl module not available")
1375
+ @pytest.mark.skipif(
1376
+ transformer_version < version.parse("4.52.4"),
1377
+ reason="Qwen2.5-VL support is only compatible with transformers >= 4.52.4",
1378
+ )
1313
1379
  def test_apply_liger_kernel_to_instance_for_qwen2_5_vl_for_conditional_generation():
1314
1380
  # Ensure any monkey patching is cleaned up for subsequent tests
1315
1381
  with patch("transformers.models.qwen2_5_vl.modeling_qwen2_5_vl"):
@@ -1375,7 +1441,10 @@ def test_apply_liger_kernel_to_instance_for_qwen2_5_vl_for_conditional_generatio
1375
1441
  pytest.fail(f"An exception occured in extra_expr: {type(e).__name__} - {e}")
1376
1442
 
1377
1443
 
1378
- @pytest.mark.skipif(not is_qwen2_5_vl_available(), reason="qwen2_5_vl module not available")
1444
+ @pytest.mark.skipif(
1445
+ transformer_version < version.parse("4.52.4"),
1446
+ reason="Qwen2.5-VL support is only compatible with transformers >= 4.52.4",
1447
+ )
1379
1448
  def test_apply_liger_kernel_to_instance_for_qwen2_5_vl_text():
1380
1449
  # Ensure any monkey patching is cleaned up for subsequent tests
1381
1450
  with patch("transformers.models.qwen2_5_vl.modeling_qwen2_5_vl"):