liger-kernel-nightly 0.5.10.dev20250610174206__tar.gz → 0.5.10.dev20250611064616__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (271) hide show
  1. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/.github/workflows/benchmark.yml +24 -4
  2. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/.github/workflows/docs.yml +5 -0
  3. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/PKG-INFO +1 -1
  4. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/dev/modal/benchmarks.py +2 -1
  5. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/pyproject.toml +1 -1
  6. liger_kernel_nightly-0.5.10.dev20250611064616/src/liger_kernel/transformers/model/qwen2_5_vl.py +150 -0
  7. liger_kernel_nightly-0.5.10.dev20250611064616/src/liger_kernel/transformers/model/qwen2_vl.py +142 -0
  8. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/transformers/monkey_patch.py +66 -23
  9. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel_nightly.egg-info/PKG-INFO +1 -1
  10. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/test/convergence/bf16/test_mini_models.py +10 -4
  11. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/test/convergence/bf16/test_mini_models_multimodal.py +24 -6
  12. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/test/convergence/bf16/test_mini_models_with_logits.py +10 -4
  13. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/test/convergence/fp32/test_mini_models.py +10 -4
  14. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/test/convergence/fp32/test_mini_models_multimodal.py +23 -5
  15. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/test/convergence/fp32/test_mini_models_with_logits.py +10 -4
  16. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/test/transformers/test_monkey_patch.py +233 -9
  17. liger_kernel_nightly-0.5.10.dev20250610174206/src/liger_kernel/transformers/model/qwen2_5_vl.py +0 -208
  18. liger_kernel_nightly-0.5.10.dev20250610174206/src/liger_kernel/transformers/model/qwen2_vl.py +0 -212
  19. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/.github/ISSUE_TEMPLATE/bug_report.yaml +0 -0
  20. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/.github/ISSUE_TEMPLATE/feature_request.yaml +0 -0
  21. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/.github/pull_request_template.md +0 -0
  22. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/.github/workflows/amd-ci.yml +0 -0
  23. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/.github/workflows/intel-ci.yml +0 -0
  24. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/.github/workflows/nvi-ci.yml +0 -0
  25. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/.github/workflows/publish-nightly.yml +0 -0
  26. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/.github/workflows/publish-release.yml +0 -0
  27. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/.gitignore +0 -0
  28. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/.idea/workspace.xml +0 -0
  29. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/LICENSE +0 -0
  30. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/Makefile +0 -0
  31. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/NOTICE +0 -0
  32. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/README.md +0 -0
  33. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/benchmark/README.md +0 -0
  34. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/benchmark/__init__.py +0 -0
  35. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/benchmark/benchmarks_visualizer.py +0 -0
  36. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/benchmark/data/all_benchmark_data.csv +0 -0
  37. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/benchmark/scripts/__init__.py +0 -0
  38. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/benchmark/scripts/benchmark_cpo_loss.py +0 -0
  39. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/benchmark/scripts/benchmark_cross_entropy.py +0 -0
  40. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/benchmark/scripts/benchmark_distill_jsd_loss.py +0 -0
  41. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/benchmark/scripts/benchmark_dpo_loss.py +0 -0
  42. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/benchmark/scripts/benchmark_dyt.py +0 -0
  43. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/benchmark/scripts/benchmark_embedding.py +0 -0
  44. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/benchmark/scripts/benchmark_fused_linear_cross_entropy.py +0 -0
  45. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/benchmark/scripts/benchmark_fused_linear_jsd.py +0 -0
  46. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/benchmark/scripts/benchmark_fused_neighborhood_attention.py +0 -0
  47. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/benchmark/scripts/benchmark_geglu.py +0 -0
  48. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/benchmark/scripts/benchmark_group_norm.py +0 -0
  49. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/benchmark/scripts/benchmark_jsd.py +0 -0
  50. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/benchmark/scripts/benchmark_kl_div.py +0 -0
  51. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/benchmark/scripts/benchmark_kto_loss.py +0 -0
  52. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/benchmark/scripts/benchmark_layer_norm.py +0 -0
  53. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/benchmark/scripts/benchmark_multi_token_attention.py +0 -0
  54. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/benchmark/scripts/benchmark_orpo_loss.py +0 -0
  55. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/benchmark/scripts/benchmark_qwen2vl_mrope.py +0 -0
  56. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/benchmark/scripts/benchmark_rms_norm.py +0 -0
  57. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/benchmark/scripts/benchmark_rope.py +0 -0
  58. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/benchmark/scripts/benchmark_simpo_loss.py +0 -0
  59. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/benchmark/scripts/benchmark_softmax.py +0 -0
  60. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/benchmark/scripts/benchmark_sparse_multi_token_attention.py +0 -0
  61. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/benchmark/scripts/benchmark_sparsemax.py +0 -0
  62. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/benchmark/scripts/benchmark_swiglu.py +0 -0
  63. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/benchmark/scripts/benchmark_tvd.py +0 -0
  64. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/benchmark/scripts/utils.py +0 -0
  65. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/dev/fmt-requirements.txt +0 -0
  66. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/dev/modal/tests.py +0 -0
  67. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/dev/modal/tests_bwd.py +0 -0
  68. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/docs/Examples.md +0 -0
  69. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/docs/Getting-Started.md +0 -0
  70. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/docs/High-Level-APIs.md +0 -0
  71. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/docs/Low-Level-APIs.md +0 -0
  72. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/docs/acknowledgement.md +0 -0
  73. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/docs/contributing.md +0 -0
  74. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/docs/images/banner.GIF +0 -0
  75. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/docs/images/compose.gif +0 -0
  76. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/docs/images/e2e-memory.png +0 -0
  77. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/docs/images/e2e-tps.png +0 -0
  78. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/docs/images/logo-banner.png +0 -0
  79. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/docs/images/patch.gif +0 -0
  80. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/docs/images/post-training.png +0 -0
  81. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/docs/index.md +0 -0
  82. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/docs/license.md +0 -0
  83. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/examples/alignment/accelerate_config.yaml +0 -0
  84. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/examples/alignment/run_orpo.py +0 -0
  85. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/examples/huggingface/README.md +0 -0
  86. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/examples/huggingface/callback.py +0 -0
  87. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/examples/huggingface/config/fsdp_config.json +0 -0
  88. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/examples/huggingface/img/gemma_7b_mem.png +0 -0
  89. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/examples/huggingface/img/gemma_7b_tp.png +0 -0
  90. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/examples/huggingface/img/llama_mem_alloc.png +0 -0
  91. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/examples/huggingface/img/llama_tps.png +0 -0
  92. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/examples/huggingface/img/qwen_mem_alloc.png +0 -0
  93. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/examples/huggingface/img/qwen_tps.png +0 -0
  94. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/examples/huggingface/launch_on_modal.py +0 -0
  95. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/examples/huggingface/requirements.txt +0 -0
  96. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/examples/huggingface/run_benchmarks.sh +0 -0
  97. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/examples/huggingface/run_gemma.sh +0 -0
  98. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/examples/huggingface/run_llama.sh +0 -0
  99. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/examples/huggingface/run_qwen.sh +0 -0
  100. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/examples/huggingface/run_qwen2_vl.sh +0 -0
  101. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/examples/huggingface/training.py +0 -0
  102. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/examples/huggingface/training_multimodal.py +0 -0
  103. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/examples/lightning/README.md +0 -0
  104. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/examples/lightning/requirements.txt +0 -0
  105. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/examples/lightning/training.py +0 -0
  106. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/examples/medusa/README.md +0 -0
  107. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/examples/medusa/callback.py +0 -0
  108. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/examples/medusa/docs/images/Memory_Stage1_num_head_3.png +0 -0
  109. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/examples/medusa/docs/images/Memory_Stage1_num_head_5.png +0 -0
  110. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/examples/medusa/docs/images/Memory_Stage2_num_head_3.png +0 -0
  111. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/examples/medusa/docs/images/Memory_Stage2_num_head_5.png +0 -0
  112. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/examples/medusa/docs/images/Throughput_Stage1_num_head_3.png +0 -0
  113. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/examples/medusa/docs/images/Throughput_Stage1_num_head_5.png +0 -0
  114. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/examples/medusa/docs/images/Throughput_Stage2_num_head_3.png +0 -0
  115. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/examples/medusa/docs/images/Throughput_Stage2_num_head_5.png +0 -0
  116. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/examples/medusa/fsdp/acc-fsdp.conf +0 -0
  117. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/examples/medusa/medusa_util.py +0 -0
  118. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/examples/medusa/requirements.txt +0 -0
  119. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/examples/medusa/scripts/llama3_8b_medusa.sh +0 -0
  120. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/examples/medusa/train.py +0 -0
  121. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/licenses/LICENSE-Apache-2.0 +0 -0
  122. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/licenses/LICENSE-MIT-AutoAWQ +0 -0
  123. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/licenses/LICENSE-MIT-Efficient-Cross-Entropy +0 -0
  124. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/licenses/LICENSE-MIT-llmc +0 -0
  125. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/licenses/LICENSE-MIT-triton +0 -0
  126. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/mkdocs.yml +0 -0
  127. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/setup.cfg +0 -0
  128. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/setup.py +0 -0
  129. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/__init__.py +0 -0
  130. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/chunked_loss/README.md +0 -0
  131. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/chunked_loss/__init__.py +0 -0
  132. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/chunked_loss/cpo_loss.py +0 -0
  133. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/chunked_loss/dpo_loss.py +0 -0
  134. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/chunked_loss/functional.py +0 -0
  135. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/chunked_loss/fused_linear_distillation.py +0 -0
  136. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/chunked_loss/fused_linear_ppo.py +0 -0
  137. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/chunked_loss/fused_linear_preference.py +0 -0
  138. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/chunked_loss/fused_linear_unpaired_preference.py +0 -0
  139. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/chunked_loss/grpo_loss.py +0 -0
  140. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/chunked_loss/jsd_loss.py +0 -0
  141. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/chunked_loss/kto_loss.py +0 -0
  142. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/chunked_loss/orpo_loss.py +0 -0
  143. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/chunked_loss/simpo_loss.py +0 -0
  144. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/env_report.py +0 -0
  145. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/ops/__init__.py +0 -0
  146. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/ops/cross_entropy.py +0 -0
  147. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/ops/dyt.py +0 -0
  148. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/ops/experimental/embedding.py +0 -0
  149. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/ops/experimental/mm_int8int2.py +0 -0
  150. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/ops/fused_linear_cross_entropy.py +0 -0
  151. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/ops/fused_linear_jsd.py +0 -0
  152. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/ops/fused_neighborhood_attention.py +0 -0
  153. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/ops/geglu.py +0 -0
  154. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/ops/group_norm.py +0 -0
  155. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/ops/grpo_loss.py +0 -0
  156. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/ops/jsd.py +0 -0
  157. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/ops/kl_div.py +0 -0
  158. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/ops/layer_norm.py +0 -0
  159. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/ops/multi_token_attention.py +0 -0
  160. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/ops/qwen2vl_mrope.py +0 -0
  161. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/ops/rms_norm.py +0 -0
  162. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/ops/rope.py +0 -0
  163. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/ops/softmax.py +0 -0
  164. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/ops/sparsemax.py +0 -0
  165. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/ops/swiglu.py +0 -0
  166. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/ops/tvd.py +0 -0
  167. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/ops/utils.py +0 -0
  168. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/transformers/__init__.py +0 -0
  169. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/transformers/auto_model.py +0 -0
  170. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/transformers/cross_entropy.py +0 -0
  171. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/transformers/dyt.py +0 -0
  172. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/transformers/experimental/embedding.py +0 -0
  173. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/transformers/fsdp.py +0 -0
  174. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/transformers/functional.py +0 -0
  175. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/transformers/fused_linear_cross_entropy.py +0 -0
  176. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/transformers/fused_linear_jsd.py +0 -0
  177. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/transformers/fused_neighborhood_attention.py +0 -0
  178. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/transformers/geglu.py +0 -0
  179. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/transformers/group_norm.py +0 -0
  180. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/transformers/grpo_loss.py +0 -0
  181. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/transformers/jsd.py +0 -0
  182. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/transformers/kl_div.py +0 -0
  183. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/transformers/layer_norm.py +0 -0
  184. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/transformers/model/__init__.py +0 -0
  185. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/transformers/model/gemma.py +0 -0
  186. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/transformers/model/gemma2.py +0 -0
  187. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/transformers/model/gemma3.py +0 -0
  188. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/transformers/model/glm4.py +0 -0
  189. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/transformers/model/llama.py +0 -0
  190. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/transformers/model/llava.py +0 -0
  191. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/transformers/model/loss_utils.py +0 -0
  192. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/transformers/model/mistral.py +0 -0
  193. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/transformers/model/mixtral.py +0 -0
  194. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/transformers/model/mllama.py +0 -0
  195. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/transformers/model/olmo2.py +0 -0
  196. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/transformers/model/paligemma.py +0 -0
  197. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/transformers/model/phi3.py +0 -0
  198. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/transformers/model/qwen2.py +0 -0
  199. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/transformers/model/qwen3.py +0 -0
  200. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/transformers/model/qwen3_moe.py +0 -0
  201. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/transformers/multi_token_attention.py +0 -0
  202. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/transformers/qwen2vl_mrope.py +0 -0
  203. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/transformers/rms_norm.py +0 -0
  204. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/transformers/rope.py +0 -0
  205. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/transformers/softmax.py +0 -0
  206. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/transformers/sparsemax.py +0 -0
  207. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/transformers/swiglu.py +0 -0
  208. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/transformers/trainer/__init__.py +0 -0
  209. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/transformers/trainer/orpo_trainer.py +0 -0
  210. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/transformers/trainer_integration.py +0 -0
  211. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/transformers/tvd.py +0 -0
  212. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/triton/__init__.py +0 -0
  213. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/triton/monkey_patch.py +0 -0
  214. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel/utils.py +0 -0
  215. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel_nightly.egg-info/SOURCES.txt +0 -0
  216. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel_nightly.egg-info/dependency_links.txt +0 -0
  217. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel_nightly.egg-info/requires.txt +0 -0
  218. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/src/liger_kernel_nightly.egg-info/top_level.txt +0 -0
  219. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/test/__init__.py +0 -0
  220. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/test/chunked_loss/__init__.py +0 -0
  221. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/test/chunked_loss/test_cpo_loss.py +0 -0
  222. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/test/chunked_loss/test_dpo_loss.py +0 -0
  223. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/test/chunked_loss/test_grpo_loss.py +0 -0
  224. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/test/chunked_loss/test_jsd_loss.py +0 -0
  225. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/test/chunked_loss/test_kto_loss.py +0 -0
  226. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/test/chunked_loss/test_orpo_loss.py +0 -0
  227. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/test/chunked_loss/test_simpo_loss.py +0 -0
  228. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/test/conftest.py +0 -0
  229. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/test/convergence/__init__.py +0 -0
  230. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/test/convergence/bf16/__init__.py +0 -0
  231. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/test/convergence/fp32/__init__.py +0 -0
  232. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/test/resources/fake_configs/Google/Gemma3/gemma-3-4b-it/tokenizer_config.json +0 -0
  233. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/test/resources/fake_configs/Google/Paligemma/paligemma-3b-pt-224/tokenizer_config.json +0 -0
  234. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/preprocessor_config.json +0 -0
  235. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/processor_config.json +0 -0
  236. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/tokenizer_config.json +0 -0
  237. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/test/resources/fake_configs/Qwen/Qwen2-VL-7B-Instruct/tokenizer_config.json +0 -0
  238. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/test/resources/fake_configs/Qwen/Qwen2.5-VL-7B-Instruct/tokenizer_config.json +0 -0
  239. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/test/resources/fake_configs/meta-llama/Llama-3.2-11B-Vision-Instruct/tokenizer_config.json +0 -0
  240. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/test/resources/scripts/generate_tokenized_dataset.py +0 -0
  241. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/test/resources/tiny_shakespeare.txt +0 -0
  242. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/test/resources/tiny_shakespeare_tokenized/data-00000-of-00001.arrow +0 -0
  243. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/test/resources/tiny_shakespeare_tokenized/dataset_info.json +0 -0
  244. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/test/resources/tiny_shakespeare_tokenized/state.json +0 -0
  245. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/test/transformers/test_auto_model.py +0 -0
  246. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/test/transformers/test_cross_entropy.py +0 -0
  247. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/test/transformers/test_dyt.py +0 -0
  248. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/test/transformers/test_embedding.py +0 -0
  249. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/test/transformers/test_flex_attention.py +0 -0
  250. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/test/transformers/test_fused_linear_cross_entropy.py +0 -0
  251. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/test/transformers/test_fused_linear_jsd.py +0 -0
  252. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/test/transformers/test_fused_neighborhood_attention.py +0 -0
  253. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/test/transformers/test_geglu.py +0 -0
  254. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/test/transformers/test_group_norm.py +0 -0
  255. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/test/transformers/test_grpo_loss.py +0 -0
  256. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/test/transformers/test_jsd.py +0 -0
  257. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/test/transformers/test_kl_div.py +0 -0
  258. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/test/transformers/test_layer_norm.py +0 -0
  259. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/test/transformers/test_mm_int8int2.py +0 -0
  260. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/test/transformers/test_multi_token_attention.py +0 -0
  261. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/test/transformers/test_qwen2vl_mrope.py +0 -0
  262. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/test/transformers/test_rms_norm.py +0 -0
  263. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/test/transformers/test_rope.py +0 -0
  264. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/test/transformers/test_softmax.py +0 -0
  265. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/test/transformers/test_sparsemax.py +0 -0
  266. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/test/transformers/test_swiglu.py +0 -0
  267. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/test/transformers/test_trainer_integration.py +0 -0
  268. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/test/transformers/test_transformers.py +0 -0
  269. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/test/transformers/test_tvd.py +0 -0
  270. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/test/triton/test_triton_monkey_patch.py +0 -0
  271. {liger_kernel_nightly-0.5.10.dev20250610174206 → liger_kernel_nightly-0.5.10.dev20250611064616}/test/utils.py +0 -0
@@ -29,7 +29,6 @@ jobs:
29
29
  OUTPUT_FILENAME: benchmark.csv
30
30
  GENERATED_CSV: benchmark/data/all_benchmark_data.csv
31
31
 
32
-
33
32
  steps:
34
33
  - name: Checkout code
35
34
  uses: actions/checkout@v3
@@ -52,9 +51,15 @@ jobs:
52
51
  pip install modal
53
52
  pip install pandas
54
53
 
55
- # - name: Run benchmarks on GPU
56
- # run: |
57
- # modal run dev.modal.benchmarks
54
+ # Delete previous benchmark results.
55
+ - name: Remove previous benchmark data
56
+ run: |
57
+ rm -f benchmark/data/all_benchmark_data.csv
58
+
59
+
60
+ - name: Run benchmarks on GPU
61
+ run: |
62
+ modal run dev.modal.benchmarks
58
63
 
59
64
  # Step 5: Checkout gh-pages branch in a subfolderAdd commentMore actions
60
65
  - name: Checkout gh-pages
@@ -69,6 +74,21 @@ jobs:
69
74
  mkdir -p gh-pages/${OUTPUT_DIR}/${{ steps.get_hash.outputs.hash }}
70
75
  cp ${GENERATED_CSV} gh-pages/${OUTPUT_DIR}/${{ steps.get_hash.outputs.hash }}/${OUTPUT_FILENAME}
71
76
 
77
+ # Step 7: Append commit hash to commits.txt if not already present
78
+ - name: Update commits.txt
79
+ run: |
80
+ cd gh-pages
81
+ echo "commits.txt file path: ${OUTPUT_DIR}/commits.txt"
82
+
83
+ # Create file if it doesn't exist
84
+ mkdir -p ${OUTPUT_DIR}
85
+ touch ${OUTPUT_DIR}/commits.txt
86
+
87
+ # Append only if not already present
88
+ if ! grep -q "${{ steps.get_hash.outputs.hash }}" ${OUTPUT_DIR}/commits.txt; then
89
+ echo "${{ steps.get_hash.outputs.hash }}" >> ${OUTPUT_DIR}/commits.txt
90
+ fi
91
+
72
92
  # Step 7: Commit and push
73
93
  - name: Commit and push to gh-pages
74
94
  run: |
@@ -3,10 +3,15 @@ on:
3
3
  push:
4
4
  branches:
5
5
  - main
6
+
7
+ env:
8
+ ENABLE_DEPLOY: false
9
+
6
10
  permissions:
7
11
  contents: write
8
12
  jobs:
9
13
  deploy:
14
+ if: env.ENABLE_DEPLOY == 'true'
10
15
  runs-on: ubuntu-latest
11
16
  steps:
12
17
  - uses: actions/checkout@v4
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: liger_kernel_nightly
3
- Version: 0.5.10.dev20250610174206
3
+ Version: 0.5.10.dev20250611064616
4
4
  Summary: Efficient Triton kernels for LLM Training
5
5
  License: BSD 2-CLAUSE LICENSE
6
6
  Copyright 2024 LinkedIn Corporation
@@ -14,7 +14,7 @@ app = modal.App("liger_benchmarks", image=image)
14
14
  repo = image.add_local_dir(ROOT_PATH, remote_path=REMOTE_ROOT_PATH)
15
15
 
16
16
 
17
- @app.function(gpu="A10G", image=repo, timeout=60 * 45)
17
+ @app.function(gpu="H100", image=repo, timeout=60 * 45)
18
18
  def liger_benchmarks():
19
19
  import subprocess
20
20
 
@@ -25,3 +25,4 @@ def liger_benchmarks():
25
25
  cwd=REMOTE_ROOT_PATH,
26
26
  )
27
27
  subprocess.run(["python benchmark/scripts/benchmark_kto_loss.py"], check=True, shell=True, cwd=REMOTE_ROOT_PATH)
28
+ subprocess.run(["python benchmark/scripts/benchmark_cpo_loss.py"], check=True, shell=True, cwd=REMOTE_ROOT_PATH)
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "liger_kernel_nightly"
7
- version = "0.5.10.dev20250610174206"
7
+ version = "0.5.10.dev20250611064616"
8
8
  description = "Efficient Triton kernels for LLM Training"
9
9
  urls = { "Homepage" = "https://github.com/linkedin/Liger-Kernel" }
10
10
  readme = { file = "README.md", content-type = "text/markdown" }
@@ -0,0 +1,150 @@
1
+ from typing import List
2
+ from typing import Optional
3
+ from typing import Tuple
4
+ from typing import Union
5
+
6
+ import torch
7
+
8
+ from transformers.models.qwen2_5_vl.modeling_qwen2_5_vl import Qwen2_5_VLCausalLMOutputWithPast
9
+ from transformers.utils import can_return_tuple
10
+
11
+ from liger_kernel.transformers.model.loss_utils import LigerForCausalLMLoss
12
+
13
+
14
+ @can_return_tuple
15
+ def lce_forward(
16
+ self,
17
+ input_ids: torch.LongTensor = None,
18
+ attention_mask: Optional[torch.Tensor] = None,
19
+ position_ids: Optional[torch.LongTensor] = None,
20
+ past_key_values: Optional[List[torch.FloatTensor]] = None,
21
+ inputs_embeds: Optional[torch.FloatTensor] = None,
22
+ labels: Optional[torch.LongTensor] = None,
23
+ use_cache: Optional[bool] = None,
24
+ output_attentions: Optional[bool] = None,
25
+ output_hidden_states: Optional[bool] = None,
26
+ return_dict: Optional[bool] = None,
27
+ pixel_values: Optional[torch.Tensor] = None,
28
+ pixel_values_videos: Optional[torch.FloatTensor] = None,
29
+ image_grid_thw: Optional[torch.LongTensor] = None,
30
+ video_grid_thw: Optional[torch.LongTensor] = None,
31
+ rope_deltas: Optional[torch.LongTensor] = None,
32
+ cache_position: Optional[torch.LongTensor] = None,
33
+ second_per_grid_ts: Optional[torch.Tensor] = None,
34
+ skip_logits: Optional[bool] = None,
35
+ **kwargs,
36
+ ) -> Union[Tuple, Qwen2_5_VLCausalLMOutputWithPast]:
37
+ r"""
38
+ labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
39
+ Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
40
+ config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
41
+ (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.
42
+ pixel_values_videos (`torch.FloatTensor` of shape `(seq_length, num_channels * temporal_size * image_size * image_size)):
43
+ The tensors corresponding to the input videos. Pixel values can be obtained using
44
+ [`AutoImageProcessor`]. See [`Qwen2_5_VLImageProcessor.__call__`] for details. [`Qwen2_5_VLProcessor`] uses
45
+ [`Qwen2_5_VLImageProcessor`] for processing videos.
46
+ image_grid_thw (`torch.LongTensor` of shape `(num_images, 3)`, *optional*):
47
+ The temporal, height and width of feature shape of each image in LLM.
48
+ video_grid_thw (`torch.LongTensor` of shape `(num_videos, 3)`, *optional*):
49
+ The temporal, height and width of feature shape of each video in LLM.
50
+ rope_deltas (`torch.LongTensor` of shape `(batch_size, )`, *optional*):
51
+ The rope index difference between sequence length and multimodal rope.
52
+ second_per_grid_ts (`torch.Tensor` of shape `(num_videos)`, *optional*):
53
+ The time interval (in seconds) for each grid along the temporal dimension in the 3D position IDs.
54
+
55
+ Example:
56
+
57
+ ```python
58
+ >>> from PIL import Image
59
+ >>> import requests
60
+ >>> from transformers import AutoProcessor, Qwen2_5_VLForConditionalGeneration
61
+
62
+ >>> model = Qwen2_5_VLForConditionalGeneration.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct")
63
+ >>> processor = AutoProcessor.from_pretrained("Qwen/Qwen2.5-VL-7B-Instruct")
64
+
65
+ >>> messages = [
66
+ {
67
+ "role": "user",
68
+ "content": [
69
+ {"type": "image"},
70
+ {"type": "text", "text": "What is shown in this image?"},
71
+ ],
72
+ },
73
+ ]
74
+ >>> url = "https://www.ilankelman.org/stopsigns/australia.jpg"
75
+ >>> image = Image.open(requests.get(url, stream=True).raw)
76
+
77
+ >>> text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
78
+ >>> inputs = processor(text=[text], images=[image], vision_infos=[vision_infos])
79
+
80
+ >>> # Generate
81
+ >>> generate_ids = model.generate(inputs.input_ids, max_length=30)
82
+ >>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
83
+ "The image shows a street scene with a red stop sign in the foreground. In the background, there is a large red gate with Chinese characters ..."
84
+ ```"""
85
+
86
+ output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
87
+ output_hidden_states = (
88
+ output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
89
+ )
90
+ return_dict = return_dict if return_dict is not None else self.config.use_return_dict
91
+
92
+ outputs = self.model(
93
+ input_ids=input_ids,
94
+ pixel_values=pixel_values,
95
+ pixel_values_videos=pixel_values_videos,
96
+ image_grid_thw=image_grid_thw,
97
+ video_grid_thw=video_grid_thw,
98
+ second_per_grid_ts=second_per_grid_ts,
99
+ position_ids=position_ids,
100
+ attention_mask=attention_mask,
101
+ past_key_values=past_key_values,
102
+ inputs_embeds=inputs_embeds,
103
+ use_cache=use_cache,
104
+ output_attentions=output_attentions,
105
+ output_hidden_states=output_hidden_states,
106
+ return_dict=return_dict,
107
+ cache_position=cache_position,
108
+ **kwargs,
109
+ )
110
+
111
+ hidden_states = outputs[0]
112
+
113
+ shift_labels = kwargs.pop("shift_labels", None)
114
+ loss = None
115
+ logits = None
116
+
117
+ if skip_logits and labels is None and shift_labels is None:
118
+ raise ValueError("skip_logits is True, but labels and shift_labels are None")
119
+
120
+ if skip_logits is None:
121
+ skip_logits = self.training and (labels is not None or shift_labels is not None)
122
+
123
+ if skip_logits:
124
+ loss = LigerForCausalLMLoss(
125
+ hidden_states=hidden_states,
126
+ lm_head_weight=self.lm_head.weight,
127
+ labels=labels,
128
+ shift_labels=shift_labels,
129
+ hidden_size=self.config.hidden_size,
130
+ **kwargs,
131
+ )
132
+ else:
133
+ logits = self.lm_head(hidden_states)
134
+
135
+ loss = None
136
+ if labels is not None:
137
+ loss = self.loss_function(logits=logits, labels=labels, vocab_size=self.config.vocab_size)
138
+
139
+ if not return_dict:
140
+ output = (logits,) + outputs[1:]
141
+ return (loss,) + output if loss is not None else output
142
+
143
+ return Qwen2_5_VLCausalLMOutputWithPast(
144
+ loss=loss,
145
+ logits=logits,
146
+ past_key_values=outputs.past_key_values,
147
+ hidden_states=outputs.hidden_states,
148
+ attentions=outputs.attentions,
149
+ rope_deltas=outputs.rope_deltas,
150
+ )
@@ -0,0 +1,142 @@
1
+ from typing import List
2
+ from typing import Optional
3
+ from typing import Tuple
4
+ from typing import Union
5
+
6
+ import torch
7
+
8
+ from transformers.models.qwen2_vl.modeling_qwen2_vl import Qwen2VLCausalLMOutputWithPast
9
+ from transformers.utils import can_return_tuple
10
+
11
+ from liger_kernel.transformers.model.loss_utils import LigerForCausalLMLoss
12
+
13
+
14
+ @can_return_tuple
15
+ def lce_forward(
16
+ self,
17
+ input_ids: torch.LongTensor = None,
18
+ attention_mask: Optional[torch.Tensor] = None,
19
+ position_ids: Optional[torch.LongTensor] = None,
20
+ past_key_values: Optional[List[torch.FloatTensor]] = None,
21
+ inputs_embeds: Optional[torch.FloatTensor] = None,
22
+ labels: Optional[torch.LongTensor] = None,
23
+ use_cache: Optional[bool] = None,
24
+ output_attentions: Optional[bool] = None,
25
+ output_hidden_states: Optional[bool] = None,
26
+ return_dict: Optional[bool] = None,
27
+ pixel_values: Optional[torch.Tensor] = None,
28
+ pixel_values_videos: Optional[torch.FloatTensor] = None,
29
+ image_grid_thw: Optional[torch.LongTensor] = None,
30
+ video_grid_thw: Optional[torch.LongTensor] = None,
31
+ rope_deltas: Optional[torch.LongTensor] = None,
32
+ cache_position: Optional[torch.LongTensor] = None,
33
+ skip_logits: Optional[bool] = None,
34
+ **kwargs,
35
+ ) -> Union[Tuple, Qwen2VLCausalLMOutputWithPast]:
36
+ r"""
37
+ labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
38
+ Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
39
+ config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
40
+ (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.
41
+ pixel_values_videos (`torch.FloatTensor` of shape `(seq_length, num_channels * temporal_size * image_size * image_size)):
42
+ The tensors corresponding to the input videos. Pixel values can be obtained using
43
+ [`AutoImageProcessor`]. See [`Qwen2VLImageProcessor.__call__`] for details. [`Qwen2VLProcessor`] uses
44
+ [`Qwen2VLImageProcessor`] for processing videos.
45
+ image_grid_thw (`torch.LongTensor` of shape `(num_images, 3)`, *optional*):
46
+ The temporal, height and width of feature shape of each image in LLM.
47
+ video_grid_thw (`torch.LongTensor` of shape `(num_videos, 3)`, *optional*):
48
+ The temporal, height and width of feature shape of each video in LLM.
49
+ rope_deltas (`torch.LongTensor` of shape `(batch_size, )`, *optional*):
50
+ The rope index difference between sequence length and multimodal rope.
51
+
52
+ Example:
53
+
54
+ ```python
55
+ >>> from PIL import Image
56
+ >>> import requests
57
+ >>> from transformers import AutoProcessor, Qwen2VLForConditionalGeneration
58
+
59
+ >>> model = Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-7B-Instruct")
60
+ >>> processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct")
61
+
62
+ >>> messages = [
63
+ {
64
+ "role": "user",
65
+ "content": [
66
+ {"type": "image"},
67
+ {"type": "text", "text": "What is shown in this image?"},
68
+ ],
69
+ },
70
+ ]
71
+ >>> url = "https://www.ilankelman.org/stopsigns/australia.jpg"
72
+ >>> image = Image.open(requests.get(url, stream=True).raw)
73
+
74
+ >>> text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
75
+ >>> inputs = processor(text=[text], images=[image], vision_infos=[vision_infos])
76
+
77
+ >>> # Generate
78
+ >>> generate_ids = model.generate(inputs.input_ids, max_length=30)
79
+ >>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
80
+ "The image shows a street scene with a red stop sign in the foreground. In the background, there is a large red gate with Chinese characters ..."
81
+ ```"""
82
+
83
+ output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
84
+ output_hidden_states = (
85
+ output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
86
+ )
87
+ return_dict = return_dict if return_dict is not None else self.config.use_return_dict
88
+
89
+ outputs = self.model(
90
+ input_ids=input_ids,
91
+ pixel_values=pixel_values,
92
+ pixel_values_videos=pixel_values_videos,
93
+ image_grid_thw=image_grid_thw,
94
+ video_grid_thw=video_grid_thw,
95
+ position_ids=position_ids,
96
+ attention_mask=attention_mask,
97
+ past_key_values=past_key_values,
98
+ inputs_embeds=inputs_embeds,
99
+ use_cache=use_cache,
100
+ output_attentions=output_attentions,
101
+ output_hidden_states=output_hidden_states,
102
+ return_dict=return_dict,
103
+ cache_position=cache_position,
104
+ **kwargs,
105
+ )
106
+
107
+ hidden_states = outputs[0]
108
+
109
+ shift_labels = kwargs.pop("shift_labels", None)
110
+ loss = None
111
+ logits = None
112
+
113
+ if skip_logits and labels is None and shift_labels is None:
114
+ raise ValueError("skip_logits is True, but labels and shift_labels are None")
115
+
116
+ if skip_logits is None:
117
+ skip_logits = self.training and (labels is not None or shift_labels is not None)
118
+
119
+ if skip_logits:
120
+ loss = LigerForCausalLMLoss(
121
+ hidden_states=hidden_states,
122
+ lm_head_weight=self.lm_head.weight,
123
+ labels=labels,
124
+ shift_labels=shift_labels,
125
+ hidden_size=self.config.hidden_size,
126
+ **kwargs,
127
+ )
128
+ else:
129
+ logits = self.lm_head(hidden_states)
130
+
131
+ loss = None
132
+ if labels is not None:
133
+ loss = self.loss_function(logits=logits, labels=labels, vocab_size=self.config.vocab_size)
134
+
135
+ return Qwen2VLCausalLMOutputWithPast(
136
+ loss=loss,
137
+ logits=logits,
138
+ past_key_values=outputs.past_key_values,
139
+ hidden_states=outputs.hidden_states,
140
+ attentions=outputs.attentions,
141
+ rope_deltas=outputs.rope_deltas,
142
+ )
@@ -1225,7 +1225,7 @@ def apply_liger_kernel_to_qwen2_vl(
1225
1225
  ) -> None:
1226
1226
  """
1227
1227
  Apply Liger kernels to replace original implementation in HuggingFace Qwen2-VL models.
1228
- NOTE: Qwen2-VL is not available in transformers<4.45.0
1228
+ NOTE: Qwen2-VL is not supported in transformers<4.52.4
1229
1229
 
1230
1230
  Args:
1231
1231
  cross_entropy (bool): Whether to apply Liger's cross entropy loss. Default is False.
@@ -1239,12 +1239,19 @@ def apply_liger_kernel_to_qwen2_vl(
1239
1239
  model (PreTrainedModel): The model instance to apply Liger kernels to, if the model has already been
1240
1240
  loaded. Default is None.
1241
1241
  """
1242
+ if transformer_version < version.parse("4.52.4"):
1243
+ logger.warning("Qwen2-VL support is only compatible with transformers >= 4.52.4")
1244
+ return
1245
+
1242
1246
  assert not (cross_entropy and fused_linear_cross_entropy), (
1243
1247
  "cross_entropy and fused_linear_cross_entropy cannot both be True."
1244
1248
  )
1245
1249
 
1246
1250
  from transformers.models.qwen2_vl import modeling_qwen2_vl
1251
+ from transformers.models.qwen2_vl.modeling_qwen2_vl import Qwen2VisionTransformerPretrainedModel
1252
+ from transformers.models.qwen2_vl.modeling_qwen2_vl import Qwen2VLForConditionalGeneration
1247
1253
  from transformers.models.qwen2_vl.modeling_qwen2_vl import Qwen2VLModel
1254
+ from transformers.models.qwen2_vl.modeling_qwen2_vl import Qwen2VLTextModel
1248
1255
 
1249
1256
  from liger_kernel.transformers.model.qwen2_vl import lce_forward as qwen2_vl_lce_forward
1250
1257
 
@@ -1266,24 +1273,38 @@ def apply_liger_kernel_to_qwen2_vl(
1266
1273
  # The model instance already exists, so we need to additionally patch the
1267
1274
  # instance variables that reference already-instantiated modules
1268
1275
 
1269
- # get the base model from the model instance
1270
- base_model: Qwen2VLModel = getattr(model, model.base_model_prefix, model)
1276
+ if isinstance(model, (Qwen2VLForConditionalGeneration, Qwen2VLModel)):
1277
+ # Note: language_model and visual properties can be accessed throught conditional class for BC.
1278
+ # Not sure if it is subject to changes in the future.
1279
+ # Reference: https://github.com/huggingface/transformers/blob/v4.52.4/src/transformers/models/qwen2_vl/modeling_qwen2_vl.py#L1698
1280
+ text_model: Qwen2VLTextModel = model.language_model
1281
+ vision_model: Qwen2VisionTransformerPretrainedModel = model.visual
1282
+ elif isinstance(model, Qwen2VLTextModel):
1283
+ text_model: Qwen2VLTextModel = model
1284
+ vision_model = None
1285
+ else:
1286
+ # Note: Currently there's no support for patching vision model only. Feel free to raise an issue if needed.
1287
+ raise TypeError(
1288
+ f"Unsupported Qwen2VL model type. `model` must be `Qwen2VLForConditionalGeneration`, `Qwen2VLModel` or `Qwen2VLTextModel`. Got: {type(model)}"
1289
+ )
1271
1290
 
1272
- if hasattr(model, "visual"):
1273
- # Patch Qwen2VisionTransformerPretrainedModel
1274
- for vision_block in model.visual.blocks:
1291
+ # Patch Qwen2VisionTransformerPretrainedModel
1292
+ if vision_model is not None:
1293
+ for vision_block in vision_model.blocks:
1275
1294
  if layer_norm:
1276
1295
  _patch_layer_norm_module(vision_block.norm1)
1277
1296
  _patch_layer_norm_module(vision_block.norm2)
1278
1297
 
1279
- if rms_norm:
1280
- _patch_rms_norm_module(base_model.norm)
1281
- for decoder_layer in base_model.layers:
1282
- if swiglu:
1283
- _patch_swiglu_module(decoder_layer.mlp, LigerSwiGLUMLP)
1298
+ # Patch Qwen2VisionTextModel
1299
+ if text_model is not None:
1284
1300
  if rms_norm:
1285
- _patch_rms_norm_module(decoder_layer.input_layernorm)
1286
- _patch_rms_norm_module(decoder_layer.post_attention_layernorm)
1301
+ _patch_rms_norm_module(text_model.norm)
1302
+ for decoder_layer in text_model.layers:
1303
+ if swiglu:
1304
+ _patch_swiglu_module(decoder_layer.mlp, LigerSwiGLUMLP)
1305
+ if rms_norm:
1306
+ _patch_rms_norm_module(decoder_layer.input_layernorm)
1307
+ _patch_rms_norm_module(decoder_layer.post_attention_layernorm)
1287
1308
 
1288
1309
 
1289
1310
  def apply_liger_kernel_to_qwen2_5_vl(
@@ -1309,12 +1330,19 @@ def apply_liger_kernel_to_qwen2_5_vl(
1309
1330
  model (PreTrainedModel): The model instance to apply Liger kernels to, if the model has already been
1310
1331
  loaded. Default is None.
1311
1332
  """
1333
+ if transformer_version < version.parse("4.52.4"):
1334
+ logger.warning("Qwen2.5-VL support is only compatible with transformers >= 4.52.4")
1335
+ return
1336
+
1312
1337
  assert not (cross_entropy and fused_linear_cross_entropy), (
1313
1338
  "cross_entropy and fused_linear_cross_entropy cannot both be True."
1314
1339
  )
1315
1340
 
1316
1341
  from transformers.models.qwen2_5_vl import modeling_qwen2_5_vl
1342
+ from transformers.models.qwen2_5_vl.modeling_qwen2_5_vl import Qwen2_5_VisionTransformerPretrainedModel
1343
+ from transformers.models.qwen2_5_vl.modeling_qwen2_5_vl import Qwen2_5_VLForConditionalGeneration
1317
1344
  from transformers.models.qwen2_5_vl.modeling_qwen2_5_vl import Qwen2_5_VLModel
1345
+ from transformers.models.qwen2_5_vl.modeling_qwen2_5_vl import Qwen2_5_VLTextModel
1318
1346
 
1319
1347
  from liger_kernel.transformers.model.qwen2_5_vl import lce_forward as qwen2_5_vl_lce_forward
1320
1348
 
@@ -1333,24 +1361,37 @@ def apply_liger_kernel_to_qwen2_5_vl(
1333
1361
  # The model instance already exists, so we need to additionally patch the
1334
1362
  # instance variables that reference already-instantiated modules
1335
1363
 
1336
- # get the base model from the model instance
1337
- base_model: Qwen2_5_VLModel = getattr(model, model.base_model_prefix, model)
1364
+ if isinstance(model, (Qwen2_5_VLForConditionalGeneration, Qwen2_5_VLModel)):
1365
+ # Note: language_model and visual properties can be accessed throught conditional class for BC.
1366
+ # Not sure if it is subject to changes in the future.
1367
+ # Reference: https://github.com/huggingface/transformers/blob/v4.52.4/src/transformers/models/qwen2_5_vl/modeling_qwen2_5_vl.py#L1823
1368
+ text_model: Qwen2_5_VLTextModel = model.language_model
1369
+ vision_model: Qwen2_5_VisionTransformerPretrainedModel = model.visual
1370
+ elif isinstance(model, Qwen2_5_VLTextModel):
1371
+ text_model: Qwen2_5_VLTextModel = model
1372
+ vision_model = None
1373
+ else:
1374
+ # Note: Currently there's no support for patching vision model only. Feel free to raise an issue if needed.
1375
+ raise TypeError(
1376
+ f"Unsupported Qwen2VL model type. `model` must be `Qwen2VLForConditionalGeneration`, `Qwen2VLModel` or `Qwen2VLTextModel`. Got: {type(model)}"
1377
+ )
1338
1378
 
1339
- if hasattr(model, "visual"):
1379
+ if vision_model is not None:
1340
1380
  # Patch Qwen2_5_VisionTransformerPretrainedModel
1341
1381
  for vision_block in model.visual.blocks:
1342
1382
  if rms_norm:
1343
1383
  _patch_rms_norm_module(vision_block.norm1)
1344
1384
  _patch_rms_norm_module(vision_block.norm2)
1345
1385
 
1346
- if rms_norm:
1347
- _patch_rms_norm_module(base_model.norm)
1348
- for decoder_layer in base_model.layers:
1349
- if swiglu:
1350
- _patch_swiglu_module(decoder_layer.mlp, LigerSwiGLUMLP)
1386
+ if text_model is not None:
1351
1387
  if rms_norm:
1352
- _patch_rms_norm_module(decoder_layer.input_layernorm)
1353
- _patch_rms_norm_module(decoder_layer.post_attention_layernorm)
1388
+ _patch_rms_norm_module(text_model.norm)
1389
+ for decoder_layer in text_model.layers:
1390
+ if swiglu:
1391
+ _patch_swiglu_module(decoder_layer.mlp, LigerSwiGLUMLP)
1392
+ if rms_norm:
1393
+ _patch_rms_norm_module(decoder_layer.input_layernorm)
1394
+ _patch_rms_norm_module(decoder_layer.post_attention_layernorm)
1354
1395
 
1355
1396
 
1356
1397
  def apply_liger_kernel_to_phi3(
@@ -1571,7 +1612,9 @@ MODEL_TYPE_TO_APPLY_LIGER_FN = {
1571
1612
  "qwen3": apply_liger_kernel_to_qwen3,
1572
1613
  "qwen3_moe": apply_liger_kernel_to_qwen3_moe,
1573
1614
  "qwen2_vl": apply_liger_kernel_to_qwen2_vl,
1615
+ "qwen2_vl_text": apply_liger_kernel_to_qwen2_vl,
1574
1616
  "qwen2_5_vl": apply_liger_kernel_to_qwen2_5_vl,
1617
+ "qwen2_5_vl_text": apply_liger_kernel_to_qwen2_5_vl,
1575
1618
  "phi3": apply_liger_kernel_to_phi3,
1576
1619
  "paligemma": apply_liger_kernel_to_paligemma,
1577
1620
  }
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: liger_kernel_nightly
3
- Version: 0.5.10.dev20250610174206
3
+ Version: 0.5.10.dev20250611064616
4
4
  Summary: Efficient Triton kernels for LLM Training
5
5
  License: BSD 2-CLAUSE LICENSE
6
6
  Copyright 2024 LinkedIn Corporation
@@ -69,20 +69,26 @@ except ImportError:
69
69
  MLLAMA_AVAILABLE = False
70
70
 
71
71
  try:
72
- # Qwen2-VL is only available in transformers>4.44.2
72
+ # Qwen2-VL is only available in transformers>4.52.4
73
+ import transformers
74
+
75
+ from packaging import version
73
76
  from transformers.models.qwen2_vl.configuration_qwen2_vl import Qwen2VLConfig
74
77
  from transformers.models.qwen2_vl.modeling_qwen2_vl import Qwen2VLForConditionalGeneration
75
78
 
76
- QWEN2_VL_AVAILABLE = True
79
+ QWEN2_VL_AVAILABLE = version.parse(transformers.__version__) >= version.parse("4.52.4")
77
80
  except ImportError:
78
81
  QWEN2_VL_AVAILABLE = False
79
82
 
80
83
  try:
81
- # Qwen2.5-VL is only available in transformers>4.48.2
84
+ # Qwen2.5-VL is only available in transformers>4.52.4
85
+ import transformers
86
+
87
+ from packaging import version
82
88
  from transformers.models.qwen2_5_vl.configuration_qwen2_5_vl import Qwen2_5_VLConfig
83
89
  from transformers.models.qwen2_5_vl.modeling_qwen2_5_vl import Qwen2_5_VLForConditionalGeneration
84
90
 
85
- QWEN2_5_VL_AVAILABLE = True
91
+ QWEN2_5_VL_AVAILABLE = version.parse(transformers.__version__) >= version.parse("4.52.4")
86
92
  except ImportError:
87
93
  QWEN2_5_VL_AVAILABLE = False
88
94
 
@@ -35,26 +35,34 @@ from test.utils import supports_bfloat16
35
35
  from test.utils import train_bpe_tokenizer
36
36
 
37
37
  try:
38
- # Qwen2-VL is only available in transformers>=4.45.0
38
+ # Qwen2-VL is only available in transformers>=4.52.4
39
+ import transformers
40
+
41
+ from packaging import version
39
42
  from transformers.models.qwen2.tokenization_qwen2_fast import Qwen2TokenizerFast
40
43
  from transformers.models.qwen2_vl.configuration_qwen2_vl import Qwen2VLConfig
41
44
  from transformers.models.qwen2_vl.image_processing_qwen2_vl import Qwen2VLImageProcessor
42
45
  from transformers.models.qwen2_vl.modeling_qwen2_vl import Qwen2VLForConditionalGeneration
43
46
  from transformers.models.qwen2_vl.processing_qwen2_vl import Qwen2VLProcessor
47
+ from transformers.models.qwen2_vl.video_processing_qwen2_vl import Qwen2VLVideoProcessor
44
48
 
45
- QWEN2_VL_AVAILABLE = True
49
+ QWEN2_VL_AVAILABLE = version.parse(transformers.__version__) >= version.parse("4.52.4")
46
50
  except ImportError:
47
51
  QWEN2_VL_AVAILABLE = False
48
52
 
49
53
  try:
50
- # Qwen2.5-VL is only available in transformers>4.48.2
54
+ # Qwen2.5-VL is only available in transformers>4.52.4
55
+ import transformers
56
+
57
+ from packaging import version
51
58
  from transformers.models.qwen2.tokenization_qwen2_fast import Qwen2TokenizerFast
52
59
  from transformers.models.qwen2_5_vl.configuration_qwen2_5_vl import Qwen2_5_VLConfig
53
60
  from transformers.models.qwen2_5_vl.modeling_qwen2_5_vl import Qwen2_5_VLForConditionalGeneration
54
61
  from transformers.models.qwen2_5_vl.processing_qwen2_5_vl import Qwen2_5_VLProcessor
55
62
  from transformers.models.qwen2_vl.image_processing_qwen2_vl import Qwen2VLImageProcessor
63
+ from transformers.models.qwen2_vl.video_processing_qwen2_vl import Qwen2VLVideoProcessor
56
64
 
57
- QWEN2_5_VL_AVAILABLE = True
65
+ QWEN2_5_VL_AVAILABLE = version.parse(transformers.__version__) >= version.parse("4.52.4")
58
66
  except ImportError:
59
67
  QWEN2_5_VL_AVAILABLE = False
60
68
 
@@ -504,7 +512,12 @@ def create_processor(model_name: str):
504
512
  )
505
513
  qwen_tokenizer = Qwen2TokenizerFast(tokenizer_object=tokenizer_base, **tokenizer_config)
506
514
  image_processor = Qwen2VLImageProcessor()
507
- return Qwen2VLProcessor(image_processor=image_processor, tokenizer=qwen_tokenizer)
515
+ video_processor = Qwen2VLVideoProcessor()
516
+ return Qwen2VLProcessor(
517
+ image_processor=image_processor,
518
+ video_processor=video_processor,
519
+ tokenizer=qwen_tokenizer,
520
+ )
508
521
 
509
522
  elif model_name == "mini_qwen2_5_vl":
510
523
  tokenizer_config = load_tokenizer_config(
@@ -521,7 +534,12 @@ def create_processor(model_name: str):
521
534
  )
522
535
  qwen_tokenizer = Qwen2TokenizerFast(tokenizer_object=tokenizer_base, **tokenizer_config)
523
536
  image_processor = Qwen2VLImageProcessor()
524
- return Qwen2_5_VLProcessor(image_processor=image_processor, tokenizer=qwen_tokenizer)
537
+ video_processor = Qwen2VLVideoProcessor()
538
+ return Qwen2_5_VLProcessor(
539
+ image_processor=image_processor,
540
+ video_processor=video_processor,
541
+ tokenizer=qwen_tokenizer,
542
+ )
525
543
 
526
544
  elif model_name == "mini_llava":
527
545
  tokenizer_config = load_tokenizer_config(