liger-kernel-nightly 0.6.3.dev20251101160510__tar.gz → 0.6.3.dev20251105012545__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (298) hide show
  1. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/PKG-INFO +1 -1
  2. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/pyproject.toml +1 -1
  3. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/model/qwen3_vl_moe.py +2 -4
  4. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/monkey_patch.py +3 -9
  5. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/rope.py +3 -7
  6. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel_nightly.egg-info/PKG-INFO +1 -1
  7. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/convergence/bf16/test_mini_models_with_logits.py +1 -2
  8. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/convergence/fp32/test_mini_models.py +2 -6
  9. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/convergence/fp32/test_mini_models_multimodal.py +0 -1
  10. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/convergence/fp32/test_mini_models_with_logits.py +2 -6
  11. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/transformers/test_monkey_patch.py +16 -44
  12. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/.github/ISSUE_TEMPLATE/bug_report.yaml +0 -0
  13. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/.github/ISSUE_TEMPLATE/feature_request.yaml +0 -0
  14. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/.github/pull_request_template.md +0 -0
  15. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/.github/workflows/amd-ci.yml +0 -0
  16. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/.github/workflows/benchmark.yml +0 -0
  17. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/.github/workflows/docs.yml +0 -0
  18. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/.github/workflows/intel-ci.yml +0 -0
  19. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/.github/workflows/nvi-ci.yml +0 -0
  20. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/.github/workflows/publish-nightly.yml +0 -0
  21. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/.github/workflows/publish-release.yml +0 -0
  22. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/.gitignore +0 -0
  23. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/LICENSE +0 -0
  24. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/Makefile +0 -0
  25. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/NOTICE +0 -0
  26. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/README.md +0 -0
  27. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/README.md +0 -0
  28. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/__init__.py +0 -0
  29. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/benchmarks_visualizer.py +0 -0
  30. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/data/all_benchmark_data.csv +0 -0
  31. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/__init__.py +0 -0
  32. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/benchmark_cpo_loss.py +0 -0
  33. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/benchmark_cross_entropy.py +0 -0
  34. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/benchmark_distill_cosine_loss.py +0 -0
  35. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/benchmark_distill_jsd_loss.py +0 -0
  36. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/benchmark_dpo_loss.py +0 -0
  37. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/benchmark_dyt.py +0 -0
  38. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/benchmark_embedding.py +0 -0
  39. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/benchmark_fused_add_rms_norm.py +0 -0
  40. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/benchmark_fused_linear_cross_entropy.py +0 -0
  41. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/benchmark_fused_linear_jsd.py +0 -0
  42. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/benchmark_fused_neighborhood_attention.py +0 -0
  43. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/benchmark_geglu.py +0 -0
  44. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/benchmark_group_norm.py +0 -0
  45. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/benchmark_grpo_loss.py +0 -0
  46. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/benchmark_jsd.py +0 -0
  47. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/benchmark_kl_div.py +0 -0
  48. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/benchmark_kto_loss.py +0 -0
  49. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/benchmark_layer_norm.py +0 -0
  50. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/benchmark_llama4_rope.py +0 -0
  51. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/benchmark_multi_token_attention.py +0 -0
  52. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/benchmark_orpo_loss.py +0 -0
  53. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/benchmark_poly_norm.py +0 -0
  54. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/benchmark_qwen2vl_mrope.py +0 -0
  55. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/benchmark_rms_norm.py +0 -0
  56. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/benchmark_rope.py +0 -0
  57. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/benchmark_simpo_loss.py +0 -0
  58. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/benchmark_softmax.py +0 -0
  59. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/benchmark_sparse_multi_token_attention.py +0 -0
  60. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/benchmark_sparsemax.py +0 -0
  61. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/benchmark_swiglu.py +0 -0
  62. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/benchmark_tvd.py +0 -0
  63. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/utils.py +0 -0
  64. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/dev/fmt-requirements.txt +0 -0
  65. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/dev/modal/benchmarks.py +0 -0
  66. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/dev/modal/tests.py +0 -0
  67. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/dev/modal/tests_bwd.py +0 -0
  68. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/docs/Examples.md +0 -0
  69. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/docs/Getting-Started.md +0 -0
  70. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/docs/High-Level-APIs.md +0 -0
  71. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/docs/Low-Level-APIs.md +0 -0
  72. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/docs/acknowledgement.md +0 -0
  73. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/docs/contributing.md +0 -0
  74. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/docs/images/banner.GIF +0 -0
  75. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/docs/images/compose.gif +0 -0
  76. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/docs/images/e2e-memory.png +0 -0
  77. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/docs/images/e2e-tps.png +0 -0
  78. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/docs/images/logo-banner.png +0 -0
  79. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/docs/images/patch.gif +0 -0
  80. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/docs/images/post-training.png +0 -0
  81. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/docs/index.md +0 -0
  82. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/docs/license.md +0 -0
  83. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/alignment/accelerate_config.yaml +0 -0
  84. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/alignment/run_orpo.py +0 -0
  85. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/huggingface/README.md +0 -0
  86. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/huggingface/callback.py +0 -0
  87. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/huggingface/config/fsdp_config.json +0 -0
  88. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/huggingface/img/gemma_7b_mem.png +0 -0
  89. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/huggingface/img/gemma_7b_tp.png +0 -0
  90. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/huggingface/img/llama_mem_alloc.png +0 -0
  91. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/huggingface/img/llama_tps.png +0 -0
  92. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/huggingface/img/qwen_mem_alloc.png +0 -0
  93. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/huggingface/img/qwen_tps.png +0 -0
  94. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/huggingface/launch_on_modal.py +0 -0
  95. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/huggingface/requirements.txt +0 -0
  96. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/huggingface/run_benchmarks.sh +0 -0
  97. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/huggingface/run_gemma.sh +0 -0
  98. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/huggingface/run_llama.sh +0 -0
  99. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/huggingface/run_qwen.sh +0 -0
  100. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/huggingface/run_qwen2_vl.sh +0 -0
  101. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/huggingface/training.py +0 -0
  102. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/huggingface/training_multimodal.py +0 -0
  103. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/lightning/README.md +0 -0
  104. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/lightning/requirements.txt +0 -0
  105. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/lightning/training.py +0 -0
  106. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/medusa/README.md +0 -0
  107. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/medusa/callback.py +0 -0
  108. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/medusa/docs/images/Memory_Stage1_num_head_3.png +0 -0
  109. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/medusa/docs/images/Memory_Stage1_num_head_5.png +0 -0
  110. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/medusa/docs/images/Memory_Stage2_num_head_3.png +0 -0
  111. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/medusa/docs/images/Memory_Stage2_num_head_5.png +0 -0
  112. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/medusa/docs/images/Throughput_Stage1_num_head_3.png +0 -0
  113. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/medusa/docs/images/Throughput_Stage1_num_head_5.png +0 -0
  114. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/medusa/docs/images/Throughput_Stage2_num_head_3.png +0 -0
  115. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/medusa/docs/images/Throughput_Stage2_num_head_5.png +0 -0
  116. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/medusa/fsdp/acc-fsdp.conf +0 -0
  117. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/medusa/medusa_util.py +0 -0
  118. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/medusa/requirements.txt +0 -0
  119. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/medusa/scripts/llama3_8b_medusa.sh +0 -0
  120. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/medusa/train.py +0 -0
  121. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/licenses/LICENSE-Apache-2.0 +0 -0
  122. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/licenses/LICENSE-MIT-AutoAWQ +0 -0
  123. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/licenses/LICENSE-MIT-Efficient-Cross-Entropy +0 -0
  124. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/licenses/LICENSE-MIT-llmc +0 -0
  125. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/licenses/LICENSE-MIT-triton +0 -0
  126. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/mkdocs.yml +0 -0
  127. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/setup.cfg +0 -0
  128. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/setup.py +0 -0
  129. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/__init__.py +0 -0
  130. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/chunked_loss/README.md +0 -0
  131. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/chunked_loss/__init__.py +0 -0
  132. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/chunked_loss/cosine_similarity_loss.py +0 -0
  133. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/chunked_loss/cpo_loss.py +0 -0
  134. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/chunked_loss/dpo_loss.py +0 -0
  135. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/chunked_loss/functional.py +0 -0
  136. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/chunked_loss/fused_linear_distillation.py +0 -0
  137. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/chunked_loss/fused_linear_ppo.py +0 -0
  138. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/chunked_loss/fused_linear_preference.py +0 -0
  139. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/chunked_loss/fused_linear_unpaired_preference.py +0 -0
  140. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/chunked_loss/grpo_loss.py +0 -0
  141. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/chunked_loss/jsd_loss.py +0 -0
  142. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/chunked_loss/kto_loss.py +0 -0
  143. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/chunked_loss/orpo_loss.py +0 -0
  144. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/chunked_loss/simpo_loss.py +0 -0
  145. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/env_report.py +0 -0
  146. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/ops/__init__.py +0 -0
  147. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/ops/cross_entropy.py +0 -0
  148. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/ops/dyt.py +0 -0
  149. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/ops/experimental/embedding.py +0 -0
  150. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/ops/experimental/mm_int8int2.py +0 -0
  151. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/ops/fused_add_rms_norm.py +0 -0
  152. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/ops/fused_linear_cross_entropy.py +0 -0
  153. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/ops/fused_linear_jsd.py +0 -0
  154. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/ops/fused_neighborhood_attention.py +0 -0
  155. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/ops/geglu.py +0 -0
  156. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/ops/group_norm.py +0 -0
  157. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/ops/grpo_loss.py +0 -0
  158. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/ops/jsd.py +0 -0
  159. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/ops/kl_div.py +0 -0
  160. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/ops/layer_norm.py +0 -0
  161. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/ops/llama4_rope.py +0 -0
  162. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/ops/multi_token_attention.py +0 -0
  163. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/ops/poly_norm.py +0 -0
  164. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/ops/qwen2vl_mrope.py +0 -0
  165. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/ops/rms_norm.py +0 -0
  166. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/ops/rope.py +0 -0
  167. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/ops/softmax.py +0 -0
  168. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/ops/sparsemax.py +0 -0
  169. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/ops/swiglu.py +0 -0
  170. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/ops/tvd.py +0 -0
  171. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/ops/utils.py +0 -0
  172. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/__init__.py +0 -0
  173. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/auto_model.py +0 -0
  174. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/cross_entropy.py +0 -0
  175. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/dyt.py +0 -0
  176. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/experimental/__init__.py +0 -0
  177. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/experimental/embedding.py +0 -0
  178. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/fsdp.py +0 -0
  179. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/functional.py +0 -0
  180. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/fused_add_rms_norm.py +0 -0
  181. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/fused_linear_cross_entropy.py +0 -0
  182. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/fused_linear_jsd.py +0 -0
  183. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/fused_neighborhood_attention.py +0 -0
  184. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/geglu.py +0 -0
  185. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/group_norm.py +0 -0
  186. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/grpo_loss.py +0 -0
  187. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/jsd.py +0 -0
  188. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/kl_div.py +0 -0
  189. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/layer_norm.py +0 -0
  190. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/llama4_rope.py +0 -0
  191. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/model/__init__.py +0 -0
  192. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/model/falcon_h1.py +0 -0
  193. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/model/gemma.py +0 -0
  194. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/model/gemma2.py +0 -0
  195. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/model/gemma3.py +0 -0
  196. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/model/glm4.py +0 -0
  197. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/model/glm4v.py +0 -0
  198. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/model/glm4v_moe.py +0 -0
  199. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/model/internvl.py +0 -0
  200. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/model/llama.py +0 -0
  201. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/model/llama4.py +0 -0
  202. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/model/llava.py +0 -0
  203. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/model/loss_utils.py +0 -0
  204. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/model/mistral.py +0 -0
  205. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/model/mixtral.py +0 -0
  206. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/model/mllama.py +0 -0
  207. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/model/olmo2.py +0 -0
  208. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/model/paligemma.py +0 -0
  209. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/model/phi3.py +0 -0
  210. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/model/qwen2.py +0 -0
  211. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/model/qwen2_5_vl.py +0 -0
  212. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/model/qwen2_vl.py +0 -0
  213. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/model/qwen3.py +0 -0
  214. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/model/qwen3_moe.py +0 -0
  215. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/model/qwen3_next.py +0 -0
  216. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/model/qwen3_vl.py +0 -0
  217. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/model/smollm3.py +0 -0
  218. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/model/smolvlm.py +0 -0
  219. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/multi_token_attention.py +0 -0
  220. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/poly_norm.py +0 -0
  221. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/qwen2vl_mrope.py +0 -0
  222. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/rms_norm.py +0 -0
  223. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/softmax.py +0 -0
  224. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/sparsemax.py +0 -0
  225. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/swiglu.py +0 -0
  226. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/trainer/__init__.py +0 -0
  227. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/trainer/orpo_trainer.py +0 -0
  228. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/trainer_integration.py +0 -0
  229. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/tvd.py +0 -0
  230. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/triton/__init__.py +0 -0
  231. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/triton/monkey_patch.py +0 -0
  232. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/utils.py +0 -0
  233. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel_nightly.egg-info/SOURCES.txt +0 -0
  234. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel_nightly.egg-info/dependency_links.txt +0 -0
  235. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel_nightly.egg-info/requires.txt +0 -0
  236. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel_nightly.egg-info/top_level.txt +0 -0
  237. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/__init__.py +0 -0
  238. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/chunked_loss/__init__.py +0 -0
  239. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/chunked_loss/test_cosine_loss.py +0 -0
  240. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/chunked_loss/test_cpo_loss.py +0 -0
  241. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/chunked_loss/test_dpo_loss.py +0 -0
  242. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/chunked_loss/test_grpo_loss.py +0 -0
  243. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/chunked_loss/test_jsd_loss.py +0 -0
  244. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/chunked_loss/test_kto_loss.py +0 -0
  245. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/chunked_loss/test_orpo_loss.py +0 -0
  246. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/chunked_loss/test_simpo_loss.py +0 -0
  247. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/conftest.py +0 -0
  248. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/convergence/__init__.py +0 -0
  249. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/convergence/bf16/__init__.py +0 -0
  250. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/convergence/bf16/test_mini_models.py +0 -0
  251. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/convergence/bf16/test_mini_models_multimodal.py +0 -0
  252. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/convergence/fp32/__init__.py +0 -0
  253. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/resources/fake_configs/Google/Gemma3/gemma-3-4b-it/tokenizer_config.json +0 -0
  254. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/resources/fake_configs/Google/Paligemma/paligemma-3b-pt-224/tokenizer_config.json +0 -0
  255. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/resources/fake_configs/HuggingFaceTB/SmolVLM2-256M-Video-Instruct/tokenizer_config.json +0 -0
  256. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/preprocessor_config.json +0 -0
  257. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/processor_config.json +0 -0
  258. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/tokenizer_config.json +0 -0
  259. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/resources/fake_configs/OpenGVLab/InternVL3-1B-hf/tokenizer_config.json +0 -0
  260. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/resources/fake_configs/Qwen/Qwen2-VL-7B-Instruct/tokenizer_config.json +0 -0
  261. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/resources/fake_configs/Qwen/Qwen2.5-VL-7B-Instruct/tokenizer_config.json +0 -0
  262. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/resources/fake_configs/Qwen/Qwen3-VL-4B-Instruct/tokenizer_config.json +0 -0
  263. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/resources/fake_configs/meta-llama/Llama-3.2-11B-Vision-Instruct/tokenizer_config.json +0 -0
  264. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/resources/fake_configs/meta-llama/Llama-4-Scout-17B-16E-Instruct/tokenizer_config.json +0 -0
  265. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/resources/scripts/generate_tokenized_dataset.py +0 -0
  266. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/resources/tiny_shakespeare.txt +0 -0
  267. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/resources/tiny_shakespeare_tokenized/data-00000-of-00001.arrow +0 -0
  268. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/resources/tiny_shakespeare_tokenized/dataset_info.json +0 -0
  269. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/resources/tiny_shakespeare_tokenized/state.json +0 -0
  270. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/transformers/test_auto_model.py +0 -0
  271. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/transformers/test_cross_entropy.py +0 -0
  272. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/transformers/test_dyt.py +0 -0
  273. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/transformers/test_embedding.py +0 -0
  274. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/transformers/test_flex_attention.py +0 -0
  275. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/transformers/test_fused_add_rms_norm.py +0 -0
  276. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/transformers/test_fused_linear_cross_entropy.py +0 -0
  277. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/transformers/test_fused_linear_jsd.py +0 -0
  278. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/transformers/test_fused_neighborhood_attention.py +0 -0
  279. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/transformers/test_geglu.py +0 -0
  280. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/transformers/test_group_norm.py +0 -0
  281. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/transformers/test_grpo_loss.py +0 -0
  282. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/transformers/test_jsd.py +0 -0
  283. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/transformers/test_kl_div.py +0 -0
  284. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/transformers/test_layer_norm.py +0 -0
  285. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/transformers/test_mm_int8int2.py +0 -0
  286. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/transformers/test_multi_token_attention.py +0 -0
  287. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/transformers/test_poly_norm.py +0 -0
  288. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/transformers/test_qwen2vl_mrope.py +0 -0
  289. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/transformers/test_rms_norm.py +0 -0
  290. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/transformers/test_rope.py +0 -0
  291. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/transformers/test_softmax.py +0 -0
  292. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/transformers/test_sparsemax.py +0 -0
  293. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/transformers/test_swiglu.py +0 -0
  294. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/transformers/test_trainer_integration.py +0 -0
  295. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/transformers/test_transformers.py +0 -0
  296. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/transformers/test_tvd.py +0 -0
  297. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/triton/test_triton_monkey_patch.py +0 -0
  298. {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: liger_kernel_nightly
3
- Version: 0.6.3.dev20251101160510
3
+ Version: 0.6.3.dev20251105012545
4
4
  Summary: Efficient Triton kernels for LLM Training
5
5
  License: BSD 2-CLAUSE LICENSE
6
6
  Copyright 2024 LinkedIn Corporation
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "liger_kernel_nightly"
7
- version = "0.6.3.dev20251101160510"
7
+ version = "0.6.3.dev20251105012545"
8
8
  description = "Efficient Triton kernels for LLM Training"
9
9
  urls = { "Homepage" = "https://github.com/linkedin/Liger-Kernel" }
10
10
  readme = { file = "README.md", content-type = "text/markdown" }
@@ -5,13 +5,11 @@ from typing import Union
5
5
 
6
6
  import torch
7
7
 
8
+ from transformers.models.qwen3_vl_moe.modeling_qwen3_vl_moe import Qwen3VLMoeCausalLMOutputWithPast
9
+ from transformers.models.qwen3_vl_moe.modeling_qwen3_vl_moe import load_balancing_loss_func
8
10
  from transformers.utils import can_return_tuple
9
11
 
10
12
  from liger_kernel.transformers.model.loss_utils import LigerForCausalLMLoss
11
- from transformers.models.qwen3_vl_moe.modeling_qwen3_vl_moe import (
12
- Qwen3VLMoeCausalLMOutputWithPast,
13
- load_balancing_loss_func,
14
- )
15
13
 
16
14
 
17
15
  @can_return_tuple
@@ -6,7 +6,6 @@ from types import MethodType
6
6
  from typing import Callable
7
7
  from typing import Optional
8
8
 
9
- import torch
10
9
  import transformers
11
10
 
12
11
  from packaging import version
@@ -36,11 +35,9 @@ from liger_kernel.transformers.model.qwen3_vl_moe import lce_forward as qwen3_vl
36
35
  from liger_kernel.transformers.model.smollm3 import lce_forward as smollm3_lce_forward
37
36
  from liger_kernel.transformers.qwen2vl_mrope import liger_multimodal_rotary_pos_emb
38
37
  from liger_kernel.transformers.rms_norm import LigerRMSNorm
39
- from liger_kernel.transformers.rope import (
40
- liger_rotary_pos_emb,
41
- liger_rotary_pos_emb_with_cast,
42
- liger_rotary_pos_emb_with_cast_and_leading_batch,
43
- )
38
+ from liger_kernel.transformers.rope import liger_rotary_pos_emb
39
+ from liger_kernel.transformers.rope import liger_rotary_pos_emb_with_cast
40
+ from liger_kernel.transformers.rope import liger_rotary_pos_emb_with_cast_and_leading_batch
44
41
  from liger_kernel.transformers.swiglu import LigerBlockSparseTop2MLP
45
42
  from liger_kernel.transformers.swiglu import LigerPhi3SwiGLUMLP
46
43
  from liger_kernel.transformers.swiglu import LigerSwiGLUMLP
@@ -64,7 +61,6 @@ def _bind_method_to_module(module, method_name: str, new_method: Callable):
64
61
  module.__dict__[method_name] = new_method.__get__(module, module.__class__)
65
62
 
66
63
 
67
-
68
64
  def _patch_rms_norm_module(module, offset=0.0, eps=1e-6, casting_mode="llama", in_place=True, row_mode=None):
69
65
  # Check if the module is a PEFT ModulesToSaveWrapper
70
66
  # If it is, we need to patch the modules_to_save.default and original_modules
@@ -1651,7 +1647,6 @@ def apply_liger_kernel_to_qwen2_5_vl(
1651
1647
  _patch_rms_norm_module(decoder_layer.post_attention_layernorm)
1652
1648
 
1653
1649
 
1654
-
1655
1650
  def apply_liger_kernel_to_qwen3_vl(
1656
1651
  rope: bool = True,
1657
1652
  cross_entropy: bool = False,
@@ -1688,7 +1683,6 @@ def apply_liger_kernel_to_qwen3_vl(
1688
1683
  modeling_qwen3_vl.apply_rotary_pos_emb = liger_rotary_pos_emb_with_cast
1689
1684
  modeling_qwen3_vl.apply_rotary_pos_emb_vision = liger_rotary_pos_emb_with_cast_and_leading_batch
1690
1685
 
1691
-
1692
1686
  if rms_norm:
1693
1687
  modeling_qwen3_vl.Qwen3VLTextRMSNorm = LigerRMSNorm
1694
1688
 
@@ -1,4 +1,5 @@
1
- from typing import Optional, Tuple
1
+ from typing import Optional
2
+ from typing import Tuple
2
3
 
3
4
  import torch
4
5
 
@@ -32,7 +33,6 @@ def liger_rotary_pos_emb_with_cast(
32
33
  position_ids: Optional[torch.Tensor] = None,
33
34
  unsqueeze_dim: int = 1,
34
35
  ) -> Tuple[torch.Tensor, torch.Tensor]:
35
-
36
36
  orig_q_dtype, orig_k_dtype = q.dtype, k.dtype
37
37
 
38
38
  q32 = q.to(torch.float32)
@@ -52,8 +52,6 @@ def liger_rotary_pos_emb_with_cast_and_leading_batch(
52
52
  position_ids: Optional[torch.Tensor] = None,
53
53
  unsqueeze_dim: int = 1,
54
54
  ) -> Tuple[torch.Tensor, torch.Tensor]:
55
-
56
-
57
55
  orig_q_dtype, orig_k_dtype = q.dtype, k.dtype
58
56
 
59
57
  q32 = q.to(torch.float32).unsqueeze(0)
@@ -61,7 +59,5 @@ def liger_rotary_pos_emb_with_cast_and_leading_batch(
61
59
  cos32 = cos.to(torch.float32).unsqueeze(0)
62
60
  sin32 = sin.to(torch.float32).unsqueeze(0)
63
61
 
64
- q_out, k_out = liger_rotary_pos_emb(
65
- q32, k32, cos32, sin32, position_ids=position_ids, unsqueeze_dim=unsqueeze_dim
66
- )
62
+ q_out, k_out = liger_rotary_pos_emb(q32, k32, cos32, sin32, position_ids=position_ids, unsqueeze_dim=unsqueeze_dim)
67
63
  return q_out.to(orig_q_dtype).squeeze(0), k_out.to(orig_k_dtype).squeeze(0)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: liger_kernel_nightly
3
- Version: 0.6.3.dev20251101160510
3
+ Version: 0.6.3.dev20251105012545
4
4
  Summary: Efficient Triton kernels for LLM Training
5
5
  License: BSD 2-CLAUSE LICENSE
6
6
  Copyright 2024 LinkedIn Corporation
@@ -1347,11 +1347,10 @@ def run_mini_model(
1347
1347
 
1348
1348
  if "llava" in model_name:
1349
1349
  apply_liger_kernel_to_llama(**kwargs)
1350
-
1350
+
1351
1351
  kwargs["fused_linear_cross_entropy"] = False
1352
1352
  kwargs["cross_entropy"] = False
1353
1353
 
1354
-
1355
1354
  MINI_MODEL_SETUPS[model_name].liger_kernel_patch_func(**kwargs)
1356
1355
  else:
1357
1356
  MINI_MODEL_SETUPS[model_name].liger_kernel_patch_revert_func(**revert_kwargs)
@@ -42,13 +42,11 @@ from liger_kernel.transformers import apply_liger_kernel_to_phi3
42
42
  from liger_kernel.transformers import apply_liger_kernel_to_qwen2
43
43
  from liger_kernel.transformers import apply_liger_kernel_to_qwen2_5_vl
44
44
  from liger_kernel.transformers import apply_liger_kernel_to_qwen2_vl
45
- from liger_kernel.transformers import apply_liger_kernel_to_qwen3_vl
46
- from liger_kernel.transformers import apply_liger_kernel_to_qwen3_vl_moe
47
45
  from liger_kernel.transformers import apply_liger_kernel_to_qwen3
48
46
  from liger_kernel.transformers import apply_liger_kernel_to_qwen3_moe
47
+ from liger_kernel.transformers import apply_liger_kernel_to_qwen3_next
49
48
  from liger_kernel.transformers import apply_liger_kernel_to_qwen3_vl
50
49
  from liger_kernel.transformers import apply_liger_kernel_to_qwen3_vl_moe
51
- from liger_kernel.transformers import apply_liger_kernel_to_qwen3_next
52
50
  from liger_kernel.transformers import apply_liger_kernel_to_smollm3
53
51
  from test.utils import DEFAULT_DATASET_PATH
54
52
  from test.utils import MiniModelConfig
@@ -76,13 +74,11 @@ from test.utils import revert_liger_kernel_to_phi3
76
74
  from test.utils import revert_liger_kernel_to_qwen2
77
75
  from test.utils import revert_liger_kernel_to_qwen2_5_vl
78
76
  from test.utils import revert_liger_kernel_to_qwen2_vl
79
- from test.utils import revert_liger_kernel_to_qwen3_vl
80
- from test.utils import revert_liger_kernel_to_qwen3_vl_moe
81
77
  from test.utils import revert_liger_kernel_to_qwen3
82
78
  from test.utils import revert_liger_kernel_to_qwen3_moe
79
+ from test.utils import revert_liger_kernel_to_qwen3_next
83
80
  from test.utils import revert_liger_kernel_to_qwen3_vl
84
81
  from test.utils import revert_liger_kernel_to_qwen3_vl_moe
85
- from test.utils import revert_liger_kernel_to_qwen3_next
86
82
  from test.utils import revert_liger_kernel_to_smollm3
87
83
  from test.utils import set_seed
88
84
  from test.utils import simple_collate_fn
@@ -1281,7 +1281,6 @@ def run_mini_model_multimodal(
1281
1281
  revert_kwargs["model_type"] = "conditional_generation"
1282
1282
 
1283
1283
  if with_liger is True:
1284
-
1285
1284
  kwargs = {
1286
1285
  "rope": True,
1287
1286
  "rms_norm": True,
@@ -42,13 +42,11 @@ from liger_kernel.transformers import apply_liger_kernel_to_phi3
42
42
  from liger_kernel.transformers import apply_liger_kernel_to_qwen2
43
43
  from liger_kernel.transformers import apply_liger_kernel_to_qwen2_5_vl
44
44
  from liger_kernel.transformers import apply_liger_kernel_to_qwen2_vl
45
- from liger_kernel.transformers import apply_liger_kernel_to_qwen3_vl
46
- from liger_kernel.transformers import apply_liger_kernel_to_qwen3_vl_moe
47
45
  from liger_kernel.transformers import apply_liger_kernel_to_qwen3
48
46
  from liger_kernel.transformers import apply_liger_kernel_to_qwen3_moe
47
+ from liger_kernel.transformers import apply_liger_kernel_to_qwen3_next
49
48
  from liger_kernel.transformers import apply_liger_kernel_to_qwen3_vl
50
49
  from liger_kernel.transformers import apply_liger_kernel_to_qwen3_vl_moe
51
- from liger_kernel.transformers import apply_liger_kernel_to_qwen3_next
52
50
  from liger_kernel.transformers import apply_liger_kernel_to_smollm3
53
51
  from test.utils import DEFAULT_DATASET_PATH
54
52
  from test.utils import MiniModelConfig
@@ -76,13 +74,11 @@ from test.utils import revert_liger_kernel_to_phi3
76
74
  from test.utils import revert_liger_kernel_to_qwen2
77
75
  from test.utils import revert_liger_kernel_to_qwen2_5_vl
78
76
  from test.utils import revert_liger_kernel_to_qwen2_vl
79
- from test.utils import revert_liger_kernel_to_qwen3_vl
80
- from test.utils import revert_liger_kernel_to_qwen3_vl_moe
81
77
  from test.utils import revert_liger_kernel_to_qwen3
82
78
  from test.utils import revert_liger_kernel_to_qwen3_moe
79
+ from test.utils import revert_liger_kernel_to_qwen3_next
83
80
  from test.utils import revert_liger_kernel_to_qwen3_vl
84
81
  from test.utils import revert_liger_kernel_to_qwen3_vl_moe
85
- from test.utils import revert_liger_kernel_to_qwen3_next
86
82
  from test.utils import revert_liger_kernel_to_smollm3
87
83
  from test.utils import set_seed
88
84
  from test.utils import simple_collate_fn
@@ -498,9 +498,7 @@ def test_apply_liger_kernel_to_instance_for_qwen3_vl_for_conditional_generation(
498
498
  LigerRMSNorm.forward
499
499
  )
500
500
  for decoder_layer in dummy_model_instance.language_model.layers:
501
- assert inspect.getsource(decoder_layer.input_layernorm.forward) != inspect.getsource(
502
- LigerRMSNorm.forward
503
- )
501
+ assert inspect.getsource(decoder_layer.input_layernorm.forward) != inspect.getsource(LigerRMSNorm.forward)
504
502
  assert inspect.getsource(decoder_layer.post_attention_layernorm.forward) != inspect.getsource(
505
503
  LigerRMSNorm.forward
506
504
  )
@@ -520,9 +518,7 @@ def test_apply_liger_kernel_to_instance_for_qwen3_vl_for_conditional_generation(
520
518
  LigerRMSNorm.forward
521
519
  )
522
520
  for decoder_layer in dummy_model_instance.language_model.layers:
523
- assert inspect.getsource(decoder_layer.input_layernorm.forward) == inspect.getsource(
524
- LigerRMSNorm.forward
525
- )
521
+ assert inspect.getsource(decoder_layer.input_layernorm.forward) == inspect.getsource(LigerRMSNorm.forward)
526
522
  assert inspect.getsource(decoder_layer.post_attention_layernorm.forward) == inspect.getsource(
527
523
  LigerRMSNorm.forward
528
524
  )
@@ -603,9 +599,7 @@ def test_apply_liger_kernel_to_instance_for_qwen3_vl():
603
599
  LigerRMSNorm.forward
604
600
  )
605
601
  for decoder_layer in dummy_model_instance.language_model.layers:
606
- assert inspect.getsource(decoder_layer.input_layernorm.forward) != inspect.getsource(
607
- LigerRMSNorm.forward
608
- )
602
+ assert inspect.getsource(decoder_layer.input_layernorm.forward) != inspect.getsource(LigerRMSNorm.forward)
609
603
  assert inspect.getsource(decoder_layer.post_attention_layernorm.forward) != inspect.getsource(
610
604
  LigerRMSNorm.forward
611
605
  )
@@ -625,9 +619,7 @@ def test_apply_liger_kernel_to_instance_for_qwen3_vl():
625
619
  LigerRMSNorm.forward
626
620
  )
627
621
  for decoder_layer in dummy_model_instance.language_model.layers:
628
- assert inspect.getsource(decoder_layer.input_layernorm.forward) == inspect.getsource(
629
- LigerRMSNorm.forward
630
- )
622
+ assert inspect.getsource(decoder_layer.input_layernorm.forward) == inspect.getsource(LigerRMSNorm.forward)
631
623
  assert inspect.getsource(decoder_layer.post_attention_layernorm.forward) == inspect.getsource(
632
624
  LigerRMSNorm.forward
633
625
  )
@@ -681,9 +673,7 @@ def test_apply_liger_kernel_to_instance_for_qwen3_vl_text():
681
673
  # Note: Text models don't have forward method patching, so skip this check
682
674
  assert inspect.getsource(dummy_model_instance.norm.forward) != inspect.getsource(LigerRMSNorm.forward)
683
675
  for decoder_layer in dummy_model_instance.layers:
684
- assert inspect.getsource(decoder_layer.input_layernorm.forward) != inspect.getsource(
685
- LigerRMSNorm.forward
686
- )
676
+ assert inspect.getsource(decoder_layer.input_layernorm.forward) != inspect.getsource(LigerRMSNorm.forward)
687
677
  assert inspect.getsource(decoder_layer.post_attention_layernorm.forward) != inspect.getsource(
688
678
  LigerRMSNorm.forward
689
679
  )
@@ -701,9 +691,7 @@ def test_apply_liger_kernel_to_instance_for_qwen3_vl_text():
701
691
  # Note: Text models don't have forward method patching, so skip this check
702
692
  assert inspect.getsource(dummy_model_instance.norm.forward) == inspect.getsource(LigerRMSNorm.forward)
703
693
  for decoder_layer in dummy_model_instance.layers:
704
- assert inspect.getsource(decoder_layer.input_layernorm.forward) == inspect.getsource(
705
- LigerRMSNorm.forward
706
- )
694
+ assert inspect.getsource(decoder_layer.input_layernorm.forward) == inspect.getsource(LigerRMSNorm.forward)
707
695
  assert inspect.getsource(decoder_layer.post_attention_layernorm.forward) == inspect.getsource(
708
696
  LigerRMSNorm.forward
709
697
  )
@@ -789,9 +777,7 @@ def test_apply_liger_kernel_to_instance_for_qwen3_vl_moe_for_conditional_generat
789
777
  LigerRMSNorm.forward
790
778
  )
791
779
  for decoder_layer in dummy_model_instance.language_model.layers:
792
- assert inspect.getsource(decoder_layer.input_layernorm.forward) != inspect.getsource(
793
- LigerRMSNorm.forward
794
- )
780
+ assert inspect.getsource(decoder_layer.input_layernorm.forward) != inspect.getsource(LigerRMSNorm.forward)
795
781
  assert inspect.getsource(decoder_layer.post_attention_layernorm.forward) != inspect.getsource(
796
782
  LigerRMSNorm.forward
797
783
  )
@@ -811,9 +797,7 @@ def test_apply_liger_kernel_to_instance_for_qwen3_vl_moe_for_conditional_generat
811
797
  LigerRMSNorm.forward
812
798
  )
813
799
  for decoder_layer in dummy_model_instance.language_model.layers:
814
- assert inspect.getsource(decoder_layer.input_layernorm.forward) == inspect.getsource(
815
- LigerRMSNorm.forward
816
- )
800
+ assert inspect.getsource(decoder_layer.input_layernorm.forward) == inspect.getsource(LigerRMSNorm.forward)
817
801
  assert inspect.getsource(decoder_layer.post_attention_layernorm.forward) == inspect.getsource(
818
802
  LigerRMSNorm.forward
819
803
  )
@@ -899,9 +883,7 @@ def test_apply_liger_kernel_to_instance_for_qwen3_vl_moe():
899
883
  LigerRMSNorm.forward
900
884
  )
901
885
  for decoder_layer in dummy_model_instance.language_model.layers:
902
- assert inspect.getsource(decoder_layer.input_layernorm.forward) != inspect.getsource(
903
- LigerRMSNorm.forward
904
- )
886
+ assert inspect.getsource(decoder_layer.input_layernorm.forward) != inspect.getsource(LigerRMSNorm.forward)
905
887
  assert inspect.getsource(decoder_layer.post_attention_layernorm.forward) != inspect.getsource(
906
888
  LigerRMSNorm.forward
907
889
  )
@@ -921,9 +903,7 @@ def test_apply_liger_kernel_to_instance_for_qwen3_vl_moe():
921
903
  LigerRMSNorm.forward
922
904
  )
923
905
  for decoder_layer in dummy_model_instance.language_model.layers:
924
- assert inspect.getsource(decoder_layer.input_layernorm.forward) == inspect.getsource(
925
- LigerRMSNorm.forward
926
- )
906
+ assert inspect.getsource(decoder_layer.input_layernorm.forward) == inspect.getsource(LigerRMSNorm.forward)
927
907
  assert inspect.getsource(decoder_layer.post_attention_layernorm.forward) == inspect.getsource(
928
908
  LigerRMSNorm.forward
929
909
  )
@@ -982,9 +962,7 @@ def test_apply_liger_kernel_to_instance_for_qwen3_vl_moe_text():
982
962
  # Note: Text models don't have forward method patching, so skip this check
983
963
  assert inspect.getsource(dummy_model_instance.norm.forward) != inspect.getsource(LigerRMSNorm.forward)
984
964
  for decoder_layer in dummy_model_instance.layers:
985
- assert inspect.getsource(decoder_layer.input_layernorm.forward) != inspect.getsource(
986
- LigerRMSNorm.forward
987
- )
965
+ assert inspect.getsource(decoder_layer.input_layernorm.forward) != inspect.getsource(LigerRMSNorm.forward)
988
966
  assert inspect.getsource(decoder_layer.post_attention_layernorm.forward) != inspect.getsource(
989
967
  LigerRMSNorm.forward
990
968
  )
@@ -1002,9 +980,7 @@ def test_apply_liger_kernel_to_instance_for_qwen3_vl_moe_text():
1002
980
  # Note: Text models don't have forward method patching, so skip this check
1003
981
  assert inspect.getsource(dummy_model_instance.norm.forward) == inspect.getsource(LigerRMSNorm.forward)
1004
982
  for decoder_layer in dummy_model_instance.layers:
1005
- assert inspect.getsource(decoder_layer.input_layernorm.forward) == inspect.getsource(
1006
- LigerRMSNorm.forward
1007
- )
983
+ assert inspect.getsource(decoder_layer.input_layernorm.forward) == inspect.getsource(LigerRMSNorm.forward)
1008
984
  assert inspect.getsource(decoder_layer.post_attention_layernorm.forward) == inspect.getsource(
1009
985
  LigerRMSNorm.forward
1010
986
  )
@@ -1025,10 +1001,8 @@ def test_apply_liger_kernel_to_instance_for_qwen3_vl_moe_text():
1025
1001
  def test_qwen3_vl_rope_hooks_applied():
1026
1002
  # Ensure any monkey patching is cleaned up for subsequent tests
1027
1003
  with patch("transformers.models.qwen3_vl.modeling_qwen3_vl") as modeling_mod:
1028
- from liger_kernel.transformers.monkey_patch import (
1029
- liger_rotary_pos_emb,
1030
- _liger_qwen3_vl_apply_rotary_pos_emb_vision,
1031
- )
1004
+ from liger_kernel.transformers.monkey_patch import _liger_qwen3_vl_apply_rotary_pos_emb_vision
1005
+ from liger_kernel.transformers.monkey_patch import liger_rotary_pos_emb
1032
1006
 
1033
1007
  # Before applying, make sure attributes exist but are not the liger implementations
1034
1008
  setattr(modeling_mod, "apply_rotary_pos_emb", object())
@@ -1044,10 +1018,8 @@ def test_qwen3_vl_rope_hooks_applied():
1044
1018
  def test_qwen3_vl_moe_rope_hooks_applied():
1045
1019
  # Ensure any monkey patching is cleaned up for subsequent tests
1046
1020
  with patch("transformers.models.qwen3_vl_moe.modeling_qwen3_vl_moe") as modeling_mod:
1047
- from liger_kernel.transformers.monkey_patch import (
1048
- liger_rotary_pos_emb,
1049
- _liger_qwen3_vl_apply_rotary_pos_emb_vision,
1050
- )
1021
+ from liger_kernel.transformers.monkey_patch import _liger_qwen3_vl_apply_rotary_pos_emb_vision
1022
+ from liger_kernel.transformers.monkey_patch import liger_rotary_pos_emb
1051
1023
 
1052
1024
  # Before applying, make sure attributes exist but are not the liger implementations
1053
1025
  setattr(modeling_mod, "apply_rotary_pos_emb", object())