liger-kernel-nightly 0.6.4.dev20251206103502__tar.gz → 0.6.4.dev20251209171241__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (307) hide show
  1. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/PKG-INFO +4 -1
  2. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/README.md +3 -0
  3. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/pyproject.toml +1 -1
  4. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/monkey_patch.py +5 -6
  5. liger_kernel_nightly-0.6.4.dev20251209171241/src/liger_kernel/transformers/rope.py +64 -0
  6. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel_nightly.egg-info/PKG-INFO +4 -1
  7. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/transformers/test_monkey_patch.py +8 -8
  8. liger_kernel_nightly-0.6.4.dev20251206103502/src/liger_kernel/transformers/rope.py +0 -63
  9. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/.github/ISSUE_TEMPLATE/bug_report.yaml +0 -0
  10. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/.github/ISSUE_TEMPLATE/feature_request.yaml +0 -0
  11. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/.github/pull_request_template.md +0 -0
  12. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/.github/workflows/amd-ci.yml +0 -0
  13. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/.github/workflows/benchmark.yml +0 -0
  14. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/.github/workflows/docs.yml +0 -0
  15. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/.github/workflows/intel-ci.yml +0 -0
  16. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/.github/workflows/nvi-ci.yml +0 -0
  17. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/.github/workflows/publish-nightly.yml +0 -0
  18. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/.github/workflows/publish-release.yml +0 -0
  19. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/.gitignore +0 -0
  20. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/LICENSE +0 -0
  21. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/Makefile +0 -0
  22. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/NOTICE +0 -0
  23. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/README.md +0 -0
  24. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/__init__.py +0 -0
  25. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/benchmarks_visualizer.py +0 -0
  26. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/data/all_benchmark_data.csv +0 -0
  27. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/__init__.py +0 -0
  28. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/benchmark_cpo_loss.py +0 -0
  29. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/benchmark_cross_entropy.py +0 -0
  30. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/benchmark_distill_cosine_loss.py +0 -0
  31. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/benchmark_distill_jsd_loss.py +0 -0
  32. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/benchmark_dpo_loss.py +0 -0
  33. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/benchmark_dyt.py +0 -0
  34. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/benchmark_embedding.py +0 -0
  35. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/benchmark_fused_add_rms_norm.py +0 -0
  36. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/benchmark_fused_linear_cross_entropy.py +0 -0
  37. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/benchmark_fused_linear_jsd.py +0 -0
  38. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/benchmark_fused_neighborhood_attention.py +0 -0
  39. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/benchmark_geglu.py +0 -0
  40. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/benchmark_group_norm.py +0 -0
  41. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/benchmark_grpo_loss.py +0 -0
  42. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/benchmark_jsd.py +0 -0
  43. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/benchmark_kl_div.py +0 -0
  44. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/benchmark_kto_loss.py +0 -0
  45. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/benchmark_layer_norm.py +0 -0
  46. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/benchmark_llama4_rope.py +0 -0
  47. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/benchmark_multi_token_attention.py +0 -0
  48. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/benchmark_orpo_loss.py +0 -0
  49. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/benchmark_poly_norm.py +0 -0
  50. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/benchmark_qwen2vl_mrope.py +0 -0
  51. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/benchmark_rms_norm.py +0 -0
  52. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/benchmark_rope.py +0 -0
  53. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/benchmark_simpo_loss.py +0 -0
  54. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/benchmark_softmax.py +0 -0
  55. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/benchmark_sparse_multi_token_attention.py +0 -0
  56. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/benchmark_sparsemax.py +0 -0
  57. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/benchmark_swiglu.py +0 -0
  58. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/benchmark_tiled_mlp.py +0 -0
  59. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/benchmark_tvd.py +0 -0
  60. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/utils.py +0 -0
  61. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/dev/fmt-requirements.txt +0 -0
  62. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/dev/modal/benchmarks.py +0 -0
  63. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/dev/modal/tests.py +0 -0
  64. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/dev/modal/tests_bwd.py +0 -0
  65. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/docs/Examples.md +0 -0
  66. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/docs/Getting-Started.md +0 -0
  67. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/docs/High-Level-APIs.md +0 -0
  68. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/docs/Low-Level-APIs.md +0 -0
  69. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/docs/acknowledgement.md +0 -0
  70. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/docs/contributing.md +0 -0
  71. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/docs/images/banner.GIF +0 -0
  72. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/docs/images/compose.gif +0 -0
  73. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/docs/images/e2e-memory.png +0 -0
  74. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/docs/images/e2e-tps.png +0 -0
  75. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/docs/images/logo-banner.png +0 -0
  76. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/docs/images/patch.gif +0 -0
  77. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/docs/images/post-training.png +0 -0
  78. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/docs/index.md +0 -0
  79. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/docs/license.md +0 -0
  80. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/alignment/accelerate_config.yaml +0 -0
  81. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/alignment/run_orpo.py +0 -0
  82. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/huggingface/README.md +0 -0
  83. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/huggingface/callback.py +0 -0
  84. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/huggingface/config/fsdp_config.json +0 -0
  85. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/huggingface/img/gemma_7b_mem.png +0 -0
  86. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/huggingface/img/gemma_7b_tp.png +0 -0
  87. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/huggingface/img/llama_mem_alloc.png +0 -0
  88. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/huggingface/img/llama_tps.png +0 -0
  89. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/huggingface/img/qwen_mem_alloc.png +0 -0
  90. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/huggingface/img/qwen_tps.png +0 -0
  91. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/huggingface/launch_on_modal.py +0 -0
  92. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/huggingface/requirements.txt +0 -0
  93. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/huggingface/run_benchmarks.sh +0 -0
  94. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/huggingface/run_gemma.sh +0 -0
  95. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/huggingface/run_llama.sh +0 -0
  96. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/huggingface/run_qwen.sh +0 -0
  97. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/huggingface/run_qwen2_vl.sh +0 -0
  98. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/huggingface/training.py +0 -0
  99. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/huggingface/training_multimodal.py +0 -0
  100. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/lightning/README.md +0 -0
  101. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/lightning/requirements.txt +0 -0
  102. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/lightning/training.py +0 -0
  103. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/medusa/README.md +0 -0
  104. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/medusa/callback.py +0 -0
  105. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/medusa/docs/images/Memory_Stage1_num_head_3.png +0 -0
  106. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/medusa/docs/images/Memory_Stage1_num_head_5.png +0 -0
  107. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/medusa/docs/images/Memory_Stage2_num_head_3.png +0 -0
  108. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/medusa/docs/images/Memory_Stage2_num_head_5.png +0 -0
  109. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/medusa/docs/images/Throughput_Stage1_num_head_3.png +0 -0
  110. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/medusa/docs/images/Throughput_Stage1_num_head_5.png +0 -0
  111. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/medusa/docs/images/Throughput_Stage2_num_head_3.png +0 -0
  112. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/medusa/docs/images/Throughput_Stage2_num_head_5.png +0 -0
  113. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/medusa/fsdp/acc-fsdp.conf +0 -0
  114. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/medusa/medusa_util.py +0 -0
  115. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/medusa/requirements.txt +0 -0
  116. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/medusa/scripts/llama3_8b_medusa.sh +0 -0
  117. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/medusa/train.py +0 -0
  118. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/licenses/LICENSE-Apache-2.0 +0 -0
  119. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/licenses/LICENSE-MIT-AutoAWQ +0 -0
  120. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/licenses/LICENSE-MIT-Efficient-Cross-Entropy +0 -0
  121. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/licenses/LICENSE-MIT-llmc +0 -0
  122. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/licenses/LICENSE-MIT-triton +0 -0
  123. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/mkdocs.yml +0 -0
  124. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/setup.cfg +0 -0
  125. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/setup.py +0 -0
  126. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/__init__.py +0 -0
  127. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/chunked_loss/README.md +0 -0
  128. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/chunked_loss/__init__.py +0 -0
  129. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/chunked_loss/cosine_similarity_loss.py +0 -0
  130. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/chunked_loss/cpo_loss.py +0 -0
  131. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/chunked_loss/dpo_loss.py +0 -0
  132. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/chunked_loss/functional.py +0 -0
  133. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/chunked_loss/fused_linear_distillation.py +0 -0
  134. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/chunked_loss/fused_linear_ppo.py +0 -0
  135. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/chunked_loss/fused_linear_preference.py +0 -0
  136. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/chunked_loss/fused_linear_unpaired_preference.py +0 -0
  137. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/chunked_loss/grpo_loss.py +0 -0
  138. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/chunked_loss/jsd_loss.py +0 -0
  139. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/chunked_loss/kto_loss.py +0 -0
  140. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/chunked_loss/orpo_loss.py +0 -0
  141. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/chunked_loss/simpo_loss.py +0 -0
  142. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/env_report.py +0 -0
  143. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/ops/__init__.py +0 -0
  144. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/ops/cross_entropy.py +0 -0
  145. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/ops/dyt.py +0 -0
  146. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/ops/experimental/embedding.py +0 -0
  147. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/ops/experimental/mm_int8int2.py +0 -0
  148. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/ops/fused_add_rms_norm.py +0 -0
  149. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/ops/fused_linear_cross_entropy.py +0 -0
  150. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/ops/fused_linear_jsd.py +0 -0
  151. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/ops/fused_neighborhood_attention.py +0 -0
  152. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/ops/geglu.py +0 -0
  153. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/ops/group_norm.py +0 -0
  154. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/ops/grpo_loss.py +0 -0
  155. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/ops/jsd.py +0 -0
  156. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/ops/kl_div.py +0 -0
  157. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/ops/layer_norm.py +0 -0
  158. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/ops/llama4_rope.py +0 -0
  159. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/ops/multi_token_attention.py +0 -0
  160. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/ops/poly_norm.py +0 -0
  161. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/ops/qwen2vl_mrope.py +0 -0
  162. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/ops/rms_norm.py +0 -0
  163. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/ops/rope.py +0 -0
  164. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/ops/softmax.py +0 -0
  165. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/ops/sparsemax.py +0 -0
  166. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/ops/swiglu.py +0 -0
  167. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/ops/tiled_mlp.py +0 -0
  168. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/ops/tvd.py +0 -0
  169. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/ops/utils.py +0 -0
  170. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/__init__.py +0 -0
  171. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/auto_model.py +0 -0
  172. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/cross_entropy.py +0 -0
  173. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/dyt.py +0 -0
  174. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/experimental/__init__.py +0 -0
  175. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/experimental/embedding.py +0 -0
  176. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/fsdp.py +0 -0
  177. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/functional.py +0 -0
  178. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/fused_add_rms_norm.py +0 -0
  179. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/fused_linear_cross_entropy.py +0 -0
  180. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/fused_linear_jsd.py +0 -0
  181. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/fused_neighborhood_attention.py +0 -0
  182. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/geglu.py +0 -0
  183. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/group_norm.py +0 -0
  184. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/grpo_loss.py +0 -0
  185. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/jsd.py +0 -0
  186. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/kl_div.py +0 -0
  187. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/layer_norm.py +0 -0
  188. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/llama4_rope.py +0 -0
  189. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/__init__.py +0 -0
  190. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/falcon_h1.py +0 -0
  191. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/gemma.py +0 -0
  192. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/gemma2.py +0 -0
  193. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/gemma3.py +0 -0
  194. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/glm4.py +0 -0
  195. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/glm4v.py +0 -0
  196. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/glm4v_moe.py +0 -0
  197. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/gpt_oss.py +0 -0
  198. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/hunyuan_v1.py +0 -0
  199. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/internvl.py +0 -0
  200. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/llama.py +0 -0
  201. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/llama4.py +0 -0
  202. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/llava.py +0 -0
  203. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/loss_utils.py +0 -0
  204. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/mistral.py +0 -0
  205. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/mixtral.py +0 -0
  206. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/mllama.py +0 -0
  207. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/olmo2.py +0 -0
  208. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/olmo3.py +0 -0
  209. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/output_classes.py +0 -0
  210. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/paligemma.py +0 -0
  211. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/phi3.py +0 -0
  212. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/qwen2.py +0 -0
  213. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/qwen2_5_vl.py +0 -0
  214. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/qwen2_vl.py +0 -0
  215. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/qwen3.py +0 -0
  216. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/qwen3_moe.py +0 -0
  217. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/qwen3_next.py +0 -0
  218. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/qwen3_vl.py +0 -0
  219. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/qwen3_vl_moe.py +0 -0
  220. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/smollm3.py +0 -0
  221. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/smolvlm.py +0 -0
  222. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/multi_token_attention.py +0 -0
  223. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/poly_norm.py +0 -0
  224. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/qwen2vl_mrope.py +0 -0
  225. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/rms_norm.py +0 -0
  226. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/softmax.py +0 -0
  227. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/sparsemax.py +0 -0
  228. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/swiglu.py +0 -0
  229. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/tiled_mlp.py +0 -0
  230. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/trainer/__init__.py +0 -0
  231. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/trainer/orpo_trainer.py +0 -0
  232. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/trainer_integration.py +0 -0
  233. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/tvd.py +0 -0
  234. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/triton/__init__.py +0 -0
  235. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/triton/monkey_patch.py +0 -0
  236. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/utils.py +0 -0
  237. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel_nightly.egg-info/SOURCES.txt +0 -0
  238. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel_nightly.egg-info/dependency_links.txt +0 -0
  239. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel_nightly.egg-info/requires.txt +0 -0
  240. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel_nightly.egg-info/top_level.txt +0 -0
  241. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/__init__.py +0 -0
  242. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/chunked_loss/__init__.py +0 -0
  243. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/chunked_loss/test_cosine_loss.py +0 -0
  244. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/chunked_loss/test_cpo_loss.py +0 -0
  245. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/chunked_loss/test_dpo_loss.py +0 -0
  246. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/chunked_loss/test_grpo_loss.py +0 -0
  247. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/chunked_loss/test_jsd_loss.py +0 -0
  248. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/chunked_loss/test_kto_loss.py +0 -0
  249. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/chunked_loss/test_orpo_loss.py +0 -0
  250. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/chunked_loss/test_simpo_loss.py +0 -0
  251. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/conftest.py +0 -0
  252. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/convergence/__init__.py +0 -0
  253. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/convergence/bf16/__init__.py +0 -0
  254. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/convergence/bf16/test_mini_models.py +0 -0
  255. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/convergence/bf16/test_mini_models_multimodal.py +0 -0
  256. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/convergence/bf16/test_mini_models_with_logits.py +0 -0
  257. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/convergence/fp32/__init__.py +0 -0
  258. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/convergence/fp32/test_mini_models.py +0 -0
  259. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/convergence/fp32/test_mini_models_multimodal.py +0 -0
  260. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/convergence/fp32/test_mini_models_with_logits.py +0 -0
  261. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/resources/fake_configs/Google/Gemma3/gemma-3-4b-it/tokenizer_config.json +0 -0
  262. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/resources/fake_configs/Google/Paligemma/paligemma-3b-pt-224/tokenizer_config.json +0 -0
  263. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/resources/fake_configs/HuggingFaceTB/SmolVLM2-256M-Video-Instruct/tokenizer_config.json +0 -0
  264. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/preprocessor_config.json +0 -0
  265. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/processor_config.json +0 -0
  266. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/tokenizer_config.json +0 -0
  267. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/resources/fake_configs/OpenGVLab/InternVL3-1B-hf/tokenizer_config.json +0 -0
  268. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/resources/fake_configs/Qwen/Qwen2-VL-7B-Instruct/tokenizer_config.json +0 -0
  269. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/resources/fake_configs/Qwen/Qwen2.5-VL-7B-Instruct/tokenizer_config.json +0 -0
  270. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/resources/fake_configs/Qwen/Qwen3-VL-4B-Instruct/tokenizer_config.json +0 -0
  271. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/resources/fake_configs/meta-llama/Llama-3.2-11B-Vision-Instruct/tokenizer_config.json +0 -0
  272. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/resources/fake_configs/meta-llama/Llama-4-Scout-17B-16E-Instruct/tokenizer_config.json +0 -0
  273. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/resources/scripts/generate_tokenized_dataset.py +0 -0
  274. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/resources/tiny_shakespeare.txt +0 -0
  275. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/resources/tiny_shakespeare_tokenized/data-00000-of-00001.arrow +0 -0
  276. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/resources/tiny_shakespeare_tokenized/dataset_info.json +0 -0
  277. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/resources/tiny_shakespeare_tokenized/state.json +0 -0
  278. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/transformers/test_auto_model.py +0 -0
  279. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/transformers/test_cross_entropy.py +0 -0
  280. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/transformers/test_dyt.py +0 -0
  281. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/transformers/test_embedding.py +0 -0
  282. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/transformers/test_flex_attention.py +0 -0
  283. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/transformers/test_fused_add_rms_norm.py +0 -0
  284. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/transformers/test_fused_linear_cross_entropy.py +0 -0
  285. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/transformers/test_fused_linear_jsd.py +0 -0
  286. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/transformers/test_fused_neighborhood_attention.py +0 -0
  287. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/transformers/test_geglu.py +0 -0
  288. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/transformers/test_group_norm.py +0 -0
  289. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/transformers/test_grpo_loss.py +0 -0
  290. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/transformers/test_jsd.py +0 -0
  291. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/transformers/test_kl_div.py +0 -0
  292. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/transformers/test_layer_norm.py +0 -0
  293. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/transformers/test_mm_int8int2.py +0 -0
  294. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/transformers/test_multi_token_attention.py +0 -0
  295. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/transformers/test_poly_norm.py +0 -0
  296. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/transformers/test_qwen2vl_mrope.py +0 -0
  297. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/transformers/test_rms_norm.py +0 -0
  298. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/transformers/test_rope.py +0 -0
  299. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/transformers/test_softmax.py +0 -0
  300. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/transformers/test_sparsemax.py +0 -0
  301. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/transformers/test_swiglu.py +0 -0
  302. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/transformers/test_tiled_mlp.py +0 -0
  303. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/transformers/test_trainer_integration.py +0 -0
  304. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/transformers/test_transformers.py +0 -0
  305. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/transformers/test_tvd.py +0 -0
  306. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/triton/test_triton_monkey_patch.py +0 -0
  307. {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: liger_kernel_nightly
3
- Version: 0.6.4.dev20251206103502
3
+ Version: 0.6.4.dev20251209171241
4
4
  Summary: Efficient Triton kernels for LLM Training
5
5
  License: BSD 2-CLAUSE LICENSE
6
6
  Copyright 2024 LinkedIn Corporation
@@ -113,6 +113,8 @@ We've also added optimized Post-Training kernels that deliver **up to 80% memory
113
113
 
114
114
  You can view the documentation site for additional installation, usage examples, and API references:https://linkedin.github.io/Liger-Kernel/
115
115
 
116
+ You can view the Liger Kernel Technical Report: https://openreview.net/forum?id=36SjAIT42G
117
+
116
118
  ## Supercharge Your Model with Liger Kernel
117
119
 
118
120
  ![Banner](https://raw.githubusercontent.com/linkedin/Liger-Kernel/main/docs/images/banner.GIF)
@@ -442,3 +444,4 @@ url={https://openreview.net/forum?id=36SjAIT42G}
442
444
  ↑ Back to Top ↑
443
445
  </a>
444
446
  </p>
447
+
@@ -65,6 +65,8 @@ We've also added optimized Post-Training kernels that deliver **up to 80% memory
65
65
 
66
66
  You can view the documentation site for additional installation, usage examples, and API references:https://linkedin.github.io/Liger-Kernel/
67
67
 
68
+ You can view the Liger Kernel Technical Report: https://openreview.net/forum?id=36SjAIT42G
69
+
68
70
  ## Supercharge Your Model with Liger Kernel
69
71
 
70
72
  ![Banner](https://raw.githubusercontent.com/linkedin/Liger-Kernel/main/docs/images/banner.GIF)
@@ -394,3 +396,4 @@ url={https://openreview.net/forum?id=36SjAIT42G}
394
396
  ↑ Back to Top ↑
395
397
  </a>
396
398
  </p>
399
+
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "liger_kernel_nightly"
7
- version = "0.6.4.dev20251206103502"
7
+ version = "0.6.4.dev20251209171241"
8
8
  description = "Efficient Triton kernels for LLM Training"
9
9
  urls = { "Homepage" = "https://github.com/linkedin/Liger-Kernel" }
10
10
  readme = { file = "README.md", content-type = "text/markdown" }
@@ -35,8 +35,7 @@ from liger_kernel.transformers.model.smollm3 import lce_forward as smollm3_lce_f
35
35
  from liger_kernel.transformers.qwen2vl_mrope import liger_multimodal_rotary_pos_emb
36
36
  from liger_kernel.transformers.rms_norm import LigerRMSNorm
37
37
  from liger_kernel.transformers.rope import liger_rotary_pos_emb
38
- from liger_kernel.transformers.rope import liger_rotary_pos_emb_with_cast
39
- from liger_kernel.transformers.rope import liger_rotary_pos_emb_with_cast_and_leading_batch
38
+ from liger_kernel.transformers.rope import liger_rotary_pos_emb_vision
40
39
  from liger_kernel.transformers.swiglu import LigerBlockSparseTop2MLP
41
40
  from liger_kernel.transformers.swiglu import LigerPhi3SwiGLUMLP
42
41
  from liger_kernel.transformers.swiglu import LigerSwiGLUMLP
@@ -1754,8 +1753,8 @@ def apply_liger_kernel_to_qwen3_vl(
1754
1753
  from liger_kernel.transformers.model.qwen3_vl import lce_forward as qwen3_vl_lce_forward
1755
1754
 
1756
1755
  if rope:
1757
- modeling_qwen3_vl.apply_rotary_pos_emb = liger_rotary_pos_emb_with_cast
1758
- modeling_qwen3_vl.apply_rotary_pos_emb_vision = liger_rotary_pos_emb_with_cast_and_leading_batch
1756
+ modeling_qwen3_vl.apply_rotary_pos_emb = liger_rotary_pos_emb
1757
+ modeling_qwen3_vl.apply_rotary_pos_emb_vision = liger_rotary_pos_emb_vision
1759
1758
 
1760
1759
  if rms_norm:
1761
1760
  modeling_qwen3_vl.Qwen3VLTextRMSNorm = LigerRMSNorm
@@ -1829,8 +1828,8 @@ def apply_liger_kernel_to_qwen3_vl_moe(
1829
1828
  from liger_kernel.transformers.model.qwen3_vl_moe import lce_forward as qwen3_vl_moe_lce_forward
1830
1829
 
1831
1830
  if rope:
1832
- modeling_qwen3_vl_moe.apply_rotary_pos_emb = liger_rotary_pos_emb_with_cast
1833
- modeling_qwen3_vl_moe.apply_rotary_pos_emb_vision = liger_rotary_pos_emb_with_cast_and_leading_batch
1831
+ modeling_qwen3_vl_moe.apply_rotary_pos_emb = liger_rotary_pos_emb
1832
+ modeling_qwen3_vl_moe.apply_rotary_pos_emb_vision = liger_rotary_pos_emb_vision
1834
1833
 
1835
1834
  if rms_norm:
1836
1835
  modeling_qwen3_vl_moe.Qwen3VLMoeTextRMSNorm = LigerRMSNorm
@@ -0,0 +1,64 @@
1
+ from typing import Tuple
2
+
3
+ import torch
4
+
5
+ from liger_kernel.ops.rope import LigerRopeFunction
6
+
7
+
8
+ def liger_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_dim=1):
9
+ """
10
+ Applies Rotary Positional Embedding (RoPE) operation to query and key states.
11
+
12
+ Args:
13
+ q (torch.Tensor): The query tensor of shape (bsz, n_q_head, seq_len, head_dim).
14
+ k (torch.Tensor): The key tensor of shape (bsz, n_kv_head, seq_len, head_dim).
15
+ cos (torch.Tensor): The cosine tensor of shape (1, seq_len, head_dim) or (bsz, seq_len, head_dim).
16
+ sin (torch.Tensor): The sine tensor of shape (1, seq_len, head_dim) or (bsz, seq_len, head_dim).
17
+ position_ids (torch.Tensor, optional): The position ids tensor. Defaults to None.
18
+ unsqueeze_dim (int, optional): The dimension to unsqueeze. Defaults to 1.
19
+
20
+ Returns:
21
+ Tuple[torch.Tensor, torch.Tensor]: The query and key tensors after applying the RoPE operation.
22
+ """
23
+
24
+ return LigerRopeFunction.apply(q, k, cos, sin, position_ids, unsqueeze_dim)
25
+
26
+
27
+ def liger_rotary_pos_emb_vision(
28
+ q: torch.Tensor,
29
+ k: torch.Tensor,
30
+ cos: torch.Tensor,
31
+ sin: torch.Tensor,
32
+ ) -> Tuple[torch.Tensor, torch.Tensor]:
33
+ """
34
+ Modified version of liger_rotary_pos_emb for qwen3_vl's apply_rotary_pos_emb_vision function.
35
+ Manually tranposed the input and output to match the expected shape for liger_rotary_pos_emb.
36
+ Reference: https://https://github.com/huggingface/transformers/blob/v5.0.0rc0/src/transformers/models/qwen3_vl/modeling_qwen3_vl.py#L116
37
+
38
+ Args:
39
+ q (torch.Tensor): The query tensor of shape (seq_length, num_heads, head_dim),
40
+ with stride (num_heads * head_dim, head_dim, 1).
41
+ k (torch.Tensor): The query tensor of shape (seq_length, num_heads, head_dim),
42
+ with stride (num_heads * head_dim, head_dim, 1). Same as q.
43
+ cos (torch.Tensor): The cosine tensor of shape (seq_length, head_dim).
44
+ sin (torch.Tensor): The sine tensor of shape (seq_length, head_dim).
45
+
46
+ Returns:
47
+ Tuple[torch.Tensor, torch.Tensor]: The query and key tensors with the same shape and stride as inputs.
48
+ """
49
+ orig_q_dtype, orig_k_dtype = q.dtype, k.dtype
50
+
51
+ # tranpose to (1, num_heads, seq_length, head_dim) and cast to float32 to match liger_rotary_pos_emb input shape
52
+ # also unsqueeze for batch dim
53
+ q32 = q.to(torch.float32).unsqueeze(0).transpose(1, 2)
54
+ k32 = k.to(torch.float32).unsqueeze(0).transpose(1, 2)
55
+ cos32 = cos.to(torch.float32)
56
+ sin32 = sin.to(torch.float32)
57
+
58
+ q_out, k_out = liger_rotary_pos_emb(q32, k32, cos32, sin32)
59
+
60
+ # transpose back to (seq_length, num_heads, head_dim) and cast back to original dtype
61
+ # also squeeze out batch dim
62
+ q_out = q_out.transpose(1, 2).squeeze(0).to(orig_q_dtype)
63
+ k_out = k_out.transpose(1, 2).squeeze(0).to(orig_k_dtype)
64
+ return q_out, k_out
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: liger_kernel_nightly
3
- Version: 0.6.4.dev20251206103502
3
+ Version: 0.6.4.dev20251209171241
4
4
  Summary: Efficient Triton kernels for LLM Training
5
5
  License: BSD 2-CLAUSE LICENSE
6
6
  Copyright 2024 LinkedIn Corporation
@@ -113,6 +113,8 @@ We've also added optimized Post-Training kernels that deliver **up to 80% memory
113
113
 
114
114
  You can view the documentation site for additional installation, usage examples, and API references:https://linkedin.github.io/Liger-Kernel/
115
115
 
116
+ You can view the Liger Kernel Technical Report: https://openreview.net/forum?id=36SjAIT42G
117
+
116
118
  ## Supercharge Your Model with Liger Kernel
117
119
 
118
120
  ![Banner](https://raw.githubusercontent.com/linkedin/Liger-Kernel/main/docs/images/banner.GIF)
@@ -442,3 +444,4 @@ url={https://openreview.net/forum?id=36SjAIT42G}
442
444
  ↑ Back to Top ↑
443
445
  </a>
444
446
  </p>
447
+
@@ -1019,8 +1019,8 @@ def test_apply_liger_kernel_to_instance_for_qwen3_vl_moe_text():
1019
1019
  def test_qwen3_vl_rope_hooks_applied():
1020
1020
  # Ensure any monkey patching is cleaned up for subsequent tests
1021
1021
  with patch("transformers.models.qwen3_vl.modeling_qwen3_vl") as modeling_mod:
1022
- from liger_kernel.transformers.monkey_patch import liger_rotary_pos_emb_with_cast
1023
- from liger_kernel.transformers.monkey_patch import liger_rotary_pos_emb_with_cast_and_leading_batch
1022
+ from liger_kernel.transformers.monkey_patch import liger_rotary_pos_emb
1023
+ from liger_kernel.transformers.monkey_patch import liger_rotary_pos_emb_vision
1024
1024
 
1025
1025
  # Before applying, make sure attributes exist but are not the liger implementations
1026
1026
  setattr(modeling_mod, "apply_rotary_pos_emb", object())
@@ -1028,16 +1028,16 @@ def test_qwen3_vl_rope_hooks_applied():
1028
1028
 
1029
1029
  _apply_liger_kernel("qwen3_vl")
1030
1030
 
1031
- assert modeling_mod.apply_rotary_pos_emb is liger_rotary_pos_emb_with_cast
1032
- assert modeling_mod.apply_rotary_pos_emb_vision is liger_rotary_pos_emb_with_cast_and_leading_batch
1031
+ assert modeling_mod.apply_rotary_pos_emb is liger_rotary_pos_emb
1032
+ assert modeling_mod.apply_rotary_pos_emb_vision is liger_rotary_pos_emb_vision
1033
1033
 
1034
1034
 
1035
1035
  @pytest.mark.skipif(not is_qwen3_vl_moe_available(), reason="qwen3_vl_moe module not available")
1036
1036
  def test_qwen3_vl_moe_rope_hooks_applied():
1037
1037
  # Ensure any monkey patching is cleaned up for subsequent tests
1038
1038
  with patch("transformers.models.qwen3_vl_moe.modeling_qwen3_vl_moe") as modeling_mod:
1039
- from liger_kernel.transformers.monkey_patch import liger_rotary_pos_emb_with_cast
1040
- from liger_kernel.transformers.monkey_patch import liger_rotary_pos_emb_with_cast_and_leading_batch
1039
+ from liger_kernel.transformers.monkey_patch import liger_rotary_pos_emb
1040
+ from liger_kernel.transformers.monkey_patch import liger_rotary_pos_emb_vision
1041
1041
 
1042
1042
  # Before applying, make sure attributes exist but are not the liger implementations
1043
1043
  setattr(modeling_mod, "apply_rotary_pos_emb", object())
@@ -1045,8 +1045,8 @@ def test_qwen3_vl_moe_rope_hooks_applied():
1045
1045
 
1046
1046
  _apply_liger_kernel("qwen3_vl_moe")
1047
1047
 
1048
- assert modeling_mod.apply_rotary_pos_emb is liger_rotary_pos_emb_with_cast
1049
- assert modeling_mod.apply_rotary_pos_emb_vision is liger_rotary_pos_emb_with_cast_and_leading_batch
1048
+ assert modeling_mod.apply_rotary_pos_emb is liger_rotary_pos_emb
1049
+ assert modeling_mod.apply_rotary_pos_emb_vision is liger_rotary_pos_emb_vision
1050
1050
 
1051
1051
 
1052
1052
  @pytest.mark.skipif(not is_falcon_h1_available(), reason="falcon_h1 module not available")
@@ -1,63 +0,0 @@
1
- from typing import Optional
2
- from typing import Tuple
3
-
4
- import torch
5
-
6
- from liger_kernel.ops.rope import LigerRopeFunction
7
-
8
-
9
- def liger_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_dim=1):
10
- """
11
- Applies Rotary Positional Embedding (RoPE) operation to query and key states.
12
-
13
- Args:
14
- q (torch.Tensor): The query tensor of shape (bsz, n_q_head, seq_len, head_dim).
15
- k (torch.Tensor): The key tensor of shape (bsz, n_kv_head, seq_len, head_dim).
16
- cos (torch.Tensor): The cosine tensor of shape (1, seq_len, head_dim) or (bsz, seq_len, head_dim).
17
- sin (torch.Tensor): The sine tensor of shape (1, seq_len, head_dim) or (bsz, seq_len, head_dim).
18
- position_ids (torch.Tensor, optional): The position ids tensor. Defaults to None.
19
- unsqueeze_dim (int, optional): The dimension to unsqueeze. Defaults to 1.
20
-
21
- Returns:
22
- Tuple[torch.Tensor, torch.Tensor]: The query and key tensors after applying the RoPE operation.
23
- """
24
-
25
- return LigerRopeFunction.apply(q, k, cos, sin, position_ids, unsqueeze_dim)
26
-
27
-
28
- def liger_rotary_pos_emb_with_cast(
29
- q: torch.Tensor,
30
- k: torch.Tensor,
31
- cos: torch.Tensor,
32
- sin: torch.Tensor,
33
- position_ids: Optional[torch.Tensor] = None,
34
- unsqueeze_dim: int = 1,
35
- ) -> Tuple[torch.Tensor, torch.Tensor]:
36
- orig_q_dtype, orig_k_dtype = q.dtype, k.dtype
37
-
38
- q32 = q.to(torch.float32)
39
- k32 = k.to(torch.float32)
40
- cos32 = cos.to(torch.float32)
41
- sin32 = sin.to(torch.float32)
42
-
43
- q_out, k_out = liger_rotary_pos_emb(q32, k32, cos32, sin32, position_ids=position_ids, unsqueeze_dim=unsqueeze_dim)
44
- return q_out.to(orig_q_dtype), k_out.to(orig_k_dtype)
45
-
46
-
47
- def liger_rotary_pos_emb_with_cast_and_leading_batch(
48
- q: torch.Tensor,
49
- k: torch.Tensor,
50
- cos: torch.Tensor,
51
- sin: torch.Tensor,
52
- position_ids: Optional[torch.Tensor] = None,
53
- unsqueeze_dim: int = 1,
54
- ) -> Tuple[torch.Tensor, torch.Tensor]:
55
- orig_q_dtype, orig_k_dtype = q.dtype, k.dtype
56
-
57
- q32 = q.to(torch.float32).unsqueeze(0)
58
- k32 = k.to(torch.float32).unsqueeze(0)
59
- cos32 = cos.to(torch.float32).unsqueeze(0)
60
- sin32 = sin.to(torch.float32).unsqueeze(0)
61
-
62
- q_out, k_out = liger_rotary_pos_emb(q32, k32, cos32, sin32, position_ids=position_ids, unsqueeze_dim=unsqueeze_dim)
63
- return q_out.to(orig_q_dtype).squeeze(0), k_out.to(orig_k_dtype).squeeze(0)