liger-kernel-nightly 0.6.4.dev20260113145602__tar.gz → 0.6.4.dev20260116105204__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of liger-kernel-nightly might be problematic. Click here for more details.

Files changed (320) hide show
  1. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/PKG-INFO +1 -1
  2. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/benchmark_tvd.py +7 -4
  3. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/pyproject.toml +1 -1
  4. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/fused_add_rms_norm.py +3 -2
  5. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/layer_norm.py +15 -15
  6. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/poly_norm.py +14 -20
  7. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/rms_norm.py +3 -2
  8. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/utils.py +11 -0
  9. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/gemma3.py +9 -3
  10. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/loss_utils.py +6 -0
  11. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel_nightly.egg-info/PKG-INFO +1 -1
  12. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/.github/ISSUE_TEMPLATE/bug_report.yaml +0 -0
  13. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/.github/ISSUE_TEMPLATE/feature_request.yaml +0 -0
  14. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/.github/pull_request_template.md +0 -0
  15. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/.github/workflows/amd-ci.yml +0 -0
  16. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/.github/workflows/benchmark.yml +0 -0
  17. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/.github/workflows/docs.yml +0 -0
  18. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/.github/workflows/intel-ci.yml +0 -0
  19. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/.github/workflows/nvi-ci.yml +0 -0
  20. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/.github/workflows/publish-nightly.yml +0 -0
  21. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/.github/workflows/publish-release.yml +0 -0
  22. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/.gitignore +0 -0
  23. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/LICENSE +0 -0
  24. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/Makefile +0 -0
  25. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/NOTICE +0 -0
  26. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/README.md +0 -0
  27. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/README.md +0 -0
  28. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/__init__.py +0 -0
  29. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/benchmarks_visualizer.py +0 -0
  30. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/data/all_benchmark_data.csv +0 -0
  31. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/__init__.py +0 -0
  32. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/benchmark_cpo_loss.py +0 -0
  33. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/benchmark_cross_entropy.py +0 -0
  34. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/benchmark_distill_cosine_loss.py +0 -0
  35. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/benchmark_distill_jsd_loss.py +0 -0
  36. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/benchmark_dpo_loss.py +0 -0
  37. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/benchmark_dyt.py +0 -0
  38. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/benchmark_embedding.py +0 -0
  39. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/benchmark_fused_add_rms_norm.py +0 -0
  40. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/benchmark_fused_linear_cross_entropy.py +0 -0
  41. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/benchmark_fused_linear_jsd.py +0 -0
  42. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/benchmark_fused_neighborhood_attention.py +0 -0
  43. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/benchmark_geglu.py +0 -0
  44. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/benchmark_group_norm.py +0 -0
  45. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/benchmark_grpo_loss.py +0 -0
  46. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/benchmark_jsd.py +0 -0
  47. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/benchmark_kl_div.py +0 -0
  48. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/benchmark_kto_loss.py +0 -0
  49. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/benchmark_layer_norm.py +0 -0
  50. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/benchmark_llama4_rope.py +0 -0
  51. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/benchmark_multi_token_attention.py +0 -0
  52. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/benchmark_orpo_loss.py +0 -0
  53. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/benchmark_poly_norm.py +0 -0
  54. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/benchmark_qwen2vl_mrope.py +0 -0
  55. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/benchmark_rms_norm.py +0 -0
  56. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/benchmark_rope.py +0 -0
  57. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/benchmark_simpo_loss.py +0 -0
  58. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/benchmark_softmax.py +0 -0
  59. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/benchmark_sparse_multi_token_attention.py +0 -0
  60. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/benchmark_sparsemax.py +0 -0
  61. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/benchmark_swiglu.py +0 -0
  62. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/benchmark_tiled_mlp.py +0 -0
  63. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/utils.py +0 -0
  64. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/dev/fmt-requirements.txt +0 -0
  65. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/dev/modal/benchmarks.py +0 -0
  66. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/dev/modal/tests.py +0 -0
  67. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/dev/modal/tests_bwd.py +0 -0
  68. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/docs/Examples.md +0 -0
  69. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/docs/Getting-Started.md +0 -0
  70. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/docs/High-Level-APIs.md +0 -0
  71. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/docs/Low-Level-APIs.md +0 -0
  72. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/docs/acknowledgement.md +0 -0
  73. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/docs/contributing.md +0 -0
  74. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/docs/images/banner.GIF +0 -0
  75. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/docs/images/compose.gif +0 -0
  76. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/docs/images/e2e-memory.png +0 -0
  77. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/docs/images/e2e-tps.png +0 -0
  78. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/docs/images/logo-banner.png +0 -0
  79. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/docs/images/patch.gif +0 -0
  80. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/docs/images/post-training.png +0 -0
  81. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/docs/index.md +0 -0
  82. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/docs/license.md +0 -0
  83. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/alignment/accelerate_config.yaml +0 -0
  84. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/alignment/run_orpo.py +0 -0
  85. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/huggingface/README.md +0 -0
  86. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/huggingface/callback.py +0 -0
  87. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/huggingface/config/fsdp_config.json +0 -0
  88. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/huggingface/img/gemma_7b_mem.png +0 -0
  89. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/huggingface/img/gemma_7b_tp.png +0 -0
  90. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/huggingface/img/llama_mem_alloc.png +0 -0
  91. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/huggingface/img/llama_tps.png +0 -0
  92. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/huggingface/img/qwen_mem_alloc.png +0 -0
  93. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/huggingface/img/qwen_tps.png +0 -0
  94. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/huggingface/launch_on_modal.py +0 -0
  95. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/huggingface/requirements.txt +0 -0
  96. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/huggingface/run_benchmarks.sh +0 -0
  97. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/huggingface/run_gemma.sh +0 -0
  98. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/huggingface/run_llama.sh +0 -0
  99. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/huggingface/run_qwen.sh +0 -0
  100. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/huggingface/run_qwen2_vl.sh +0 -0
  101. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/huggingface/training.py +0 -0
  102. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/huggingface/training_multimodal.py +0 -0
  103. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/lightning/README.md +0 -0
  104. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/lightning/requirements.txt +0 -0
  105. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/lightning/training.py +0 -0
  106. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/medusa/README.md +0 -0
  107. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/medusa/callback.py +0 -0
  108. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/medusa/docs/images/Memory_Stage1_num_head_3.png +0 -0
  109. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/medusa/docs/images/Memory_Stage1_num_head_5.png +0 -0
  110. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/medusa/docs/images/Memory_Stage2_num_head_3.png +0 -0
  111. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/medusa/docs/images/Memory_Stage2_num_head_5.png +0 -0
  112. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/medusa/docs/images/Throughput_Stage1_num_head_3.png +0 -0
  113. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/medusa/docs/images/Throughput_Stage1_num_head_5.png +0 -0
  114. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/medusa/docs/images/Throughput_Stage2_num_head_3.png +0 -0
  115. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/medusa/docs/images/Throughput_Stage2_num_head_5.png +0 -0
  116. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/medusa/fsdp/acc-fsdp.conf +0 -0
  117. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/medusa/medusa_util.py +0 -0
  118. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/medusa/requirements.txt +0 -0
  119. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/medusa/scripts/llama3_8b_medusa.sh +0 -0
  120. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/medusa/train.py +0 -0
  121. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/licenses/LICENSE-Apache-2.0 +0 -0
  122. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/licenses/LICENSE-MIT-AutoAWQ +0 -0
  123. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/licenses/LICENSE-MIT-Efficient-Cross-Entropy +0 -0
  124. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/licenses/LICENSE-MIT-llmc +0 -0
  125. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/licenses/LICENSE-MIT-triton +0 -0
  126. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/mkdocs.yml +0 -0
  127. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/setup.cfg +0 -0
  128. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/setup.py +0 -0
  129. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/__init__.py +0 -0
  130. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/chunked_loss/README.md +0 -0
  131. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/chunked_loss/__init__.py +0 -0
  132. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/chunked_loss/cosine_similarity_loss.py +0 -0
  133. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/chunked_loss/cpo_loss.py +0 -0
  134. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/chunked_loss/dpo_loss.py +0 -0
  135. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/chunked_loss/functional.py +0 -0
  136. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/chunked_loss/fused_linear_distillation.py +0 -0
  137. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/chunked_loss/fused_linear_ppo.py +0 -0
  138. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/chunked_loss/fused_linear_preference.py +0 -0
  139. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/chunked_loss/fused_linear_unpaired_preference.py +0 -0
  140. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/chunked_loss/grpo_loss.py +0 -0
  141. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/chunked_loss/jsd_loss.py +0 -0
  142. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/chunked_loss/kto_loss.py +0 -0
  143. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/chunked_loss/orpo_loss.py +0 -0
  144. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/chunked_loss/simpo_loss.py +0 -0
  145. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/env_report.py +0 -0
  146. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/__init__.py +0 -0
  147. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/backends/README.md +0 -0
  148. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/backends/__init__.py +0 -0
  149. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/backends/_ascend/__init__.py +0 -0
  150. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/backends/_ascend/ascend-ub-manager-design.md +0 -0
  151. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/backends/_ascend/ops/__init__.py +0 -0
  152. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/backends/_ascend/ops/geglu.py +0 -0
  153. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/backends/_ascend/ops/qwen2vl_mrope.py +0 -0
  154. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/backends/_ascend/ops/rope.py +0 -0
  155. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/backends/_ascend/ops/swiglu.py +0 -0
  156. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/backends/_ascend/ops/tvd.py +0 -0
  157. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/backends/_ascend/ub_manager.py +0 -0
  158. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/backends/registry.py +0 -0
  159. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/cross_entropy.py +0 -0
  160. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/dyt.py +0 -0
  161. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/experimental/embedding.py +0 -0
  162. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/experimental/mm_int8int2.py +0 -0
  163. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/fused_linear_cross_entropy.py +0 -0
  164. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/fused_linear_jsd.py +0 -0
  165. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/fused_neighborhood_attention.py +0 -0
  166. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/geglu.py +0 -0
  167. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/group_norm.py +0 -0
  168. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/grpo_loss.py +0 -0
  169. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/jsd.py +0 -0
  170. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/kl_div.py +0 -0
  171. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/llama4_rope.py +0 -0
  172. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/multi_token_attention.py +0 -0
  173. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/qwen2vl_mrope.py +0 -0
  174. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/rope.py +0 -0
  175. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/softmax.py +0 -0
  176. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/sparsemax.py +0 -0
  177. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/swiglu.py +0 -0
  178. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/tiled_mlp.py +0 -0
  179. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/tvd.py +0 -0
  180. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/__init__.py +0 -0
  181. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/auto_model.py +0 -0
  182. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/cross_entropy.py +0 -0
  183. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/dyt.py +0 -0
  184. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/experimental/__init__.py +0 -0
  185. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/experimental/embedding.py +0 -0
  186. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/fsdp.py +0 -0
  187. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/functional.py +0 -0
  188. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/fused_add_rms_norm.py +0 -0
  189. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/fused_linear_cross_entropy.py +0 -0
  190. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/fused_linear_jsd.py +0 -0
  191. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/fused_neighborhood_attention.py +0 -0
  192. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/geglu.py +0 -0
  193. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/group_norm.py +0 -0
  194. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/grpo_loss.py +0 -0
  195. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/jsd.py +0 -0
  196. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/kl_div.py +0 -0
  197. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/layer_norm.py +0 -0
  198. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/llama4_rope.py +0 -0
  199. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/__init__.py +0 -0
  200. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/exaone4.py +0 -0
  201. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/falcon_h1.py +0 -0
  202. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/gemma.py +0 -0
  203. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/gemma2.py +0 -0
  204. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/glm4.py +0 -0
  205. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/glm4v.py +0 -0
  206. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/glm4v_moe.py +0 -0
  207. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/gpt_oss.py +0 -0
  208. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/hunyuan_v1.py +0 -0
  209. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/internvl.py +0 -0
  210. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/llama.py +0 -0
  211. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/llama4.py +0 -0
  212. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/llava.py +0 -0
  213. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/mistral.py +0 -0
  214. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/mixtral.py +0 -0
  215. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/mllama.py +0 -0
  216. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/olmo2.py +0 -0
  217. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/olmo3.py +0 -0
  218. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/output_classes.py +0 -0
  219. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/paligemma.py +0 -0
  220. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/phi3.py +0 -0
  221. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/qwen2.py +0 -0
  222. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/qwen2_5_vl.py +0 -0
  223. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/qwen2_vl.py +0 -0
  224. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/qwen3.py +0 -0
  225. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/qwen3_moe.py +0 -0
  226. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/qwen3_next.py +0 -0
  227. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/qwen3_vl.py +0 -0
  228. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/qwen3_vl_moe.py +0 -0
  229. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/smollm3.py +0 -0
  230. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/smolvlm.py +0 -0
  231. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/monkey_patch.py +0 -0
  232. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/multi_token_attention.py +0 -0
  233. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/poly_norm.py +0 -0
  234. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/qwen2vl_mrope.py +0 -0
  235. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/rms_norm.py +0 -0
  236. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/rope.py +0 -0
  237. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/softmax.py +0 -0
  238. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/sparsemax.py +0 -0
  239. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/swiglu.py +0 -0
  240. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/tiled_mlp.py +0 -0
  241. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/trainer/__init__.py +0 -0
  242. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/trainer/orpo_trainer.py +0 -0
  243. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/trainer_integration.py +0 -0
  244. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/tvd.py +0 -0
  245. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/triton/__init__.py +0 -0
  246. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/triton/monkey_patch.py +0 -0
  247. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/utils.py +0 -0
  248. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel_nightly.egg-info/SOURCES.txt +0 -0
  249. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel_nightly.egg-info/dependency_links.txt +0 -0
  250. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel_nightly.egg-info/requires.txt +0 -0
  251. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel_nightly.egg-info/top_level.txt +0 -0
  252. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/__init__.py +0 -0
  253. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/chunked_loss/__init__.py +0 -0
  254. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/chunked_loss/test_cosine_loss.py +0 -0
  255. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/chunked_loss/test_cpo_loss.py +0 -0
  256. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/chunked_loss/test_dpo_loss.py +0 -0
  257. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/chunked_loss/test_grpo_loss.py +0 -0
  258. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/chunked_loss/test_jsd_loss.py +0 -0
  259. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/chunked_loss/test_kto_loss.py +0 -0
  260. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/chunked_loss/test_orpo_loss.py +0 -0
  261. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/chunked_loss/test_simpo_loss.py +0 -0
  262. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/conftest.py +0 -0
  263. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/convergence/__init__.py +0 -0
  264. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/convergence/bf16/__init__.py +0 -0
  265. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/convergence/bf16/test_mini_models.py +0 -0
  266. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/convergence/bf16/test_mini_models_multimodal.py +0 -0
  267. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/convergence/bf16/test_mini_models_with_logits.py +0 -0
  268. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/convergence/fp32/__init__.py +0 -0
  269. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/convergence/fp32/test_mini_models.py +0 -0
  270. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/convergence/fp32/test_mini_models_multimodal.py +0 -0
  271. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/convergence/fp32/test_mini_models_with_logits.py +0 -0
  272. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/resources/fake_configs/Google/Gemma3/gemma-3-4b-it/tokenizer_config.json +0 -0
  273. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/resources/fake_configs/Google/Paligemma/paligemma-3b-pt-224/tokenizer_config.json +0 -0
  274. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/resources/fake_configs/HuggingFaceTB/SmolVLM2-256M-Video-Instruct/tokenizer_config.json +0 -0
  275. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/preprocessor_config.json +0 -0
  276. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/processor_config.json +0 -0
  277. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/tokenizer_config.json +0 -0
  278. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/resources/fake_configs/OpenGVLab/InternVL3-1B-hf/tokenizer_config.json +0 -0
  279. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/resources/fake_configs/Qwen/Qwen2-VL-7B-Instruct/tokenizer_config.json +0 -0
  280. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/resources/fake_configs/Qwen/Qwen2.5-VL-7B-Instruct/tokenizer_config.json +0 -0
  281. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/resources/fake_configs/Qwen/Qwen3-VL-4B-Instruct/tokenizer_config.json +0 -0
  282. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/resources/fake_configs/meta-llama/Llama-3.2-11B-Vision-Instruct/tokenizer_config.json +0 -0
  283. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/resources/fake_configs/meta-llama/Llama-4-Scout-17B-16E-Instruct/tokenizer_config.json +0 -0
  284. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/resources/scripts/generate_tokenized_dataset.py +0 -0
  285. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/resources/tiny_shakespeare.txt +0 -0
  286. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/resources/tiny_shakespeare_tokenized/data-00000-of-00001.arrow +0 -0
  287. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/resources/tiny_shakespeare_tokenized/dataset_info.json +0 -0
  288. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/resources/tiny_shakespeare_tokenized/state.json +0 -0
  289. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/transformers/test_auto_model.py +0 -0
  290. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/transformers/test_cross_entropy.py +0 -0
  291. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/transformers/test_dyt.py +0 -0
  292. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/transformers/test_embedding.py +0 -0
  293. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/transformers/test_flex_attention.py +0 -0
  294. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/transformers/test_fused_add_rms_norm.py +0 -0
  295. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/transformers/test_fused_linear_cross_entropy.py +0 -0
  296. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/transformers/test_fused_linear_jsd.py +0 -0
  297. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/transformers/test_fused_neighborhood_attention.py +0 -0
  298. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/transformers/test_geglu.py +0 -0
  299. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/transformers/test_group_norm.py +0 -0
  300. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/transformers/test_grpo_loss.py +0 -0
  301. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/transformers/test_jsd.py +0 -0
  302. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/transformers/test_kl_div.py +0 -0
  303. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/transformers/test_layer_norm.py +0 -0
  304. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/transformers/test_llama4_rope.py +0 -0
  305. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/transformers/test_mm_int8int2.py +0 -0
  306. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/transformers/test_monkey_patch.py +0 -0
  307. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/transformers/test_multi_token_attention.py +0 -0
  308. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/transformers/test_poly_norm.py +0 -0
  309. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/transformers/test_qwen2vl_mrope.py +0 -0
  310. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/transformers/test_rms_norm.py +0 -0
  311. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/transformers/test_rope.py +0 -0
  312. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/transformers/test_softmax.py +0 -0
  313. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/transformers/test_sparsemax.py +0 -0
  314. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/transformers/test_swiglu.py +0 -0
  315. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/transformers/test_tiled_mlp.py +0 -0
  316. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/transformers/test_trainer_integration.py +0 -0
  317. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/transformers/test_transformers.py +0 -0
  318. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/transformers/test_tvd.py +0 -0
  319. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/triton/test_triton_monkey_patch.py +0 -0
  320. {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: liger_kernel_nightly
3
- Version: 0.6.4.dev20260113145602
3
+ Version: 0.6.4.dev20260116105204
4
4
  Summary: Efficient Triton kernels for LLM Training
5
5
  License: BSD 2-CLAUSE LICENSE
6
6
  Copyright 2024 LinkedIn Corporation
@@ -9,6 +9,9 @@ from utils import parse_benchmark_script_args
9
9
  from utils import run_benchmarks
10
10
 
11
11
  from liger_kernel.transformers.tvd import LigerTVDLoss
12
+ from liger_kernel.utils import infer_device
13
+
14
+ device = infer_device()
12
15
 
13
16
 
14
17
  class TorchTVDLoss(torch.nn.Module):
@@ -40,8 +43,8 @@ def bench_speed_tvd(input: SingleBenchmarkRunInput) -> SingleBenchmarkRunOutput:
40
43
  torch_tvd = TorchTVDLoss(reduction=reduction)
41
44
  liger_tvd = LigerTVDLoss(reduction=reduction)
42
45
 
43
- _input = torch.randn(B * T, V, requires_grad=True, device="cuda").softmax(dim=-1)
44
- target = torch.randn(B * T, V, device="cuda").softmax(dim=-1)
46
+ _input = torch.randn(B * T, V, requires_grad=True, device=device).softmax(dim=-1)
47
+ target = torch.randn(B * T, V, device=device).softmax(dim=-1)
45
48
 
46
49
  def fwd():
47
50
  if input.kernel_provider == "liger":
@@ -82,8 +85,8 @@ def bench_memory_tvd(input: SingleBenchmarkRunInput) -> SingleBenchmarkRunOutput
82
85
  V = input.x
83
86
  B, T = input.extra_benchmark_config["B"], input.extra_benchmark_config["T"]
84
87
 
85
- _input = torch.randn(B * T, V, requires_grad=True, device="cuda").softmax(dim=-1)
86
- target = torch.randn(B * T, V, device="cuda").softmax(dim=-1)
88
+ _input = torch.randn(B * T, V, requires_grad=True, device=device).softmax(dim=-1)
89
+ target = torch.randn(B * T, V, device=device).softmax(dim=-1)
87
90
 
88
91
  def fwd():
89
92
  if input.kernel_provider == "liger":
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "liger_kernel_nightly"
7
- version = "0.6.4.dev20260113145602"
7
+ version = "0.6.4.dev20260116105204"
8
8
  description = "Efficient Triton kernels for LLM Training"
9
9
  urls = { "Homepage" = "https://github.com/linkedin/Liger-Kernel" }
10
10
  readme = { file = "README.md", content-type = "text/markdown" }
@@ -8,6 +8,7 @@ import triton.language as tl
8
8
  from liger_kernel.ops.utils import calculate_settings
9
9
  from liger_kernel.ops.utils import compare_version
10
10
  from liger_kernel.ops.utils import ensure_contiguous
11
+ from liger_kernel.ops.utils import set_large_grf_mode
11
12
  from liger_kernel.ops.utils import torch_to_triton_dtype
12
13
  from liger_kernel.utils import get_npu_multi_processor_count
13
14
  from liger_kernel.utils import is_npu_available
@@ -247,7 +248,7 @@ def fused_add_rms_norm_forward(X, R, W, eps, offset, casting_mode):
247
248
  # XPU-specific optimization
248
249
  kernel_args = {}
249
250
  if X.device.type == "xpu":
250
- kernel_args["grf_mode"] = "large"
251
+ set_large_grf_mode(kernel_args)
251
252
 
252
253
  # TODO: add _block_fused_add_rms_norm_forward_kernel
253
254
  _fused_add_rms_norm_forward_kernel[(n_rows,)](
@@ -307,7 +308,7 @@ def fused_add_rms_norm_backward(dY, dS_out, S, W, RSTD, offset, casting_mode, BL
307
308
  # XPU-specific optimization
308
309
  kernel_args = {}
309
310
  if S.device.type == "xpu":
310
- kernel_args["grf_mode"] = "large"
311
+ set_large_grf_mode(kernel_args)
311
312
 
312
313
  # TODO: add _block_fused_add_rms_norm_backward_kernel
313
314
  _fused_add_rms_norm_backward_kernel[grid](
@@ -8,6 +8,8 @@ import triton.language as tl
8
8
  from liger_kernel.ops.utils import calculate_settings
9
9
  from liger_kernel.ops.utils import compare_version
10
10
  from liger_kernel.ops.utils import ensure_contiguous
11
+ from liger_kernel.ops.utils import set_large_grf_mode
12
+ from liger_kernel.utils import get_npu_multi_processor_count
11
13
  from liger_kernel.utils import is_npu_available
12
14
 
13
15
  if compare_version("triton", operator.ge, "3.0.0") and not is_npu_available():
@@ -124,14 +126,14 @@ def _layer_norm_backward_kernel(
124
126
  w = tl.load(W_ptr + cols, mask=mask, other=0.0)
125
127
  w_f32 = w.to(tl.float32)
126
128
 
127
- # Calculate pointers for this specific row
128
- row_X_ptr = X_ptr + row_start * stride_x
129
- row_DX_ptr = DX_ptr + row_start * stride_dx
130
- row_DY_ptr = DY_ptr + row_start * stride_dy
131
- row_Mean_ptr = Mean_ptr + row_start
132
- row_RSTD_ptr = RSTD_ptr + row_start
129
+ for row_idx in range(row_start, row_end):
130
+ # Calculate pointers for this specific row
131
+ row_X_ptr = X_ptr + row_idx * stride_x
132
+ row_DX_ptr = DX_ptr + row_idx * stride_dx
133
+ row_DY_ptr = DY_ptr + row_idx * stride_dy
134
+ row_Mean_ptr = Mean_ptr + row_idx * stride_mean
135
+ row_RSTD_ptr = RSTD_ptr + row_idx * stride_rstd
133
136
 
134
- for _ in range(row_start, row_end):
135
137
  # Load data for this row
136
138
  x = tl.load(row_X_ptr + cols, mask=mask, other=0.0)
137
139
  dy = tl.load(row_DY_ptr + cols, mask=mask, other=0.0)
@@ -160,12 +162,6 @@ def _layer_norm_backward_kernel(
160
162
  dW_row += dw
161
163
  db_row += db
162
164
 
163
- row_X_ptr += stride_x
164
- row_DX_ptr += stride_dx
165
- row_DY_ptr += stride_dy
166
- row_Mean_ptr += stride_mean
167
- row_RSTD_ptr += stride_rstd
168
-
169
165
  tl.store(DW_ptr + row_block_id * stride_dw + cols, dW_row, mask=mask)
170
166
  tl.store(DB_ptr + row_block_id * stride_db + cols, db_row, mask=mask)
171
167
 
@@ -204,7 +200,7 @@ def layer_norm_forward(X, W, B, eps):
204
200
  # XPU-specific optimization
205
201
  kernel_args = {}
206
202
  if X.device.type == "xpu":
207
- kernel_args["grf_mode"] = "large"
203
+ set_large_grf_mode(kernel_args)
208
204
 
209
205
  # Launch kernel with one thread block per row for optimal performance
210
206
  grid = (n_rows,)
@@ -254,6 +250,8 @@ def layer_norm_backward(dY, X, W, B, Mean, RSTD):
254
250
  sm_count = torch.cuda.get_device_properties(X.device).multi_processor_count
255
251
  elif X.device.type == "xpu":
256
252
  sm_count = torch.xpu.get_device_properties(X.device).gpu_eu_count
253
+ elif X.device.type == "npu":
254
+ sm_count = get_npu_multi_processor_count()
257
255
 
258
256
  # fp32 for numerical stability especially.
259
257
  _DW = torch.empty((sm_count, n_cols), dtype=torch.float32, device=W.device)
@@ -272,7 +270,8 @@ def layer_norm_backward(dY, X, W, B, Mean, RSTD):
272
270
  kernel_args = {"num_warps": num_warps}
273
271
  # XPU-specific optimization
274
272
  if X.device.type == "xpu":
275
- kernel_args.update({"grf_mode": "large", "num_warps": 32, "num_stages": 4})
273
+ kernel_args.update({"num_warps": 32, "num_stages": 4})
274
+ set_large_grf_mode(kernel_args)
276
275
 
277
276
  # Launch kernel with one thread block per row for optimal performance
278
277
  _layer_norm_backward_kernel[grid](
@@ -301,6 +300,7 @@ def layer_norm_backward(dY, X, W, B, Mean, RSTD):
301
300
  DX = DX.view(*shape)
302
301
  DW = _DW.sum(dim=0).to(W.dtype)
303
302
  DB = _DB.sum(dim=0).to(B.dtype)
303
+
304
304
  return DX, DW, DB
305
305
 
306
306
 
@@ -7,6 +7,7 @@ import triton.language as tl
7
7
  from liger_kernel.ops.utils import calculate_settings
8
8
  from liger_kernel.ops.utils import compare_version
9
9
  from liger_kernel.ops.utils import ensure_contiguous
10
+ from liger_kernel.ops.utils import set_large_grf_mode
10
11
  from liger_kernel.utils import get_npu_multi_processor_count
11
12
  from liger_kernel.utils import is_npu_available
12
13
 
@@ -140,20 +141,19 @@ def _poly_norm_backward_kernel(
140
141
  w1 = tl.load(W_ptr + 1).to(tl.float32)
141
142
  w2 = tl.load(W_ptr + 2).to(tl.float32)
142
143
 
143
- dY_ptr += row_start * dY_row_stride
144
- dX_ptr += row_start * dX_row_stride
145
- X_ptr += row_start * X_row_stride
146
- RSTD_ptr += row_start * RSTD_row_stride
144
+ for row_idx in range(row_start, row_end):
145
+ dy_base = dY_ptr + row_idx * dY_row_stride
146
+ x_base = X_ptr + row_idx * X_row_stride
147
+ dx_base = dX_ptr + row_idx * dX_row_stride
148
+ rstd_base = RSTD_ptr + row_idx * RSTD_row_stride
147
149
 
148
- for _ in range(row_start, row_end):
149
- # Load input and gradient
150
- dY_row = tl.load(dY_ptr + col_offsets, mask=mask, other=0.0).to(tl.float32)
151
- X_row = tl.load(X_ptr + col_offsets, mask=mask, other=0.0).to(tl.float32)
150
+ dY_row = tl.load(dy_base + col_offsets, mask=mask, other=0.0).to(tl.float32)
151
+ X_row = tl.load(x_base + col_offsets, mask=mask, other=0.0).to(tl.float32)
152
152
 
153
153
  # Load cached rstd values
154
- rstd_3 = tl.load(RSTD_ptr + 0).to(tl.float32)
155
- rstd_2 = tl.load(RSTD_ptr + 1).to(tl.float32)
156
- rstd_1 = tl.load(RSTD_ptr + 2).to(tl.float32)
154
+ rstd_3 = tl.load(rstd_base + 0).to(tl.float32)
155
+ rstd_2 = tl.load(rstd_base + 1).to(tl.float32)
156
+ rstd_1 = tl.load(rstd_base + 2).to(tl.float32)
157
157
 
158
158
  # Compute powers
159
159
  X_pow3 = X_row * X_row * X_row
@@ -190,13 +190,7 @@ def _poly_norm_backward_kernel(
190
190
  dX_row = grad_x_3 + grad_x_2 + grad_x_1
191
191
 
192
192
  # Store gradient
193
- tl.store(dX_ptr + col_offsets, dX_row, mask=mask)
194
-
195
- # Update pointers
196
- dY_ptr += dY_row_stride
197
- dX_ptr += dX_row_stride
198
- X_ptr += X_row_stride
199
- RSTD_ptr += RSTD_row_stride
193
+ tl.store(dx_base + col_offsets, dX_row, mask=mask)
200
194
 
201
195
  # Store accumulated gradients (scalars)
202
196
  tl.store(dW_ptr + row_block_id * dW_row_stride + 0, dW0_acc)
@@ -239,7 +233,7 @@ def poly_norm_forward(X, W, B, eps=1e-6):
239
233
  # XPU-specific optimization
240
234
  kernel_args = {}
241
235
  if X.device.type == "xpu":
242
- kernel_args["grf_mode"] = "large"
236
+ set_large_grf_mode(kernel_args)
243
237
 
244
238
  # Launch kernel
245
239
  _poly_norm_forward_kernel[(n_rows,)](
@@ -310,7 +304,7 @@ def poly_norm_backward(dY, X, W, RSTD, BLOCK_SIZE, num_warps, in_place):
310
304
  # XPU-specific optimization
311
305
  kernel_args = {}
312
306
  if X.device.type == "xpu":
313
- kernel_args["grf_mode"] = "large"
307
+ set_large_grf_mode(kernel_args)
314
308
 
315
309
  # Launch backward kernel
316
310
  _poly_norm_backward_kernel[grid](
@@ -20,6 +20,7 @@ import triton.language as tl
20
20
  from liger_kernel.ops.utils import calculate_settings
21
21
  from liger_kernel.ops.utils import compare_version
22
22
  from liger_kernel.ops.utils import ensure_contiguous
23
+ from liger_kernel.ops.utils import set_large_grf_mode
23
24
  from liger_kernel.ops.utils import torch_to_triton_dtype
24
25
  from liger_kernel.utils import get_npu_multi_processor_count
25
26
  from liger_kernel.utils import is_npu_available
@@ -436,7 +437,7 @@ def rms_norm_forward(X, W, eps, offset, casting_mode, row_mode):
436
437
  # XPU-specific optimization
437
438
  kernel_args = {}
438
439
  if X.device.type == "xpu":
439
- kernel_args["grf_mode"] = "large"
440
+ set_large_grf_mode(kernel_args)
440
441
  if BLOCK_SIZE > 256 or n_rows < 4096 * 8 or row_mode:
441
442
  _rms_norm_forward_kernel[(n_rows,)](
442
443
  Y,
@@ -516,7 +517,7 @@ def rms_norm_backward(dY, X, W, RSTD, offset, casting_mode, BLOCK_SIZE, num_warp
516
517
  # XPU-specific optimization
517
518
  kernel_args = {}
518
519
  if X.device.type == "xpu":
519
- kernel_args["grf_mode"] = "large"
520
+ set_large_grf_mode(kernel_args)
520
521
 
521
522
  if BLOCK_SIZE > 256 or n_rows < 4096 * 8 or row_mode:
522
523
  _rms_norm_backward_kernel[grid](
@@ -139,3 +139,14 @@ def get_npu_core_count(default: int = 20) -> int:
139
139
  return int(props.get("num_vectorcore", default))
140
140
  except Exception:
141
141
  return default
142
+
143
+
144
+ def set_large_grf_mode(kernel_args: dict):
145
+ """Set large GRF mode for XPU devices."""
146
+ # On XPU triton installed along with pytorch-xpu will be called `pytorch-triton-xpu`,
147
+ # triton XPU installed from source will be called `triton`.
148
+ if compare_version("pytorch-triton-xpu", operator.ge, "3.6.0") or compare_version("triton", operator.ge, "3.6.0"):
149
+ kernel_args["grf_mode"] = "256"
150
+ else:
151
+ # API was changed in https://github.com/intel/intel-xpu-backend-for-triton/pull/5430
152
+ kernel_args["grf_mode"] = "large"
@@ -8,7 +8,6 @@ import torch.nn as nn
8
8
  from transformers.cache_utils import Cache
9
9
  from transformers.utils import logging
10
10
 
11
- from liger_kernel.transformers.fused_linear_cross_entropy import LigerFusedLinearCrossEntropyLoss
12
11
  from liger_kernel.transformers.model.loss_utils import LigerForCausalLMLoss
13
12
  from liger_kernel.transformers.model.loss_utils import unpack_cross_entropy_result
14
13
  from liger_kernel.transformers.model.output_classes import LigerCausalLMOutputWithPast
@@ -268,8 +267,15 @@ def multimodal_forward(
268
267
  shift_hidden_states = shift_hidden_states.view(-1, self.config.text_config.hidden_size)
269
268
  shift_labels = shift_labels.view(-1).to(hidden_device)
270
269
 
271
- lce = LigerFusedLinearCrossEntropyLoss()
272
- result = lce(self.lm_head.weight, shift_hidden_states, shift_labels)
270
+ result = LigerForCausalLMLoss(
271
+ hidden_states=shift_hidden_states,
272
+ lm_head_weight=self.lm_head.weight,
273
+ labels=shift_labels,
274
+ hidden_size=self.config.text_config.hidden_size,
275
+ shift_labels=shift_labels,
276
+ final_logit_softcapping=getattr(self.config.text_config, "final_logit_softcapping", None),
277
+ **lm_kwargs,
278
+ )
273
279
  loss, _, token_accuracy = unpack_cross_entropy_result(result)
274
280
 
275
281
  else:
@@ -1,3 +1,5 @@
1
+ import inspect
2
+
1
3
  from typing import Optional
2
4
  from typing import Tuple
3
5
 
@@ -71,6 +73,10 @@ def LigerForCausalLMLoss(
71
73
  return_token_accuracy: bool = False,
72
74
  **kwargs,
73
75
  ):
76
+ # Filter out inapplicable kwargs to liger_fused_linear_cross_entropy
77
+ applicable_params = inspect.signature(F.liger_fused_linear_cross_entropy).parameters
78
+ kwargs = {k: v for k, v in kwargs.items() if k in applicable_params}
79
+
74
80
  # Skip upcast since intermediate values for the loss are all fp32 in kernel
75
81
  if shift_labels is None:
76
82
  # Shift so that token < n predict n
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: liger_kernel_nightly
3
- Version: 0.6.4.dev20260113145602
3
+ Version: 0.6.4.dev20260116105204
4
4
  Summary: Efficient Triton kernels for LLM Training
5
5
  License: BSD 2-CLAUSE LICENSE
6
6
  Copyright 2024 LinkedIn Corporation