liger-kernel-nightly 0.6.0.dev20250709030408__tar.gz → 0.6.0.dev20250709091230__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (273) hide show
  1. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/PKG-INFO +2 -2
  2. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/dev/modal/tests.py +1 -1
  3. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/dev/modal/tests_bwd.py +3 -3
  4. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/pyproject.toml +1 -1
  5. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/setup.py +1 -1
  6. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/model/gemma3.py +1 -1
  7. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/monkey_patch.py +4 -1
  8. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel_nightly.egg-info/PKG-INFO +2 -2
  9. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel_nightly.egg-info/requires.txt +1 -1
  10. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/convergence/bf16/test_mini_models.py +2 -2
  11. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/convergence/fp32/test_mini_models.py +3 -3
  12. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/transformers/test_monkey_patch.py +35 -33
  13. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/.github/ISSUE_TEMPLATE/bug_report.yaml +0 -0
  14. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/.github/ISSUE_TEMPLATE/feature_request.yaml +0 -0
  15. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/.github/pull_request_template.md +0 -0
  16. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/.github/workflows/amd-ci.yml +0 -0
  17. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/.github/workflows/benchmark.yml +0 -0
  18. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/.github/workflows/docs.yml +0 -0
  19. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/.github/workflows/intel-ci.yml +0 -0
  20. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/.github/workflows/nvi-ci.yml +0 -0
  21. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/.github/workflows/publish-nightly.yml +0 -0
  22. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/.github/workflows/publish-release.yml +0 -0
  23. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/.gitignore +0 -0
  24. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/LICENSE +0 -0
  25. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/Makefile +0 -0
  26. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/NOTICE +0 -0
  27. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/README.md +0 -0
  28. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/README.md +0 -0
  29. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/__init__.py +0 -0
  30. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/benchmarks_visualizer.py +0 -0
  31. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/data/all_benchmark_data.csv +0 -0
  32. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/scripts/__init__.py +0 -0
  33. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/scripts/benchmark_cpo_loss.py +0 -0
  34. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/scripts/benchmark_cross_entropy.py +0 -0
  35. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/scripts/benchmark_distill_cosine_loss.py +0 -0
  36. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/scripts/benchmark_distill_jsd_loss.py +0 -0
  37. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/scripts/benchmark_dpo_loss.py +0 -0
  38. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/scripts/benchmark_dyt.py +0 -0
  39. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/scripts/benchmark_embedding.py +0 -0
  40. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/scripts/benchmark_fused_linear_cross_entropy.py +0 -0
  41. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/scripts/benchmark_fused_linear_jsd.py +0 -0
  42. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/scripts/benchmark_fused_neighborhood_attention.py +0 -0
  43. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/scripts/benchmark_geglu.py +0 -0
  44. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/scripts/benchmark_group_norm.py +0 -0
  45. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/scripts/benchmark_jsd.py +0 -0
  46. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/scripts/benchmark_kl_div.py +0 -0
  47. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/scripts/benchmark_kto_loss.py +0 -0
  48. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/scripts/benchmark_layer_norm.py +0 -0
  49. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/scripts/benchmark_multi_token_attention.py +0 -0
  50. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/scripts/benchmark_orpo_loss.py +0 -0
  51. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/scripts/benchmark_qwen2vl_mrope.py +0 -0
  52. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/scripts/benchmark_rms_norm.py +0 -0
  53. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/scripts/benchmark_rope.py +0 -0
  54. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/scripts/benchmark_simpo_loss.py +0 -0
  55. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/scripts/benchmark_softmax.py +0 -0
  56. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/scripts/benchmark_sparse_multi_token_attention.py +0 -0
  57. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/scripts/benchmark_sparsemax.py +0 -0
  58. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/scripts/benchmark_swiglu.py +0 -0
  59. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/scripts/benchmark_tvd.py +0 -0
  60. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/scripts/utils.py +0 -0
  61. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/dev/fmt-requirements.txt +0 -0
  62. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/dev/modal/benchmarks.py +0 -0
  63. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/docs/Examples.md +0 -0
  64. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/docs/Getting-Started.md +0 -0
  65. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/docs/High-Level-APIs.md +0 -0
  66. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/docs/Low-Level-APIs.md +0 -0
  67. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/docs/acknowledgement.md +0 -0
  68. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/docs/contributing.md +0 -0
  69. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/docs/images/banner.GIF +0 -0
  70. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/docs/images/compose.gif +0 -0
  71. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/docs/images/e2e-memory.png +0 -0
  72. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/docs/images/e2e-tps.png +0 -0
  73. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/docs/images/logo-banner.png +0 -0
  74. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/docs/images/patch.gif +0 -0
  75. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/docs/images/post-training.png +0 -0
  76. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/docs/index.md +0 -0
  77. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/docs/license.md +0 -0
  78. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/alignment/accelerate_config.yaml +0 -0
  79. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/alignment/run_orpo.py +0 -0
  80. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/huggingface/README.md +0 -0
  81. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/huggingface/callback.py +0 -0
  82. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/huggingface/config/fsdp_config.json +0 -0
  83. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/huggingface/img/gemma_7b_mem.png +0 -0
  84. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/huggingface/img/gemma_7b_tp.png +0 -0
  85. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/huggingface/img/llama_mem_alloc.png +0 -0
  86. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/huggingface/img/llama_tps.png +0 -0
  87. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/huggingface/img/qwen_mem_alloc.png +0 -0
  88. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/huggingface/img/qwen_tps.png +0 -0
  89. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/huggingface/launch_on_modal.py +0 -0
  90. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/huggingface/requirements.txt +0 -0
  91. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/huggingface/run_benchmarks.sh +0 -0
  92. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/huggingface/run_gemma.sh +0 -0
  93. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/huggingface/run_llama.sh +0 -0
  94. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/huggingface/run_qwen.sh +0 -0
  95. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/huggingface/run_qwen2_vl.sh +0 -0
  96. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/huggingface/training.py +0 -0
  97. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/huggingface/training_multimodal.py +0 -0
  98. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/lightning/README.md +0 -0
  99. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/lightning/requirements.txt +0 -0
  100. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/lightning/training.py +0 -0
  101. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/medusa/README.md +0 -0
  102. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/medusa/callback.py +0 -0
  103. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/medusa/docs/images/Memory_Stage1_num_head_3.png +0 -0
  104. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/medusa/docs/images/Memory_Stage1_num_head_5.png +0 -0
  105. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/medusa/docs/images/Memory_Stage2_num_head_3.png +0 -0
  106. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/medusa/docs/images/Memory_Stage2_num_head_5.png +0 -0
  107. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/medusa/docs/images/Throughput_Stage1_num_head_3.png +0 -0
  108. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/medusa/docs/images/Throughput_Stage1_num_head_5.png +0 -0
  109. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/medusa/docs/images/Throughput_Stage2_num_head_3.png +0 -0
  110. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/medusa/docs/images/Throughput_Stage2_num_head_5.png +0 -0
  111. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/medusa/fsdp/acc-fsdp.conf +0 -0
  112. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/medusa/medusa_util.py +0 -0
  113. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/medusa/requirements.txt +0 -0
  114. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/medusa/scripts/llama3_8b_medusa.sh +0 -0
  115. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/medusa/train.py +0 -0
  116. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/licenses/LICENSE-Apache-2.0 +0 -0
  117. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/licenses/LICENSE-MIT-AutoAWQ +0 -0
  118. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/licenses/LICENSE-MIT-Efficient-Cross-Entropy +0 -0
  119. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/licenses/LICENSE-MIT-llmc +0 -0
  120. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/licenses/LICENSE-MIT-triton +0 -0
  121. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/mkdocs.yml +0 -0
  122. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/setup.cfg +0 -0
  123. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/__init__.py +0 -0
  124. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/chunked_loss/README.md +0 -0
  125. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/chunked_loss/__init__.py +0 -0
  126. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/chunked_loss/cosine_similarity_loss.py +0 -0
  127. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/chunked_loss/cpo_loss.py +0 -0
  128. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/chunked_loss/dpo_loss.py +0 -0
  129. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/chunked_loss/functional.py +0 -0
  130. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/chunked_loss/fused_linear_distillation.py +0 -0
  131. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/chunked_loss/fused_linear_ppo.py +0 -0
  132. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/chunked_loss/fused_linear_preference.py +0 -0
  133. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/chunked_loss/fused_linear_unpaired_preference.py +0 -0
  134. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/chunked_loss/grpo_loss.py +0 -0
  135. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/chunked_loss/jsd_loss.py +0 -0
  136. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/chunked_loss/kto_loss.py +0 -0
  137. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/chunked_loss/orpo_loss.py +0 -0
  138. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/chunked_loss/simpo_loss.py +0 -0
  139. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/env_report.py +0 -0
  140. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/ops/__init__.py +0 -0
  141. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/ops/cross_entropy.py +0 -0
  142. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/ops/dyt.py +0 -0
  143. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/ops/experimental/embedding.py +0 -0
  144. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/ops/experimental/mm_int8int2.py +0 -0
  145. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/ops/fused_linear_cross_entropy.py +0 -0
  146. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/ops/fused_linear_jsd.py +0 -0
  147. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/ops/fused_neighborhood_attention.py +0 -0
  148. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/ops/geglu.py +0 -0
  149. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/ops/group_norm.py +0 -0
  150. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/ops/grpo_loss.py +0 -0
  151. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/ops/jsd.py +0 -0
  152. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/ops/kl_div.py +0 -0
  153. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/ops/layer_norm.py +0 -0
  154. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/ops/multi_token_attention.py +0 -0
  155. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/ops/qwen2vl_mrope.py +0 -0
  156. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/ops/rms_norm.py +0 -0
  157. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/ops/rope.py +0 -0
  158. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/ops/softmax.py +0 -0
  159. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/ops/sparsemax.py +0 -0
  160. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/ops/swiglu.py +0 -0
  161. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/ops/tvd.py +0 -0
  162. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/ops/utils.py +0 -0
  163. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/__init__.py +0 -0
  164. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/auto_model.py +0 -0
  165. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/cross_entropy.py +0 -0
  166. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/dyt.py +0 -0
  167. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/experimental/embedding.py +0 -0
  168. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/fsdp.py +0 -0
  169. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/functional.py +0 -0
  170. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/fused_linear_cross_entropy.py +0 -0
  171. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/fused_linear_jsd.py +0 -0
  172. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/fused_neighborhood_attention.py +0 -0
  173. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/geglu.py +0 -0
  174. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/group_norm.py +0 -0
  175. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/grpo_loss.py +0 -0
  176. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/jsd.py +0 -0
  177. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/kl_div.py +0 -0
  178. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/layer_norm.py +0 -0
  179. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/model/__init__.py +0 -0
  180. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/model/gemma.py +0 -0
  181. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/model/gemma2.py +0 -0
  182. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/model/glm4.py +0 -0
  183. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/model/llama.py +0 -0
  184. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/model/llama4.py +0 -0
  185. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/model/llava.py +0 -0
  186. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/model/loss_utils.py +0 -0
  187. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/model/mistral.py +0 -0
  188. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/model/mixtral.py +0 -0
  189. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/model/mllama.py +0 -0
  190. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/model/olmo2.py +0 -0
  191. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/model/paligemma.py +0 -0
  192. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/model/phi3.py +0 -0
  193. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/model/qwen2.py +0 -0
  194. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/model/qwen2_5_vl.py +0 -0
  195. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/model/qwen2_vl.py +0 -0
  196. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/model/qwen3.py +0 -0
  197. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/model/qwen3_moe.py +0 -0
  198. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/multi_token_attention.py +0 -0
  199. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/qwen2vl_mrope.py +0 -0
  200. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/rms_norm.py +0 -0
  201. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/rope.py +0 -0
  202. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/softmax.py +0 -0
  203. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/sparsemax.py +0 -0
  204. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/swiglu.py +0 -0
  205. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/trainer/__init__.py +0 -0
  206. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/trainer/orpo_trainer.py +0 -0
  207. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/trainer_integration.py +0 -0
  208. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/tvd.py +0 -0
  209. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/triton/__init__.py +0 -0
  210. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/triton/monkey_patch.py +0 -0
  211. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/utils.py +0 -0
  212. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel_nightly.egg-info/SOURCES.txt +0 -0
  213. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel_nightly.egg-info/dependency_links.txt +0 -0
  214. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel_nightly.egg-info/top_level.txt +0 -0
  215. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/__init__.py +0 -0
  216. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/chunked_loss/__init__.py +0 -0
  217. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/chunked_loss/test_cosine_loss.py +0 -0
  218. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/chunked_loss/test_cpo_loss.py +0 -0
  219. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/chunked_loss/test_dpo_loss.py +0 -0
  220. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/chunked_loss/test_grpo_loss.py +0 -0
  221. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/chunked_loss/test_jsd_loss.py +0 -0
  222. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/chunked_loss/test_kto_loss.py +0 -0
  223. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/chunked_loss/test_orpo_loss.py +0 -0
  224. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/chunked_loss/test_simpo_loss.py +0 -0
  225. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/conftest.py +0 -0
  226. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/convergence/__init__.py +0 -0
  227. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/convergence/bf16/__init__.py +0 -0
  228. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/convergence/bf16/test_mini_models_multimodal.py +0 -0
  229. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/convergence/bf16/test_mini_models_with_logits.py +0 -0
  230. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/convergence/fp32/__init__.py +0 -0
  231. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/convergence/fp32/test_mini_models_multimodal.py +0 -0
  232. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/convergence/fp32/test_mini_models_with_logits.py +0 -0
  233. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/resources/fake_configs/Google/Gemma3/gemma-3-4b-it/tokenizer_config.json +0 -0
  234. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/resources/fake_configs/Google/Paligemma/paligemma-3b-pt-224/tokenizer_config.json +0 -0
  235. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/preprocessor_config.json +0 -0
  236. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/processor_config.json +0 -0
  237. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/tokenizer_config.json +0 -0
  238. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/resources/fake_configs/Qwen/Qwen2-VL-7B-Instruct/tokenizer_config.json +0 -0
  239. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/resources/fake_configs/Qwen/Qwen2.5-VL-7B-Instruct/tokenizer_config.json +0 -0
  240. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/resources/fake_configs/meta-llama/Llama-3.2-11B-Vision-Instruct/tokenizer_config.json +0 -0
  241. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/resources/fake_configs/meta-llama/Llama-4-Scout-17B-16E-Instruct/tokenizer_config.json +0 -0
  242. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/resources/scripts/generate_tokenized_dataset.py +0 -0
  243. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/resources/tiny_shakespeare.txt +0 -0
  244. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/resources/tiny_shakespeare_tokenized/data-00000-of-00001.arrow +0 -0
  245. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/resources/tiny_shakespeare_tokenized/dataset_info.json +0 -0
  246. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/resources/tiny_shakespeare_tokenized/state.json +0 -0
  247. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/transformers/test_auto_model.py +0 -0
  248. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/transformers/test_cross_entropy.py +0 -0
  249. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/transformers/test_dyt.py +0 -0
  250. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/transformers/test_embedding.py +0 -0
  251. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/transformers/test_flex_attention.py +0 -0
  252. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/transformers/test_fused_linear_cross_entropy.py +0 -0
  253. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/transformers/test_fused_linear_jsd.py +0 -0
  254. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/transformers/test_fused_neighborhood_attention.py +0 -0
  255. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/transformers/test_geglu.py +0 -0
  256. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/transformers/test_group_norm.py +0 -0
  257. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/transformers/test_grpo_loss.py +0 -0
  258. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/transformers/test_jsd.py +0 -0
  259. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/transformers/test_kl_div.py +0 -0
  260. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/transformers/test_layer_norm.py +0 -0
  261. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/transformers/test_mm_int8int2.py +0 -0
  262. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/transformers/test_multi_token_attention.py +0 -0
  263. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/transformers/test_qwen2vl_mrope.py +0 -0
  264. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/transformers/test_rms_norm.py +0 -0
  265. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/transformers/test_rope.py +0 -0
  266. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/transformers/test_softmax.py +0 -0
  267. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/transformers/test_sparsemax.py +0 -0
  268. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/transformers/test_swiglu.py +0 -0
  269. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/transformers/test_trainer_integration.py +0 -0
  270. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/transformers/test_transformers.py +0 -0
  271. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/transformers/test_tvd.py +0 -0
  272. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/triton/test_triton_monkey_patch.py +0 -0
  273. {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: liger_kernel_nightly
3
- Version: 0.6.0.dev20250709030408
3
+ Version: 0.6.0.dev20250709091230
4
4
  Summary: Efficient Triton kernels for LLM Training
5
5
  License: BSD 2-CLAUSE LICENSE
6
6
  Copyright 2024 LinkedIn Corporation
@@ -33,7 +33,7 @@ License-File: NOTICE
33
33
  Requires-Dist: torch>=2.1.2
34
34
  Requires-Dist: triton>=2.3.1
35
35
  Provides-Extra: dev
36
- Requires-Dist: transformers>=4.44.2; extra == "dev"
36
+ Requires-Dist: transformers>=4.49.0; extra == "dev"
37
37
  Requires-Dist: matplotlib>=3.7.2; extra == "dev"
38
38
  Requires-Dist: flake8>=4.0.1.1; extra == "dev"
39
39
  Requires-Dist: black>=24.4.2; extra == "dev"
@@ -14,7 +14,7 @@ app = modal.App("liger_tests", image=image)
14
14
  repo = image.add_local_dir(ROOT_PATH, remote_path=REMOTE_ROOT_PATH)
15
15
 
16
16
 
17
- @app.function(gpu="A10G", image=repo, timeout=60 * 45)
17
+ @app.function(gpu="A10G", image=repo, timeout=60 * 60)
18
18
  def liger_tests():
19
19
  import subprocess
20
20
 
@@ -14,7 +14,7 @@ app = modal.App("liger_tests_bwd", image=image)
14
14
  repo = image.add_local_dir(ROOT_PATH, remote_path=REMOTE_ROOT_PATH)
15
15
 
16
16
 
17
- @app.function(gpu="A10G", image=repo, timeout=60 * 30)
17
+ @app.function(gpu="A10G", image=repo, timeout=60 * 60)
18
18
  def liger_bwd_tests():
19
19
  import subprocess
20
20
 
@@ -24,9 +24,9 @@ def liger_bwd_tests():
24
24
  shell=True,
25
25
  cwd=REMOTE_ROOT_PATH,
26
26
  )
27
- # force install transformers==4.44.2
27
+ # force install transformers==4.49.0
28
28
  subprocess.run(
29
- ["uv pip install transformers==4.44.2 --system"],
29
+ ["uv pip install transformers==4.49.0 --system"],
30
30
  check=True,
31
31
  shell=True,
32
32
  cwd=REMOTE_ROOT_PATH,
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "liger_kernel_nightly"
7
- version = "0.6.0.dev20250709030408"
7
+ version = "0.6.0.dev20250709091230"
8
8
  description = "Efficient Triton kernels for LLM Training"
9
9
  urls = { "Homepage" = "https://github.com/linkedin/Liger-Kernel" }
10
10
  readme = { file = "README.md", content-type = "text/markdown" }
@@ -31,7 +31,7 @@ def get_optional_dependencies():
31
31
  """Get optional dependency groups."""
32
32
  return {
33
33
  "dev": [
34
- "transformers>=4.44.2",
34
+ "transformers>=4.49.0",
35
35
  "matplotlib>=3.7.2",
36
36
  "flake8>=4.0.1.1",
37
37
  "black>=24.4.2",
@@ -255,7 +255,7 @@ def multimodal_forward(
255
255
  shift_labels = shift_labels.view(-1).to(hidden_device)
256
256
 
257
257
  lce = LigerFusedLinearCrossEntropyLoss()
258
- loss = lce(self.language_model.lm_head.weight, shift_hidden_states, shift_labels)
258
+ loss = lce(self.lm_head.weight, shift_hidden_states, shift_labels)
259
259
  else:
260
260
  logits = self.lm_head(kept_hidden_states)
261
261
  if labels is not None:
@@ -537,7 +537,10 @@ def apply_liger_kernel_to_mllama(
537
537
  if isinstance(model, MllamaForConditionalGeneration):
538
538
  language_model: MllamaForCausalLM = model.language_model
539
539
  vision_model: MllamaVisionModel = model.vision_model
540
- text_model: MllamaTextModel = language_model
540
+ if isinstance(language_model, MllamaForCausalLM):
541
+ text_model: MllamaTextModel = language_model.model
542
+ else:
543
+ text_model = language_model
541
544
  elif isinstance(model, MllamaForCausalLM):
542
545
  text_model = model.model
543
546
  vision_model = None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: liger_kernel_nightly
3
- Version: 0.6.0.dev20250709030408
3
+ Version: 0.6.0.dev20250709091230
4
4
  Summary: Efficient Triton kernels for LLM Training
5
5
  License: BSD 2-CLAUSE LICENSE
6
6
  Copyright 2024 LinkedIn Corporation
@@ -33,7 +33,7 @@ License-File: NOTICE
33
33
  Requires-Dist: torch>=2.1.2
34
34
  Requires-Dist: triton>=2.3.1
35
35
  Provides-Extra: dev
36
- Requires-Dist: transformers>=4.44.2; extra == "dev"
36
+ Requires-Dist: transformers>=4.49.0; extra == "dev"
37
37
  Requires-Dist: matplotlib>=3.7.2; extra == "dev"
38
38
  Requires-Dist: flake8>=4.0.1.1; extra == "dev"
39
39
  Requires-Dist: black>=24.4.2; extra == "dev"
@@ -2,7 +2,7 @@ torch>=2.1.2
2
2
  triton>=2.3.1
3
3
 
4
4
  [dev]
5
- transformers>=4.44.2
5
+ transformers>=4.49.0
6
6
  matplotlib>=3.7.2
7
7
  flake8>=4.0.1.1
8
8
  black>=24.4.2
@@ -957,8 +957,8 @@ def run_mini_model(
957
957
  reason="LLaVa not available in this version of transformers",
958
958
  ),
959
959
  pytest.mark.skipif(
960
- version.parse(transformers.__version__) < version.parse("4.49.0"),
961
- reason="Mistral not available in transformers<=4.49.0",
960
+ version.parse(transformers.__version__) < version.parse("4.52.0"),
961
+ reason="LLaVa doesn't materialize logits in transformers<=4.52.0 so we can't test it",
962
962
  ),
963
963
  ],
964
964
  ),
@@ -938,8 +938,8 @@ def run_mini_model(
938
938
  reason="LLaVa not available in this version of transformers",
939
939
  ),
940
940
  pytest.mark.skipif(
941
- version.parse(transformers.__version__) < version.parse("4.49.0"),
942
- reason="Mistral not available in transformers<=4.49.0",
941
+ version.parse(transformers.__version__) < version.parse("4.52.0"),
942
+ reason="LLaVa doesn't materialize logits in transformers<=4.52.0 so we can't test it",
943
943
  ),
944
944
  ],
945
945
  ),
@@ -1103,7 +1103,7 @@ def run_mini_model(
1103
1103
  torch.float32,
1104
1104
  1e-8,
1105
1105
  1e-4,
1106
- 5e-3, # 4e-3
1106
+ 4e-2, # 4e-3
1107
1107
  1e-5, # 1e-5
1108
1108
  5e-3,
1109
1109
  1e-5,
@@ -74,24 +74,6 @@ def is_llama4_available():
74
74
  return False
75
75
 
76
76
 
77
- def is_qwen2_vl_available():
78
- try:
79
- import transformers.models.qwen2_vl # noqa: F401
80
-
81
- return True
82
- except ImportError:
83
- return False
84
-
85
-
86
- def is_qwen2_5_vl_available():
87
- try:
88
- import transformers.models.qwen2_5_vl # noqa: F401
89
-
90
- return True
91
- except ImportError:
92
- return False
93
-
94
-
95
77
  def is_qwen3_available():
96
78
  try:
97
79
  import transformers.models.qwen3 # noqa: F401
@@ -365,6 +347,7 @@ def test_apply_liger_kernel_to_instance_for_mllama_for_conditional_generation():
365
347
  # Ensure any monkey patching is cleaned up for subsequent tests
366
348
  with patch("transformers.models.mllama.modeling_mllama"):
367
349
  from transformers.models.mllama.modeling_mllama import MllamaForConditionalGeneration
350
+ from transformers.models.mllama.modeling_mllama import MllamaTextModel
368
351
 
369
352
  # Instantiate a dummy model
370
353
  config = transformers.models.mllama.configuration_mllama.MllamaConfig(
@@ -398,10 +381,14 @@ def test_apply_liger_kernel_to_instance_for_mllama_for_conditional_generation():
398
381
 
399
382
  # Check that model instance variables are not yet patched with Liger modules
400
383
  assert inspect.getsource(dummy_model_instance.forward) != inspect.getsource(mllama_lce_forward)
401
- assert inspect.getsource(dummy_model_instance.language_model.norm.forward) != inspect.getsource(
402
- LigerRMSNorm.forward
403
- )
404
- for layer in dummy_model_instance.language_model.layers:
384
+
385
+ if isinstance(dummy_model_instance.language_model, MllamaTextModel):
386
+ language_model = dummy_model_instance.language_model
387
+ else:
388
+ language_model = dummy_model_instance.language_model.model
389
+
390
+ assert inspect.getsource(language_model.norm.forward) != inspect.getsource(LigerRMSNorm.forward)
391
+ for layer in language_model.layers:
405
392
  assert inspect.getsource(layer.mlp.forward) != inspect.getsource(LigerSwiGLUMLP.forward)
406
393
  assert inspect.getsource(layer.input_layernorm.forward) != inspect.getsource(LigerRMSNorm.forward)
407
394
  assert inspect.getsource(layer.post_attention_layernorm.forward) != inspect.getsource(LigerRMSNorm.forward)
@@ -428,10 +415,8 @@ def test_apply_liger_kernel_to_instance_for_mllama_for_conditional_generation():
428
415
 
429
416
  # Check that the model's instance variables were correctly patched with Liger modules
430
417
  assert inspect.getsource(dummy_model_instance.forward) == inspect.getsource(mllama_lce_forward)
431
- assert inspect.getsource(dummy_model_instance.language_model.norm.forward) == inspect.getsource(
432
- LigerRMSNorm.forward
433
- )
434
- for layer in dummy_model_instance.language_model.layers:
418
+ assert inspect.getsource(language_model.norm.forward) == inspect.getsource(LigerRMSNorm.forward)
419
+ for layer in language_model.layers:
435
420
  assert inspect.getsource(layer.mlp.forward) == inspect.getsource(LigerSwiGLUMLP.forward)
436
421
  assert inspect.getsource(layer.input_layernorm.forward) == inspect.getsource(LigerRMSNorm.forward)
437
422
  assert inspect.getsource(layer.post_attention_layernorm.forward) == inspect.getsource(LigerRMSNorm.forward)
@@ -452,7 +437,6 @@ def test_apply_liger_kernel_to_instance_for_mllama_for_conditional_generation():
452
437
  assert inspect.getsource(layer.post_attention_layernorm.forward) == inspect.getsource(
453
438
  LigerLayerNorm.forward
454
439
  )
455
-
456
440
  try:
457
441
  print(dummy_model_instance)
458
442
  except Exception as e:
@@ -1130,7 +1114,10 @@ def test_apply_liger_kernel_to_instance_for_qwen3_moe():
1130
1114
  pytest.fail(f"An exception occured in extra_expr: {type(e).__name__} - {e}")
1131
1115
 
1132
1116
 
1133
- @pytest.mark.skipif(not is_qwen2_vl_available(), reason="qwen2_vl module not available")
1117
+ @pytest.mark.skipif(
1118
+ transformer_version < version.parse("4.52.4"),
1119
+ reason="Qwen2-VL support is only compatible with transformers >= 4.52.4",
1120
+ )
1134
1121
  def test_apply_liger_kernel_to_instance_for_qwen2_vl_for_conditional_generation():
1135
1122
  # Ensure any monkey patching is cleaned up for subsequent tests
1136
1123
  with patch("transformers.models.qwen2_vl.modeling_qwen2_vl"):
@@ -1196,7 +1183,10 @@ def test_apply_liger_kernel_to_instance_for_qwen2_vl_for_conditional_generation(
1196
1183
  pytest.fail(f"An exception occured in extra_expr: {type(e).__name__} - {e}")
1197
1184
 
1198
1185
 
1199
- @pytest.mark.skipif(not is_qwen2_vl_available(), reason="qwen2_vl module not available")
1186
+ @pytest.mark.skipif(
1187
+ transformer_version < version.parse("4.52.4"),
1188
+ reason="Qwen2-VL support is only compatible with transformers >= 4.52.4",
1189
+ )
1200
1190
  def test_apply_liger_kernel_to_instance_for_qwen2_vl():
1201
1191
  # Ensure any monkey patching is cleaned up for subsequent tests
1202
1192
  with patch("transformers.models.qwen2_vl.modeling_qwen2_vl"):
@@ -1262,7 +1252,10 @@ def test_apply_liger_kernel_to_instance_for_qwen2_vl():
1262
1252
  pytest.fail(f"An exception occured in extra_expr: {type(e).__name__} - {e}")
1263
1253
 
1264
1254
 
1265
- @pytest.mark.skipif(not is_qwen2_vl_available(), reason="qwen2_vl module not available")
1255
+ @pytest.mark.skipif(
1256
+ transformer_version < version.parse("4.52.4"),
1257
+ reason="Qwen2-VL support is only compatible with transformers >= 4.52.4",
1258
+ )
1266
1259
  def test_apply_liger_kernel_to_instance_for_qwen2_vl_text():
1267
1260
  # Ensure any monkey patching is cleaned up for subsequent tests
1268
1261
  with patch("transformers.models.qwen2_vl.modeling_qwen2_vl"):
@@ -1310,7 +1303,10 @@ def test_apply_liger_kernel_to_instance_for_qwen2_vl_text():
1310
1303
  pytest.fail(f"An exception occured in extra_expr: {type(e).__name__} - {e}")
1311
1304
 
1312
1305
 
1313
- @pytest.mark.skipif(not is_qwen2_5_vl_available(), reason="qwen2_5_vl module not available")
1306
+ @pytest.mark.skipif(
1307
+ transformer_version < version.parse("4.52.4"),
1308
+ reason="Qwen2.5-VL support is only compatible with transformers >= 4.52.4",
1309
+ )
1314
1310
  def test_apply_liger_kernel_to_instance_for_qwen2_5_vl():
1315
1311
  # Ensure any monkey patching is cleaned up for subsequent tests
1316
1312
  with patch("transformers.models.qwen2_5_vl.modeling_qwen2_5_vl"):
@@ -1376,7 +1372,10 @@ def test_apply_liger_kernel_to_instance_for_qwen2_5_vl():
1376
1372
  pytest.fail(f"An exception occured in extra_expr: {type(e).__name__} - {e}")
1377
1373
 
1378
1374
 
1379
- @pytest.mark.skipif(not is_qwen2_5_vl_available(), reason="qwen2_5_vl module not available")
1375
+ @pytest.mark.skipif(
1376
+ transformer_version < version.parse("4.52.4"),
1377
+ reason="Qwen2.5-VL support is only compatible with transformers >= 4.52.4",
1378
+ )
1380
1379
  def test_apply_liger_kernel_to_instance_for_qwen2_5_vl_for_conditional_generation():
1381
1380
  # Ensure any monkey patching is cleaned up for subsequent tests
1382
1381
  with patch("transformers.models.qwen2_5_vl.modeling_qwen2_5_vl"):
@@ -1442,7 +1441,10 @@ def test_apply_liger_kernel_to_instance_for_qwen2_5_vl_for_conditional_generatio
1442
1441
  pytest.fail(f"An exception occured in extra_expr: {type(e).__name__} - {e}")
1443
1442
 
1444
1443
 
1445
- @pytest.mark.skipif(not is_qwen2_5_vl_available(), reason="qwen2_5_vl module not available")
1444
+ @pytest.mark.skipif(
1445
+ transformer_version < version.parse("4.52.4"),
1446
+ reason="Qwen2.5-VL support is only compatible with transformers >= 4.52.4",
1447
+ )
1446
1448
  def test_apply_liger_kernel_to_instance_for_qwen2_5_vl_text():
1447
1449
  # Ensure any monkey patching is cleaned up for subsequent tests
1448
1450
  with patch("transformers.models.qwen2_5_vl.modeling_qwen2_5_vl"):