liger-kernel-nightly 0.6.2.dev20251020204513__tar.gz → 0.6.2.dev20251024142419__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (294) hide show
  1. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/PKG-INFO +1 -1
  2. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/pyproject.toml +1 -1
  3. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/transformers/monkey_patch.py +2 -1
  4. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel_nightly.egg-info/PKG-INFO +1 -1
  5. liger_kernel_nightly-0.6.2.dev20251024142419/test/conftest.py +11 -0
  6. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/test/convergence/bf16/test_mini_models.py +6 -2
  7. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/test/convergence/bf16/test_mini_models_multimodal.py +3 -0
  8. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/test/convergence/bf16/test_mini_models_with_logits.py +6 -0
  9. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/test/convergence/fp32/test_mini_models.py +7 -2
  10. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/test/convergence/fp32/test_mini_models_multimodal.py +4 -0
  11. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/test/convergence/fp32/test_mini_models_with_logits.py +6 -0
  12. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/test/utils.py +14 -0
  13. liger_kernel_nightly-0.6.2.dev20251020204513/test/conftest.py +0 -8
  14. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/.github/ISSUE_TEMPLATE/bug_report.yaml +0 -0
  15. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/.github/ISSUE_TEMPLATE/feature_request.yaml +0 -0
  16. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/.github/pull_request_template.md +0 -0
  17. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/.github/workflows/amd-ci.yml +0 -0
  18. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/.github/workflows/benchmark.yml +0 -0
  19. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/.github/workflows/docs.yml +0 -0
  20. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/.github/workflows/intel-ci.yml +0 -0
  21. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/.github/workflows/nvi-ci.yml +0 -0
  22. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/.github/workflows/publish-nightly.yml +0 -0
  23. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/.github/workflows/publish-release.yml +0 -0
  24. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/.gitignore +0 -0
  25. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/LICENSE +0 -0
  26. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/Makefile +0 -0
  27. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/NOTICE +0 -0
  28. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/README.md +0 -0
  29. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/benchmark/README.md +0 -0
  30. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/benchmark/__init__.py +0 -0
  31. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/benchmark/benchmarks_visualizer.py +0 -0
  32. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/benchmark/data/all_benchmark_data.csv +0 -0
  33. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/benchmark/scripts/__init__.py +0 -0
  34. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/benchmark/scripts/benchmark_cpo_loss.py +0 -0
  35. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/benchmark/scripts/benchmark_cross_entropy.py +0 -0
  36. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/benchmark/scripts/benchmark_distill_cosine_loss.py +0 -0
  37. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/benchmark/scripts/benchmark_distill_jsd_loss.py +0 -0
  38. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/benchmark/scripts/benchmark_dpo_loss.py +0 -0
  39. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/benchmark/scripts/benchmark_dyt.py +0 -0
  40. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/benchmark/scripts/benchmark_embedding.py +0 -0
  41. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/benchmark/scripts/benchmark_fused_add_rms_norm.py +0 -0
  42. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/benchmark/scripts/benchmark_fused_linear_cross_entropy.py +0 -0
  43. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/benchmark/scripts/benchmark_fused_linear_jsd.py +0 -0
  44. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/benchmark/scripts/benchmark_fused_neighborhood_attention.py +0 -0
  45. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/benchmark/scripts/benchmark_geglu.py +0 -0
  46. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/benchmark/scripts/benchmark_group_norm.py +0 -0
  47. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/benchmark/scripts/benchmark_grpo_loss.py +0 -0
  48. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/benchmark/scripts/benchmark_jsd.py +0 -0
  49. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/benchmark/scripts/benchmark_kl_div.py +0 -0
  50. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/benchmark/scripts/benchmark_kto_loss.py +0 -0
  51. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/benchmark/scripts/benchmark_layer_norm.py +0 -0
  52. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/benchmark/scripts/benchmark_llama4_rope.py +0 -0
  53. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/benchmark/scripts/benchmark_multi_token_attention.py +0 -0
  54. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/benchmark/scripts/benchmark_orpo_loss.py +0 -0
  55. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/benchmark/scripts/benchmark_poly_norm.py +0 -0
  56. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/benchmark/scripts/benchmark_qwen2vl_mrope.py +0 -0
  57. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/benchmark/scripts/benchmark_rms_norm.py +0 -0
  58. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/benchmark/scripts/benchmark_rope.py +0 -0
  59. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/benchmark/scripts/benchmark_simpo_loss.py +0 -0
  60. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/benchmark/scripts/benchmark_softmax.py +0 -0
  61. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/benchmark/scripts/benchmark_sparse_multi_token_attention.py +0 -0
  62. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/benchmark/scripts/benchmark_sparsemax.py +0 -0
  63. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/benchmark/scripts/benchmark_swiglu.py +0 -0
  64. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/benchmark/scripts/benchmark_tvd.py +0 -0
  65. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/benchmark/scripts/utils.py +0 -0
  66. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/dev/fmt-requirements.txt +0 -0
  67. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/dev/modal/benchmarks.py +0 -0
  68. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/dev/modal/tests.py +0 -0
  69. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/dev/modal/tests_bwd.py +0 -0
  70. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/docs/Examples.md +0 -0
  71. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/docs/Getting-Started.md +0 -0
  72. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/docs/High-Level-APIs.md +0 -0
  73. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/docs/Low-Level-APIs.md +0 -0
  74. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/docs/acknowledgement.md +0 -0
  75. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/docs/contributing.md +0 -0
  76. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/docs/images/banner.GIF +0 -0
  77. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/docs/images/compose.gif +0 -0
  78. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/docs/images/e2e-memory.png +0 -0
  79. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/docs/images/e2e-tps.png +0 -0
  80. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/docs/images/logo-banner.png +0 -0
  81. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/docs/images/patch.gif +0 -0
  82. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/docs/images/post-training.png +0 -0
  83. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/docs/index.md +0 -0
  84. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/docs/license.md +0 -0
  85. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/examples/alignment/accelerate_config.yaml +0 -0
  86. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/examples/alignment/run_orpo.py +0 -0
  87. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/examples/huggingface/README.md +0 -0
  88. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/examples/huggingface/callback.py +0 -0
  89. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/examples/huggingface/config/fsdp_config.json +0 -0
  90. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/examples/huggingface/img/gemma_7b_mem.png +0 -0
  91. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/examples/huggingface/img/gemma_7b_tp.png +0 -0
  92. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/examples/huggingface/img/llama_mem_alloc.png +0 -0
  93. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/examples/huggingface/img/llama_tps.png +0 -0
  94. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/examples/huggingface/img/qwen_mem_alloc.png +0 -0
  95. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/examples/huggingface/img/qwen_tps.png +0 -0
  96. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/examples/huggingface/launch_on_modal.py +0 -0
  97. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/examples/huggingface/requirements.txt +0 -0
  98. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/examples/huggingface/run_benchmarks.sh +0 -0
  99. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/examples/huggingface/run_gemma.sh +0 -0
  100. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/examples/huggingface/run_llama.sh +0 -0
  101. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/examples/huggingface/run_qwen.sh +0 -0
  102. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/examples/huggingface/run_qwen2_vl.sh +0 -0
  103. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/examples/huggingface/training.py +0 -0
  104. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/examples/huggingface/training_multimodal.py +0 -0
  105. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/examples/lightning/README.md +0 -0
  106. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/examples/lightning/requirements.txt +0 -0
  107. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/examples/lightning/training.py +0 -0
  108. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/examples/medusa/README.md +0 -0
  109. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/examples/medusa/callback.py +0 -0
  110. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/examples/medusa/docs/images/Memory_Stage1_num_head_3.png +0 -0
  111. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/examples/medusa/docs/images/Memory_Stage1_num_head_5.png +0 -0
  112. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/examples/medusa/docs/images/Memory_Stage2_num_head_3.png +0 -0
  113. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/examples/medusa/docs/images/Memory_Stage2_num_head_5.png +0 -0
  114. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/examples/medusa/docs/images/Throughput_Stage1_num_head_3.png +0 -0
  115. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/examples/medusa/docs/images/Throughput_Stage1_num_head_5.png +0 -0
  116. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/examples/medusa/docs/images/Throughput_Stage2_num_head_3.png +0 -0
  117. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/examples/medusa/docs/images/Throughput_Stage2_num_head_5.png +0 -0
  118. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/examples/medusa/fsdp/acc-fsdp.conf +0 -0
  119. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/examples/medusa/medusa_util.py +0 -0
  120. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/examples/medusa/requirements.txt +0 -0
  121. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/examples/medusa/scripts/llama3_8b_medusa.sh +0 -0
  122. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/examples/medusa/train.py +0 -0
  123. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/licenses/LICENSE-Apache-2.0 +0 -0
  124. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/licenses/LICENSE-MIT-AutoAWQ +0 -0
  125. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/licenses/LICENSE-MIT-Efficient-Cross-Entropy +0 -0
  126. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/licenses/LICENSE-MIT-llmc +0 -0
  127. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/licenses/LICENSE-MIT-triton +0 -0
  128. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/mkdocs.yml +0 -0
  129. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/setup.cfg +0 -0
  130. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/setup.py +0 -0
  131. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/__init__.py +0 -0
  132. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/chunked_loss/README.md +0 -0
  133. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/chunked_loss/__init__.py +0 -0
  134. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/chunked_loss/cosine_similarity_loss.py +0 -0
  135. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/chunked_loss/cpo_loss.py +0 -0
  136. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/chunked_loss/dpo_loss.py +0 -0
  137. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/chunked_loss/functional.py +0 -0
  138. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/chunked_loss/fused_linear_distillation.py +0 -0
  139. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/chunked_loss/fused_linear_ppo.py +0 -0
  140. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/chunked_loss/fused_linear_preference.py +0 -0
  141. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/chunked_loss/fused_linear_unpaired_preference.py +0 -0
  142. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/chunked_loss/grpo_loss.py +0 -0
  143. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/chunked_loss/jsd_loss.py +0 -0
  144. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/chunked_loss/kto_loss.py +0 -0
  145. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/chunked_loss/orpo_loss.py +0 -0
  146. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/chunked_loss/simpo_loss.py +0 -0
  147. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/env_report.py +0 -0
  148. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/ops/__init__.py +0 -0
  149. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/ops/cross_entropy.py +0 -0
  150. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/ops/dyt.py +0 -0
  151. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/ops/experimental/embedding.py +0 -0
  152. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/ops/experimental/mm_int8int2.py +0 -0
  153. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/ops/fused_add_rms_norm.py +0 -0
  154. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/ops/fused_linear_cross_entropy.py +0 -0
  155. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/ops/fused_linear_jsd.py +0 -0
  156. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/ops/fused_neighborhood_attention.py +0 -0
  157. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/ops/geglu.py +0 -0
  158. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/ops/group_norm.py +0 -0
  159. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/ops/grpo_loss.py +0 -0
  160. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/ops/jsd.py +0 -0
  161. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/ops/kl_div.py +0 -0
  162. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/ops/layer_norm.py +0 -0
  163. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/ops/llama4_rope.py +0 -0
  164. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/ops/multi_token_attention.py +0 -0
  165. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/ops/poly_norm.py +0 -0
  166. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/ops/qwen2vl_mrope.py +0 -0
  167. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/ops/rms_norm.py +0 -0
  168. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/ops/rope.py +0 -0
  169. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/ops/softmax.py +0 -0
  170. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/ops/sparsemax.py +0 -0
  171. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/ops/swiglu.py +0 -0
  172. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/ops/tvd.py +0 -0
  173. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/ops/utils.py +0 -0
  174. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/transformers/__init__.py +0 -0
  175. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/transformers/auto_model.py +0 -0
  176. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/transformers/cross_entropy.py +0 -0
  177. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/transformers/dyt.py +0 -0
  178. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/transformers/experimental/__init__.py +0 -0
  179. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/transformers/experimental/embedding.py +0 -0
  180. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/transformers/fsdp.py +0 -0
  181. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/transformers/functional.py +0 -0
  182. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/transformers/fused_add_rms_norm.py +0 -0
  183. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/transformers/fused_linear_cross_entropy.py +0 -0
  184. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/transformers/fused_linear_jsd.py +0 -0
  185. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/transformers/fused_neighborhood_attention.py +0 -0
  186. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/transformers/geglu.py +0 -0
  187. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/transformers/group_norm.py +0 -0
  188. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/transformers/grpo_loss.py +0 -0
  189. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/transformers/jsd.py +0 -0
  190. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/transformers/kl_div.py +0 -0
  191. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/transformers/layer_norm.py +0 -0
  192. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/transformers/llama4_rope.py +0 -0
  193. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/transformers/model/__init__.py +0 -0
  194. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/transformers/model/falcon_h1.py +0 -0
  195. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/transformers/model/gemma.py +0 -0
  196. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/transformers/model/gemma2.py +0 -0
  197. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/transformers/model/gemma3.py +0 -0
  198. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/transformers/model/glm4.py +0 -0
  199. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/transformers/model/glm4v.py +0 -0
  200. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/transformers/model/glm4v_moe.py +0 -0
  201. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/transformers/model/internvl.py +0 -0
  202. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/transformers/model/llama.py +0 -0
  203. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/transformers/model/llama4.py +0 -0
  204. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/transformers/model/llava.py +0 -0
  205. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/transformers/model/loss_utils.py +0 -0
  206. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/transformers/model/mistral.py +0 -0
  207. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/transformers/model/mixtral.py +0 -0
  208. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/transformers/model/mllama.py +0 -0
  209. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/transformers/model/olmo2.py +0 -0
  210. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/transformers/model/paligemma.py +0 -0
  211. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/transformers/model/phi3.py +0 -0
  212. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/transformers/model/qwen2.py +0 -0
  213. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/transformers/model/qwen2_5_vl.py +0 -0
  214. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/transformers/model/qwen2_vl.py +0 -0
  215. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/transformers/model/qwen3.py +0 -0
  216. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/transformers/model/qwen3_moe.py +0 -0
  217. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/transformers/model/qwen3_next.py +0 -0
  218. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/transformers/model/smollm3.py +0 -0
  219. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/transformers/multi_token_attention.py +0 -0
  220. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/transformers/poly_norm.py +0 -0
  221. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/transformers/qwen2vl_mrope.py +0 -0
  222. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/transformers/rms_norm.py +0 -0
  223. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/transformers/rope.py +0 -0
  224. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/transformers/softmax.py +0 -0
  225. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/transformers/sparsemax.py +0 -0
  226. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/transformers/swiglu.py +0 -0
  227. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/transformers/trainer/__init__.py +0 -0
  228. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/transformers/trainer/orpo_trainer.py +0 -0
  229. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/transformers/trainer_integration.py +0 -0
  230. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/transformers/tvd.py +0 -0
  231. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/triton/__init__.py +0 -0
  232. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/triton/monkey_patch.py +0 -0
  233. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel/utils.py +0 -0
  234. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel_nightly.egg-info/SOURCES.txt +0 -0
  235. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel_nightly.egg-info/dependency_links.txt +0 -0
  236. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel_nightly.egg-info/requires.txt +0 -0
  237. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/src/liger_kernel_nightly.egg-info/top_level.txt +0 -0
  238. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/test/__init__.py +0 -0
  239. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/test/chunked_loss/__init__.py +0 -0
  240. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/test/chunked_loss/test_cosine_loss.py +0 -0
  241. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/test/chunked_loss/test_cpo_loss.py +0 -0
  242. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/test/chunked_loss/test_dpo_loss.py +0 -0
  243. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/test/chunked_loss/test_grpo_loss.py +0 -0
  244. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/test/chunked_loss/test_jsd_loss.py +0 -0
  245. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/test/chunked_loss/test_kto_loss.py +0 -0
  246. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/test/chunked_loss/test_orpo_loss.py +0 -0
  247. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/test/chunked_loss/test_simpo_loss.py +0 -0
  248. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/test/convergence/__init__.py +0 -0
  249. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/test/convergence/bf16/__init__.py +0 -0
  250. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/test/convergence/fp32/__init__.py +0 -0
  251. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/test/resources/fake_configs/Google/Gemma3/gemma-3-4b-it/tokenizer_config.json +0 -0
  252. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/test/resources/fake_configs/Google/Paligemma/paligemma-3b-pt-224/tokenizer_config.json +0 -0
  253. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/preprocessor_config.json +0 -0
  254. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/processor_config.json +0 -0
  255. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/tokenizer_config.json +0 -0
  256. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/test/resources/fake_configs/OpenGVLab/InternVL3-1B-hf/tokenizer_config.json +0 -0
  257. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/test/resources/fake_configs/Qwen/Qwen2-VL-7B-Instruct/tokenizer_config.json +0 -0
  258. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/test/resources/fake_configs/Qwen/Qwen2.5-VL-7B-Instruct/tokenizer_config.json +0 -0
  259. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/test/resources/fake_configs/meta-llama/Llama-3.2-11B-Vision-Instruct/tokenizer_config.json +0 -0
  260. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/test/resources/fake_configs/meta-llama/Llama-4-Scout-17B-16E-Instruct/tokenizer_config.json +0 -0
  261. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/test/resources/scripts/generate_tokenized_dataset.py +0 -0
  262. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/test/resources/tiny_shakespeare.txt +0 -0
  263. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/test/resources/tiny_shakespeare_tokenized/data-00000-of-00001.arrow +0 -0
  264. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/test/resources/tiny_shakespeare_tokenized/dataset_info.json +0 -0
  265. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/test/resources/tiny_shakespeare_tokenized/state.json +0 -0
  266. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/test/transformers/test_auto_model.py +0 -0
  267. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/test/transformers/test_cross_entropy.py +0 -0
  268. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/test/transformers/test_dyt.py +0 -0
  269. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/test/transformers/test_embedding.py +0 -0
  270. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/test/transformers/test_flex_attention.py +0 -0
  271. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/test/transformers/test_fused_add_rms_norm.py +0 -0
  272. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/test/transformers/test_fused_linear_cross_entropy.py +0 -0
  273. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/test/transformers/test_fused_linear_jsd.py +0 -0
  274. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/test/transformers/test_fused_neighborhood_attention.py +0 -0
  275. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/test/transformers/test_geglu.py +0 -0
  276. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/test/transformers/test_group_norm.py +0 -0
  277. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/test/transformers/test_grpo_loss.py +0 -0
  278. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/test/transformers/test_jsd.py +0 -0
  279. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/test/transformers/test_kl_div.py +0 -0
  280. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/test/transformers/test_layer_norm.py +0 -0
  281. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/test/transformers/test_mm_int8int2.py +0 -0
  282. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/test/transformers/test_monkey_patch.py +0 -0
  283. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/test/transformers/test_multi_token_attention.py +0 -0
  284. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/test/transformers/test_poly_norm.py +0 -0
  285. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/test/transformers/test_qwen2vl_mrope.py +0 -0
  286. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/test/transformers/test_rms_norm.py +0 -0
  287. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/test/transformers/test_rope.py +0 -0
  288. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/test/transformers/test_softmax.py +0 -0
  289. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/test/transformers/test_sparsemax.py +0 -0
  290. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/test/transformers/test_swiglu.py +0 -0
  291. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/test/transformers/test_trainer_integration.py +0 -0
  292. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/test/transformers/test_transformers.py +0 -0
  293. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/test/transformers/test_tvd.py +0 -0
  294. {liger_kernel_nightly-0.6.2.dev20251020204513 → liger_kernel_nightly-0.6.2.dev20251024142419}/test/triton/test_triton_monkey_patch.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: liger_kernel_nightly
3
- Version: 0.6.2.dev20251020204513
3
+ Version: 0.6.2.dev20251024142419
4
4
  Summary: Efficient Triton kernels for LLM Training
5
5
  License: BSD 2-CLAUSE LICENSE
6
6
  Copyright 2024 LinkedIn Corporation
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "liger_kernel_nightly"
7
- version = "0.6.2.dev20251020204513"
7
+ version = "0.6.2.dev20251024142419"
8
8
  description = "Efficient Triton kernels for LLM Training"
9
9
  urls = { "Homepage" = "https://github.com/linkedin/Liger-Kernel" }
10
10
  readme = { file = "README.md", content-type = "text/markdown" }
@@ -1971,7 +1971,8 @@ def apply_liger_kernel_to_glm4v_moe(
1971
1971
  if rope:
1972
1972
  raise NotImplementedError("liger_rotary_pos_emb is not available for Glm4 models.")
1973
1973
  if rms_norm:
1974
- modeling_glm4v_moe.Glm4vRMSNorm = LigerRMSNormForGlm4
1974
+ modeling_glm4v_moe.Glm4vMoeRMSNorm = LigerRMSNormForGlm4
1975
+ modeling_glm4v_moe.Glm4vMoeTextRMSNorm = LigerRMSNormForGlm4
1975
1976
  if cross_entropy:
1976
1977
  from transformers.loss.loss_utils import nn
1977
1978
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: liger_kernel_nightly
3
- Version: 0.6.2.dev20251020204513
3
+ Version: 0.6.2.dev20251024142419
4
4
  Summary: Efficient Triton kernels for LLM Training
5
5
  License: BSD 2-CLAUSE LICENSE
6
6
  Copyright 2024 LinkedIn Corporation
@@ -0,0 +1,11 @@
1
+ import pytest
2
+ import torch
3
+
4
+
5
+ @pytest.fixture(autouse=True)
6
+ def clear_gpu_cache():
7
+ yield
8
+ if torch.cuda.is_available():
9
+ torch.cuda.empty_cache()
10
+ elif torch.xpu.is_available():
11
+ torch.xpu.empty_cache()
@@ -1,3 +1,7 @@
1
+ import os
2
+
3
+ os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8" # Ensure deterministic behavior with CuBLAS
4
+
1
5
  import pytest
2
6
  import torch
3
7
 
@@ -47,6 +51,7 @@ from test.utils import MiniModelConfig
47
51
  from test.utils import assert_verbose_allclose
48
52
  from test.utils import get_logprobs
49
53
  from test.utils import get_topk
54
+ from test.utils import require_deterministic
50
55
  from test.utils import revert_liger_kernel_to_falcon_h1
51
56
  from test.utils import revert_liger_kernel_to_gemma
52
57
  from test.utils import revert_liger_kernel_to_gemma2
@@ -1165,6 +1170,7 @@ def create_model(model_name="mini_llama4"):
1165
1170
  return model_class(model_config)
1166
1171
 
1167
1172
 
1173
+ @require_deterministic
1168
1174
  def run_mini_model(
1169
1175
  model_name="mini_llama4",
1170
1176
  num_steps=100,
@@ -1522,7 +1528,6 @@ def run_mini_model(
1522
1528
  not GLM4V_AVAILABLE,
1523
1529
  reason="Glm4v not available in this version of transformers",
1524
1530
  ),
1525
- pytest.mark.skipif(device == "xpu", reason="skip for XPU"),
1526
1531
  ],
1527
1532
  ),
1528
1533
  pytest.param(
@@ -1542,7 +1547,6 @@ def run_mini_model(
1542
1547
  not GLM4V_MOE_AVAILABLE,
1543
1548
  reason="Glm4v_moe not available in this version of transformers",
1544
1549
  ),
1545
- pytest.mark.skipif(device == "xpu", reason="skip for XPU"),
1546
1550
  ],
1547
1551
  ),
1548
1552
  pytest.param(
@@ -1,6 +1,7 @@
1
1
  import functools
2
2
  import os
3
3
 
4
+ os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8" # Ensure deterministic behavior with CuBLAS
4
5
  import pytest
5
6
  import torch
6
7
 
@@ -29,6 +30,7 @@ from test.utils import load_image_processing_config
29
30
  from test.utils import load_processor_config
30
31
  from test.utils import load_tokenizer_config
31
32
  from test.utils import multimodal_collate_fn
33
+ from test.utils import require_deterministic
32
34
  from test.utils import revert_liger_kernel_to_gemma3
33
35
  from test.utils import revert_liger_kernel_to_internvl
34
36
  from test.utils import revert_liger_kernel_to_llama4
@@ -881,6 +883,7 @@ def create_model(model_name):
881
883
  return model_class(model_config)
882
884
 
883
885
 
886
+ @require_deterministic
884
887
  def run_mini_model_multimodal(
885
888
  model_name="mini_qwen2_vl",
886
889
  num_steps=100,
@@ -1,3 +1,7 @@
1
+ import os
2
+
3
+ os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8" # Ensure deterministic behavior with CuBLAS
4
+
1
5
  import pytest
2
6
  import torch
3
7
 
@@ -47,6 +51,7 @@ from test.utils import MiniModelConfig
47
51
  from test.utils import assert_verbose_allclose
48
52
  from test.utils import get_logprobs
49
53
  from test.utils import get_topk
54
+ from test.utils import require_deterministic
50
55
  from test.utils import revert_liger_kernel_to_falcon_h1
51
56
  from test.utils import revert_liger_kernel_to_gemma
52
57
  from test.utils import revert_liger_kernel_to_gemma2
@@ -1164,6 +1169,7 @@ def create_model(model_name="mini_llama3"):
1164
1169
  return model_class(model_config)
1165
1170
 
1166
1171
 
1172
+ @require_deterministic
1167
1173
  def run_mini_model(
1168
1174
  model_name="mini_llama3",
1169
1175
  num_steps=100,
@@ -1,3 +1,7 @@
1
+ import os
2
+
3
+ os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8" # Ensure deterministic behavior with CuBLAS
4
+
1
5
  import pytest
2
6
  import torch
3
7
 
@@ -47,6 +51,7 @@ from test.utils import MiniModelConfig
47
51
  from test.utils import assert_verbose_allclose
48
52
  from test.utils import get_logprobs
49
53
  from test.utils import get_topk
54
+ from test.utils import require_deterministic
50
55
  from test.utils import revert_liger_kernel_to_falcon_h1
51
56
  from test.utils import revert_liger_kernel_to_gemma
52
57
  from test.utils import revert_liger_kernel_to_gemma2
@@ -1160,6 +1165,7 @@ def create_model(model_name="mini_llama3"):
1160
1165
  return model_class(model_config)
1161
1166
 
1162
1167
 
1168
+ @require_deterministic
1163
1169
  def run_mini_model(
1164
1170
  model_name="mini_llama3",
1165
1171
  num_steps=100,
@@ -1436,7 +1442,7 @@ def run_mini_model(
1436
1442
  1e-4,
1437
1443
  torch.float32,
1438
1444
  1e-8,
1439
- 1e-5,
1445
+ 1e-3,
1440
1446
  5e-3,
1441
1447
  1e-5,
1442
1448
  5e-3,
@@ -1446,7 +1452,6 @@ def run_mini_model(
1446
1452
  not GLM4V_MOE_AVAILABLE,
1447
1453
  reason="Glm4v_moe not available in this version of transformers",
1448
1454
  ),
1449
- pytest.mark.skipif(device == "xpu", reason="skip for XPU"),
1450
1455
  ],
1451
1456
  ),
1452
1457
  ("mini_phi3", 32, 1e-4, torch.float32, 1e-8, 1e-5, 5e-3, 1e-5, 5e-3, 1e-5),
@@ -1,6 +1,8 @@
1
1
  import functools
2
2
  import os
3
3
 
4
+ os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8" # Ensure deterministic behavior with CuBLAS
5
+
4
6
  import pytest
5
7
  import torch
6
8
 
@@ -29,6 +31,7 @@ from test.utils import load_image_processing_config
29
31
  from test.utils import load_processor_config
30
32
  from test.utils import load_tokenizer_config
31
33
  from test.utils import multimodal_collate_fn
34
+ from test.utils import require_deterministic
32
35
  from test.utils import revert_liger_kernel_to_gemma3
33
36
  from test.utils import revert_liger_kernel_to_internvl
34
37
  from test.utils import revert_liger_kernel_to_llama4
@@ -878,6 +881,7 @@ def create_model(model_name):
878
881
  return model_class(model_config)
879
882
 
880
883
 
884
+ @require_deterministic
881
885
  def run_mini_model_multimodal(
882
886
  model_name="mini_qwen2_vl",
883
887
  num_steps=100,
@@ -1,3 +1,7 @@
1
+ import os
2
+
3
+ os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8" # Ensure deterministic behavior with CuBLAS
4
+
1
5
  import pytest
2
6
  import torch
3
7
 
@@ -47,6 +51,7 @@ from test.utils import MiniModelConfig
47
51
  from test.utils import assert_verbose_allclose
48
52
  from test.utils import get_logprobs
49
53
  from test.utils import get_topk
54
+ from test.utils import require_deterministic
50
55
  from test.utils import revert_liger_kernel_to_falcon_h1
51
56
  from test.utils import revert_liger_kernel_to_gemma
52
57
  from test.utils import revert_liger_kernel_to_gemma2
@@ -1161,6 +1166,7 @@ def create_model(model_name="mini_llama3"):
1161
1166
  return model_class(model_config)
1162
1167
 
1163
1168
 
1169
+ @require_deterministic
1164
1170
  def run_mini_model(
1165
1171
  model_name="mini_llama3",
1166
1172
  num_steps=100,
@@ -5,6 +5,7 @@ import random
5
5
 
6
6
  from abc import abstractmethod
7
7
  from dataclasses import dataclass
8
+ from functools import wraps
8
9
  from typing import Any
9
10
  from typing import Dict
10
11
  from typing import List
@@ -59,6 +60,19 @@ def set_seed(seed=42):
59
60
  os.environ["PYTHONHASHSEED"] = str(seed)
60
61
 
61
62
 
63
+ def require_deterministic(test_case):
64
+ @wraps(test_case)
65
+ def wrapper(*args, **kwargs):
66
+ original_state = torch.are_deterministic_algorithms_enabled()
67
+ try:
68
+ torch.use_deterministic_algorithms(True)
69
+ return test_case(*args, **kwargs)
70
+ finally:
71
+ torch.use_deterministic_algorithms(original_state)
72
+
73
+ return wrapper
74
+
75
+
62
76
  @torch.no_grad
63
77
  def get_logprobs(tensor):
64
78
  return torch.nn.functional.log_softmax(tensor, dim=-1, dtype=torch.float32)
@@ -1,8 +0,0 @@
1
- import pytest
2
- import torch
3
-
4
-
5
- @pytest.fixture(autouse=True)
6
- def clear_cuda_cache():
7
- yield
8
- torch.cuda.empty_cache()