liger-kernel-nightly 0.6.2.dev20251024142419__tar.gz → 0.6.3.dev20251027181634__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (295) hide show
  1. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/PKG-INFO +1 -1
  2. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/pyproject.toml +1 -1
  3. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/transformers/__init__.py +3 -0
  4. liger_kernel_nightly-0.6.3.dev20251027181634/src/liger_kernel/transformers/model/smolvlm.py +158 -0
  5. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/transformers/monkey_patch.py +101 -0
  6. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel_nightly.egg-info/PKG-INFO +1 -1
  7. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel_nightly.egg-info/SOURCES.txt +2 -0
  8. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/test/convergence/bf16/test_mini_models_multimodal.py +105 -0
  9. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/test/convergence/fp32/test_mini_models_multimodal.py +104 -0
  10. liger_kernel_nightly-0.6.3.dev20251027181634/test/resources/fake_configs/HuggingFaceTB/SmolVLM2-256M-Video-Instruct/tokenizer_config.json +1192 -0
  11. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/test/transformers/test_monkey_patch.py +85 -0
  12. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/test/utils.py +17 -0
  13. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/.github/ISSUE_TEMPLATE/bug_report.yaml +0 -0
  14. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/.github/ISSUE_TEMPLATE/feature_request.yaml +0 -0
  15. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/.github/pull_request_template.md +0 -0
  16. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/.github/workflows/amd-ci.yml +0 -0
  17. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/.github/workflows/benchmark.yml +0 -0
  18. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/.github/workflows/docs.yml +0 -0
  19. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/.github/workflows/intel-ci.yml +0 -0
  20. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/.github/workflows/nvi-ci.yml +0 -0
  21. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/.github/workflows/publish-nightly.yml +0 -0
  22. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/.github/workflows/publish-release.yml +0 -0
  23. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/.gitignore +0 -0
  24. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/LICENSE +0 -0
  25. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/Makefile +0 -0
  26. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/NOTICE +0 -0
  27. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/README.md +0 -0
  28. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/benchmark/README.md +0 -0
  29. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/benchmark/__init__.py +0 -0
  30. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/benchmark/benchmarks_visualizer.py +0 -0
  31. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/benchmark/data/all_benchmark_data.csv +0 -0
  32. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/benchmark/scripts/__init__.py +0 -0
  33. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/benchmark/scripts/benchmark_cpo_loss.py +0 -0
  34. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/benchmark/scripts/benchmark_cross_entropy.py +0 -0
  35. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/benchmark/scripts/benchmark_distill_cosine_loss.py +0 -0
  36. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/benchmark/scripts/benchmark_distill_jsd_loss.py +0 -0
  37. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/benchmark/scripts/benchmark_dpo_loss.py +0 -0
  38. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/benchmark/scripts/benchmark_dyt.py +0 -0
  39. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/benchmark/scripts/benchmark_embedding.py +0 -0
  40. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/benchmark/scripts/benchmark_fused_add_rms_norm.py +0 -0
  41. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/benchmark/scripts/benchmark_fused_linear_cross_entropy.py +0 -0
  42. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/benchmark/scripts/benchmark_fused_linear_jsd.py +0 -0
  43. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/benchmark/scripts/benchmark_fused_neighborhood_attention.py +0 -0
  44. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/benchmark/scripts/benchmark_geglu.py +0 -0
  45. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/benchmark/scripts/benchmark_group_norm.py +0 -0
  46. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/benchmark/scripts/benchmark_grpo_loss.py +0 -0
  47. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/benchmark/scripts/benchmark_jsd.py +0 -0
  48. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/benchmark/scripts/benchmark_kl_div.py +0 -0
  49. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/benchmark/scripts/benchmark_kto_loss.py +0 -0
  50. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/benchmark/scripts/benchmark_layer_norm.py +0 -0
  51. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/benchmark/scripts/benchmark_llama4_rope.py +0 -0
  52. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/benchmark/scripts/benchmark_multi_token_attention.py +0 -0
  53. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/benchmark/scripts/benchmark_orpo_loss.py +0 -0
  54. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/benchmark/scripts/benchmark_poly_norm.py +0 -0
  55. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/benchmark/scripts/benchmark_qwen2vl_mrope.py +0 -0
  56. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/benchmark/scripts/benchmark_rms_norm.py +0 -0
  57. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/benchmark/scripts/benchmark_rope.py +0 -0
  58. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/benchmark/scripts/benchmark_simpo_loss.py +0 -0
  59. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/benchmark/scripts/benchmark_softmax.py +0 -0
  60. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/benchmark/scripts/benchmark_sparse_multi_token_attention.py +0 -0
  61. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/benchmark/scripts/benchmark_sparsemax.py +0 -0
  62. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/benchmark/scripts/benchmark_swiglu.py +0 -0
  63. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/benchmark/scripts/benchmark_tvd.py +0 -0
  64. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/benchmark/scripts/utils.py +0 -0
  65. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/dev/fmt-requirements.txt +0 -0
  66. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/dev/modal/benchmarks.py +0 -0
  67. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/dev/modal/tests.py +0 -0
  68. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/dev/modal/tests_bwd.py +0 -0
  69. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/docs/Examples.md +0 -0
  70. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/docs/Getting-Started.md +0 -0
  71. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/docs/High-Level-APIs.md +0 -0
  72. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/docs/Low-Level-APIs.md +0 -0
  73. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/docs/acknowledgement.md +0 -0
  74. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/docs/contributing.md +0 -0
  75. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/docs/images/banner.GIF +0 -0
  76. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/docs/images/compose.gif +0 -0
  77. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/docs/images/e2e-memory.png +0 -0
  78. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/docs/images/e2e-tps.png +0 -0
  79. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/docs/images/logo-banner.png +0 -0
  80. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/docs/images/patch.gif +0 -0
  81. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/docs/images/post-training.png +0 -0
  82. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/docs/index.md +0 -0
  83. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/docs/license.md +0 -0
  84. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/examples/alignment/accelerate_config.yaml +0 -0
  85. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/examples/alignment/run_orpo.py +0 -0
  86. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/examples/huggingface/README.md +0 -0
  87. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/examples/huggingface/callback.py +0 -0
  88. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/examples/huggingface/config/fsdp_config.json +0 -0
  89. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/examples/huggingface/img/gemma_7b_mem.png +0 -0
  90. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/examples/huggingface/img/gemma_7b_tp.png +0 -0
  91. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/examples/huggingface/img/llama_mem_alloc.png +0 -0
  92. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/examples/huggingface/img/llama_tps.png +0 -0
  93. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/examples/huggingface/img/qwen_mem_alloc.png +0 -0
  94. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/examples/huggingface/img/qwen_tps.png +0 -0
  95. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/examples/huggingface/launch_on_modal.py +0 -0
  96. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/examples/huggingface/requirements.txt +0 -0
  97. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/examples/huggingface/run_benchmarks.sh +0 -0
  98. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/examples/huggingface/run_gemma.sh +0 -0
  99. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/examples/huggingface/run_llama.sh +0 -0
  100. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/examples/huggingface/run_qwen.sh +0 -0
  101. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/examples/huggingface/run_qwen2_vl.sh +0 -0
  102. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/examples/huggingface/training.py +0 -0
  103. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/examples/huggingface/training_multimodal.py +0 -0
  104. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/examples/lightning/README.md +0 -0
  105. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/examples/lightning/requirements.txt +0 -0
  106. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/examples/lightning/training.py +0 -0
  107. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/examples/medusa/README.md +0 -0
  108. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/examples/medusa/callback.py +0 -0
  109. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/examples/medusa/docs/images/Memory_Stage1_num_head_3.png +0 -0
  110. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/examples/medusa/docs/images/Memory_Stage1_num_head_5.png +0 -0
  111. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/examples/medusa/docs/images/Memory_Stage2_num_head_3.png +0 -0
  112. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/examples/medusa/docs/images/Memory_Stage2_num_head_5.png +0 -0
  113. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/examples/medusa/docs/images/Throughput_Stage1_num_head_3.png +0 -0
  114. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/examples/medusa/docs/images/Throughput_Stage1_num_head_5.png +0 -0
  115. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/examples/medusa/docs/images/Throughput_Stage2_num_head_3.png +0 -0
  116. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/examples/medusa/docs/images/Throughput_Stage2_num_head_5.png +0 -0
  117. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/examples/medusa/fsdp/acc-fsdp.conf +0 -0
  118. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/examples/medusa/medusa_util.py +0 -0
  119. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/examples/medusa/requirements.txt +0 -0
  120. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/examples/medusa/scripts/llama3_8b_medusa.sh +0 -0
  121. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/examples/medusa/train.py +0 -0
  122. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/licenses/LICENSE-Apache-2.0 +0 -0
  123. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/licenses/LICENSE-MIT-AutoAWQ +0 -0
  124. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/licenses/LICENSE-MIT-Efficient-Cross-Entropy +0 -0
  125. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/licenses/LICENSE-MIT-llmc +0 -0
  126. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/licenses/LICENSE-MIT-triton +0 -0
  127. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/mkdocs.yml +0 -0
  128. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/setup.cfg +0 -0
  129. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/setup.py +0 -0
  130. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/__init__.py +0 -0
  131. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/chunked_loss/README.md +0 -0
  132. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/chunked_loss/__init__.py +0 -0
  133. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/chunked_loss/cosine_similarity_loss.py +0 -0
  134. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/chunked_loss/cpo_loss.py +0 -0
  135. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/chunked_loss/dpo_loss.py +0 -0
  136. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/chunked_loss/functional.py +0 -0
  137. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/chunked_loss/fused_linear_distillation.py +0 -0
  138. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/chunked_loss/fused_linear_ppo.py +0 -0
  139. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/chunked_loss/fused_linear_preference.py +0 -0
  140. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/chunked_loss/fused_linear_unpaired_preference.py +0 -0
  141. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/chunked_loss/grpo_loss.py +0 -0
  142. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/chunked_loss/jsd_loss.py +0 -0
  143. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/chunked_loss/kto_loss.py +0 -0
  144. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/chunked_loss/orpo_loss.py +0 -0
  145. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/chunked_loss/simpo_loss.py +0 -0
  146. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/env_report.py +0 -0
  147. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/ops/__init__.py +0 -0
  148. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/ops/cross_entropy.py +0 -0
  149. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/ops/dyt.py +0 -0
  150. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/ops/experimental/embedding.py +0 -0
  151. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/ops/experimental/mm_int8int2.py +0 -0
  152. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/ops/fused_add_rms_norm.py +0 -0
  153. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/ops/fused_linear_cross_entropy.py +0 -0
  154. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/ops/fused_linear_jsd.py +0 -0
  155. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/ops/fused_neighborhood_attention.py +0 -0
  156. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/ops/geglu.py +0 -0
  157. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/ops/group_norm.py +0 -0
  158. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/ops/grpo_loss.py +0 -0
  159. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/ops/jsd.py +0 -0
  160. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/ops/kl_div.py +0 -0
  161. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/ops/layer_norm.py +0 -0
  162. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/ops/llama4_rope.py +0 -0
  163. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/ops/multi_token_attention.py +0 -0
  164. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/ops/poly_norm.py +0 -0
  165. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/ops/qwen2vl_mrope.py +0 -0
  166. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/ops/rms_norm.py +0 -0
  167. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/ops/rope.py +0 -0
  168. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/ops/softmax.py +0 -0
  169. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/ops/sparsemax.py +0 -0
  170. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/ops/swiglu.py +0 -0
  171. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/ops/tvd.py +0 -0
  172. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/ops/utils.py +0 -0
  173. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/transformers/auto_model.py +0 -0
  174. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/transformers/cross_entropy.py +0 -0
  175. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/transformers/dyt.py +0 -0
  176. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/transformers/experimental/__init__.py +0 -0
  177. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/transformers/experimental/embedding.py +0 -0
  178. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/transformers/fsdp.py +0 -0
  179. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/transformers/functional.py +0 -0
  180. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/transformers/fused_add_rms_norm.py +0 -0
  181. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/transformers/fused_linear_cross_entropy.py +0 -0
  182. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/transformers/fused_linear_jsd.py +0 -0
  183. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/transformers/fused_neighborhood_attention.py +0 -0
  184. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/transformers/geglu.py +0 -0
  185. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/transformers/group_norm.py +0 -0
  186. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/transformers/grpo_loss.py +0 -0
  187. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/transformers/jsd.py +0 -0
  188. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/transformers/kl_div.py +0 -0
  189. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/transformers/layer_norm.py +0 -0
  190. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/transformers/llama4_rope.py +0 -0
  191. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/transformers/model/__init__.py +0 -0
  192. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/transformers/model/falcon_h1.py +0 -0
  193. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/transformers/model/gemma.py +0 -0
  194. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/transformers/model/gemma2.py +0 -0
  195. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/transformers/model/gemma3.py +0 -0
  196. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/transformers/model/glm4.py +0 -0
  197. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/transformers/model/glm4v.py +0 -0
  198. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/transformers/model/glm4v_moe.py +0 -0
  199. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/transformers/model/internvl.py +0 -0
  200. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/transformers/model/llama.py +0 -0
  201. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/transformers/model/llama4.py +0 -0
  202. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/transformers/model/llava.py +0 -0
  203. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/transformers/model/loss_utils.py +0 -0
  204. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/transformers/model/mistral.py +0 -0
  205. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/transformers/model/mixtral.py +0 -0
  206. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/transformers/model/mllama.py +0 -0
  207. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/transformers/model/olmo2.py +0 -0
  208. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/transformers/model/paligemma.py +0 -0
  209. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/transformers/model/phi3.py +0 -0
  210. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/transformers/model/qwen2.py +0 -0
  211. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/transformers/model/qwen2_5_vl.py +0 -0
  212. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/transformers/model/qwen2_vl.py +0 -0
  213. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/transformers/model/qwen3.py +0 -0
  214. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/transformers/model/qwen3_moe.py +0 -0
  215. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/transformers/model/qwen3_next.py +0 -0
  216. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/transformers/model/smollm3.py +0 -0
  217. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/transformers/multi_token_attention.py +0 -0
  218. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/transformers/poly_norm.py +0 -0
  219. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/transformers/qwen2vl_mrope.py +0 -0
  220. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/transformers/rms_norm.py +0 -0
  221. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/transformers/rope.py +0 -0
  222. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/transformers/softmax.py +0 -0
  223. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/transformers/sparsemax.py +0 -0
  224. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/transformers/swiglu.py +0 -0
  225. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/transformers/trainer/__init__.py +0 -0
  226. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/transformers/trainer/orpo_trainer.py +0 -0
  227. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/transformers/trainer_integration.py +0 -0
  228. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/transformers/tvd.py +0 -0
  229. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/triton/__init__.py +0 -0
  230. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/triton/monkey_patch.py +0 -0
  231. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel/utils.py +0 -0
  232. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel_nightly.egg-info/dependency_links.txt +0 -0
  233. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel_nightly.egg-info/requires.txt +0 -0
  234. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/src/liger_kernel_nightly.egg-info/top_level.txt +0 -0
  235. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/test/__init__.py +0 -0
  236. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/test/chunked_loss/__init__.py +0 -0
  237. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/test/chunked_loss/test_cosine_loss.py +0 -0
  238. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/test/chunked_loss/test_cpo_loss.py +0 -0
  239. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/test/chunked_loss/test_dpo_loss.py +0 -0
  240. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/test/chunked_loss/test_grpo_loss.py +0 -0
  241. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/test/chunked_loss/test_jsd_loss.py +0 -0
  242. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/test/chunked_loss/test_kto_loss.py +0 -0
  243. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/test/chunked_loss/test_orpo_loss.py +0 -0
  244. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/test/chunked_loss/test_simpo_loss.py +0 -0
  245. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/test/conftest.py +0 -0
  246. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/test/convergence/__init__.py +0 -0
  247. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/test/convergence/bf16/__init__.py +0 -0
  248. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/test/convergence/bf16/test_mini_models.py +0 -0
  249. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/test/convergence/bf16/test_mini_models_with_logits.py +0 -0
  250. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/test/convergence/fp32/__init__.py +0 -0
  251. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/test/convergence/fp32/test_mini_models.py +0 -0
  252. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/test/convergence/fp32/test_mini_models_with_logits.py +0 -0
  253. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/test/resources/fake_configs/Google/Gemma3/gemma-3-4b-it/tokenizer_config.json +0 -0
  254. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/test/resources/fake_configs/Google/Paligemma/paligemma-3b-pt-224/tokenizer_config.json +0 -0
  255. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/preprocessor_config.json +0 -0
  256. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/processor_config.json +0 -0
  257. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/tokenizer_config.json +0 -0
  258. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/test/resources/fake_configs/OpenGVLab/InternVL3-1B-hf/tokenizer_config.json +0 -0
  259. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/test/resources/fake_configs/Qwen/Qwen2-VL-7B-Instruct/tokenizer_config.json +0 -0
  260. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/test/resources/fake_configs/Qwen/Qwen2.5-VL-7B-Instruct/tokenizer_config.json +0 -0
  261. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/test/resources/fake_configs/meta-llama/Llama-3.2-11B-Vision-Instruct/tokenizer_config.json +0 -0
  262. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/test/resources/fake_configs/meta-llama/Llama-4-Scout-17B-16E-Instruct/tokenizer_config.json +0 -0
  263. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/test/resources/scripts/generate_tokenized_dataset.py +0 -0
  264. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/test/resources/tiny_shakespeare.txt +0 -0
  265. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/test/resources/tiny_shakespeare_tokenized/data-00000-of-00001.arrow +0 -0
  266. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/test/resources/tiny_shakespeare_tokenized/dataset_info.json +0 -0
  267. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/test/resources/tiny_shakespeare_tokenized/state.json +0 -0
  268. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/test/transformers/test_auto_model.py +0 -0
  269. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/test/transformers/test_cross_entropy.py +0 -0
  270. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/test/transformers/test_dyt.py +0 -0
  271. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/test/transformers/test_embedding.py +0 -0
  272. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/test/transformers/test_flex_attention.py +0 -0
  273. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/test/transformers/test_fused_add_rms_norm.py +0 -0
  274. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/test/transformers/test_fused_linear_cross_entropy.py +0 -0
  275. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/test/transformers/test_fused_linear_jsd.py +0 -0
  276. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/test/transformers/test_fused_neighborhood_attention.py +0 -0
  277. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/test/transformers/test_geglu.py +0 -0
  278. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/test/transformers/test_group_norm.py +0 -0
  279. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/test/transformers/test_grpo_loss.py +0 -0
  280. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/test/transformers/test_jsd.py +0 -0
  281. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/test/transformers/test_kl_div.py +0 -0
  282. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/test/transformers/test_layer_norm.py +0 -0
  283. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/test/transformers/test_mm_int8int2.py +0 -0
  284. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/test/transformers/test_multi_token_attention.py +0 -0
  285. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/test/transformers/test_poly_norm.py +0 -0
  286. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/test/transformers/test_qwen2vl_mrope.py +0 -0
  287. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/test/transformers/test_rms_norm.py +0 -0
  288. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/test/transformers/test_rope.py +0 -0
  289. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/test/transformers/test_softmax.py +0 -0
  290. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/test/transformers/test_sparsemax.py +0 -0
  291. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/test/transformers/test_swiglu.py +0 -0
  292. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/test/transformers/test_trainer_integration.py +0 -0
  293. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/test/transformers/test_transformers.py +0 -0
  294. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/test/transformers/test_tvd.py +0 -0
  295. {liger_kernel_nightly-0.6.2.dev20251024142419 → liger_kernel_nightly-0.6.3.dev20251027181634}/test/triton/test_triton_monkey_patch.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: liger_kernel_nightly
3
- Version: 0.6.2.dev20251024142419
3
+ Version: 0.6.3.dev20251027181634
4
4
  Summary: Efficient Triton kernels for LLM Training
5
5
  License: BSD 2-CLAUSE LICENSE
6
6
  Copyright 2024 LinkedIn Corporation
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "liger_kernel_nightly"
7
- version = "0.6.2.dev20251024142419"
7
+ version = "0.6.3.dev20251027181634"
8
8
  description = "Efficient Triton kernels for LLM Training"
9
9
  urls = { "Homepage" = "https://github.com/linkedin/Liger-Kernel" }
10
10
  readme = { file = "README.md", content-type = "text/markdown" }
@@ -57,6 +57,7 @@ if TYPE_CHECKING:
57
57
  from liger_kernel.transformers.monkey_patch import apply_liger_kernel_to_qwen3_moe # noqa: F401
58
58
  from liger_kernel.transformers.monkey_patch import apply_liger_kernel_to_qwen3_next # noqa: F401
59
59
  from liger_kernel.transformers.monkey_patch import apply_liger_kernel_to_smollm3 # noqa: F401
60
+ from liger_kernel.transformers.monkey_patch import apply_liger_kernel_to_smolvlm # noqa: F401
60
61
 
61
62
 
62
63
  # Check if 'transformers' is installed
@@ -120,6 +121,7 @@ def __getattr__(name: str):
120
121
  "apply_liger_kernel_to_qwen3_moe",
121
122
  "apply_liger_kernel_to_qwen3_next",
122
123
  "apply_liger_kernel_to_smollm3",
124
+ "apply_liger_kernel_to_smolvlm",
123
125
  }
124
126
 
125
127
  if name in monkey_patch_symbols:
@@ -189,5 +191,6 @@ if _TRANSFORMERS_AVAILABLE:
189
191
  "apply_liger_kernel_to_qwen3_moe",
190
192
  "apply_liger_kernel_to_qwen3_next",
191
193
  "apply_liger_kernel_to_smollm3",
194
+ "apply_liger_kernel_to_smolvlm",
192
195
  ]
193
196
  )
@@ -0,0 +1,158 @@
1
+ from typing import TYPE_CHECKING
2
+ from typing import Optional
3
+ from typing import Union
4
+
5
+ import torch
6
+
7
+ from transformers.models.smolvlm.modeling_smolvlm import SmolVLMCausalLMOutputWithPast
8
+ from transformers.processing_utils import Unpack
9
+ from transformers.utils.generic import can_return_tuple
10
+
11
+ from liger_kernel.transformers.model.loss_utils import LigerForCausalLMLoss
12
+
13
+ if TYPE_CHECKING:
14
+ from transformers.cache_utils import Cache
15
+ from transformers.utils.generic import TransformersKwargs
16
+
17
+
18
+ # Forward adapted to enable fused Linear + CE without materializing logits.
19
+ # Mirrors the pattern used for other multimodal models (e.g., InternVL, LLaVA).
20
+ @can_return_tuple
21
+ def lce_forward(
22
+ self,
23
+ input_ids: Optional[torch.LongTensor] = None,
24
+ attention_mask: Optional[torch.Tensor] = None,
25
+ position_ids: Optional[torch.LongTensor] = None,
26
+ past_key_values: Optional["Cache"] = None,
27
+ inputs_embeds: Optional[torch.FloatTensor] = None,
28
+ pixel_values: Optional[torch.FloatTensor] = None,
29
+ pixel_attention_mask: Optional[torch.BoolTensor] = None,
30
+ image_hidden_states: Optional[torch.FloatTensor] = None,
31
+ labels: Optional[torch.LongTensor] = None,
32
+ use_cache: Optional[bool] = None,
33
+ output_attentions: Optional[bool] = None,
34
+ output_hidden_states: Optional[bool] = None,
35
+ cache_position: Optional[torch.LongTensor] = None,
36
+ return_dict: Optional[bool] = None,
37
+ logits_to_keep: Union[int, torch.Tensor] = 0,
38
+ skip_logits: Optional[bool] = None, # Added argument for liger-kernel
39
+ **lm_kwargs: Unpack["TransformersKwargs"], # renamed from kwargs
40
+ ) -> Union[tuple, SmolVLMCausalLMOutputWithPast]:
41
+ r"""
42
+ pixel_attention_mask (`torch.Tensor` of shape `(batch_size, image_size, image_size)`, *optional*):
43
+ Mask to avoid performing attention on padding pixel indices.
44
+ image_hidden_states (`torch.FloatTensor` of shape `(batch_size, num_channels, image_size, image_size)`):
45
+ The hidden states of the image encoder after modality projection.
46
+ labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
47
+ Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
48
+ config.vocab_size]` or `model.image_token_id`. Tokens with indices set to `model.image_token_id` are
49
+ ignored (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.
50
+
51
+ Example:
52
+
53
+ ```python
54
+ >>> import requests
55
+ >>> import torch
56
+ >>> from PIL import Image
57
+ >>> from io import BytesIO
58
+
59
+ >>> from transformers import AutoProcessor, AutoModelForImageTextToText
60
+ >>> from transformers.image_utils import load_image
61
+
62
+ >>> # Note that passing the image urls (instead of the actual pil images) to the processor is also possible
63
+ >>> image1 = load_image("https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg")
64
+ >>> image2 = load_image("https://cdn.britannica.com/59/94459-050-DBA42467/Skyline-Chicago.jpg")
65
+ >>> image3 = load_image("https://cdn.britannica.com/68/170868-050-8DDE8263/Golden-Gate-Bridge-San-Francisco.jpg")
66
+
67
+ >>> processor = AutoProcessor.from_pretrained("HuggingFaceTB/SmolVLM2-2.2B-Instruct")
68
+ >>> model = AutoModelForImageTextToText.from_pretrained("HuggingFaceTB/SmolVLM2-2.2B-Instruct", dtype=torch.bfloat16, device_map="auto")
69
+
70
+ >>> # Create inputs
71
+ >>> messages = [
72
+ ... {
73
+ ... "role": "user",
74
+ ... "content": [
75
+ ... {"type": "video", "path": path/to/video},
76
+ ... {"type": "text", "text": "What is happening in this video?"},
77
+ ... ]
78
+ ... }
79
+ ... ]
80
+
81
+ >>> inputs = processor.apply_chat_template([messages], add_generation_prompt=True)
82
+
83
+ >>> # Generate
84
+ >>> generated_ids = model.generate(**inputs, max_new_tokens=256)
85
+ >>> generated_texts = processor.batch_decode(generated_ids, skip_special_tokens=True)
86
+
87
+ >>> print(generated_texts)
88
+ ```"""
89
+ output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
90
+ output_hidden_states = (
91
+ output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
92
+ )
93
+ return_dict = return_dict if return_dict is not None else self.config.use_return_dict
94
+
95
+ # decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn)
96
+ outputs = self.model(
97
+ input_ids=input_ids,
98
+ attention_mask=attention_mask,
99
+ position_ids=position_ids,
100
+ past_key_values=past_key_values,
101
+ inputs_embeds=inputs_embeds,
102
+ pixel_values=pixel_values,
103
+ pixel_attention_mask=pixel_attention_mask,
104
+ image_hidden_states=image_hidden_states,
105
+ use_cache=use_cache,
106
+ output_attentions=output_attentions,
107
+ output_hidden_states=output_hidden_states,
108
+ cache_position=cache_position,
109
+ return_dict=True,
110
+ **lm_kwargs,
111
+ )
112
+
113
+ # Copied from llava.py
114
+ hidden_states = outputs[0]
115
+ # Only compute necessary logits, and do not upcast them to float if we are not computing the loss
116
+ slice_indices = slice(-logits_to_keep, None) if isinstance(logits_to_keep, int) else logits_to_keep
117
+ kept_hidden_states = hidden_states[:, slice_indices, :]
118
+
119
+ shift_labels = lm_kwargs.pop("shift_labels", None)
120
+ logits = None
121
+ loss = None
122
+
123
+ if skip_logits and labels is None and shift_labels is None:
124
+ raise ValueError("skip_logits is True, but labels and shift_labels are None")
125
+
126
+ if skip_logits is None:
127
+ # By default, if in training mode, don't materialize logits
128
+ skip_logits = self.training and (labels is not None or shift_labels is not None)
129
+
130
+ if skip_logits:
131
+ loss = LigerForCausalLMLoss(
132
+ hidden_states=kept_hidden_states,
133
+ lm_head_weight=self.lm_head.weight,
134
+ labels=labels,
135
+ shift_labels=shift_labels,
136
+ hidden_size=self.config.text_config.hidden_size,
137
+ **lm_kwargs,
138
+ )
139
+
140
+ else:
141
+ logits = self.lm_head(kept_hidden_states)
142
+ if labels is not None or shift_labels is not None:
143
+ loss = self.loss_function(
144
+ logits=logits, labels=labels, vocab_size=self.config.text_config.vocab_size, **lm_kwargs
145
+ )
146
+
147
+ if not return_dict:
148
+ output = (logits,) + outputs[1:]
149
+ return (loss,) + output if loss is not None else output
150
+
151
+ return SmolVLMCausalLMOutputWithPast(
152
+ loss=loss,
153
+ logits=logits,
154
+ past_key_values=outputs.past_key_values,
155
+ hidden_states=outputs.hidden_states,
156
+ attentions=outputs.attentions,
157
+ image_hidden_states=outputs.image_hidden_states,
158
+ )
@@ -2112,6 +2112,106 @@ def apply_liger_kernel_to_internvl(
2112
2112
  logger.warning(f"{vision_model_name} is not supported by Liger kernel.")
2113
2113
 
2114
2114
 
2115
+ def apply_liger_kernel_to_smolvlm(
2116
+ cross_entropy: bool = False,
2117
+ fused_linear_cross_entropy: bool = True,
2118
+ rms_norm: bool = True,
2119
+ layer_norm: bool = True,
2120
+ model: Optional[PreTrainedModel] = None,
2121
+ **kwargs,
2122
+ ) -> None:
2123
+ """
2124
+ Apply Liger kernels to replace original implementation in HuggingFace SmolVLM models.
2125
+ Due to the characteristics of SmolVLM, the model must be passed to apply Liger-Kernel's patch to other models connected to SmolVLM.
2126
+ However, if an LM not supported by Liger-Kernel is connected to SmolVLM, unexpected side effects may occur.
2127
+ NOTE: SmolVLM is not available in transformers<4.50.0
2128
+
2129
+ Args:
2130
+ cross_entropy (bool): Whether to apply Liger's cross entropy loss. Default is False.
2131
+ fused_linear_cross_entropy (bool):
2132
+ Whether to apply Liger's fused linear cross entropy loss. Default is True.
2133
+ `cross_entropy` and `fused_linear_cross_entropy` cannot both be True.
2134
+ If `fused_linear_cross_entropy` is True, the logits will not be materialized but more memory efficient.
2135
+ rms_norm (bool): Whether to apply Liger's RMSNorm. Default is True.
2136
+ layer_norm (bool): Whether to apply Liger's LayerNorm. Default is True.
2137
+ model (PreTrainedModel): The model instance to apply Liger kernels to, if the model has already been
2138
+ loaded. Default is None.
2139
+ """
2140
+ assert not (cross_entropy and fused_linear_cross_entropy), (
2141
+ "cross_entropy and fused_linear_cross_entropy cannot both be True."
2142
+ )
2143
+
2144
+ from transformers.models.smolvlm import modeling_smolvlm
2145
+ from transformers.models.smolvlm.modeling_smolvlm import SmolVLMEncoderLayer
2146
+ from transformers.models.smolvlm.modeling_smolvlm import SmolVLMForConditionalGeneration
2147
+ from transformers.models.smolvlm.modeling_smolvlm import SmolVLMModel
2148
+ from transformers.models.smolvlm.modeling_smolvlm import SmolVLMVisionTransformer
2149
+
2150
+ from liger_kernel.transformers.model.smolvlm import lce_forward as smolvlm_lce_forward
2151
+
2152
+ # Patch LayerNorm for vision model if model is not provided (pre-initialization)
2153
+ if layer_norm and model is None:
2154
+ modeling_smolvlm.nn.LayerNorm = LigerLayerNorm
2155
+
2156
+ if cross_entropy:
2157
+ logger.info("Apply liger cross entropy")
2158
+
2159
+ from transformers.loss.loss_utils import nn
2160
+
2161
+ nn.functional.cross_entropy = liger_cross_entropy
2162
+ if fused_linear_cross_entropy:
2163
+ if model is not None:
2164
+ model.forward = MethodType(smolvlm_lce_forward, model)
2165
+ else:
2166
+ modeling_smolvlm.SmolVLMForConditionalGeneration.forward = smolvlm_lce_forward
2167
+ if rms_norm:
2168
+ modeling_smolvlm.SmolVLMRMSNorm = LigerRMSNorm
2169
+
2170
+ if model is not None:
2171
+ # The model instance already exists, so we need to additionally patch the
2172
+ # instance variables that reference already-instantiated modules
2173
+ if isinstance(model, SmolVLMForConditionalGeneration):
2174
+ text_model = model.model.text_model
2175
+ vision_model: SmolVLMVisionTransformer = model.model.vision_model
2176
+ elif isinstance(model, SmolVLMModel):
2177
+ text_model = model.text_model
2178
+ vision_model: SmolVLMVisionTransformer = model.vision_model
2179
+ else:
2180
+ raise TypeError(
2181
+ f"Unsupported smolvlm model type. `model` must be `SmolVLMForConditionalGeneration`, `SmolVLMModel`. Got: {type(model)}"
2182
+ )
2183
+
2184
+ text_model_name = model.config.text_config.model_type
2185
+ text_liger_fn = MODEL_TYPE_TO_APPLY_LIGER_FN.get(text_model_name, None)
2186
+
2187
+ kwargs = {"cross_entropy": False, "fused_linear_cross_entropy": False, **kwargs} | {"rms_norm": rms_norm}
2188
+ if text_liger_fn:
2189
+ accept_params = inspect.signature(text_liger_fn).parameters
2190
+ remain_params = set(kwargs) - (set(accept_params) & set(kwargs))
2191
+ text_kwargs = {k: v for k, v in kwargs.items() if k not in remain_params}
2192
+
2193
+ if remain_params:
2194
+ logger.warning(
2195
+ f"These parameters are not supported by {text_model_name}. Enter the remaining {list(text_kwargs.keys())} except for {list(remain_params)}\n"
2196
+ f"Parameters accepted by {text_model_name}: {list(accept_params.keys())}"
2197
+ )
2198
+ text_kwargs["model"] = text_model
2199
+ text_liger_fn(**text_kwargs)
2200
+ elif text_model_name not in MODEL_TYPE_TO_APPLY_LIGER_FN:
2201
+ logger.warning(f"{text_model_name} is not supported by Liger kernel.")
2202
+
2203
+ # Patch vision model LayerNorm layers
2204
+ if layer_norm:
2205
+ # Patch post_layernorm
2206
+ _patch_layer_norm_module(vision_model.post_layernorm)
2207
+
2208
+ # Patch encoder layers
2209
+ for encoder_layer in vision_model.encoder.layers:
2210
+ encoder_layer: SmolVLMEncoderLayer
2211
+ _patch_layer_norm_module(encoder_layer.layer_norm1)
2212
+ _patch_layer_norm_module(encoder_layer.layer_norm2)
2213
+
2214
+
2115
2215
  def apply_liger_kernel_to_falcon_h1(
2116
2216
  rope: bool = True,
2117
2217
  cross_entropy: bool = False,
@@ -2304,6 +2404,7 @@ MODEL_TYPE_TO_APPLY_LIGER_FN = {
2304
2404
  "phi3": apply_liger_kernel_to_phi3,
2305
2405
  "paligemma": apply_liger_kernel_to_paligemma,
2306
2406
  "falcon_h1": apply_liger_kernel_to_falcon_h1,
2407
+ "smolvlm": apply_liger_kernel_to_smolvlm,
2307
2408
  }
2308
2409
 
2309
2410
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: liger_kernel_nightly
3
- Version: 0.6.2.dev20251024142419
3
+ Version: 0.6.3.dev20251027181634
4
4
  Summary: Efficient Triton kernels for LLM Training
5
5
  License: BSD 2-CLAUSE LICENSE
6
6
  Copyright 2024 LinkedIn Corporation
@@ -215,6 +215,7 @@ src/liger_kernel/transformers/model/qwen3.py
215
215
  src/liger_kernel/transformers/model/qwen3_moe.py
216
216
  src/liger_kernel/transformers/model/qwen3_next.py
217
217
  src/liger_kernel/transformers/model/smollm3.py
218
+ src/liger_kernel/transformers/model/smolvlm.py
218
219
  src/liger_kernel/transformers/trainer/__init__.py
219
220
  src/liger_kernel/transformers/trainer/orpo_trainer.py
220
221
  src/liger_kernel/triton/__init__.py
@@ -248,6 +249,7 @@ test/convergence/fp32/test_mini_models_with_logits.py
248
249
  test/resources/tiny_shakespeare.txt
249
250
  test/resources/fake_configs/Google/Gemma3/gemma-3-4b-it/tokenizer_config.json
250
251
  test/resources/fake_configs/Google/Paligemma/paligemma-3b-pt-224/tokenizer_config.json
252
+ test/resources/fake_configs/HuggingFaceTB/SmolVLM2-256M-Video-Instruct/tokenizer_config.json
251
253
  test/resources/fake_configs/Llava/llava-1.5-7b-hf/preprocessor_config.json
252
254
  test/resources/fake_configs/Llava/llava-1.5-7b-hf/processor_config.json
253
255
  test/resources/fake_configs/Llava/llava-1.5-7b-hf/tokenizer_config.json
@@ -19,6 +19,7 @@ from liger_kernel.transformers import apply_liger_kernel_to_mllama
19
19
  from liger_kernel.transformers import apply_liger_kernel_to_paligemma
20
20
  from liger_kernel.transformers import apply_liger_kernel_to_qwen2_5_vl
21
21
  from liger_kernel.transformers import apply_liger_kernel_to_qwen2_vl
22
+ from liger_kernel.transformers import apply_liger_kernel_to_smolvlm
22
23
  from test.utils import FAKE_CONFIGS_PATH
23
24
  from test.utils import UNTOKENIZED_DATASET_PATH
24
25
  from test.utils import MiniModelConfig
@@ -39,6 +40,7 @@ from test.utils import revert_liger_kernel_to_mllama
39
40
  from test.utils import revert_liger_kernel_to_Paligemma
40
41
  from test.utils import revert_liger_kernel_to_qwen2_5_vl
41
42
  from test.utils import revert_liger_kernel_to_qwen2_vl
43
+ from test.utils import revert_liger_kernel_to_smolvlm2
42
44
  from test.utils import set_seed
43
45
  from test.utils import supports_bfloat16
44
46
  from test.utils import train_bpe_tokenizer
@@ -159,6 +161,26 @@ try:
159
161
  except ImportError:
160
162
  INTERNVL_AVAILABLE = False
161
163
 
164
+ try:
165
+ # SmolVLM2 is only available in transformers>=4.50.0
166
+ from transformers.models.gpt2.tokenization_gpt2_fast import GPT2TokenizerFast
167
+ from transformers.models.smolvlm.configuration_smolvlm import SmolVLMConfig
168
+ from transformers.models.smolvlm.image_processing_smolvlm import SmolVLMImageProcessor
169
+ from transformers.models.smolvlm.modeling_smolvlm import SmolVLMForConditionalGeneration
170
+ from transformers.models.smolvlm.processing_smolvlm import SmolVLMProcessor
171
+ from transformers.models.smolvlm.video_processing_smolvlm import SmolVLMVideoProcessor
172
+
173
+ SMOLVLM2_AVAILABLE = True
174
+ except ImportError:
175
+ SMOLVLM2_AVAILABLE = False
176
+
177
+ try:
178
+ from num2words import num2words # noqa: F401
179
+
180
+ NUM2WORDS_AVAILABLE = True
181
+ except ImportError:
182
+ NUM2WORDS_AVAILABLE = False
183
+
162
184
  from liger_kernel.utils import infer_device
163
185
 
164
186
  device = infer_device()
@@ -566,6 +588,44 @@ if INTERNVL_AVAILABLE:
566
588
  ),
567
589
  )
568
590
 
591
+ if SMOLVLM2_AVAILABLE:
592
+ MINI_MODEL_SETUPS["mini_smolvlm2"] = MiniModelConfig(
593
+ liger_kernel_patch_func=apply_liger_kernel_to_smolvlm,
594
+ liger_kernel_patch_revert_func=revert_liger_kernel_to_smolvlm2,
595
+ model_class=SmolVLMForConditionalGeneration,
596
+ mini_model_config=SmolVLMConfig(
597
+ text_config=LlamaConfig(
598
+ attention_bias=False,
599
+ attention_dropout=0.0,
600
+ bos_token_id=1,
601
+ eos_token_id=2,
602
+ pad_token_id=2,
603
+ hidden_act="silu",
604
+ hidden_size=576, # 576 for 256M model
605
+ initializer_range=0.041666666666666664,
606
+ intermediate_size=1536, # 1536 for 256M model
607
+ max_position_embeddings=8192,
608
+ num_attention_heads=9, # 9 for 256M model
609
+ num_hidden_layers=4, # 30 -> reduced to 4 for testing
610
+ num_key_value_heads=3, # 3 for 256M model
611
+ rms_norm_eps=1e-5,
612
+ rope_theta=100000,
613
+ tie_word_embeddings=False,
614
+ vocab_size=49280,
615
+ ),
616
+ vision_config={
617
+ "hidden_size": 768,
618
+ "intermediate_size": 3072,
619
+ "num_hidden_layers": 4, # 12 -> reduced to 4 for testing
620
+ "num_attention_heads": 12,
621
+ "image_size": 512,
622
+ "patch_size": 16,
623
+ },
624
+ image_token_id=49190,
625
+ attn_implementation="sdpa", # default value, pytorch native attention
626
+ ),
627
+ )
628
+
569
629
  if QWEN2_5_VL_AVAILABLE:
570
630
  MINI_MODEL_SETUPS["mini_qwen2_5_vl"] = MiniModelConfig(
571
631
  liger_kernel_patch_func=functools.partial(apply_liger_kernel_to_qwen2_5_vl, fused_linear_cross_entropy=False),
@@ -718,6 +778,28 @@ def create_processor(model_name: str):
718
778
  image_processor=image_processor, tokenizer=qwen_tokenizer, video_processor=video_processor
719
779
  )
720
780
 
781
+ elif model_name == "mini_smolvlm2":
782
+ tokenizer_config = load_tokenizer_config(
783
+ os.path.join(FAKE_CONFIGS_PATH, "HuggingFaceTB/SmolVLM2-256M-Video-Instruct/tokenizer_config.json")
784
+ )
785
+ tokenizer_base = train_bpe_tokenizer(
786
+ [
787
+ token.content
788
+ for key, token in sorted(
789
+ tokenizer_config["added_tokens_decoder"].items(),
790
+ key=lambda x: int(x[0]),
791
+ )
792
+ ]
793
+ )
794
+ gpt2_tokenizer = GPT2TokenizerFast(tokenizer_object=tokenizer_base, **tokenizer_config)
795
+ image_processor = SmolVLMImageProcessor(size={"longest_edge": 512})
796
+ video_processor = SmolVLMVideoProcessor()
797
+
798
+ # Return proper SmolVLM processor
799
+ return SmolVLMProcessor(
800
+ image_processor=image_processor, tokenizer=gpt2_tokenizer, video_processor=video_processor
801
+ )
802
+
721
803
  elif model_name.startswith("mini_llama4"):
722
804
  tokenizer_config = load_tokenizer_config(
723
805
  os.path.join(
@@ -1032,6 +1114,29 @@ def run_mini_model_multimodal(
1032
1114
  ),
1033
1115
  ],
1034
1116
  ),
1117
+ pytest.param(
1118
+ "mini_smolvlm2",
1119
+ 32,
1120
+ 1e-5,
1121
+ torch.bfloat16,
1122
+ 5e-2,
1123
+ 5e-2,
1124
+ 1e-1,
1125
+ 1e-2,
1126
+ 1e-2,
1127
+ 1e-2,
1128
+ marks=[
1129
+ pytest.mark.skipif(not supports_bfloat16(), reason="bfloat16 not supported on this GPU"),
1130
+ pytest.mark.skipif(
1131
+ not SMOLVLM2_AVAILABLE,
1132
+ reason="SmolVLM2 not available in this version of transformers",
1133
+ ),
1134
+ pytest.mark.skipif(
1135
+ not NUM2WORDS_AVAILABLE,
1136
+ reason="num2words must be present to run SmolVLMProcessor",
1137
+ ),
1138
+ ],
1139
+ ),
1035
1140
  pytest.param(
1036
1141
  "mini_qwen2_5_vl",
1037
1142
  32,
@@ -20,6 +20,7 @@ from liger_kernel.transformers import apply_liger_kernel_to_mllama
20
20
  from liger_kernel.transformers import apply_liger_kernel_to_paligemma
21
21
  from liger_kernel.transformers import apply_liger_kernel_to_qwen2_5_vl
22
22
  from liger_kernel.transformers import apply_liger_kernel_to_qwen2_vl
23
+ from liger_kernel.transformers import apply_liger_kernel_to_smolvlm
23
24
  from test.utils import FAKE_CONFIGS_PATH
24
25
  from test.utils import UNTOKENIZED_DATASET_PATH
25
26
  from test.utils import MiniModelConfig
@@ -40,6 +41,7 @@ from test.utils import revert_liger_kernel_to_mllama
40
41
  from test.utils import revert_liger_kernel_to_Paligemma
41
42
  from test.utils import revert_liger_kernel_to_qwen2_5_vl
42
43
  from test.utils import revert_liger_kernel_to_qwen2_vl
44
+ from test.utils import revert_liger_kernel_to_smolvlm2
43
45
  from test.utils import set_seed
44
46
  from test.utils import train_bpe_tokenizer
45
47
 
@@ -155,6 +157,26 @@ try:
155
157
  except ImportError:
156
158
  INTERNVL_AVAILABLE = False
157
159
 
160
+ try:
161
+ # SmolVLM2 is only available in transformers>=4.50.0
162
+ from transformers.models.gpt2.tokenization_gpt2_fast import GPT2TokenizerFast
163
+ from transformers.models.smolvlm.configuration_smolvlm import SmolVLMConfig
164
+ from transformers.models.smolvlm.image_processing_smolvlm import SmolVLMImageProcessor
165
+ from transformers.models.smolvlm.modeling_smolvlm import SmolVLMForConditionalGeneration
166
+ from transformers.models.smolvlm.processing_smolvlm import SmolVLMProcessor
167
+ from transformers.models.smolvlm.video_processing_smolvlm import SmolVLMVideoProcessor
168
+
169
+ SMOLVLM2_AVAILABLE = True
170
+ except ImportError:
171
+ SMOLVLM2_AVAILABLE = False
172
+
173
+ try:
174
+ from num2words import num2words # noqa: F401
175
+
176
+ NUM2WORDS_AVAILABLE = True
177
+ except ImportError:
178
+ NUM2WORDS_AVAILABLE = False
179
+
158
180
  from liger_kernel.utils import infer_device
159
181
 
160
182
  device = infer_device()
@@ -564,6 +586,44 @@ if INTERNVL_AVAILABLE:
564
586
  ),
565
587
  )
566
588
 
589
+ if SMOLVLM2_AVAILABLE:
590
+ MINI_MODEL_SETUPS["mini_smolvlm2"] = MiniModelConfig(
591
+ liger_kernel_patch_func=apply_liger_kernel_to_smolvlm,
592
+ liger_kernel_patch_revert_func=revert_liger_kernel_to_smolvlm2,
593
+ model_class=SmolVLMForConditionalGeneration,
594
+ mini_model_config=SmolVLMConfig(
595
+ text_config=LlamaConfig(
596
+ attention_bias=False,
597
+ attention_dropout=0.0,
598
+ bos_token_id=1,
599
+ eos_token_id=2,
600
+ pad_token_id=2,
601
+ hidden_act="silu",
602
+ hidden_size=576, # 576 for 256M model
603
+ initializer_range=0.041666666666666664,
604
+ intermediate_size=1536, # 1536 for 256M model
605
+ max_position_embeddings=8192,
606
+ num_attention_heads=9, # 9 for 256M model
607
+ num_hidden_layers=4, # 30 -> reduced to 4 for testing
608
+ num_key_value_heads=3, # 3 for 256M model
609
+ rms_norm_eps=1e-5,
610
+ rope_theta=100000,
611
+ tie_word_embeddings=False,
612
+ vocab_size=49280,
613
+ ),
614
+ vision_config={
615
+ "hidden_size": 768,
616
+ "intermediate_size": 3072,
617
+ "num_hidden_layers": 4, # 12 -> reduced to 4 for testing
618
+ "num_attention_heads": 12,
619
+ "image_size": 512,
620
+ "patch_size": 16,
621
+ },
622
+ image_token_id=49190,
623
+ attn_implementation="sdpa", # default value, pytorch native attention
624
+ ),
625
+ )
626
+
567
627
  if QWEN2_5_VL_AVAILABLE:
568
628
  MINI_MODEL_SETUPS["mini_qwen2_5_vl"] = MiniModelConfig(
569
629
  liger_kernel_patch_func=functools.partial(apply_liger_kernel_to_qwen2_5_vl, fused_linear_cross_entropy=False),
@@ -716,6 +776,28 @@ def create_processor(model_name: str):
716
776
  image_processor=image_processor, tokenizer=qwen_tokenizer, video_processor=video_processor
717
777
  )
718
778
 
779
+ elif model_name == "mini_smolvlm2":
780
+ tokenizer_config = load_tokenizer_config(
781
+ os.path.join(FAKE_CONFIGS_PATH, "HuggingFaceTB/SmolVLM2-256M-Video-Instruct/tokenizer_config.json")
782
+ )
783
+ tokenizer_base = train_bpe_tokenizer(
784
+ [
785
+ token.content
786
+ for key, token in sorted(
787
+ tokenizer_config["added_tokens_decoder"].items(),
788
+ key=lambda x: int(x[0]),
789
+ )
790
+ ]
791
+ )
792
+ gpt2_tokenizer = GPT2TokenizerFast(tokenizer_object=tokenizer_base, **tokenizer_config)
793
+ image_processor = SmolVLMImageProcessor(size={"longest_edge": 512})
794
+ video_processor = SmolVLMVideoProcessor()
795
+
796
+ # Return proper SmolVLM processor
797
+ return SmolVLMProcessor(
798
+ image_processor=image_processor, tokenizer=gpt2_tokenizer, video_processor=video_processor
799
+ )
800
+
719
801
  elif model_name.startswith("mini_llama4"):
720
802
  tokenizer_config = load_tokenizer_config(
721
803
  os.path.join(
@@ -1035,6 +1117,28 @@ def run_mini_model_multimodal(
1035
1117
  reason="InternVL not available in this version of transformers",
1036
1118
  ),
1037
1119
  ),
1120
+ pytest.param(
1121
+ "mini_smolvlm2",
1122
+ 32,
1123
+ 1e-4,
1124
+ torch.float32,
1125
+ 1e-8,
1126
+ 1e-5,
1127
+ 5e-3,
1128
+ 1e-5,
1129
+ 5e-3,
1130
+ 1e-5,
1131
+ marks=[
1132
+ pytest.mark.skipif(
1133
+ not SMOLVLM2_AVAILABLE,
1134
+ reason="SmolVLM2 not available in this version of transformers",
1135
+ ),
1136
+ pytest.mark.skipif(
1137
+ not NUM2WORDS_AVAILABLE,
1138
+ reason="num2words must be present to run SmolVLMProcessor",
1139
+ ),
1140
+ ],
1141
+ ),
1038
1142
  pytest.param(
1039
1143
  "mini_qwen2_5_vl",
1040
1144
  32,