liger-kernel 0.5.9__tar.gz → 0.6.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- liger_kernel-0.6.0/.github/workflows/benchmark.yml +93 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/.github/workflows/docs.yml +4 -2
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/.gitignore +4 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/Makefile +8 -2
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/PKG-INFO +41 -21
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/README.md +38 -19
- liger_kernel-0.6.0/benchmark/README.md +48 -0
- liger_kernel-0.6.0/benchmark/benchmarks_visualizer.py +299 -0
- liger_kernel-0.6.0/benchmark/data/all_benchmark_data.csv +1495 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/benchmark/scripts/benchmark_cpo_loss.py +1 -1
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/benchmark/scripts/benchmark_cross_entropy.py +1 -1
- liger_kernel-0.6.0/benchmark/scripts/benchmark_distill_cosine_loss.py +266 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/benchmark/scripts/benchmark_distill_jsd_loss.py +1 -1
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/benchmark/scripts/benchmark_dpo_loss.py +1 -1
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/benchmark/scripts/benchmark_dyt.py +37 -34
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/benchmark/scripts/benchmark_embedding.py +1 -1
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/benchmark/scripts/benchmark_fused_linear_cross_entropy.py +1 -1
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/benchmark/scripts/benchmark_fused_linear_jsd.py +1 -1
- liger_kernel-0.6.0/benchmark/scripts/benchmark_fused_neighborhood_attention.py +367 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/benchmark/scripts/benchmark_jsd.py +1 -1
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/benchmark/scripts/benchmark_kl_div.py +1 -1
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/benchmark/scripts/benchmark_kto_loss.py +1 -1
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/benchmark/scripts/benchmark_layer_norm.py +1 -1
- liger_kernel-0.6.0/benchmark/scripts/benchmark_multi_token_attention.py +218 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/benchmark/scripts/benchmark_orpo_loss.py +1 -1
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/benchmark/scripts/benchmark_simpo_loss.py +1 -1
- liger_kernel-0.6.0/benchmark/scripts/benchmark_softmax.py +140 -0
- liger_kernel-0.6.0/benchmark/scripts/benchmark_sparse_multi_token_attention.py +254 -0
- liger_kernel-0.6.0/benchmark/scripts/benchmark_sparsemax.py +172 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/benchmark/scripts/benchmark_swiglu.py +1 -1
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/benchmark/scripts/benchmark_tvd.py +1 -1
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/benchmark/scripts/utils.py +8 -1
- liger_kernel-0.6.0/dev/modal/benchmarks.py +73 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/dev/modal/tests.py +2 -2
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/dev/modal/tests_bwd.py +4 -4
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/docs/Low-Level-APIs.md +24 -1
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/examples/huggingface/training_multimodal.py +1 -1
- liger_kernel-0.6.0/examples/medusa/requirements.txt +3 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/examples/medusa/scripts/llama3_8b_medusa.sh +2 -5
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/examples/medusa/train.py +37 -39
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/mkdocs.yml +2 -2
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/pyproject.toml +1 -1
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/setup.py +25 -4
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/chunked_loss/__init__.py +1 -0
- liger_kernel-0.6.0/src/liger_kernel/chunked_loss/cosine_similarity_loss.py +127 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/chunked_loss/dpo_loss.py +1 -1
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/chunked_loss/functional.py +2 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/chunked_loss/fused_linear_preference.py +0 -1
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/chunked_loss/jsd_loss.py +2 -2
- liger_kernel-0.6.0/src/liger_kernel/ops/dyt.py +157 -0
- liger_kernel-0.6.0/src/liger_kernel/ops/fused_neighborhood_attention.py +1022 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/ops/geglu.py +1 -1
- liger_kernel-0.6.0/src/liger_kernel/ops/grpo_loss.py +310 -0
- liger_kernel-0.6.0/src/liger_kernel/ops/multi_token_attention.py +207 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/ops/rms_norm.py +265 -54
- liger_kernel-0.6.0/src/liger_kernel/ops/softmax.py +201 -0
- liger_kernel-0.6.0/src/liger_kernel/ops/sparsemax.py +179 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/ops/swiglu.py +1 -1
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/transformers/__init__.py +8 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/transformers/dyt.py +5 -3
- liger_kernel-0.6.0/src/liger_kernel/transformers/fsdp.py +55 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/transformers/functional.py +70 -0
- liger_kernel-0.6.0/src/liger_kernel/transformers/fused_neighborhood_attention.py +234 -0
- liger_kernel-0.6.0/src/liger_kernel/transformers/grpo_loss.py +98 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/transformers/model/gemma.py +25 -16
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/transformers/model/gemma2.py +27 -14
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/transformers/model/gemma3.py +62 -106
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/transformers/model/glm4.py +16 -13
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/transformers/model/llama.py +81 -18
- liger_kernel-0.6.0/src/liger_kernel/transformers/model/llama4.py +108 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/transformers/model/llava.py +95 -132
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/transformers/model/mistral.py +13 -14
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/transformers/model/mixtral.py +16 -15
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/transformers/model/mllama.py +16 -14
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/transformers/model/olmo2.py +16 -13
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/transformers/model/paligemma.py +8 -9
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/transformers/model/phi3.py +25 -16
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/transformers/model/qwen2.py +24 -15
- liger_kernel-0.6.0/src/liger_kernel/transformers/model/qwen2_5_vl.py +150 -0
- liger_kernel-0.6.0/src/liger_kernel/transformers/model/qwen2_vl.py +142 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/transformers/model/qwen3.py +11 -9
- liger_kernel-0.6.0/src/liger_kernel/transformers/model/qwen3_moe.py +132 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/transformers/monkey_patch.py +424 -81
- liger_kernel-0.6.0/src/liger_kernel/transformers/multi_token_attention.py +64 -0
- liger_kernel-0.6.0/src/liger_kernel/transformers/rms_norm.py +79 -0
- liger_kernel-0.6.0/src/liger_kernel/transformers/softmax.py +12 -0
- liger_kernel-0.6.0/src/liger_kernel/transformers/sparsemax.py +16 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/transformers/swiglu.py +21 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/transformers/trainer/orpo_trainer.py +1 -53
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/utils.py +11 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel.egg-info/PKG-INFO +41 -21
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel.egg-info/SOURCES.txt +29 -1
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel.egg-info/requires.txt +2 -1
- liger_kernel-0.6.0/test/chunked_loss/test_cosine_loss.py +320 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/test/chunked_loss/test_dpo_loss.py +2 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/test/convergence/bf16/test_mini_models.py +216 -61
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/test/convergence/bf16/test_mini_models_multimodal.py +204 -50
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/test/convergence/bf16/test_mini_models_with_logits.py +187 -50
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/test/convergence/fp32/test_mini_models.py +195 -34
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/test/convergence/fp32/test_mini_models_multimodal.py +185 -26
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/test/convergence/fp32/test_mini_models_with_logits.py +150 -20
- liger_kernel-0.6.0/test/resources/fake_configs/meta-llama/Llama-4-Scout-17B-16E-Instruct/tokenizer_config.json +98 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/test/transformers/test_dyt.py +50 -26
- liger_kernel-0.6.0/test/transformers/test_fused_neighborhood_attention.py +572 -0
- liger_kernel-0.6.0/test/transformers/test_grpo_loss.py +190 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/test/transformers/test_monkey_patch.py +595 -38
- liger_kernel-0.6.0/test/transformers/test_multi_token_attention.py +327 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/test/transformers/test_qwen2vl_mrope.py +3 -3
- liger_kernel-0.6.0/test/transformers/test_softmax.py +103 -0
- liger_kernel-0.6.0/test/transformers/test_sparsemax.py +111 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/test/utils.py +56 -2
- liger_kernel-0.5.9/benchmark/README.md +0 -30
- liger_kernel-0.5.9/benchmark/benchmarks_visualizer.py +0 -164
- liger_kernel-0.5.9/benchmark/data/all_benchmark_data.csv +0 -807
- liger_kernel-0.5.9/examples/medusa/requirements.txt +0 -3
- liger_kernel-0.5.9/src/liger_kernel/ops/dyt.py +0 -225
- liger_kernel-0.5.9/src/liger_kernel/transformers/gema3_rms.py +0 -8
- liger_kernel-0.5.9/src/liger_kernel/transformers/model/qwen2_5_vl.py +0 -206
- liger_kernel-0.5.9/src/liger_kernel/transformers/model/qwen2_vl.py +0 -210
- liger_kernel-0.5.9/src/liger_kernel/transformers/rms_norm.py +0 -43
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/.github/ISSUE_TEMPLATE/bug_report.yaml +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/.github/ISSUE_TEMPLATE/feature_request.yaml +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/.github/pull_request_template.md +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/.github/workflows/amd-ci.yml +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/.github/workflows/intel-ci.yml +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/.github/workflows/nvi-ci.yml +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/.github/workflows/publish-nightly.yml +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/.github/workflows/publish-release.yml +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/LICENSE +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/NOTICE +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/benchmark/__init__.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/benchmark/scripts/__init__.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/benchmark/scripts/benchmark_geglu.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/benchmark/scripts/benchmark_group_norm.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/benchmark/scripts/benchmark_qwen2vl_mrope.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/benchmark/scripts/benchmark_rms_norm.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/benchmark/scripts/benchmark_rope.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/dev/fmt-requirements.txt +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/docs/Examples.md +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/docs/Getting-Started.md +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/docs/High-Level-APIs.md +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/docs/acknowledgement.md +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/docs/contributing.md +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/docs/images/banner.GIF +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/docs/images/compose.gif +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/docs/images/e2e-memory.png +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/docs/images/e2e-tps.png +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/docs/images/logo-banner.png +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/docs/images/patch.gif +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/docs/images/post-training.png +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/docs/index.md +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/docs/license.md +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/examples/alignment/accelerate_config.yaml +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/examples/alignment/run_orpo.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/examples/huggingface/README.md +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/examples/huggingface/callback.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/examples/huggingface/config/fsdp_config.json +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/examples/huggingface/img/gemma_7b_mem.png +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/examples/huggingface/img/gemma_7b_tp.png +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/examples/huggingface/img/llama_mem_alloc.png +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/examples/huggingface/img/llama_tps.png +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/examples/huggingface/img/qwen_mem_alloc.png +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/examples/huggingface/img/qwen_tps.png +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/examples/huggingface/launch_on_modal.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/examples/huggingface/requirements.txt +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/examples/huggingface/run_benchmarks.sh +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/examples/huggingface/run_gemma.sh +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/examples/huggingface/run_llama.sh +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/examples/huggingface/run_qwen.sh +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/examples/huggingface/run_qwen2_vl.sh +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/examples/huggingface/training.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/examples/lightning/README.md +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/examples/lightning/requirements.txt +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/examples/lightning/training.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/examples/medusa/README.md +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/examples/medusa/callback.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/examples/medusa/docs/images/Memory_Stage1_num_head_3.png +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/examples/medusa/docs/images/Memory_Stage1_num_head_5.png +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/examples/medusa/docs/images/Memory_Stage2_num_head_3.png +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/examples/medusa/docs/images/Memory_Stage2_num_head_5.png +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/examples/medusa/docs/images/Throughput_Stage1_num_head_3.png +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/examples/medusa/docs/images/Throughput_Stage1_num_head_5.png +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/examples/medusa/docs/images/Throughput_Stage2_num_head_3.png +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/examples/medusa/docs/images/Throughput_Stage2_num_head_5.png +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/examples/medusa/fsdp/acc-fsdp.conf +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/examples/medusa/medusa_util.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/licenses/LICENSE-Apache-2.0 +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/licenses/LICENSE-MIT-AutoAWQ +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/licenses/LICENSE-MIT-Efficient-Cross-Entropy +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/licenses/LICENSE-MIT-llmc +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/licenses/LICENSE-MIT-triton +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/setup.cfg +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/__init__.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/chunked_loss/README.md +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/chunked_loss/cpo_loss.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/chunked_loss/fused_linear_distillation.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/chunked_loss/fused_linear_ppo.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/chunked_loss/fused_linear_unpaired_preference.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/chunked_loss/grpo_loss.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/chunked_loss/kto_loss.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/chunked_loss/orpo_loss.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/chunked_loss/simpo_loss.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/env_report.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/ops/__init__.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/ops/cross_entropy.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/ops/experimental/embedding.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/ops/experimental/mm_int8int2.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/ops/fused_linear_cross_entropy.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/ops/fused_linear_jsd.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/ops/group_norm.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/ops/jsd.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/ops/kl_div.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/ops/layer_norm.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/ops/qwen2vl_mrope.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/ops/rope.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/ops/tvd.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/ops/utils.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/transformers/auto_model.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/transformers/cross_entropy.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/transformers/experimental/embedding.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/transformers/fused_linear_cross_entropy.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/transformers/fused_linear_jsd.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/transformers/geglu.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/transformers/group_norm.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/transformers/jsd.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/transformers/kl_div.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/transformers/layer_norm.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/transformers/model/__init__.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/transformers/model/loss_utils.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/transformers/qwen2vl_mrope.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/transformers/rope.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/transformers/trainer/__init__.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/transformers/trainer_integration.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/transformers/tvd.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/triton/__init__.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel/triton/monkey_patch.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel.egg-info/dependency_links.txt +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/src/liger_kernel.egg-info/top_level.txt +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/test/__init__.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/test/chunked_loss/__init__.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/test/chunked_loss/test_cpo_loss.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/test/chunked_loss/test_grpo_loss.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/test/chunked_loss/test_jsd_loss.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/test/chunked_loss/test_kto_loss.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/test/chunked_loss/test_orpo_loss.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/test/chunked_loss/test_simpo_loss.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/test/conftest.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/test/convergence/__init__.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/test/convergence/bf16/__init__.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/test/convergence/fp32/__init__.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/test/resources/fake_configs/Google/Gemma3/gemma-3-4b-it/tokenizer_config.json +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/test/resources/fake_configs/Google/Paligemma/paligemma-3b-pt-224/tokenizer_config.json +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/preprocessor_config.json +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/processor_config.json +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/tokenizer_config.json +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/test/resources/fake_configs/Qwen/Qwen2-VL-7B-Instruct/tokenizer_config.json +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/test/resources/fake_configs/Qwen/Qwen2.5-VL-7B-Instruct/tokenizer_config.json +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/test/resources/fake_configs/meta-llama/Llama-3.2-11B-Vision-Instruct/tokenizer_config.json +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/test/resources/scripts/generate_tokenized_dataset.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/test/resources/tiny_shakespeare.txt +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/test/resources/tiny_shakespeare_tokenized/data-00000-of-00001.arrow +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/test/resources/tiny_shakespeare_tokenized/dataset_info.json +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/test/resources/tiny_shakespeare_tokenized/state.json +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/test/transformers/test_auto_model.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/test/transformers/test_cross_entropy.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/test/transformers/test_embedding.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/test/transformers/test_flex_attention.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/test/transformers/test_fused_linear_cross_entropy.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/test/transformers/test_fused_linear_jsd.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/test/transformers/test_geglu.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/test/transformers/test_group_norm.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/test/transformers/test_jsd.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/test/transformers/test_kl_div.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/test/transformers/test_layer_norm.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/test/transformers/test_mm_int8int2.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/test/transformers/test_rms_norm.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/test/transformers/test_rope.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/test/transformers/test_swiglu.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/test/transformers/test_trainer_integration.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/test/transformers/test_transformers.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/test/transformers/test_tvd.py +0 -0
- {liger_kernel-0.5.9 → liger_kernel-0.6.0}/test/triton/test_triton_monkey_patch.py +0 -0
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
name: Benchmarks
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
schedule:
|
|
5
|
+
# Runs at 00:00 UTC every Friday
|
|
6
|
+
- cron: '0 0 * * 5'
|
|
7
|
+
workflow_dispatch: # Enables manual trigger
|
|
8
|
+
|
|
9
|
+
permissions:
|
|
10
|
+
contents: write
|
|
11
|
+
|
|
12
|
+
concurrency:
|
|
13
|
+
# This causes it to cancel previous in-progress actions on the same PR / branch,
|
|
14
|
+
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
|
|
15
|
+
cancel-in-progress: true
|
|
16
|
+
|
|
17
|
+
jobs:
|
|
18
|
+
benchmarks:
|
|
19
|
+
runs-on: ubuntu-latest
|
|
20
|
+
env:
|
|
21
|
+
MODAL_TOKEN_ID: ${{ secrets.MODAL_TOKEN_ID }}
|
|
22
|
+
MODAL_TOKEN_SECRET: ${{ secrets.MODAL_TOKEN_SECRET }}
|
|
23
|
+
GITHUB_USERNAME: linkedin
|
|
24
|
+
REPO_NAME: Liger-Kernel
|
|
25
|
+
OUTPUT_DIR: benchmarks
|
|
26
|
+
OUTPUT_FILENAME: benchmark.csv
|
|
27
|
+
GENERATED_CSV: benchmark/data/all_benchmark_data.csv
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
steps:
|
|
31
|
+
- name: Checkout code
|
|
32
|
+
uses: actions/checkout@v3
|
|
33
|
+
|
|
34
|
+
# Get the latest commit hash from main branch
|
|
35
|
+
- name: Get commit hash
|
|
36
|
+
id: get_hash
|
|
37
|
+
run: echo "hash=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT
|
|
38
|
+
|
|
39
|
+
- name: Set up Python
|
|
40
|
+
uses: actions/setup-python@v3
|
|
41
|
+
with:
|
|
42
|
+
python-version: '3.10'
|
|
43
|
+
|
|
44
|
+
# Install dependencies
|
|
45
|
+
- name: Install dependencies
|
|
46
|
+
run: |
|
|
47
|
+
python -m pip install --upgrade pip
|
|
48
|
+
pip install modal
|
|
49
|
+
pip install pandas
|
|
50
|
+
|
|
51
|
+
# Delete previous benchmark results.
|
|
52
|
+
- name: Remove previous benchmark data
|
|
53
|
+
run: |
|
|
54
|
+
rm -f benchmark/data/all_benchmark_data.csv
|
|
55
|
+
|
|
56
|
+
- name: Run benchmarks on GPU
|
|
57
|
+
run: |
|
|
58
|
+
modal run dev.modal.benchmarks
|
|
59
|
+
|
|
60
|
+
# Step 5: Checkout gh-pages branch in a subfolderAdd commentMore actions
|
|
61
|
+
- name: Checkout gh-pages
|
|
62
|
+
uses: actions/checkout@v3
|
|
63
|
+
with:
|
|
64
|
+
ref: gh-pages
|
|
65
|
+
path: gh-pages
|
|
66
|
+
|
|
67
|
+
# Step 6: Copy benchmark CSV to gh-pages directory
|
|
68
|
+
- name: Copy generated benchmark to gh-pages
|
|
69
|
+
run: |
|
|
70
|
+
mkdir -p gh-pages/${OUTPUT_DIR}/${{ steps.get_hash.outputs.hash }}
|
|
71
|
+
cp ${GENERATED_CSV} gh-pages/${OUTPUT_DIR}/${{ steps.get_hash.outputs.hash }}/${OUTPUT_FILENAME}
|
|
72
|
+
# Step 7: Append commit hash to commits.txt if not already present
|
|
73
|
+
- name: Update commits.txt
|
|
74
|
+
run: |
|
|
75
|
+
cd gh-pages
|
|
76
|
+
echo "commits.txt file path: ${OUTPUT_DIR}/commits.txt"
|
|
77
|
+
# Create file if it doesn't exist
|
|
78
|
+
mkdir -p ${OUTPUT_DIR}
|
|
79
|
+
touch ${OUTPUT_DIR}/commits.txt
|
|
80
|
+
# Append only if not already present
|
|
81
|
+
if ! grep -q "${{ steps.get_hash.outputs.hash }}" ${OUTPUT_DIR}/commits.txt; then
|
|
82
|
+
echo "${{ steps.get_hash.outputs.hash }}" >> ${OUTPUT_DIR}/commits.txt
|
|
83
|
+
fi
|
|
84
|
+
# Step 7: Commit and push
|
|
85
|
+
- name: Commit and push to gh-pages
|
|
86
|
+
run: |
|
|
87
|
+
cd gh-pages
|
|
88
|
+
git config user.name github-actions[bot]
|
|
89
|
+
git config user.email 41898282+github-actions[bot]@users.noreply.github.com
|
|
90
|
+
git add .
|
|
91
|
+
git commit -m "Add benchmark for commit ${{ steps.get_hash.outputs.hash }}" || echo "No changes to commit"
|
|
92
|
+
git push origin gh-pages
|
|
93
|
+
|
|
@@ -2,11 +2,13 @@ name: Publish documentation
|
|
|
2
2
|
on:
|
|
3
3
|
push:
|
|
4
4
|
branches:
|
|
5
|
-
-
|
|
5
|
+
- main
|
|
6
|
+
|
|
6
7
|
permissions:
|
|
7
8
|
contents: write
|
|
8
9
|
jobs:
|
|
9
10
|
deploy:
|
|
11
|
+
if: False
|
|
10
12
|
runs-on: ubuntu-latest
|
|
11
13
|
steps:
|
|
12
14
|
- uses: actions/checkout@v4
|
|
@@ -25,4 +27,4 @@ jobs:
|
|
|
25
27
|
restore-keys: |
|
|
26
28
|
mkdocs-material-
|
|
27
29
|
- run: pip install mkdocs-material
|
|
28
|
-
- run: mkdocs gh-deploy --force
|
|
30
|
+
- run: mkdocs gh-deploy --force
|
|
@@ -48,13 +48,19 @@ run-benchmarks:
|
|
|
48
48
|
# MkDocs Configuration
|
|
49
49
|
MKDOCS = mkdocs
|
|
50
50
|
CONFIG_FILE = mkdocs.yml
|
|
51
|
+
SITE_DIR = site
|
|
51
52
|
|
|
52
53
|
# MkDocs targets
|
|
54
|
+
|
|
55
|
+
# Serve the documentation
|
|
53
56
|
serve:
|
|
54
57
|
$(MKDOCS) serve -f $(CONFIG_FILE)
|
|
55
58
|
|
|
59
|
+
# Build the documentation into the specified site directory
|
|
56
60
|
build:
|
|
57
|
-
$(MKDOCS) build -f $(CONFIG_FILE)
|
|
61
|
+
$(MKDOCS) build -f $(CONFIG_FILE) --site-dir $(SITE_DIR)
|
|
58
62
|
|
|
63
|
+
# Clean the output directory
|
|
59
64
|
clean:
|
|
60
|
-
rm -rf
|
|
65
|
+
rm -rf $(SITE_DIR)/
|
|
66
|
+
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: liger_kernel
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.6.0
|
|
4
4
|
Summary: Efficient Triton kernels for LLM Training
|
|
5
5
|
License: BSD 2-CLAUSE LICENSE
|
|
6
6
|
Copyright 2024 LinkedIn Corporation
|
|
@@ -33,7 +33,7 @@ License-File: NOTICE
|
|
|
33
33
|
Requires-Dist: torch>=2.1.2
|
|
34
34
|
Requires-Dist: triton>=2.3.1
|
|
35
35
|
Provides-Extra: dev
|
|
36
|
-
Requires-Dist: transformers>=4.
|
|
36
|
+
Requires-Dist: transformers>=4.49.0; extra == "dev"
|
|
37
37
|
Requires-Dist: matplotlib>=3.7.2; extra == "dev"
|
|
38
38
|
Requires-Dist: flake8>=4.0.1.1; extra == "dev"
|
|
39
39
|
Requires-Dist: black>=24.4.2; extra == "dev"
|
|
@@ -45,6 +45,7 @@ Requires-Dist: datasets>=2.19.2; extra == "dev"
|
|
|
45
45
|
Requires-Dist: seaborn; extra == "dev"
|
|
46
46
|
Requires-Dist: mkdocs; extra == "dev"
|
|
47
47
|
Requires-Dist: mkdocs-material; extra == "dev"
|
|
48
|
+
Requires-Dist: torchvision>=0.20; extra == "dev"
|
|
48
49
|
Dynamic: license-file
|
|
49
50
|
Dynamic: provides-extra
|
|
50
51
|
Dynamic: requires-dist
|
|
@@ -59,7 +60,6 @@ Dynamic: requires-dist
|
|
|
59
60
|
<th style="padding: 10px;" colspan="2">Stable</th>
|
|
60
61
|
<th style="padding: 10px;" colspan="2">Nightly</th>
|
|
61
62
|
<th style="padding: 10px;">Discord</th>
|
|
62
|
-
<th style="padding: 10px;">Build</th>
|
|
63
63
|
</tr>
|
|
64
64
|
<tr>
|
|
65
65
|
<td style="padding: 10px;">
|
|
@@ -87,23 +87,6 @@ Dynamic: requires-dist
|
|
|
87
87
|
<img src="https://dcbadge.vercel.app/api/server/gpumode?style=flat" alt="Join Our Discord">
|
|
88
88
|
</a>
|
|
89
89
|
</td>
|
|
90
|
-
<td style="padding: 10px;">
|
|
91
|
-
<div style="display: block;">
|
|
92
|
-
<a href="https://github.com/linkedin/Liger-Kernel/actions/workflows/nvi-ci.yml">
|
|
93
|
-
<img src="https://github.com/linkedin/Liger-Kernel/actions/workflows/nvi-ci.yml/badge.svg?event=schedule" alt="Build">
|
|
94
|
-
</a>
|
|
95
|
-
</div>
|
|
96
|
-
<div style="display: block;">
|
|
97
|
-
<a href="https://github.com/linkedin/Liger-Kernel/actions/workflows/amd-ci.yml">
|
|
98
|
-
<img src="https://github.com/linkedin/Liger-Kernel/actions/workflows/amd-ci.yml/badge.svg?event=schedule" alt="Build">
|
|
99
|
-
</a>
|
|
100
|
-
</div>
|
|
101
|
-
<div style="display: block;">
|
|
102
|
-
<a href="https://github.com/linkedin/Liger-Kernel/actions/workflows/amd-ci.yml">
|
|
103
|
-
<img src="https://github.com/linkedin/Liger-Kernel/actions/workflows/intel-ci.yml/badge.svg?event=schedule" alt="Build">
|
|
104
|
-
</a>
|
|
105
|
-
</div>
|
|
106
|
-
</td>
|
|
107
90
|
</tr>
|
|
108
91
|
</table>
|
|
109
92
|
|
|
@@ -132,6 +115,8 @@ Dynamic: requires-dist
|
|
|
132
115
|
|
|
133
116
|
We've also added optimized Post-Training kernels that deliver **up to 80% memory savings** for alignment and distillation tasks. We support losses like DPO, CPO, ORPO, SimPO, KTO, JSD, and many more. Check out [how we optimize the memory](https://x.com/hsu_byron/status/1866577403918917655).
|
|
134
117
|
|
|
118
|
+
You can view the documentation site for additional installation, usage examples, and API references:https://linkedin.github.io/Liger-Kernel/
|
|
119
|
+
|
|
135
120
|
## Supercharge Your Model with Liger Kernel
|
|
136
121
|
|
|
137
122
|

|
|
@@ -308,6 +293,7 @@ loss.backward()
|
|
|
308
293
|
|
|
309
294
|
| **Model** | **API** | **Supported Operations** |
|
|
310
295
|
|-------------|--------------------------------------------------------------|-------------------------------------------------------------------------|
|
|
296
|
+
| Llama4 (Text) & (Multimodal) | `liger_kernel.transformers.apply_liger_kernel_to_llama4` | RMSNorm, LayerNorm, GeGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
311
297
|
| LLaMA 2 & 3 | `liger_kernel.transformers.apply_liger_kernel_to_llama` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
312
298
|
| LLaMA 3.2-Vision | `liger_kernel.transformers.apply_liger_kernel_to_mllama` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
313
299
|
| Mistral | `liger_kernel.transformers.apply_liger_kernel_to_mistral` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
@@ -321,6 +307,7 @@ loss.backward()
|
|
|
321
307
|
| Qwen2-VL, & QVQ | `liger_kernel.transformers.apply_liger_kernel_to_qwen2_vl` | RMSNorm, LayerNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
322
308
|
| Qwen2.5-VL | `liger_kernel.transformers.apply_liger_kernel_to_qwen2_5_vl` | RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
323
309
|
| Qwen3 | `liger_kernel.transformers.apply_liger_kernel_to_qwen3` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
310
|
+
| Qwen3 MoE | `liger_kernel_transformers.apply_liger_kernel_to_qwen3_moe` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
324
311
|
| Phi3 & Phi3.5 | `liger_kernel.transformers.apply_liger_kernel_to_phi3` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
325
312
|
| Granite 3.0 & 3.1 | `liger_kernel.transformers.apply_liger_kernel_to_granite` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss |
|
|
326
313
|
| OLMo2 | `liger_kernel.transformers.apply_liger_kernel_to_olmo2` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
@@ -342,7 +329,10 @@ loss.backward()
|
|
|
342
329
|
| SwiGLU | `liger_kernel.transformers.LigerSwiGLUMLP` |
|
|
343
330
|
| GeGLU | `liger_kernel.transformers.LigerGEGLUMLP` |
|
|
344
331
|
| CrossEntropy | `liger_kernel.transformers.LigerCrossEntropyLoss` |
|
|
345
|
-
| Fused Linear CrossEntropy
|
|
332
|
+
| Fused Linear CrossEntropy | `liger_kernel.transformers.LigerFusedLinearCrossEntropyLoss`|
|
|
333
|
+
| Multi Token Attention | `liger_kernel.transformers.LigerMultiTokenAttention` |
|
|
334
|
+
| Softmax | `liger_kernel.transformers.LigerSoftmax` |
|
|
335
|
+
| Sparsemax | `liger_kernel.transformers.LigerSparsemax` |
|
|
346
336
|
|
|
347
337
|
|
|
348
338
|
### Alignment Kernels
|
|
@@ -390,6 +380,36 @@ loss.backward()
|
|
|
390
380
|
- [Axolotl](https://axolotl.ai/): Integrating Liger Kernel into Axolotl.
|
|
391
381
|
- [Llama-Factory](https://github.com/hiyouga/LLaMA-Factory): Integrating Liger Kernel into Llama-Factory.
|
|
392
382
|
|
|
383
|
+
|
|
384
|
+
## CI status
|
|
385
|
+
|
|
386
|
+
<table style="width: 100%; text-align: center; border-collapse: collapse;">
|
|
387
|
+
<tr>
|
|
388
|
+
<th style="padding: 10px;">Build</th>
|
|
389
|
+
</tr>
|
|
390
|
+
<tr>
|
|
391
|
+
<td style="padding: 10px;">
|
|
392
|
+
<div style="display: block;">
|
|
393
|
+
<a href="https://github.com/linkedin/Liger-Kernel/actions/workflows/nvi-ci.yml">
|
|
394
|
+
<img src="https://github.com/linkedin/Liger-Kernel/actions/workflows/nvi-ci.yml/badge.svg?event=schedule" alt="Build">
|
|
395
|
+
</a>
|
|
396
|
+
</div>
|
|
397
|
+
<div style="display: block;">
|
|
398
|
+
<a href="https://github.com/linkedin/Liger-Kernel/actions/workflows/amd-ci.yml">
|
|
399
|
+
<img src="https://github.com/linkedin/Liger-Kernel/actions/workflows/amd-ci.yml/badge.svg?event=schedule" alt="Build">
|
|
400
|
+
</a>
|
|
401
|
+
</div>
|
|
402
|
+
<div style="display: block;">
|
|
403
|
+
<a href="https://github.com/linkedin/Liger-Kernel/actions/workflows/amd-ci.yml">
|
|
404
|
+
<img src="https://github.com/linkedin/Liger-Kernel/actions/workflows/intel-ci.yml/badge.svg?event=schedule" alt="Build">
|
|
405
|
+
</a>
|
|
406
|
+
</div>
|
|
407
|
+
</td>
|
|
408
|
+
</tr>
|
|
409
|
+
</table>
|
|
410
|
+
|
|
411
|
+
|
|
412
|
+
|
|
393
413
|
## Contact
|
|
394
414
|
|
|
395
415
|
- For issues, create a Github ticket in this repository
|
|
@@ -8,7 +8,6 @@
|
|
|
8
8
|
<th style="padding: 10px;" colspan="2">Stable</th>
|
|
9
9
|
<th style="padding: 10px;" colspan="2">Nightly</th>
|
|
10
10
|
<th style="padding: 10px;">Discord</th>
|
|
11
|
-
<th style="padding: 10px;">Build</th>
|
|
12
11
|
</tr>
|
|
13
12
|
<tr>
|
|
14
13
|
<td style="padding: 10px;">
|
|
@@ -36,23 +35,6 @@
|
|
|
36
35
|
<img src="https://dcbadge.vercel.app/api/server/gpumode?style=flat" alt="Join Our Discord">
|
|
37
36
|
</a>
|
|
38
37
|
</td>
|
|
39
|
-
<td style="padding: 10px;">
|
|
40
|
-
<div style="display: block;">
|
|
41
|
-
<a href="https://github.com/linkedin/Liger-Kernel/actions/workflows/nvi-ci.yml">
|
|
42
|
-
<img src="https://github.com/linkedin/Liger-Kernel/actions/workflows/nvi-ci.yml/badge.svg?event=schedule" alt="Build">
|
|
43
|
-
</a>
|
|
44
|
-
</div>
|
|
45
|
-
<div style="display: block;">
|
|
46
|
-
<a href="https://github.com/linkedin/Liger-Kernel/actions/workflows/amd-ci.yml">
|
|
47
|
-
<img src="https://github.com/linkedin/Liger-Kernel/actions/workflows/amd-ci.yml/badge.svg?event=schedule" alt="Build">
|
|
48
|
-
</a>
|
|
49
|
-
</div>
|
|
50
|
-
<div style="display: block;">
|
|
51
|
-
<a href="https://github.com/linkedin/Liger-Kernel/actions/workflows/amd-ci.yml">
|
|
52
|
-
<img src="https://github.com/linkedin/Liger-Kernel/actions/workflows/intel-ci.yml/badge.svg?event=schedule" alt="Build">
|
|
53
|
-
</a>
|
|
54
|
-
</div>
|
|
55
|
-
</td>
|
|
56
38
|
</tr>
|
|
57
39
|
</table>
|
|
58
40
|
|
|
@@ -81,6 +63,8 @@
|
|
|
81
63
|
|
|
82
64
|
We've also added optimized Post-Training kernels that deliver **up to 80% memory savings** for alignment and distillation tasks. We support losses like DPO, CPO, ORPO, SimPO, KTO, JSD, and many more. Check out [how we optimize the memory](https://x.com/hsu_byron/status/1866577403918917655).
|
|
83
65
|
|
|
66
|
+
You can view the documentation site for additional installation, usage examples, and API references:https://linkedin.github.io/Liger-Kernel/
|
|
67
|
+
|
|
84
68
|
## Supercharge Your Model with Liger Kernel
|
|
85
69
|
|
|
86
70
|

|
|
@@ -257,6 +241,7 @@ loss.backward()
|
|
|
257
241
|
|
|
258
242
|
| **Model** | **API** | **Supported Operations** |
|
|
259
243
|
|-------------|--------------------------------------------------------------|-------------------------------------------------------------------------|
|
|
244
|
+
| Llama4 (Text) & (Multimodal) | `liger_kernel.transformers.apply_liger_kernel_to_llama4` | RMSNorm, LayerNorm, GeGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
260
245
|
| LLaMA 2 & 3 | `liger_kernel.transformers.apply_liger_kernel_to_llama` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
261
246
|
| LLaMA 3.2-Vision | `liger_kernel.transformers.apply_liger_kernel_to_mllama` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
262
247
|
| Mistral | `liger_kernel.transformers.apply_liger_kernel_to_mistral` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
@@ -270,6 +255,7 @@ loss.backward()
|
|
|
270
255
|
| Qwen2-VL, & QVQ | `liger_kernel.transformers.apply_liger_kernel_to_qwen2_vl` | RMSNorm, LayerNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
271
256
|
| Qwen2.5-VL | `liger_kernel.transformers.apply_liger_kernel_to_qwen2_5_vl` | RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
272
257
|
| Qwen3 | `liger_kernel.transformers.apply_liger_kernel_to_qwen3` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
258
|
+
| Qwen3 MoE | `liger_kernel_transformers.apply_liger_kernel_to_qwen3_moe` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
273
259
|
| Phi3 & Phi3.5 | `liger_kernel.transformers.apply_liger_kernel_to_phi3` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
274
260
|
| Granite 3.0 & 3.1 | `liger_kernel.transformers.apply_liger_kernel_to_granite` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss |
|
|
275
261
|
| OLMo2 | `liger_kernel.transformers.apply_liger_kernel_to_olmo2` | RoPE, RMSNorm, SwiGLU, CrossEntropyLoss, FusedLinearCrossEntropy |
|
|
@@ -291,7 +277,10 @@ loss.backward()
|
|
|
291
277
|
| SwiGLU | `liger_kernel.transformers.LigerSwiGLUMLP` |
|
|
292
278
|
| GeGLU | `liger_kernel.transformers.LigerGEGLUMLP` |
|
|
293
279
|
| CrossEntropy | `liger_kernel.transformers.LigerCrossEntropyLoss` |
|
|
294
|
-
| Fused Linear CrossEntropy
|
|
280
|
+
| Fused Linear CrossEntropy | `liger_kernel.transformers.LigerFusedLinearCrossEntropyLoss`|
|
|
281
|
+
| Multi Token Attention | `liger_kernel.transformers.LigerMultiTokenAttention` |
|
|
282
|
+
| Softmax | `liger_kernel.transformers.LigerSoftmax` |
|
|
283
|
+
| Sparsemax | `liger_kernel.transformers.LigerSparsemax` |
|
|
295
284
|
|
|
296
285
|
|
|
297
286
|
### Alignment Kernels
|
|
@@ -339,6 +328,36 @@ loss.backward()
|
|
|
339
328
|
- [Axolotl](https://axolotl.ai/): Integrating Liger Kernel into Axolotl.
|
|
340
329
|
- [Llama-Factory](https://github.com/hiyouga/LLaMA-Factory): Integrating Liger Kernel into Llama-Factory.
|
|
341
330
|
|
|
331
|
+
|
|
332
|
+
## CI status
|
|
333
|
+
|
|
334
|
+
<table style="width: 100%; text-align: center; border-collapse: collapse;">
|
|
335
|
+
<tr>
|
|
336
|
+
<th style="padding: 10px;">Build</th>
|
|
337
|
+
</tr>
|
|
338
|
+
<tr>
|
|
339
|
+
<td style="padding: 10px;">
|
|
340
|
+
<div style="display: block;">
|
|
341
|
+
<a href="https://github.com/linkedin/Liger-Kernel/actions/workflows/nvi-ci.yml">
|
|
342
|
+
<img src="https://github.com/linkedin/Liger-Kernel/actions/workflows/nvi-ci.yml/badge.svg?event=schedule" alt="Build">
|
|
343
|
+
</a>
|
|
344
|
+
</div>
|
|
345
|
+
<div style="display: block;">
|
|
346
|
+
<a href="https://github.com/linkedin/Liger-Kernel/actions/workflows/amd-ci.yml">
|
|
347
|
+
<img src="https://github.com/linkedin/Liger-Kernel/actions/workflows/amd-ci.yml/badge.svg?event=schedule" alt="Build">
|
|
348
|
+
</a>
|
|
349
|
+
</div>
|
|
350
|
+
<div style="display: block;">
|
|
351
|
+
<a href="https://github.com/linkedin/Liger-Kernel/actions/workflows/amd-ci.yml">
|
|
352
|
+
<img src="https://github.com/linkedin/Liger-Kernel/actions/workflows/intel-ci.yml/badge.svg?event=schedule" alt="Build">
|
|
353
|
+
</a>
|
|
354
|
+
</div>
|
|
355
|
+
</td>
|
|
356
|
+
</tr>
|
|
357
|
+
</table>
|
|
358
|
+
|
|
359
|
+
|
|
360
|
+
|
|
342
361
|
## Contact
|
|
343
362
|
|
|
344
363
|
- For issues, create a Github ticket in this repository
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
## Benchmarking Liger Kernels
|
|
2
|
+
|
|
3
|
+
Follow these steps to benchmark and visualize kernel performance:
|
|
4
|
+
|
|
5
|
+
1. Create a benchmark script
|
|
6
|
+
- Add your script under `benchmark/scripts/`
|
|
7
|
+
- Name it according to the kernel (e.g., `benchmark_<kernel_name>.py`)
|
|
8
|
+
|
|
9
|
+
2. Run the benchmark
|
|
10
|
+
- Results will be saved to `benchmark/data/all_benchmark_data.csv`
|
|
11
|
+
|
|
12
|
+
Example: Benchmarking KTO Loss
|
|
13
|
+
```bash
|
|
14
|
+
cd benchmark
|
|
15
|
+
python scripts/benchmark_kto_loss.py
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
3. Visualize results
|
|
19
|
+
- Use the visualization script with optional modes:
|
|
20
|
+
|
|
21
|
+
* To target specific mode(s), pass `--kernel-operation-mode` one or more values.
|
|
22
|
+
* If you omit `--kernel-operation-mode`, the script will:
|
|
23
|
+
- For `speed` metrics: generate plots for all available modes (forward/backward/full).
|
|
24
|
+
- For `memory` metrics: generate only the `full` plot.
|
|
25
|
+
|
|
26
|
+
Examples:
|
|
27
|
+
1. Specific modes (speed):
|
|
28
|
+
```bash
|
|
29
|
+
python benchmarks_visualizer.py \
|
|
30
|
+
--kernel-name kto_loss \
|
|
31
|
+
--metric-name speed \
|
|
32
|
+
--kernel-operation-mode forward backward
|
|
33
|
+
```
|
|
34
|
+
2. All modes (speed):
|
|
35
|
+
```bash
|
|
36
|
+
python benchmarks_visualizer.py \
|
|
37
|
+
--kernel-name kto_loss \
|
|
38
|
+
--metric-name speed
|
|
39
|
+
```
|
|
40
|
+
3. Memory (always full):
|
|
41
|
+
```bash
|
|
42
|
+
python benchmarks_visualizer.py \
|
|
43
|
+
--kernel-name kto_loss \
|
|
44
|
+
--metric-name memory
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
4. View results
|
|
48
|
+
- Generated plots will be saved in `benchmark/visualizations/`
|