liger-kernel-nightly 0.5.5.dev20250324181221__tar.gz → 0.5.5.dev20250326012054__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of liger-kernel-nightly might be problematic. Click here for more details.
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/PKG-INFO +1 -1
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/pyproject.toml +1 -1
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/transformers/monkey_patch.py +24 -12
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel_nightly.egg-info/PKG-INFO +1 -1
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/.github/ISSUE_TEMPLATE/bug_report.yaml +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/.github/ISSUE_TEMPLATE/feature_request.yaml +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/.github/pull_request_template.md +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/.github/workflows/amd-ci.yml +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/.github/workflows/docs.yml +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/.github/workflows/intel-ci.yml +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/.github/workflows/nvi-ci.yml +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/.github/workflows/publish-nightly.yml +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/.github/workflows/publish-release.yml +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/.gitignore +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/LICENSE +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/Makefile +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/NOTICE +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/README.md +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/benchmark/README.md +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/benchmark/__init__.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/benchmark/benchmarks_visualizer.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/benchmark/data/all_benchmark_data.csv +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/benchmark/scripts/__init__.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/benchmark/scripts/benchmark_cpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/benchmark/scripts/benchmark_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/benchmark/scripts/benchmark_distill_jsd_loss.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/benchmark/scripts/benchmark_dpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/benchmark/scripts/benchmark_dyt.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/benchmark/scripts/benchmark_embedding.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/benchmark/scripts/benchmark_fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/benchmark/scripts/benchmark_fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/benchmark/scripts/benchmark_geglu.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/benchmark/scripts/benchmark_group_norm.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/benchmark/scripts/benchmark_jsd.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/benchmark/scripts/benchmark_kl_div.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/benchmark/scripts/benchmark_kto_loss.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/benchmark/scripts/benchmark_layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/benchmark/scripts/benchmark_orpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/benchmark/scripts/benchmark_qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/benchmark/scripts/benchmark_rms_norm.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/benchmark/scripts/benchmark_rope.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/benchmark/scripts/benchmark_simpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/benchmark/scripts/benchmark_swiglu.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/benchmark/scripts/benchmark_tvd.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/benchmark/scripts/utils.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/dev/fmt-requirements.txt +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/dev/modal/tests.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/dev/modal/tests_bwd.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/docs/Examples.md +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/docs/Getting-Started.md +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/docs/High-Level-APIs.md +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/docs/Low-Level-APIs.md +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/docs/acknowledgement.md +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/docs/contributing.md +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/docs/images/banner.GIF +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/docs/images/compose.gif +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/docs/images/e2e-memory.png +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/docs/images/e2e-tps.png +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/docs/images/logo-banner.png +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/docs/images/patch.gif +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/docs/images/post-training.png +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/docs/index.md +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/docs/license.md +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/examples/alignment/accelerate_config.yaml +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/examples/alignment/run_orpo.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/examples/huggingface/README.md +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/examples/huggingface/callback.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/examples/huggingface/config/fsdp_config.json +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/examples/huggingface/img/gemma_7b_mem.png +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/examples/huggingface/img/gemma_7b_tp.png +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/examples/huggingface/img/llama_mem_alloc.png +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/examples/huggingface/img/llama_tps.png +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/examples/huggingface/img/qwen_mem_alloc.png +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/examples/huggingface/img/qwen_tps.png +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/examples/huggingface/launch_on_modal.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/examples/huggingface/requirements.txt +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/examples/huggingface/run_benchmarks.sh +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/examples/huggingface/run_gemma.sh +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/examples/huggingface/run_llama.sh +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/examples/huggingface/run_qwen.sh +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/examples/huggingface/run_qwen2_vl.sh +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/examples/huggingface/training.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/examples/huggingface/training_multimodal.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/examples/lightning/README.md +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/examples/lightning/requirements.txt +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/examples/lightning/training.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/examples/medusa/README.md +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/examples/medusa/callback.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/examples/medusa/docs/images/Memory_Stage1_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/examples/medusa/docs/images/Memory_Stage1_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/examples/medusa/docs/images/Memory_Stage2_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/examples/medusa/docs/images/Memory_Stage2_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/examples/medusa/docs/images/Throughput_Stage1_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/examples/medusa/docs/images/Throughput_Stage1_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/examples/medusa/docs/images/Throughput_Stage2_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/examples/medusa/docs/images/Throughput_Stage2_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/examples/medusa/fsdp/acc-fsdp.conf +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/examples/medusa/medusa_util.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/examples/medusa/requirements.txt +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/examples/medusa/scripts/llama3_8b_medusa.sh +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/examples/medusa/train.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/licenses/LICENSE-Apache-2.0 +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/licenses/LICENSE-MIT-AutoAWQ +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/licenses/LICENSE-MIT-Efficient-Cross-Entropy +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/licenses/LICENSE-MIT-llmc +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/licenses/LICENSE-MIT-triton +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/mkdocs.yml +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/setup.cfg +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/setup.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/__init__.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/chunked_loss/README.md +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/chunked_loss/__init__.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/chunked_loss/cpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/chunked_loss/dpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/chunked_loss/functional.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/chunked_loss/fused_linear_distillation.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/chunked_loss/fused_linear_preference.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/chunked_loss/fused_linear_rlhf.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/chunked_loss/fused_linear_unpaired_preference.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/chunked_loss/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/chunked_loss/jsd_loss.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/chunked_loss/kto_loss.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/chunked_loss/orpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/chunked_loss/simpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/env_report.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/ops/__init__.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/ops/cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/ops/dyt.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/ops/experimental/embedding.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/ops/experimental/mm_int8int2.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/ops/fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/ops/fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/ops/geglu.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/ops/group_norm.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/ops/jsd.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/ops/kl_div.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/ops/layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/ops/qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/ops/rms_norm.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/ops/rope.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/ops/swiglu.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/ops/tvd.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/ops/utils.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/transformers/__init__.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/transformers/auto_model.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/transformers/cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/transformers/dyt.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/transformers/experimental/embedding.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/transformers/functional.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/transformers/fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/transformers/fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/transformers/geglu.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/transformers/group_norm.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/transformers/jsd.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/transformers/kl_div.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/transformers/layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/transformers/model/__init__.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/transformers/model/gemma.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/transformers/model/gemma2.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/transformers/model/llama.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/transformers/model/loss_utils.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/transformers/model/mistral.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/transformers/model/mixtral.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/transformers/model/mllama.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/transformers/model/olmo2.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/transformers/model/paligemma.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/transformers/model/phi3.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/transformers/model/qwen2.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/transformers/model/qwen2_5_vl.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/transformers/model/qwen2_vl.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/transformers/qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/transformers/rms_norm.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/transformers/rope.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/transformers/swiglu.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/transformers/trainer/__init__.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/transformers/trainer/orpo_trainer.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/transformers/trainer_integration.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/transformers/tvd.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/triton/__init__.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/triton/monkey_patch.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel/utils.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel_nightly.egg-info/SOURCES.txt +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel_nightly.egg-info/dependency_links.txt +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel_nightly.egg-info/requires.txt +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/src/liger_kernel_nightly.egg-info/top_level.txt +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/test/__init__.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/test/chunked_loss/__init__.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/test/chunked_loss/test_cpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/test/chunked_loss/test_dpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/test/chunked_loss/test_grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/test/chunked_loss/test_jsd_loss.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/test/chunked_loss/test_kto_loss.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/test/chunked_loss/test_orpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/test/chunked_loss/test_simpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/test/conftest.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/test/convergence/__init__.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/test/convergence/bf16/__init__.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/test/convergence/bf16/test_mini_models.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/test/convergence/bf16/test_mini_models_multimodal.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/test/convergence/bf16/test_mini_models_with_logits.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/test/convergence/fp32/__init__.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/test/convergence/fp32/test_mini_models.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/test/convergence/fp32/test_mini_models_multimodal.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/test/convergence/fp32/test_mini_models_with_logits.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/test/resources/fake_configs/Google/Paligemma/paligemma-3b-pt-224/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/test/resources/fake_configs/Qwen/Qwen2-VL-7B-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/test/resources/fake_configs/Qwen/Qwen2.5-VL-7B-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/test/resources/fake_configs/meta-llama/Llama-3.2-11B-Vision-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/test/resources/scripts/generate_tokenized_dataset.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/test/resources/tiny_shakespeare.txt +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/test/resources/tiny_shakespeare_tokenized/data-00000-of-00001.arrow +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/test/resources/tiny_shakespeare_tokenized/dataset_info.json +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/test/resources/tiny_shakespeare_tokenized/state.json +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/test/transformers/test_auto_model.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/test/transformers/test_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/test/transformers/test_dyt.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/test/transformers/test_embedding.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/test/transformers/test_flex_attention.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/test/transformers/test_fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/test/transformers/test_fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/test/transformers/test_geglu.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/test/transformers/test_group_norm.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/test/transformers/test_jsd.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/test/transformers/test_kl_div.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/test/transformers/test_layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/test/transformers/test_mm_int8int2.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/test/transformers/test_monkey_patch.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/test/transformers/test_qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/test/transformers/test_rms_norm.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/test/transformers/test_rope.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/test/transformers/test_swiglu.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/test/transformers/test_trainer_integration.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/test/transformers/test_transformers.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/test/transformers/test_tvd.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/test/triton/test_triton_monkey_patch.py +0 -0
- {liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/test/utils.py +0 -0
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "liger_kernel_nightly"
|
|
7
|
-
version = "0.5.5.
|
|
7
|
+
version = "0.5.5.dev20250326012054"
|
|
8
8
|
description = "Efficient Triton kernels for LLM Training"
|
|
9
9
|
urls = { "Homepage" = "https://github.com/linkedin/Liger-Kernel" }
|
|
10
10
|
readme = { file = "README.md", content-type = "text/markdown" }
|
|
@@ -52,6 +52,7 @@ def _patch_rms_norm_module(module, offset=0.0, eps=1e-6, casting_mode="llama", i
|
|
|
52
52
|
module.in_place = in_place
|
|
53
53
|
_bind_method_to_module(module, "forward", LigerRMSNorm.forward)
|
|
54
54
|
_bind_method_to_module(module, "extra_repr", LigerRMSNorm.extra_repr)
|
|
55
|
+
module.__class__.__name__ = LigerRMSNorm.__name__
|
|
55
56
|
|
|
56
57
|
|
|
57
58
|
def _patch_layer_norm_module(module, eps=1e-6):
|
|
@@ -59,6 +60,17 @@ def _patch_layer_norm_module(module, eps=1e-6):
|
|
|
59
60
|
module.hidden_size = module.normalized_shape
|
|
60
61
|
_bind_method_to_module(module, "forward", LigerLayerNorm.forward)
|
|
61
62
|
_bind_method_to_module(module, "extra_repr", LigerLayerNorm.extra_repr)
|
|
63
|
+
module.__class__.__name__ = LigerLayerNorm.__name__
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _patch_swiglu_module(module, liger_module):
|
|
67
|
+
_bind_method_to_module(module, "forward", liger_module.forward)
|
|
68
|
+
module.__class__.__name__ = liger_module.__name__
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _patch_geglu_module(module):
|
|
72
|
+
_bind_method_to_module(module, "forward", LigerGEGLUMLP.forward)
|
|
73
|
+
module.__class__.__name__ = LigerGEGLUMLP.__name__
|
|
62
74
|
|
|
63
75
|
|
|
64
76
|
def apply_liger_kernel_to_granite(
|
|
@@ -134,7 +146,7 @@ def apply_liger_kernel_to_granite(
|
|
|
134
146
|
|
|
135
147
|
for decoder_layer in base_model.layers:
|
|
136
148
|
if swiglu:
|
|
137
|
-
|
|
149
|
+
_patch_swiglu_module(decoder_layer.mlp, LigerSwiGLUMLP)
|
|
138
150
|
if rms_norm:
|
|
139
151
|
_patch_rms_norm_module(decoder_layer.input_layernorm)
|
|
140
152
|
_patch_rms_norm_module(decoder_layer.post_attention_layernorm)
|
|
@@ -206,7 +218,7 @@ def apply_liger_kernel_to_llama(
|
|
|
206
218
|
|
|
207
219
|
for decoder_layer in base_model.layers:
|
|
208
220
|
if swiglu:
|
|
209
|
-
|
|
221
|
+
_patch_swiglu_module(decoder_layer.mlp, LigerSwiGLUMLP)
|
|
210
222
|
if rms_norm:
|
|
211
223
|
_patch_rms_norm_module(decoder_layer.input_layernorm)
|
|
212
224
|
_patch_rms_norm_module(decoder_layer.post_attention_layernorm)
|
|
@@ -296,7 +308,7 @@ def apply_liger_kernel_to_mllama(
|
|
|
296
308
|
_patch_rms_norm_module(text_model.norm)
|
|
297
309
|
for decoder_layer in text_model.layers:
|
|
298
310
|
if swiglu:
|
|
299
|
-
|
|
311
|
+
_patch_swiglu_module(decoder_layer.mlp, LigerSwiGLUMLP)
|
|
300
312
|
if rms_norm:
|
|
301
313
|
_patch_rms_norm_module(decoder_layer.input_layernorm)
|
|
302
314
|
_patch_rms_norm_module(decoder_layer.post_attention_layernorm)
|
|
@@ -370,7 +382,7 @@ def apply_liger_kernel_to_mistral(
|
|
|
370
382
|
|
|
371
383
|
for decoder_layer in base_model.layers:
|
|
372
384
|
if swiglu:
|
|
373
|
-
|
|
385
|
+
_patch_swiglu_module(decoder_layer.mlp, LigerSwiGLUMLP)
|
|
374
386
|
if rms_norm:
|
|
375
387
|
_patch_rms_norm_module(decoder_layer.input_layernorm)
|
|
376
388
|
_patch_rms_norm_module(decoder_layer.post_attention_layernorm)
|
|
@@ -442,7 +454,7 @@ def apply_liger_kernel_to_mixtral(
|
|
|
442
454
|
for decoder_layer in base_model.layers:
|
|
443
455
|
if swiglu:
|
|
444
456
|
for expert in decoder_layer.block_sparse_moe.experts:
|
|
445
|
-
|
|
457
|
+
_patch_swiglu_module(expert, LigerBlockSparseTop2MLP)
|
|
446
458
|
if rms_norm:
|
|
447
459
|
_patch_rms_norm_module(decoder_layer.input_layernorm)
|
|
448
460
|
_patch_rms_norm_module(decoder_layer.post_attention_layernorm)
|
|
@@ -516,7 +528,7 @@ def apply_liger_kernel_to_gemma(
|
|
|
516
528
|
|
|
517
529
|
for decoder_layer in base_model.layers:
|
|
518
530
|
if geglu:
|
|
519
|
-
|
|
531
|
+
_patch_geglu_module(decoder_layer.mlp)
|
|
520
532
|
if rms_norm:
|
|
521
533
|
_patch_rms_norm_module_for_gemma(decoder_layer.input_layernorm)
|
|
522
534
|
_patch_rms_norm_module_for_gemma(decoder_layer.post_attention_layernorm)
|
|
@@ -592,7 +604,7 @@ def apply_liger_kernel_to_gemma2(
|
|
|
592
604
|
|
|
593
605
|
for decoder_layer in base_model.layers:
|
|
594
606
|
if geglu:
|
|
595
|
-
|
|
607
|
+
_patch_geglu_module(decoder_layer.mlp)
|
|
596
608
|
if rms_norm:
|
|
597
609
|
_patch_rms_norm_module_for_gemma2(decoder_layer.input_layernorm)
|
|
598
610
|
_patch_rms_norm_module_for_gemma2(decoder_layer.post_attention_layernorm)
|
|
@@ -776,7 +788,7 @@ def apply_liger_kernel_to_qwen2(
|
|
|
776
788
|
|
|
777
789
|
for decoder_layer in base_model.layers:
|
|
778
790
|
if swiglu:
|
|
779
|
-
|
|
791
|
+
_patch_swiglu_module(decoder_layer.mlp, LigerSwiGLUMLP)
|
|
780
792
|
if rms_norm:
|
|
781
793
|
_patch_rms_norm_module(decoder_layer.input_layernorm)
|
|
782
794
|
_patch_rms_norm_module(decoder_layer.post_attention_layernorm)
|
|
@@ -849,7 +861,7 @@ def apply_liger_kernel_to_qwen2_vl(
|
|
|
849
861
|
_patch_rms_norm_module(base_model.norm)
|
|
850
862
|
for decoder_layer in base_model.layers:
|
|
851
863
|
if swiglu:
|
|
852
|
-
|
|
864
|
+
_patch_swiglu_module(decoder_layer.mlp, LigerSwiGLUMLP)
|
|
853
865
|
if rms_norm:
|
|
854
866
|
_patch_rms_norm_module(decoder_layer.input_layernorm)
|
|
855
867
|
_patch_rms_norm_module(decoder_layer.post_attention_layernorm)
|
|
@@ -916,7 +928,7 @@ def apply_liger_kernel_to_qwen2_5_vl(
|
|
|
916
928
|
_patch_rms_norm_module(base_model.norm)
|
|
917
929
|
for decoder_layer in base_model.layers:
|
|
918
930
|
if swiglu:
|
|
919
|
-
|
|
931
|
+
_patch_swiglu_module(decoder_layer.mlp, LigerSwiGLUMLP)
|
|
920
932
|
if rms_norm:
|
|
921
933
|
_patch_rms_norm_module(decoder_layer.input_layernorm)
|
|
922
934
|
_patch_rms_norm_module(decoder_layer.post_attention_layernorm)
|
|
@@ -985,7 +997,7 @@ def apply_liger_kernel_to_phi3(
|
|
|
985
997
|
|
|
986
998
|
for decoder_layer in base_model.layers:
|
|
987
999
|
if swiglu:
|
|
988
|
-
|
|
1000
|
+
_patch_swiglu_module(decoder_layer.mlp, LigerPhi3SwiGLUMLP)
|
|
989
1001
|
if rms_norm:
|
|
990
1002
|
_patch_rms_norm_module(decoder_layer.input_layernorm)
|
|
991
1003
|
_patch_rms_norm_module(decoder_layer.post_attention_layernorm)
|
|
@@ -1048,7 +1060,7 @@ def apply_liger_kernel_to_olmo2(
|
|
|
1048
1060
|
|
|
1049
1061
|
for decoder_layer in base_model.layers:
|
|
1050
1062
|
if swiglu:
|
|
1051
|
-
|
|
1063
|
+
_patch_swiglu_module(decoder_layer.mlp, LigerSwiGLUMLP)
|
|
1052
1064
|
if rms_norm:
|
|
1053
1065
|
_patch_rms_norm_module(decoder_layer.post_attention_layernorm, in_place=False)
|
|
1054
1066
|
_patch_rms_norm_module(decoder_layer.post_feedforward_layernorm, in_place=False)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{liger_kernel_nightly-0.5.5.dev20250324181221 → liger_kernel_nightly-0.5.5.dev20250326012054}/NOTICE
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|