liger-kernel-nightly 0.5.10.dev20250527002824__tar.gz → 0.5.10.dev20250528223524__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/PKG-INFO +1 -1
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/pyproject.toml +1 -1
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/ops/rms_norm.py +243 -45
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/transformers/monkey_patch.py +3 -4
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/transformers/rms_norm.py +4 -1
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel_nightly.egg-info/PKG-INFO +1 -1
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/test/transformers/test_monkey_patch.py +6 -6
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/.github/ISSUE_TEMPLATE/bug_report.yaml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/.github/ISSUE_TEMPLATE/feature_request.yaml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/.github/pull_request_template.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/.github/workflows/amd-ci.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/.github/workflows/docs.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/.github/workflows/intel-ci.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/.github/workflows/nvi-ci.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/.github/workflows/publish-nightly.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/.github/workflows/publish-release.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/.gitignore +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/.idea/workspace.xml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/LICENSE +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/Makefile +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/NOTICE +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/README.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/benchmark/README.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/benchmark/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/benchmark/benchmarks_visualizer.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/benchmark/data/all_benchmark_data.csv +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/benchmark/scripts/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/benchmark/scripts/benchmark_cpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/benchmark/scripts/benchmark_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/benchmark/scripts/benchmark_distill_jsd_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/benchmark/scripts/benchmark_dpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/benchmark/scripts/benchmark_dyt.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/benchmark/scripts/benchmark_embedding.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/benchmark/scripts/benchmark_fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/benchmark/scripts/benchmark_fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/benchmark/scripts/benchmark_geglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/benchmark/scripts/benchmark_group_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/benchmark/scripts/benchmark_jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/benchmark/scripts/benchmark_kl_div.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/benchmark/scripts/benchmark_kto_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/benchmark/scripts/benchmark_layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/benchmark/scripts/benchmark_multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/benchmark/scripts/benchmark_orpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/benchmark/scripts/benchmark_qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/benchmark/scripts/benchmark_rms_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/benchmark/scripts/benchmark_rope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/benchmark/scripts/benchmark_simpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/benchmark/scripts/benchmark_softmax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/benchmark/scripts/benchmark_sparse_multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/benchmark/scripts/benchmark_sparsemax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/benchmark/scripts/benchmark_swiglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/benchmark/scripts/benchmark_tvd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/benchmark/scripts/utils.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/dev/fmt-requirements.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/dev/modal/tests.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/dev/modal/tests_bwd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/docs/Examples.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/docs/Getting-Started.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/docs/High-Level-APIs.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/docs/Low-Level-APIs.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/docs/acknowledgement.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/docs/contributing.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/docs/images/banner.GIF +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/docs/images/compose.gif +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/docs/images/e2e-memory.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/docs/images/e2e-tps.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/docs/images/logo-banner.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/docs/images/patch.gif +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/docs/images/post-training.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/docs/index.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/docs/license.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/examples/alignment/accelerate_config.yaml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/examples/alignment/run_orpo.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/examples/huggingface/README.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/examples/huggingface/callback.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/examples/huggingface/config/fsdp_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/examples/huggingface/img/gemma_7b_mem.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/examples/huggingface/img/gemma_7b_tp.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/examples/huggingface/img/llama_mem_alloc.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/examples/huggingface/img/llama_tps.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/examples/huggingface/img/qwen_mem_alloc.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/examples/huggingface/img/qwen_tps.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/examples/huggingface/launch_on_modal.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/examples/huggingface/requirements.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/examples/huggingface/run_benchmarks.sh +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/examples/huggingface/run_gemma.sh +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/examples/huggingface/run_llama.sh +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/examples/huggingface/run_qwen.sh +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/examples/huggingface/run_qwen2_vl.sh +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/examples/huggingface/training.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/examples/huggingface/training_multimodal.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/examples/lightning/README.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/examples/lightning/requirements.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/examples/lightning/training.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/examples/medusa/README.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/examples/medusa/callback.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/examples/medusa/docs/images/Memory_Stage1_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/examples/medusa/docs/images/Memory_Stage1_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/examples/medusa/docs/images/Memory_Stage2_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/examples/medusa/docs/images/Memory_Stage2_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/examples/medusa/docs/images/Throughput_Stage1_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/examples/medusa/docs/images/Throughput_Stage1_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/examples/medusa/docs/images/Throughput_Stage2_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/examples/medusa/docs/images/Throughput_Stage2_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/examples/medusa/fsdp/acc-fsdp.conf +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/examples/medusa/medusa_util.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/examples/medusa/requirements.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/examples/medusa/scripts/llama3_8b_medusa.sh +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/examples/medusa/train.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/licenses/LICENSE-Apache-2.0 +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/licenses/LICENSE-MIT-AutoAWQ +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/licenses/LICENSE-MIT-Efficient-Cross-Entropy +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/licenses/LICENSE-MIT-llmc +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/licenses/LICENSE-MIT-triton +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/mkdocs.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/setup.cfg +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/setup.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/chunked_loss/README.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/chunked_loss/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/chunked_loss/cpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/chunked_loss/dpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/chunked_loss/functional.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/chunked_loss/fused_linear_distillation.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/chunked_loss/fused_linear_ppo.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/chunked_loss/fused_linear_preference.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/chunked_loss/fused_linear_unpaired_preference.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/chunked_loss/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/chunked_loss/jsd_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/chunked_loss/kto_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/chunked_loss/orpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/chunked_loss/simpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/env_report.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/ops/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/ops/cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/ops/dyt.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/ops/experimental/embedding.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/ops/experimental/mm_int8int2.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/ops/fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/ops/fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/ops/geglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/ops/group_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/ops/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/ops/jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/ops/kl_div.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/ops/layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/ops/multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/ops/qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/ops/rope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/ops/softmax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/ops/sparsemax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/ops/swiglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/ops/tvd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/ops/utils.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/transformers/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/transformers/auto_model.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/transformers/cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/transformers/dyt.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/transformers/experimental/embedding.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/transformers/fsdp.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/transformers/functional.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/transformers/fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/transformers/fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/transformers/geglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/transformers/gema3_rms.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/transformers/group_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/transformers/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/transformers/jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/transformers/kl_div.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/transformers/layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/transformers/model/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/transformers/model/gemma.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/transformers/model/gemma2.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/transformers/model/gemma3.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/transformers/model/glm4.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/transformers/model/llama.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/transformers/model/llava.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/transformers/model/loss_utils.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/transformers/model/mistral.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/transformers/model/mixtral.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/transformers/model/mllama.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/transformers/model/olmo2.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/transformers/model/paligemma.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/transformers/model/phi3.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/transformers/model/qwen2.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/transformers/model/qwen2_5_vl.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/transformers/model/qwen2_vl.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/transformers/model/qwen3.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/transformers/model/qwen3_moe.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/transformers/multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/transformers/qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/transformers/rope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/transformers/softmax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/transformers/sparsemax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/transformers/swiglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/transformers/trainer/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/transformers/trainer/orpo_trainer.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/transformers/trainer_integration.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/transformers/tvd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/triton/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/triton/monkey_patch.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel/utils.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel_nightly.egg-info/SOURCES.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel_nightly.egg-info/dependency_links.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel_nightly.egg-info/requires.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/src/liger_kernel_nightly.egg-info/top_level.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/test/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/test/chunked_loss/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/test/chunked_loss/test_cpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/test/chunked_loss/test_dpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/test/chunked_loss/test_grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/test/chunked_loss/test_jsd_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/test/chunked_loss/test_kto_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/test/chunked_loss/test_orpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/test/chunked_loss/test_simpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/test/conftest.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/test/convergence/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/test/convergence/bf16/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/test/convergence/bf16/test_mini_models.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/test/convergence/bf16/test_mini_models_multimodal.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/test/convergence/bf16/test_mini_models_with_logits.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/test/convergence/fp32/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/test/convergence/fp32/test_mini_models.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/test/convergence/fp32/test_mini_models_multimodal.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/test/convergence/fp32/test_mini_models_with_logits.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/test/resources/fake_configs/Google/Gemma3/gemma-3-4b-it/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/test/resources/fake_configs/Google/Paligemma/paligemma-3b-pt-224/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/preprocessor_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/processor_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/test/resources/fake_configs/Qwen/Qwen2-VL-7B-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/test/resources/fake_configs/Qwen/Qwen2.5-VL-7B-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/test/resources/fake_configs/meta-llama/Llama-3.2-11B-Vision-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/test/resources/scripts/generate_tokenized_dataset.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/test/resources/tiny_shakespeare.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/test/resources/tiny_shakespeare_tokenized/data-00000-of-00001.arrow +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/test/resources/tiny_shakespeare_tokenized/dataset_info.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/test/resources/tiny_shakespeare_tokenized/state.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/test/transformers/test_auto_model.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/test/transformers/test_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/test/transformers/test_dyt.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/test/transformers/test_embedding.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/test/transformers/test_flex_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/test/transformers/test_fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/test/transformers/test_fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/test/transformers/test_geglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/test/transformers/test_group_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/test/transformers/test_grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/test/transformers/test_jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/test/transformers/test_kl_div.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/test/transformers/test_layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/test/transformers/test_mm_int8int2.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/test/transformers/test_multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/test/transformers/test_qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/test/transformers/test_rms_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/test/transformers/test_rope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/test/transformers/test_softmax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/test/transformers/test_sparsemax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/test/transformers/test_swiglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/test/transformers/test_trainer_integration.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/test/transformers/test_transformers.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/test/transformers/test_tvd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/test/triton/test_triton_monkey_patch.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250527002824 → liger_kernel_nightly-0.5.10.dev20250528223524}/test/utils.py +0 -0
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
4
4
|
|
5
5
|
[project]
|
6
6
|
name = "liger_kernel_nightly"
|
7
|
-
version = "0.5.10.
|
7
|
+
version = "0.5.10.dev20250528223524"
|
8
8
|
description = "Efficient Triton kernels for LLM Training"
|
9
9
|
urls = { "Homepage" = "https://github.com/linkedin/Liger-Kernel" }
|
10
10
|
readme = { file = "README.md", content-type = "text/markdown" }
|
@@ -193,6 +193,153 @@ def _rms_norm_backward_kernel(
|
|
193
193
|
|
194
194
|
tl.store(dW_ptr + row_block_id * dW_row_stride + col_offsets, dW_row, mask=mask)
|
195
195
|
|
196
|
+
@triton.jit
|
197
|
+
def _block_rms_norm_forward_kernel(
|
198
|
+
Y_ptr,
|
199
|
+
Y_row_stride,
|
200
|
+
X_ptr,
|
201
|
+
X_row_stride,
|
202
|
+
W_ptr,
|
203
|
+
W_row_stride,
|
204
|
+
RSTD_ptr,
|
205
|
+
RSTD_row_stride,
|
206
|
+
n_rows,
|
207
|
+
n_cols,
|
208
|
+
eps,
|
209
|
+
offset,
|
210
|
+
casting_mode: tl.constexpr, # constexpr so the `if` blocks can be optimized out
|
211
|
+
BLOCK_SIZE: tl.constexpr,
|
212
|
+
BLOCK_ROW: tl.constexpr,
|
213
|
+
):
|
214
|
+
"""
|
215
|
+
y_i = (x_i / (RMS)) * (offset + wi), RMS = sqrt(sum(x_i^2) / N)
|
216
|
+
|
217
|
+
Reference:
|
218
|
+
1. https://triton-lang.org/main/getting-started/tutorials/05-layer-norm.html
|
219
|
+
2. https://github.com/unslothai/unsloth/blob/fd753fed99ed5f10ef8a9b7139588d9de9ddecfb/unsloth/kernels/rms_layernorm.py#L22
|
220
|
+
3. https://arxiv.org/pdf/1910.07467
|
221
|
+
"""
|
222
|
+
|
223
|
+
row_idx = tl.program_id(0) * BLOCK_ROW + tl.arange(0, BLOCK_ROW)
|
224
|
+
col_offsets = tl.arange(0, BLOCK_SIZE)
|
225
|
+
row_mask = row_idx < n_rows
|
226
|
+
col_mask = col_offsets < n_cols
|
227
|
+
|
228
|
+
|
229
|
+
X_row = tl.load(X_ptr + row_idx[:, None] * X_row_stride + col_offsets[None, :], mask=row_mask[:, None] & col_mask[None, :] , other=0)
|
230
|
+
X_row_dtype = X_row.dtype
|
231
|
+
W_row = tl.load(W_ptr + col_offsets, mask=col_mask, other=0)
|
232
|
+
|
233
|
+
# On Llama, only rstd is computed on fp32
|
234
|
+
if casting_mode == _CASTING_MODE_LLAMA:
|
235
|
+
X_row = X_row.to(tl.float32)
|
236
|
+
|
237
|
+
# Gemma computes everything on fp32, and then casts back the output to the original dtype
|
238
|
+
if casting_mode == _CASTING_MODE_GEMMA:
|
239
|
+
W_row = W_row.to(tl.float32)
|
240
|
+
X_row = X_row.to(tl.float32)
|
241
|
+
|
242
|
+
if casting_mode == _CASTING_MODE_NONE:
|
243
|
+
eps = eps.to(X_row_dtype)
|
244
|
+
offset = offset.to(X_row_dtype)
|
245
|
+
|
246
|
+
mean_square = tl.sum(X_row * X_row, axis=1) / n_cols
|
247
|
+
rstd = rsqrt(mean_square + eps)
|
248
|
+
|
249
|
+
# We can save time by caching rms with minimal memory overhead
|
250
|
+
# because rms is much smaller compared to X_row, as rms is for each row.
|
251
|
+
# However, on the computation side, it can save 4 operations (*, sum, /, sqrt).
|
252
|
+
tl.store(RSTD_ptr + row_idx * RSTD_row_stride, rstd, row_mask)
|
253
|
+
|
254
|
+
X_row = X_row * rstd[:, None]
|
255
|
+
|
256
|
+
# On Llama, the multiplication with the weight is done on the original dtype
|
257
|
+
if casting_mode == _CASTING_MODE_LLAMA:
|
258
|
+
X_row = X_row.to(X_row_dtype)
|
259
|
+
|
260
|
+
Y_row = X_row * (offset + W_row)[None, :]
|
261
|
+
|
262
|
+
if casting_mode == _CASTING_MODE_GEMMA:
|
263
|
+
Y_row = Y_row.to(X_row_dtype)
|
264
|
+
|
265
|
+
tl.store(Y_ptr + row_idx[:, None] * Y_row_stride + col_offsets[None, :], Y_row, mask=row_mask[:, None] & col_mask[None, :])
|
266
|
+
|
267
|
+
@triton.jit
|
268
|
+
def _block_rms_norm_backward_kernel(
|
269
|
+
dY_ptr,
|
270
|
+
dY_row_stride,
|
271
|
+
dX_ptr,
|
272
|
+
dX_row_stride,
|
273
|
+
X_ptr,
|
274
|
+
X_row_stride,
|
275
|
+
X_dtype: tl.constexpr,
|
276
|
+
W_ptr,
|
277
|
+
W_row_stride,
|
278
|
+
RSTD_ptr,
|
279
|
+
RSTD_row_stride,
|
280
|
+
dW_ptr,
|
281
|
+
dW_row_stride,
|
282
|
+
n_rows,
|
283
|
+
n_cols,
|
284
|
+
offset,
|
285
|
+
rows_per_program: tl.constexpr,
|
286
|
+
casting_mode: tl.constexpr,
|
287
|
+
BLOCK_SIZE: tl.constexpr,
|
288
|
+
BLOCK_ROW: tl.constexpr,
|
289
|
+
):
|
290
|
+
"""
|
291
|
+
dx = (1 / RMS) * [dy * (w + offset - (1 / N) * (1 / RMS^2) * ((dy * (w + offset)) dot x) * x]. * means element-wise multiplication, whileas dot means dot product
|
292
|
+
dw = sum(dy * (x / RMS)). summation over BxT dimension
|
293
|
+
"""
|
294
|
+
|
295
|
+
pid = tl.program_id(0).cast(tl.int64)
|
296
|
+
NUM_SMS = tl.num_programs(0)
|
297
|
+
|
298
|
+
col_offsets = tl.arange(0, BLOCK_SIZE)
|
299
|
+
col_mask = col_offsets < n_cols
|
300
|
+
|
301
|
+
dW_row = tl.zeros((BLOCK_SIZE,), dtype=tl.float32)
|
302
|
+
|
303
|
+
W_row = tl.load(W_ptr + col_offsets, mask=col_mask, other=0.0)
|
304
|
+
W_row = W_row + offset
|
305
|
+
|
306
|
+
for start in range(pid * BLOCK_ROW, n_rows, NUM_SMS * BLOCK_ROW):
|
307
|
+
row_idx = start + tl.arange(0, BLOCK_ROW)
|
308
|
+
row_mask = row_idx < n_rows
|
309
|
+
dY_row = tl.load(dY_ptr + row_idx[:, None] * dY_row_stride + col_offsets[None, :], mask=row_mask[:, None] & col_mask[None, :], other=0.0)
|
310
|
+
X_row = tl.load(X_ptr + row_idx[:, None] * X_row_stride + col_offsets[None, :], mask=row_mask[:, None] & col_mask[None, :], other=0.0)
|
311
|
+
|
312
|
+
# Get cached rms
|
313
|
+
rstd_row = tl.load(RSTD_ptr + row_idx * RSTD_row_stride, row_mask)
|
314
|
+
|
315
|
+
X_row = X_row.to(tl.float32)
|
316
|
+
|
317
|
+
# Different bacward graphs for different casting modes
|
318
|
+
if casting_mode == _CASTING_MODE_LLAMA:
|
319
|
+
m = (dY_row * W_row[None, :]).to(tl.float32)
|
320
|
+
|
321
|
+
elif casting_mode == _CASTING_MODE_GEMMA:
|
322
|
+
dY_row = dY_row.to(tl.float32)
|
323
|
+
m = dY_row * W_row[None, :]
|
324
|
+
else:
|
325
|
+
m = dY_row * W_row[None, :]
|
326
|
+
|
327
|
+
dX_row = rstd_row[:, None] * m
|
328
|
+
|
329
|
+
dX_row += (rstd_row[:, None]) * (-(1 / n_cols) * (rstd_row * rstd_row * tl.sum(m * X_row, axis=1))[:, None] * X_row)
|
330
|
+
|
331
|
+
# calculate the gradient of W
|
332
|
+
if casting_mode == _CASTING_MODE_LLAMA:
|
333
|
+
dW_row += tl.sum(dY_row * (X_row * rstd_row[:, None]).to(X_dtype), 0)
|
334
|
+
else:
|
335
|
+
# here X_row is already in fp32 (see previous if block)
|
336
|
+
dW_row += tl.sum(dY_row * (X_row * rstd_row[:, None]), 0)
|
337
|
+
|
338
|
+
tl.store(dX_ptr + row_idx[:, None] * dX_row_stride + col_offsets[None, :], dX_row, mask=row_mask[:, None] & col_mask[None, :])
|
339
|
+
|
340
|
+
|
341
|
+
tl.store(dW_ptr + pid * dW_row_stride + col_offsets, dW_row, mask=col_mask)
|
342
|
+
|
196
343
|
|
197
344
|
_str_to_casting_mode = {
|
198
345
|
"llama": _CASTING_MODE_LLAMA.value,
|
@@ -201,7 +348,7 @@ _str_to_casting_mode = {
|
|
201
348
|
}
|
202
349
|
|
203
350
|
|
204
|
-
def rms_norm_forward(X, W, eps, offset, casting_mode):
|
351
|
+
def rms_norm_forward(X, W, eps, offset, casting_mode, row_mode):
|
205
352
|
if not isinstance(casting_mode, int):
|
206
353
|
assert casting_mode in _str_to_casting_mode, f"Invalid casting mode: {casting_mode}"
|
207
354
|
casting_mode = _str_to_casting_mode[casting_mode]
|
@@ -227,27 +374,49 @@ def rms_norm_forward(X, W, eps, offset, casting_mode):
|
|
227
374
|
kernel_args = {}
|
228
375
|
if X.device.type == "xpu":
|
229
376
|
kernel_args["grf_mode"] = "large"
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
377
|
+
if BLOCK_SIZE > 256 or n_rows < 4096 * 8 or row_mode:
|
378
|
+
_rms_norm_forward_kernel[(n_rows,)](
|
379
|
+
Y,
|
380
|
+
Y.stride(0),
|
381
|
+
X,
|
382
|
+
X.stride(0),
|
383
|
+
W,
|
384
|
+
W.stride(0),
|
385
|
+
RSTD,
|
386
|
+
RSTD.stride(0),
|
387
|
+
n_cols,
|
388
|
+
eps,
|
389
|
+
offset,
|
390
|
+
casting_mode,
|
391
|
+
BLOCK_SIZE=BLOCK_SIZE,
|
392
|
+
num_warps=num_warps,
|
393
|
+
**kernel_args, # XPU-specific optimization
|
394
|
+
)
|
395
|
+
else:
|
396
|
+
BLOCK_ROW = 16
|
397
|
+
kernel_args["BLOCK_ROW"] = BLOCK_ROW
|
398
|
+
_block_rms_norm_forward_kernel[(triton.cdiv(n_rows, BLOCK_ROW),)](
|
399
|
+
Y,
|
400
|
+
Y.stride(0),
|
401
|
+
X,
|
402
|
+
X.stride(0),
|
403
|
+
W,
|
404
|
+
W.stride(0),
|
405
|
+
RSTD,
|
406
|
+
RSTD.stride(0),
|
407
|
+
n_rows,
|
408
|
+
n_cols,
|
409
|
+
eps,
|
410
|
+
offset,
|
411
|
+
casting_mode,
|
412
|
+
BLOCK_SIZE=BLOCK_SIZE,
|
413
|
+
num_warps=num_warps,
|
414
|
+
**kernel_args, # XPU-specific optimization
|
415
|
+
)
|
247
416
|
return Y.view(*shape), X, RSTD, BLOCK_SIZE, num_warps, casting_mode
|
248
417
|
|
249
418
|
|
250
|
-
def rms_norm_backward(dY, X, W, RSTD, offset, casting_mode, BLOCK_SIZE, num_warps, in_place):
|
419
|
+
def rms_norm_backward(dY, X, W, RSTD, offset, casting_mode, BLOCK_SIZE, num_warps, in_place, row_mode):
|
251
420
|
shape = dY.shape
|
252
421
|
dim = shape[-1]
|
253
422
|
dY = dY.view(-1, dim)
|
@@ -277,29 +446,56 @@ def rms_norm_backward(dY, X, W, RSTD, offset, casting_mode, BLOCK_SIZE, num_warp
|
|
277
446
|
if X.device.type == "xpu":
|
278
447
|
kernel_args["grf_mode"] = "large"
|
279
448
|
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
449
|
+
if BLOCK_SIZE > 256 or n_rows < 4096 * 8 or row_mode:
|
450
|
+
_rms_norm_backward_kernel[grid](
|
451
|
+
dY,
|
452
|
+
dY.stride(0),
|
453
|
+
dX,
|
454
|
+
dX.stride(0),
|
455
|
+
X,
|
456
|
+
X.stride(0),
|
457
|
+
torch_to_triton_dtype[X.dtype],
|
458
|
+
W,
|
459
|
+
W.stride(0),
|
460
|
+
RSTD,
|
461
|
+
RSTD.stride(0),
|
462
|
+
_dW,
|
463
|
+
_dW.stride(0),
|
464
|
+
n_rows,
|
465
|
+
n_cols,
|
466
|
+
offset,
|
467
|
+
rows_per_program,
|
468
|
+
casting_mode,
|
469
|
+
BLOCK_SIZE=BLOCK_SIZE,
|
470
|
+
num_warps=num_warps,
|
471
|
+
**kernel_args, # XPU-specific optimization
|
472
|
+
)
|
473
|
+
else:
|
474
|
+
BLOCK_ROW = 16
|
475
|
+
kernel_args["BLOCK_ROW"] = BLOCK_ROW
|
476
|
+
_block_rms_norm_backward_kernel[grid](
|
477
|
+
dY,
|
478
|
+
dY.stride(0),
|
479
|
+
dX,
|
480
|
+
dX.stride(0),
|
481
|
+
X,
|
482
|
+
X.stride(0),
|
483
|
+
torch_to_triton_dtype[X.dtype],
|
484
|
+
W,
|
485
|
+
W.stride(0),
|
486
|
+
RSTD,
|
487
|
+
RSTD.stride(0),
|
488
|
+
_dW,
|
489
|
+
_dW.stride(0),
|
490
|
+
n_rows,
|
491
|
+
n_cols,
|
492
|
+
offset,
|
493
|
+
rows_per_program,
|
494
|
+
casting_mode,
|
495
|
+
BLOCK_SIZE=BLOCK_SIZE,
|
496
|
+
num_warps=num_warps,
|
497
|
+
**kernel_args, # XPU-specific optimization
|
498
|
+
)
|
303
499
|
dX = dX.view(*shape)
|
304
500
|
dW = _dW.sum(dim=0).to(W.dtype)
|
305
501
|
|
@@ -330,15 +526,16 @@ class LigerRMSNormFunction(torch.autograd.Function):
|
|
330
526
|
|
331
527
|
@staticmethod
|
332
528
|
@ensure_contiguous
|
333
|
-
def forward(ctx, X, W, eps, offset=0.0, casting_mode="llama", in_place=True):
|
529
|
+
def forward(ctx, X, W, eps, offset=0.0, casting_mode="llama", in_place=True, row_mode=None):
|
334
530
|
"""
|
335
531
|
X: (B, T, H) or (BxT, H)
|
336
532
|
W: (H,)
|
337
533
|
"""
|
338
|
-
Y, X, RSTD, BLOCK_SIZE, num_warps, casting_mode = rms_norm_forward(X, W, eps, offset, casting_mode)
|
534
|
+
Y, X, RSTD, BLOCK_SIZE, num_warps, casting_mode = rms_norm_forward(X, W, eps, offset, casting_mode, row_mode)
|
339
535
|
ctx.offset = offset
|
340
536
|
ctx.casting_mode = casting_mode
|
341
537
|
ctx.in_place = in_place
|
538
|
+
ctx.row_mode = row_mode
|
342
539
|
ctx.BLOCK_SIZE = BLOCK_SIZE
|
343
540
|
ctx.num_warps = num_warps
|
344
541
|
ctx.save_for_backward(X, W, RSTD)
|
@@ -361,5 +558,6 @@ class LigerRMSNormFunction(torch.autograd.Function):
|
|
361
558
|
ctx.BLOCK_SIZE,
|
362
559
|
ctx.num_warps,
|
363
560
|
ctx.in_place,
|
561
|
+
ctx.row_mode
|
364
562
|
)
|
365
|
-
return dX, dW, None, None, None, None
|
563
|
+
return dX, dW, None, None, None, None, None
|
@@ -776,7 +776,7 @@ def apply_liger_kernel_to_gemma3_text(
|
|
776
776
|
|
777
777
|
from transformers.models.gemma3 import modeling_gemma3
|
778
778
|
from transformers.models.gemma3.modeling_gemma3 import Gemma3DecoderLayer
|
779
|
-
from transformers.models.gemma3.modeling_gemma3 import Gemma3ForCausalLM
|
779
|
+
from transformers.models.gemma3.modeling_gemma3 import Gemma3ForCausalLM, Gemma3TextModel
|
780
780
|
|
781
781
|
from liger_kernel.transformers.gema3_rms import LigerRMSNormForGemma3
|
782
782
|
from liger_kernel.transformers.model.gemma3 import causal_forward
|
@@ -807,9 +807,9 @@ def apply_liger_kernel_to_gemma3_text(
|
|
807
807
|
# The model instance already exists, so we need to additionally patch the
|
808
808
|
# instance variables that reference already-instantiated modules
|
809
809
|
|
810
|
-
if isinstance(model, Gemma3ForCausalLM):
|
810
|
+
if isinstance(model, Gemma3ForCausalLM) or isinstance(model, Gemma3TextModel):
|
811
811
|
# get the base model from the model instance
|
812
|
-
base_model = model.model
|
812
|
+
base_model = model.model if isinstance(model, Gemma3ForCausalLM) else model
|
813
813
|
|
814
814
|
if rms_norm:
|
815
815
|
_patch_rms_norm_module_for_gemma3(base_model.norm)
|
@@ -1625,7 +1625,6 @@ def _apply_liger_kernel_to_instance(model: PreTrainedModel, **kwargs) -> None:
|
|
1625
1625
|
return
|
1626
1626
|
|
1627
1627
|
apply_fn = MODEL_TYPE_TO_APPLY_LIGER_FN[model_type]
|
1628
|
-
|
1629
1628
|
apply_fn_signature = inspect.signature(apply_fn)
|
1630
1629
|
|
1631
1630
|
# Filter out the keyword arguments that are not supported by the apply function
|
@@ -13,6 +13,7 @@ class LigerRMSNorm(nn.Module):
|
|
13
13
|
casting_mode="llama",
|
14
14
|
init_fn="ones",
|
15
15
|
in_place=True,
|
16
|
+
row_mode=None,
|
16
17
|
):
|
17
18
|
super().__init__()
|
18
19
|
assert init_fn in [
|
@@ -20,11 +21,12 @@ class LigerRMSNorm(nn.Module):
|
|
20
21
|
"zeros",
|
21
22
|
], f"init_fn must be either 'ones' or 'zeros', got {init_fn}"
|
22
23
|
self.weight = nn.Parameter(torch.ones(hidden_size) if init_fn == "ones" else torch.zeros(hidden_size))
|
23
|
-
self.variance_epsilon, self.offset, self.casting_mode, self.in_place = (
|
24
|
+
self.variance_epsilon, self.offset, self.casting_mode, self.in_place, self.row_mode = (
|
24
25
|
eps,
|
25
26
|
offset,
|
26
27
|
casting_mode,
|
27
28
|
in_place,
|
29
|
+
row_mode,
|
28
30
|
)
|
29
31
|
|
30
32
|
def forward(self, hidden_states):
|
@@ -35,6 +37,7 @@ class LigerRMSNorm(nn.Module):
|
|
35
37
|
self.offset,
|
36
38
|
self.casting_mode,
|
37
39
|
self.in_place,
|
40
|
+
self.row_mode
|
38
41
|
)
|
39
42
|
|
40
43
|
def extra_repr(self):
|
@@ -667,7 +667,7 @@ def test_apply_liger_kernel_to_instance_for_gemma3_text():
|
|
667
667
|
|
668
668
|
|
669
669
|
@pytest.mark.skipif(not is_gemma3_available(), reason="gemma3 module not available")
|
670
|
-
def
|
670
|
+
def test_apply_liger_kernel_to_instance_for_gemma3_conditional_generation():
|
671
671
|
# Ensure any monkey patching is cleaned up for subsequent tests
|
672
672
|
|
673
673
|
with patch("transformers.models.gemma3.modeling_gemma3"):
|
@@ -687,8 +687,8 @@ def test_apply_liger_kernel_to_instance_for_gemma3():
|
|
687
687
|
intermediate_size=64,
|
688
688
|
)
|
689
689
|
config = transformers.models.gemma3.configuration_gemma3.Gemma3Config(text_config, vision_config)
|
690
|
-
dummy_model_instance = Gemma3ForConditionalGeneration._from_config(config)
|
691
690
|
|
691
|
+
dummy_model_instance = Gemma3ForConditionalGeneration._from_config(config)
|
692
692
|
assert isinstance(dummy_model_instance, Gemma3ForConditionalGeneration)
|
693
693
|
|
694
694
|
# Check that model instance variables are not yet patched with Liger modules
|
@@ -704,11 +704,11 @@ def test_apply_liger_kernel_to_instance_for_gemma3():
|
|
704
704
|
dummy_model_instance.multi_modal_projector.mm_soft_emb_norm.forward
|
705
705
|
) != inspect.getsource(LigerRMSNorm.forward)
|
706
706
|
|
707
|
-
assert inspect.getsource(dummy_model_instance.language_model.
|
707
|
+
assert inspect.getsource(dummy_model_instance.language_model.norm.forward) != inspect.getsource(
|
708
708
|
LigerRMSNorm.forward
|
709
709
|
)
|
710
710
|
|
711
|
-
for layer in dummy_model_instance.language_model.
|
711
|
+
for layer in dummy_model_instance.language_model.layers:
|
712
712
|
assert inspect.getsource(layer.mlp.forward) != inspect.getsource(LigerGEGLUMLP.forward)
|
713
713
|
assert inspect.getsource(layer.input_layernorm.forward) != inspect.getsource(LigerRMSNorm.forward)
|
714
714
|
assert inspect.getsource(layer.post_attention_layernorm.forward) != inspect.getsource(LigerRMSNorm.forward)
|
@@ -736,10 +736,10 @@ def test_apply_liger_kernel_to_instance_for_gemma3():
|
|
736
736
|
dummy_model_instance.multi_modal_projector.mm_soft_emb_norm.forward
|
737
737
|
) == inspect.getsource(LigerRMSNorm.forward)
|
738
738
|
|
739
|
-
assert inspect.getsource(dummy_model_instance.language_model.
|
739
|
+
assert inspect.getsource(dummy_model_instance.language_model.norm.forward) == inspect.getsource(
|
740
740
|
LigerRMSNorm.forward
|
741
741
|
)
|
742
|
-
for layer in dummy_model_instance.language_model.
|
742
|
+
for layer in dummy_model_instance.language_model.layers:
|
743
743
|
assert inspect.getsource(layer.mlp.forward) == inspect.getsource(LigerGEGLUMLP.forward)
|
744
744
|
assert inspect.getsource(layer.input_layernorm.forward) == inspect.getsource(LigerRMSNorm.forward)
|
745
745
|
assert inspect.getsource(layer.post_attention_layernorm.forward) == inspect.getsource(LigerRMSNorm.forward)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|