liger-kernel-nightly 0.5.10.dev20250601024230__tar.gz → 0.5.10.dev20250602134913__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/PKG-INFO +1 -1
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/pyproject.toml +1 -1
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/transformers/monkey_patch.py +11 -7
- liger_kernel_nightly-0.5.10.dev20250602134913/src/liger_kernel/transformers/rms_norm.py +81 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel_nightly.egg-info/PKG-INFO +1 -1
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel_nightly.egg-info/SOURCES.txt +0 -1
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/test/transformers/test_monkey_patch.py +4 -4
- liger_kernel_nightly-0.5.10.dev20250601024230/src/liger_kernel/transformers/gema3_rms.py +0 -8
- liger_kernel_nightly-0.5.10.dev20250601024230/src/liger_kernel/transformers/rms_norm.py +0 -46
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/.github/ISSUE_TEMPLATE/bug_report.yaml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/.github/ISSUE_TEMPLATE/feature_request.yaml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/.github/pull_request_template.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/.github/workflows/amd-ci.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/.github/workflows/docs.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/.github/workflows/intel-ci.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/.github/workflows/nvi-ci.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/.github/workflows/publish-nightly.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/.github/workflows/publish-release.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/.gitignore +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/.idea/workspace.xml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/LICENSE +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/Makefile +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/NOTICE +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/README.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/benchmark/README.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/benchmark/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/benchmark/benchmarks_visualizer.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/benchmark/data/all_benchmark_data.csv +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/benchmark/scripts/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/benchmark/scripts/benchmark_cpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/benchmark/scripts/benchmark_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/benchmark/scripts/benchmark_distill_jsd_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/benchmark/scripts/benchmark_dpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/benchmark/scripts/benchmark_dyt.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/benchmark/scripts/benchmark_embedding.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/benchmark/scripts/benchmark_fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/benchmark/scripts/benchmark_fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/benchmark/scripts/benchmark_geglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/benchmark/scripts/benchmark_group_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/benchmark/scripts/benchmark_jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/benchmark/scripts/benchmark_kl_div.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/benchmark/scripts/benchmark_kto_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/benchmark/scripts/benchmark_layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/benchmark/scripts/benchmark_multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/benchmark/scripts/benchmark_orpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/benchmark/scripts/benchmark_qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/benchmark/scripts/benchmark_rms_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/benchmark/scripts/benchmark_rope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/benchmark/scripts/benchmark_simpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/benchmark/scripts/benchmark_softmax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/benchmark/scripts/benchmark_sparse_multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/benchmark/scripts/benchmark_sparsemax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/benchmark/scripts/benchmark_swiglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/benchmark/scripts/benchmark_tvd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/benchmark/scripts/utils.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/dev/fmt-requirements.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/dev/modal/tests.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/dev/modal/tests_bwd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/docs/Examples.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/docs/Getting-Started.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/docs/High-Level-APIs.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/docs/Low-Level-APIs.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/docs/acknowledgement.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/docs/contributing.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/docs/images/banner.GIF +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/docs/images/compose.gif +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/docs/images/e2e-memory.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/docs/images/e2e-tps.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/docs/images/logo-banner.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/docs/images/patch.gif +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/docs/images/post-training.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/docs/index.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/docs/license.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/examples/alignment/accelerate_config.yaml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/examples/alignment/run_orpo.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/examples/huggingface/README.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/examples/huggingface/callback.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/examples/huggingface/config/fsdp_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/examples/huggingface/img/gemma_7b_mem.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/examples/huggingface/img/gemma_7b_tp.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/examples/huggingface/img/llama_mem_alloc.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/examples/huggingface/img/llama_tps.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/examples/huggingface/img/qwen_mem_alloc.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/examples/huggingface/img/qwen_tps.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/examples/huggingface/launch_on_modal.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/examples/huggingface/requirements.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/examples/huggingface/run_benchmarks.sh +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/examples/huggingface/run_gemma.sh +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/examples/huggingface/run_llama.sh +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/examples/huggingface/run_qwen.sh +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/examples/huggingface/run_qwen2_vl.sh +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/examples/huggingface/training.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/examples/huggingface/training_multimodal.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/examples/lightning/README.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/examples/lightning/requirements.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/examples/lightning/training.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/examples/medusa/README.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/examples/medusa/callback.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/examples/medusa/docs/images/Memory_Stage1_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/examples/medusa/docs/images/Memory_Stage1_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/examples/medusa/docs/images/Memory_Stage2_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/examples/medusa/docs/images/Memory_Stage2_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/examples/medusa/docs/images/Throughput_Stage1_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/examples/medusa/docs/images/Throughput_Stage1_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/examples/medusa/docs/images/Throughput_Stage2_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/examples/medusa/docs/images/Throughput_Stage2_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/examples/medusa/fsdp/acc-fsdp.conf +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/examples/medusa/medusa_util.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/examples/medusa/requirements.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/examples/medusa/scripts/llama3_8b_medusa.sh +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/examples/medusa/train.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/licenses/LICENSE-Apache-2.0 +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/licenses/LICENSE-MIT-AutoAWQ +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/licenses/LICENSE-MIT-Efficient-Cross-Entropy +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/licenses/LICENSE-MIT-llmc +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/licenses/LICENSE-MIT-triton +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/mkdocs.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/setup.cfg +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/setup.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/chunked_loss/README.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/chunked_loss/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/chunked_loss/cpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/chunked_loss/dpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/chunked_loss/functional.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/chunked_loss/fused_linear_distillation.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/chunked_loss/fused_linear_ppo.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/chunked_loss/fused_linear_preference.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/chunked_loss/fused_linear_unpaired_preference.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/chunked_loss/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/chunked_loss/jsd_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/chunked_loss/kto_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/chunked_loss/orpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/chunked_loss/simpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/env_report.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/ops/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/ops/cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/ops/dyt.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/ops/experimental/embedding.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/ops/experimental/mm_int8int2.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/ops/fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/ops/fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/ops/geglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/ops/group_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/ops/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/ops/jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/ops/kl_div.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/ops/layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/ops/multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/ops/qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/ops/rms_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/ops/rope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/ops/softmax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/ops/sparsemax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/ops/swiglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/ops/tvd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/ops/utils.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/transformers/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/transformers/auto_model.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/transformers/cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/transformers/dyt.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/transformers/experimental/embedding.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/transformers/fsdp.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/transformers/functional.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/transformers/fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/transformers/fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/transformers/geglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/transformers/group_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/transformers/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/transformers/jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/transformers/kl_div.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/transformers/layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/transformers/model/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/transformers/model/gemma.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/transformers/model/gemma2.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/transformers/model/gemma3.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/transformers/model/glm4.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/transformers/model/llama.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/transformers/model/llava.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/transformers/model/loss_utils.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/transformers/model/mistral.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/transformers/model/mixtral.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/transformers/model/mllama.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/transformers/model/olmo2.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/transformers/model/paligemma.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/transformers/model/phi3.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/transformers/model/qwen2.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/transformers/model/qwen2_5_vl.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/transformers/model/qwen2_vl.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/transformers/model/qwen3.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/transformers/model/qwen3_moe.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/transformers/multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/transformers/qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/transformers/rope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/transformers/softmax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/transformers/sparsemax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/transformers/swiglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/transformers/trainer/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/transformers/trainer/orpo_trainer.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/transformers/trainer_integration.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/transformers/tvd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/triton/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/triton/monkey_patch.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel/utils.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel_nightly.egg-info/dependency_links.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel_nightly.egg-info/requires.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/src/liger_kernel_nightly.egg-info/top_level.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/test/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/test/chunked_loss/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/test/chunked_loss/test_cpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/test/chunked_loss/test_dpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/test/chunked_loss/test_grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/test/chunked_loss/test_jsd_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/test/chunked_loss/test_kto_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/test/chunked_loss/test_orpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/test/chunked_loss/test_simpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/test/conftest.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/test/convergence/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/test/convergence/bf16/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/test/convergence/bf16/test_mini_models.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/test/convergence/bf16/test_mini_models_multimodal.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/test/convergence/bf16/test_mini_models_with_logits.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/test/convergence/fp32/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/test/convergence/fp32/test_mini_models.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/test/convergence/fp32/test_mini_models_multimodal.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/test/convergence/fp32/test_mini_models_with_logits.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/test/resources/fake_configs/Google/Gemma3/gemma-3-4b-it/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/test/resources/fake_configs/Google/Paligemma/paligemma-3b-pt-224/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/preprocessor_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/processor_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/test/resources/fake_configs/Qwen/Qwen2-VL-7B-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/test/resources/fake_configs/Qwen/Qwen2.5-VL-7B-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/test/resources/fake_configs/meta-llama/Llama-3.2-11B-Vision-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/test/resources/scripts/generate_tokenized_dataset.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/test/resources/tiny_shakespeare.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/test/resources/tiny_shakespeare_tokenized/data-00000-of-00001.arrow +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/test/resources/tiny_shakespeare_tokenized/dataset_info.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/test/resources/tiny_shakespeare_tokenized/state.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/test/transformers/test_auto_model.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/test/transformers/test_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/test/transformers/test_dyt.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/test/transformers/test_embedding.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/test/transformers/test_flex_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/test/transformers/test_fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/test/transformers/test_fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/test/transformers/test_geglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/test/transformers/test_group_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/test/transformers/test_grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/test/transformers/test_jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/test/transformers/test_kl_div.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/test/transformers/test_layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/test/transformers/test_mm_int8int2.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/test/transformers/test_multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/test/transformers/test_qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/test/transformers/test_rms_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/test/transformers/test_rope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/test/transformers/test_softmax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/test/transformers/test_sparsemax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/test/transformers/test_swiglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/test/transformers/test_trainer_integration.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/test/transformers/test_transformers.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/test/transformers/test_tvd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/test/triton/test_triton_monkey_patch.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250601024230 → liger_kernel_nightly-0.5.10.dev20250602134913}/test/utils.py +0 -0
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
4
4
|
|
5
5
|
[project]
|
6
6
|
name = "liger_kernel_nightly"
|
7
|
-
version = "0.5.10.
|
7
|
+
version = "0.5.10.dev20250602134913"
|
8
8
|
description = "Efficient Triton kernels for LLM Training"
|
9
9
|
urls = { "Homepage" = "https://github.com/linkedin/Liger-Kernel" }
|
10
10
|
readme = { file = "README.md", content-type = "text/markdown" }
|
@@ -428,13 +428,14 @@ def apply_liger_kernel_to_mllama(
|
|
428
428
|
if isinstance(model, MllamaForConditionalGeneration):
|
429
429
|
language_model: MllamaForCausalLM = model.language_model
|
430
430
|
vision_model: MllamaVisionModel = model.vision_model
|
431
|
-
text_model: MllamaTextModel = language_model
|
431
|
+
text_model: MllamaTextModel = language_model
|
432
432
|
elif isinstance(model, MllamaForCausalLM):
|
433
433
|
text_model = model.model
|
434
434
|
vision_model = None
|
435
435
|
elif isinstance(model, MllamaTextModel):
|
436
436
|
text_model = model
|
437
437
|
vision_model = None
|
438
|
+
|
438
439
|
else:
|
439
440
|
raise ValueError(f"Unsupported Mllama model type: {type(model)}")
|
440
441
|
|
@@ -626,8 +627,8 @@ def apply_liger_kernel_to_gemma(
|
|
626
627
|
from transformers.models.gemma import modeling_gemma
|
627
628
|
from transformers.models.gemma.modeling_gemma import GemmaModel
|
628
629
|
|
629
|
-
|
630
|
-
|
630
|
+
from liger_kernel.transformers.rms_norm import LigerRMSNormForGemma
|
631
|
+
|
631
632
|
_patch_rms_norm_module_for_gemma = partial(_patch_rms_norm_module, casting_mode="gemma", offset=1.0)
|
632
633
|
|
633
634
|
if rope:
|
@@ -700,7 +701,8 @@ def apply_liger_kernel_to_gemma2(
|
|
700
701
|
from transformers.models.gemma2 import modeling_gemma2
|
701
702
|
from transformers.models.gemma2.modeling_gemma2 import Gemma2Model
|
702
703
|
|
703
|
-
|
704
|
+
from liger_kernel.transformers.rms_norm import LigerRMSNormForGemma2
|
705
|
+
|
704
706
|
_patch_rms_norm_module_for_gemma2 = partial(
|
705
707
|
_patch_rms_norm_module, offset=1.0, casting_mode="gemma", in_place=False
|
706
708
|
)
|
@@ -779,8 +781,8 @@ def apply_liger_kernel_to_gemma3_text(
|
|
779
781
|
from transformers.models.gemma3.modeling_gemma3 import Gemma3ForCausalLM
|
780
782
|
from transformers.models.gemma3.modeling_gemma3 import Gemma3TextModel
|
781
783
|
|
782
|
-
from liger_kernel.transformers.gema3_rms import LigerRMSNormForGemma3
|
783
784
|
from liger_kernel.transformers.model.gemma3 import causal_forward
|
785
|
+
from liger_kernel.transformers.rms_norm import LigerRMSNormForGemma3
|
784
786
|
|
785
787
|
_patch_rms_norm_module_for_gemma3 = partial(
|
786
788
|
_patch_rms_norm_module, offset=1.0, casting_mode="gemma", in_place=False
|
@@ -1450,11 +1452,12 @@ def apply_liger_kernel_to_olmo2(
|
|
1450
1452
|
from transformers.models.olmo2.modeling_olmo2 import Olmo2Model
|
1451
1453
|
|
1452
1454
|
from liger_kernel.transformers.model.olmo2 import lce_forward as olmo2_lce_forward
|
1455
|
+
from liger_kernel.transformers.rms_norm import LigerRMSNormForOlmo2
|
1453
1456
|
|
1454
1457
|
if rope:
|
1455
1458
|
modeling_olmo2.apply_rotary_pos_emb = liger_rotary_pos_emb
|
1456
1459
|
if rms_norm:
|
1457
|
-
modeling_olmo2.Olmo2RMSNorm =
|
1460
|
+
modeling_olmo2.Olmo2RMSNorm = LigerRMSNormForOlmo2
|
1458
1461
|
if swiglu:
|
1459
1462
|
modeling_olmo2.Olmo2MLP = LigerSwiGLUMLP
|
1460
1463
|
if cross_entropy:
|
@@ -1513,11 +1516,12 @@ def apply_liger_kernel_to_glm4(
|
|
1513
1516
|
from transformers.models.glm4.modeling_glm4 import Glm4Model
|
1514
1517
|
|
1515
1518
|
from liger_kernel.transformers.model.glm4 import lce_forward as glm4_lce_forward
|
1519
|
+
from liger_kernel.transformers.rms_norm import LigerRMSNormForGlm4
|
1516
1520
|
|
1517
1521
|
if rope:
|
1518
1522
|
raise NotImplementedError("liger_rotary_pos_emb is not available for Glm4 models.")
|
1519
1523
|
if rms_norm:
|
1520
|
-
modeling_glm4.Glm4RMSNorm =
|
1524
|
+
modeling_glm4.Glm4RMSNorm = LigerRMSNormForGlm4
|
1521
1525
|
if swiglu:
|
1522
1526
|
modeling_glm4.Glm4MLP = LigerPhi3SwiGLUMLP
|
1523
1527
|
if cross_entropy:
|
@@ -0,0 +1,81 @@
|
|
1
|
+
import torch
|
2
|
+
import torch.nn as nn
|
3
|
+
|
4
|
+
from liger_kernel.ops.rms_norm import LigerRMSNormFunction
|
5
|
+
|
6
|
+
|
7
|
+
class LigerRMSNorm(nn.Module):
|
8
|
+
def __init__(
|
9
|
+
self,
|
10
|
+
hidden_size,
|
11
|
+
eps=1e-6,
|
12
|
+
offset=0.0,
|
13
|
+
casting_mode="llama",
|
14
|
+
init_fn="ones",
|
15
|
+
in_place=True,
|
16
|
+
row_mode=None,
|
17
|
+
):
|
18
|
+
super().__init__()
|
19
|
+
assert init_fn in [
|
20
|
+
"ones",
|
21
|
+
"zeros",
|
22
|
+
], f"init_fn must be either 'ones' or 'zeros', got {init_fn}"
|
23
|
+
self.weight = nn.Parameter(torch.ones(hidden_size) if init_fn == "ones" else torch.zeros(hidden_size))
|
24
|
+
self.variance_epsilon, self.offset, self.casting_mode, self.in_place, self.row_mode = (
|
25
|
+
eps,
|
26
|
+
offset,
|
27
|
+
casting_mode,
|
28
|
+
in_place,
|
29
|
+
row_mode,
|
30
|
+
)
|
31
|
+
|
32
|
+
def forward(self, hidden_states):
|
33
|
+
return LigerRMSNormFunction.apply(
|
34
|
+
hidden_states,
|
35
|
+
self.weight,
|
36
|
+
self.variance_epsilon,
|
37
|
+
self.offset,
|
38
|
+
self.casting_mode,
|
39
|
+
self.in_place,
|
40
|
+
self.row_mode,
|
41
|
+
)
|
42
|
+
|
43
|
+
def extra_repr(self):
|
44
|
+
return (
|
45
|
+
f"{tuple(self.weight.shape)}, eps={self.variance_epsilon}, offset={self.offset}, in_place={self.in_place}"
|
46
|
+
)
|
47
|
+
|
48
|
+
|
49
|
+
class LigerRMSNormForGemma(LigerRMSNorm):
|
50
|
+
def __init__(
|
51
|
+
self, hidden_size, eps=1e-6, offset=1.0, casting_mode="gemma", init_fn="zeros", in_place=True, row_mode=None
|
52
|
+
):
|
53
|
+
super().__init__(hidden_size, eps, offset, casting_mode, init_fn, in_place, row_mode)
|
54
|
+
|
55
|
+
|
56
|
+
class LigerRMSNormForGemma2(LigerRMSNorm):
|
57
|
+
def __init__(
|
58
|
+
self, hidden_size, eps=1e-6, offset=1.0, casting_mode="gemma", init_fn="zeros", in_place=False, row_mode=None
|
59
|
+
):
|
60
|
+
super().__init__(hidden_size, eps, offset, casting_mode, init_fn, in_place, row_mode)
|
61
|
+
|
62
|
+
|
63
|
+
class LigerRMSNormForGemma3(LigerRMSNorm):
|
64
|
+
"""Gemma3RMSNorm has a dim argument not hidden_size used in q_norm and k_norm."""
|
65
|
+
|
66
|
+
def __init__(self, dim, eps=0.000001, offset=1.0, casting_mode="gemma", init_fn="zeros", in_place=False):
|
67
|
+
super().__init__(dim, eps, offset, casting_mode, init_fn, in_place)
|
68
|
+
|
69
|
+
|
70
|
+
class LigerRMSNormForOlmo2(LigerRMSNorm):
|
71
|
+
def __init__(
|
72
|
+
self, hidden_size, eps=1e-6, offset=0.0, casting_mode="llama", init_fn="ones", in_place=False, row_mode=None
|
73
|
+
):
|
74
|
+
super().__init__(hidden_size, eps, offset, casting_mode, init_fn, in_place, row_mode)
|
75
|
+
|
76
|
+
|
77
|
+
class LigerRMSNormForGlm4(LigerRMSNorm):
|
78
|
+
def __init__(
|
79
|
+
self, hidden_size, eps=1e-6, offset=0.0, casting_mode="llama", init_fn="ones", in_place=False, row_mode=None
|
80
|
+
):
|
81
|
+
super().__init__(hidden_size, eps, offset, casting_mode, init_fn, in_place, row_mode)
|
@@ -156,7 +156,6 @@ src/liger_kernel/transformers/functional.py
|
|
156
156
|
src/liger_kernel/transformers/fused_linear_cross_entropy.py
|
157
157
|
src/liger_kernel/transformers/fused_linear_jsd.py
|
158
158
|
src/liger_kernel/transformers/geglu.py
|
159
|
-
src/liger_kernel/transformers/gema3_rms.py
|
160
159
|
src/liger_kernel/transformers/group_norm.py
|
161
160
|
src/liger_kernel/transformers/grpo_loss.py
|
162
161
|
src/liger_kernel/transformers/jsd.py
|
@@ -348,10 +348,10 @@ def test_apply_liger_kernel_to_instance_for_mllama_for_conditional_generation():
|
|
348
348
|
assert isinstance(dummy_model_instance, MllamaForConditionalGeneration)
|
349
349
|
|
350
350
|
# Check that model instance variables are not yet patched with Liger modules
|
351
|
-
assert inspect.getsource(dummy_model_instance.language_model.
|
351
|
+
assert inspect.getsource(dummy_model_instance.language_model.norm.forward) != inspect.getsource(
|
352
352
|
LigerRMSNorm.forward
|
353
353
|
)
|
354
|
-
for layer in dummy_model_instance.language_model.
|
354
|
+
for layer in dummy_model_instance.language_model.layers:
|
355
355
|
assert inspect.getsource(layer.mlp.forward) != inspect.getsource(LigerSwiGLUMLP.forward)
|
356
356
|
assert inspect.getsource(layer.input_layernorm.forward) != inspect.getsource(LigerRMSNorm.forward)
|
357
357
|
assert inspect.getsource(layer.post_attention_layernorm.forward) != inspect.getsource(LigerRMSNorm.forward)
|
@@ -377,10 +377,10 @@ def test_apply_liger_kernel_to_instance_for_mllama_for_conditional_generation():
|
|
377
377
|
_apply_liger_kernel_to_instance(model=dummy_model_instance)
|
378
378
|
|
379
379
|
# Check that the model's instance variables were correctly patched with Liger modules
|
380
|
-
assert inspect.getsource(dummy_model_instance.language_model.
|
380
|
+
assert inspect.getsource(dummy_model_instance.language_model.norm.forward) == inspect.getsource(
|
381
381
|
LigerRMSNorm.forward
|
382
382
|
)
|
383
|
-
for layer in dummy_model_instance.language_model.
|
383
|
+
for layer in dummy_model_instance.language_model.layers:
|
384
384
|
assert inspect.getsource(layer.mlp.forward) == inspect.getsource(LigerSwiGLUMLP.forward)
|
385
385
|
assert inspect.getsource(layer.input_layernorm.forward) == inspect.getsource(LigerRMSNorm.forward)
|
386
386
|
assert inspect.getsource(layer.post_attention_layernorm.forward) == inspect.getsource(LigerRMSNorm.forward)
|
@@ -1,8 +0,0 @@
|
|
1
|
-
from .rms_norm import LigerRMSNorm
|
2
|
-
|
3
|
-
|
4
|
-
class LigerRMSNormForGemma3(LigerRMSNorm):
|
5
|
-
"""Gemma3RMSNorm has a dim argument not hidden_size used in q_norm and k_norm."""
|
6
|
-
|
7
|
-
def __init__(self, dim, eps=0.000001, offset=1.0, casting_mode="gemma", init_fn="zeros", in_place=False):
|
8
|
-
super().__init__(dim, eps, offset, casting_mode, init_fn, in_place)
|
@@ -1,46 +0,0 @@
|
|
1
|
-
import torch
|
2
|
-
import torch.nn as nn
|
3
|
-
|
4
|
-
from liger_kernel.ops.rms_norm import LigerRMSNormFunction
|
5
|
-
|
6
|
-
|
7
|
-
class LigerRMSNorm(nn.Module):
|
8
|
-
def __init__(
|
9
|
-
self,
|
10
|
-
hidden_size,
|
11
|
-
eps=1e-6,
|
12
|
-
offset=0.0,
|
13
|
-
casting_mode="llama",
|
14
|
-
init_fn="ones",
|
15
|
-
in_place=True,
|
16
|
-
row_mode=None,
|
17
|
-
):
|
18
|
-
super().__init__()
|
19
|
-
assert init_fn in [
|
20
|
-
"ones",
|
21
|
-
"zeros",
|
22
|
-
], f"init_fn must be either 'ones' or 'zeros', got {init_fn}"
|
23
|
-
self.weight = nn.Parameter(torch.ones(hidden_size) if init_fn == "ones" else torch.zeros(hidden_size))
|
24
|
-
self.variance_epsilon, self.offset, self.casting_mode, self.in_place, self.row_mode = (
|
25
|
-
eps,
|
26
|
-
offset,
|
27
|
-
casting_mode,
|
28
|
-
in_place,
|
29
|
-
row_mode,
|
30
|
-
)
|
31
|
-
|
32
|
-
def forward(self, hidden_states):
|
33
|
-
return LigerRMSNormFunction.apply(
|
34
|
-
hidden_states,
|
35
|
-
self.weight,
|
36
|
-
self.variance_epsilon,
|
37
|
-
self.offset,
|
38
|
-
self.casting_mode,
|
39
|
-
self.in_place,
|
40
|
-
self.row_mode,
|
41
|
-
)
|
42
|
-
|
43
|
-
def extra_repr(self):
|
44
|
-
return (
|
45
|
-
f"{tuple(self.weight.shape)}, eps={self.variance_epsilon}, offset={self.offset}, in_place={self.in_place}"
|
46
|
-
)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|