liger-kernel-nightly 0.5.10.dev20250531184114__tar.gz → 0.5.10.dev20250602014906__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/PKG-INFO +1 -1
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/pyproject.toml +1 -1
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/ops/rms_norm.py +31 -18
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/transformers/monkey_patch.py +4 -2
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/transformers/rms_norm.py +1 -1
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel_nightly.egg-info/PKG-INFO +1 -1
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/test/transformers/test_monkey_patch.py +4 -4
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/.github/ISSUE_TEMPLATE/bug_report.yaml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/.github/ISSUE_TEMPLATE/feature_request.yaml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/.github/pull_request_template.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/.github/workflows/amd-ci.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/.github/workflows/docs.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/.github/workflows/intel-ci.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/.github/workflows/nvi-ci.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/.github/workflows/publish-nightly.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/.github/workflows/publish-release.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/.gitignore +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/.idea/workspace.xml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/LICENSE +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/Makefile +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/NOTICE +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/README.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/benchmark/README.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/benchmark/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/benchmark/benchmarks_visualizer.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/benchmark/data/all_benchmark_data.csv +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/benchmark/scripts/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/benchmark/scripts/benchmark_cpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/benchmark/scripts/benchmark_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/benchmark/scripts/benchmark_distill_jsd_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/benchmark/scripts/benchmark_dpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/benchmark/scripts/benchmark_dyt.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/benchmark/scripts/benchmark_embedding.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/benchmark/scripts/benchmark_fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/benchmark/scripts/benchmark_fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/benchmark/scripts/benchmark_geglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/benchmark/scripts/benchmark_group_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/benchmark/scripts/benchmark_jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/benchmark/scripts/benchmark_kl_div.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/benchmark/scripts/benchmark_kto_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/benchmark/scripts/benchmark_layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/benchmark/scripts/benchmark_multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/benchmark/scripts/benchmark_orpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/benchmark/scripts/benchmark_qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/benchmark/scripts/benchmark_rms_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/benchmark/scripts/benchmark_rope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/benchmark/scripts/benchmark_simpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/benchmark/scripts/benchmark_softmax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/benchmark/scripts/benchmark_sparse_multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/benchmark/scripts/benchmark_sparsemax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/benchmark/scripts/benchmark_swiglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/benchmark/scripts/benchmark_tvd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/benchmark/scripts/utils.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/dev/fmt-requirements.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/dev/modal/tests.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/dev/modal/tests_bwd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/docs/Examples.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/docs/Getting-Started.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/docs/High-Level-APIs.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/docs/Low-Level-APIs.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/docs/acknowledgement.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/docs/contributing.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/docs/images/banner.GIF +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/docs/images/compose.gif +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/docs/images/e2e-memory.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/docs/images/e2e-tps.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/docs/images/logo-banner.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/docs/images/patch.gif +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/docs/images/post-training.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/docs/index.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/docs/license.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/examples/alignment/accelerate_config.yaml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/examples/alignment/run_orpo.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/examples/huggingface/README.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/examples/huggingface/callback.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/examples/huggingface/config/fsdp_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/examples/huggingface/img/gemma_7b_mem.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/examples/huggingface/img/gemma_7b_tp.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/examples/huggingface/img/llama_mem_alloc.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/examples/huggingface/img/llama_tps.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/examples/huggingface/img/qwen_mem_alloc.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/examples/huggingface/img/qwen_tps.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/examples/huggingface/launch_on_modal.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/examples/huggingface/requirements.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/examples/huggingface/run_benchmarks.sh +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/examples/huggingface/run_gemma.sh +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/examples/huggingface/run_llama.sh +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/examples/huggingface/run_qwen.sh +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/examples/huggingface/run_qwen2_vl.sh +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/examples/huggingface/training.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/examples/huggingface/training_multimodal.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/examples/lightning/README.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/examples/lightning/requirements.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/examples/lightning/training.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/examples/medusa/README.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/examples/medusa/callback.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/examples/medusa/docs/images/Memory_Stage1_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/examples/medusa/docs/images/Memory_Stage1_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/examples/medusa/docs/images/Memory_Stage2_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/examples/medusa/docs/images/Memory_Stage2_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/examples/medusa/docs/images/Throughput_Stage1_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/examples/medusa/docs/images/Throughput_Stage1_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/examples/medusa/docs/images/Throughput_Stage2_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/examples/medusa/docs/images/Throughput_Stage2_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/examples/medusa/fsdp/acc-fsdp.conf +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/examples/medusa/medusa_util.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/examples/medusa/requirements.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/examples/medusa/scripts/llama3_8b_medusa.sh +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/examples/medusa/train.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/licenses/LICENSE-Apache-2.0 +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/licenses/LICENSE-MIT-AutoAWQ +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/licenses/LICENSE-MIT-Efficient-Cross-Entropy +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/licenses/LICENSE-MIT-llmc +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/licenses/LICENSE-MIT-triton +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/mkdocs.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/setup.cfg +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/setup.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/chunked_loss/README.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/chunked_loss/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/chunked_loss/cpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/chunked_loss/dpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/chunked_loss/functional.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/chunked_loss/fused_linear_distillation.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/chunked_loss/fused_linear_ppo.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/chunked_loss/fused_linear_preference.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/chunked_loss/fused_linear_unpaired_preference.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/chunked_loss/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/chunked_loss/jsd_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/chunked_loss/kto_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/chunked_loss/orpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/chunked_loss/simpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/env_report.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/ops/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/ops/cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/ops/dyt.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/ops/experimental/embedding.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/ops/experimental/mm_int8int2.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/ops/fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/ops/fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/ops/geglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/ops/group_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/ops/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/ops/jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/ops/kl_div.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/ops/layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/ops/multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/ops/qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/ops/rope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/ops/softmax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/ops/sparsemax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/ops/swiglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/ops/tvd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/ops/utils.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/transformers/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/transformers/auto_model.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/transformers/cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/transformers/dyt.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/transformers/experimental/embedding.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/transformers/fsdp.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/transformers/functional.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/transformers/fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/transformers/fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/transformers/geglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/transformers/gema3_rms.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/transformers/group_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/transformers/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/transformers/jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/transformers/kl_div.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/transformers/layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/transformers/model/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/transformers/model/gemma.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/transformers/model/gemma2.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/transformers/model/gemma3.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/transformers/model/glm4.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/transformers/model/llama.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/transformers/model/llava.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/transformers/model/loss_utils.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/transformers/model/mistral.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/transformers/model/mixtral.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/transformers/model/mllama.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/transformers/model/olmo2.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/transformers/model/paligemma.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/transformers/model/phi3.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/transformers/model/qwen2.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/transformers/model/qwen2_5_vl.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/transformers/model/qwen2_vl.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/transformers/model/qwen3.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/transformers/model/qwen3_moe.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/transformers/multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/transformers/qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/transformers/rope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/transformers/softmax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/transformers/sparsemax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/transformers/swiglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/transformers/trainer/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/transformers/trainer/orpo_trainer.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/transformers/trainer_integration.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/transformers/tvd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/triton/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/triton/monkey_patch.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel/utils.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel_nightly.egg-info/SOURCES.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel_nightly.egg-info/dependency_links.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel_nightly.egg-info/requires.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/src/liger_kernel_nightly.egg-info/top_level.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/test/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/test/chunked_loss/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/test/chunked_loss/test_cpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/test/chunked_loss/test_dpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/test/chunked_loss/test_grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/test/chunked_loss/test_jsd_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/test/chunked_loss/test_kto_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/test/chunked_loss/test_orpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/test/chunked_loss/test_simpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/test/conftest.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/test/convergence/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/test/convergence/bf16/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/test/convergence/bf16/test_mini_models.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/test/convergence/bf16/test_mini_models_multimodal.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/test/convergence/bf16/test_mini_models_with_logits.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/test/convergence/fp32/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/test/convergence/fp32/test_mini_models.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/test/convergence/fp32/test_mini_models_multimodal.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/test/convergence/fp32/test_mini_models_with_logits.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/test/resources/fake_configs/Google/Gemma3/gemma-3-4b-it/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/test/resources/fake_configs/Google/Paligemma/paligemma-3b-pt-224/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/preprocessor_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/processor_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/test/resources/fake_configs/Qwen/Qwen2-VL-7B-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/test/resources/fake_configs/Qwen/Qwen2.5-VL-7B-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/test/resources/fake_configs/meta-llama/Llama-3.2-11B-Vision-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/test/resources/scripts/generate_tokenized_dataset.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/test/resources/tiny_shakespeare.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/test/resources/tiny_shakespeare_tokenized/data-00000-of-00001.arrow +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/test/resources/tiny_shakespeare_tokenized/dataset_info.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/test/resources/tiny_shakespeare_tokenized/state.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/test/transformers/test_auto_model.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/test/transformers/test_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/test/transformers/test_dyt.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/test/transformers/test_embedding.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/test/transformers/test_flex_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/test/transformers/test_fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/test/transformers/test_fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/test/transformers/test_geglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/test/transformers/test_group_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/test/transformers/test_grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/test/transformers/test_jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/test/transformers/test_kl_div.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/test/transformers/test_layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/test/transformers/test_mm_int8int2.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/test/transformers/test_multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/test/transformers/test_qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/test/transformers/test_rms_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/test/transformers/test_rope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/test/transformers/test_softmax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/test/transformers/test_sparsemax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/test/transformers/test_swiglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/test/transformers/test_trainer_integration.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/test/transformers/test_transformers.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/test/transformers/test_tvd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/test/triton/test_triton_monkey_patch.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250602014906}/test/utils.py +0 -0
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
4
4
|
|
5
5
|
[project]
|
6
6
|
name = "liger_kernel_nightly"
|
7
|
-
version = "0.5.10.
|
7
|
+
version = "0.5.10.dev20250602014906"
|
8
8
|
description = "Efficient Triton kernels for LLM Training"
|
9
9
|
urls = { "Homepage" = "https://github.com/linkedin/Liger-Kernel" }
|
10
10
|
readme = { file = "README.md", content-type = "text/markdown" }
|
@@ -193,6 +193,7 @@ def _rms_norm_backward_kernel(
|
|
193
193
|
|
194
194
|
tl.store(dW_ptr + row_block_id * dW_row_stride + col_offsets, dW_row, mask=mask)
|
195
195
|
|
196
|
+
|
196
197
|
@triton.jit
|
197
198
|
def _block_rms_norm_forward_kernel(
|
198
199
|
Y_ptr,
|
@@ -225,8 +226,11 @@ def _block_rms_norm_forward_kernel(
|
|
225
226
|
row_mask = row_idx < n_rows
|
226
227
|
col_mask = col_offsets < n_cols
|
227
228
|
|
228
|
-
|
229
|
-
|
229
|
+
X_row = tl.load(
|
230
|
+
X_ptr + row_idx[:, None] * X_row_stride + col_offsets[None, :],
|
231
|
+
mask=row_mask[:, None] & col_mask[None, :],
|
232
|
+
other=0,
|
233
|
+
)
|
230
234
|
X_row_dtype = X_row.dtype
|
231
235
|
W_row = tl.load(W_ptr + col_offsets, mask=col_mask, other=0)
|
232
236
|
|
@@ -262,7 +266,12 @@ def _block_rms_norm_forward_kernel(
|
|
262
266
|
if casting_mode == _CASTING_MODE_GEMMA:
|
263
267
|
Y_row = Y_row.to(X_row_dtype)
|
264
268
|
|
265
|
-
tl.store(
|
269
|
+
tl.store(
|
270
|
+
Y_ptr + row_idx[:, None] * Y_row_stride + col_offsets[None, :],
|
271
|
+
Y_row,
|
272
|
+
mask=row_mask[:, None] & col_mask[None, :],
|
273
|
+
)
|
274
|
+
|
266
275
|
|
267
276
|
@triton.jit
|
268
277
|
def _block_rms_norm_backward_kernel(
|
@@ -306,8 +315,16 @@ def _block_rms_norm_backward_kernel(
|
|
306
315
|
for start in range(pid * BLOCK_ROW, n_rows, NUM_SMS * BLOCK_ROW):
|
307
316
|
row_idx = start + tl.arange(0, BLOCK_ROW)
|
308
317
|
row_mask = row_idx < n_rows
|
309
|
-
dY_row = tl.load(
|
310
|
-
|
318
|
+
dY_row = tl.load(
|
319
|
+
dY_ptr + row_idx[:, None] * dY_row_stride + col_offsets[None, :],
|
320
|
+
mask=row_mask[:, None] & col_mask[None, :],
|
321
|
+
other=0.0,
|
322
|
+
)
|
323
|
+
X_row = tl.load(
|
324
|
+
X_ptr + row_idx[:, None] * X_row_stride + col_offsets[None, :],
|
325
|
+
mask=row_mask[:, None] & col_mask[None, :],
|
326
|
+
other=0.0,
|
327
|
+
)
|
311
328
|
|
312
329
|
# Get cached rms
|
313
330
|
rstd_row = tl.load(RSTD_ptr + row_idx * RSTD_row_stride, row_mask)
|
@@ -326,7 +343,9 @@ def _block_rms_norm_backward_kernel(
|
|
326
343
|
|
327
344
|
dX_row = rstd_row[:, None] * m
|
328
345
|
|
329
|
-
dX_row += (rstd_row[:, None]) * (
|
346
|
+
dX_row += (rstd_row[:, None]) * (
|
347
|
+
-(1 / n_cols) * (rstd_row * rstd_row * tl.sum(m * X_row, axis=1))[:, None] * X_row
|
348
|
+
)
|
330
349
|
|
331
350
|
# calculate the gradient of W
|
332
351
|
if casting_mode == _CASTING_MODE_LLAMA:
|
@@ -335,8 +354,11 @@ def _block_rms_norm_backward_kernel(
|
|
335
354
|
# here X_row is already in fp32 (see previous if block)
|
336
355
|
dW_row += tl.sum(dY_row * (X_row * rstd_row[:, None]), 0)
|
337
356
|
|
338
|
-
tl.store(
|
339
|
-
|
357
|
+
tl.store(
|
358
|
+
dX_ptr + row_idx[:, None] * dX_row_stride + col_offsets[None, :],
|
359
|
+
dX_row,
|
360
|
+
mask=row_mask[:, None] & col_mask[None, :],
|
361
|
+
)
|
340
362
|
|
341
363
|
tl.store(dW_ptr + pid * dW_row_stride + col_offsets, dW_row, mask=col_mask)
|
342
364
|
|
@@ -549,15 +571,6 @@ class LigerRMSNormFunction(torch.autograd.Function):
|
|
549
571
|
"""
|
550
572
|
X, W, RSTD = ctx.saved_tensors
|
551
573
|
dX, dW = rms_norm_backward(
|
552
|
-
dY,
|
553
|
-
X,
|
554
|
-
W,
|
555
|
-
RSTD,
|
556
|
-
ctx.offset,
|
557
|
-
ctx.casting_mode,
|
558
|
-
ctx.BLOCK_SIZE,
|
559
|
-
ctx.num_warps,
|
560
|
-
ctx.in_place,
|
561
|
-
ctx.row_mode
|
574
|
+
dY, X, W, RSTD, ctx.offset, ctx.casting_mode, ctx.BLOCK_SIZE, ctx.num_warps, ctx.in_place, ctx.row_mode
|
562
575
|
)
|
563
576
|
return dX, dW, None, None, None, None, None
|
@@ -428,13 +428,14 @@ def apply_liger_kernel_to_mllama(
|
|
428
428
|
if isinstance(model, MllamaForConditionalGeneration):
|
429
429
|
language_model: MllamaForCausalLM = model.language_model
|
430
430
|
vision_model: MllamaVisionModel = model.vision_model
|
431
|
-
text_model: MllamaTextModel = language_model
|
431
|
+
text_model: MllamaTextModel = language_model
|
432
432
|
elif isinstance(model, MllamaForCausalLM):
|
433
433
|
text_model = model.model
|
434
434
|
vision_model = None
|
435
435
|
elif isinstance(model, MllamaTextModel):
|
436
436
|
text_model = model
|
437
437
|
vision_model = None
|
438
|
+
|
438
439
|
else:
|
439
440
|
raise ValueError(f"Unsupported Mllama model type: {type(model)}")
|
440
441
|
|
@@ -776,7 +777,8 @@ def apply_liger_kernel_to_gemma3_text(
|
|
776
777
|
|
777
778
|
from transformers.models.gemma3 import modeling_gemma3
|
778
779
|
from transformers.models.gemma3.modeling_gemma3 import Gemma3DecoderLayer
|
779
|
-
from transformers.models.gemma3.modeling_gemma3 import Gemma3ForCausalLM
|
780
|
+
from transformers.models.gemma3.modeling_gemma3 import Gemma3ForCausalLM
|
781
|
+
from transformers.models.gemma3.modeling_gemma3 import Gemma3TextModel
|
780
782
|
|
781
783
|
from liger_kernel.transformers.gema3_rms import LigerRMSNormForGemma3
|
782
784
|
from liger_kernel.transformers.model.gemma3 import causal_forward
|
@@ -348,10 +348,10 @@ def test_apply_liger_kernel_to_instance_for_mllama_for_conditional_generation():
|
|
348
348
|
assert isinstance(dummy_model_instance, MllamaForConditionalGeneration)
|
349
349
|
|
350
350
|
# Check that model instance variables are not yet patched with Liger modules
|
351
|
-
assert inspect.getsource(dummy_model_instance.language_model.
|
351
|
+
assert inspect.getsource(dummy_model_instance.language_model.norm.forward) != inspect.getsource(
|
352
352
|
LigerRMSNorm.forward
|
353
353
|
)
|
354
|
-
for layer in dummy_model_instance.language_model.
|
354
|
+
for layer in dummy_model_instance.language_model.layers:
|
355
355
|
assert inspect.getsource(layer.mlp.forward) != inspect.getsource(LigerSwiGLUMLP.forward)
|
356
356
|
assert inspect.getsource(layer.input_layernorm.forward) != inspect.getsource(LigerRMSNorm.forward)
|
357
357
|
assert inspect.getsource(layer.post_attention_layernorm.forward) != inspect.getsource(LigerRMSNorm.forward)
|
@@ -377,10 +377,10 @@ def test_apply_liger_kernel_to_instance_for_mllama_for_conditional_generation():
|
|
377
377
|
_apply_liger_kernel_to_instance(model=dummy_model_instance)
|
378
378
|
|
379
379
|
# Check that the model's instance variables were correctly patched with Liger modules
|
380
|
-
assert inspect.getsource(dummy_model_instance.language_model.
|
380
|
+
assert inspect.getsource(dummy_model_instance.language_model.norm.forward) == inspect.getsource(
|
381
381
|
LigerRMSNorm.forward
|
382
382
|
)
|
383
|
-
for layer in dummy_model_instance.language_model.
|
383
|
+
for layer in dummy_model_instance.language_model.layers:
|
384
384
|
assert inspect.getsource(layer.mlp.forward) == inspect.getsource(LigerSwiGLUMLP.forward)
|
385
385
|
assert inspect.getsource(layer.input_layernorm.forward) == inspect.getsource(LigerRMSNorm.forward)
|
386
386
|
assert inspect.getsource(layer.post_attention_layernorm.forward) == inspect.getsource(LigerRMSNorm.forward)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|