liger-kernel-nightly 0.5.10.dev20250531184114__tar.gz → 0.5.10.dev20250601024230__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/PKG-INFO +1 -1
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/pyproject.toml +1 -1
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/ops/rms_norm.py +31 -18
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/monkey_patch.py +2 -1
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/rms_norm.py +1 -1
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel_nightly.egg-info/PKG-INFO +1 -1
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/.github/ISSUE_TEMPLATE/bug_report.yaml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/.github/ISSUE_TEMPLATE/feature_request.yaml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/.github/pull_request_template.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/.github/workflows/amd-ci.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/.github/workflows/docs.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/.github/workflows/intel-ci.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/.github/workflows/nvi-ci.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/.github/workflows/publish-nightly.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/.github/workflows/publish-release.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/.gitignore +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/.idea/workspace.xml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/LICENSE +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/Makefile +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/NOTICE +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/README.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/benchmark/README.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/benchmark/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/benchmark/benchmarks_visualizer.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/benchmark/data/all_benchmark_data.csv +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/benchmark/scripts/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/benchmark/scripts/benchmark_cpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/benchmark/scripts/benchmark_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/benchmark/scripts/benchmark_distill_jsd_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/benchmark/scripts/benchmark_dpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/benchmark/scripts/benchmark_dyt.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/benchmark/scripts/benchmark_embedding.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/benchmark/scripts/benchmark_fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/benchmark/scripts/benchmark_fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/benchmark/scripts/benchmark_geglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/benchmark/scripts/benchmark_group_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/benchmark/scripts/benchmark_jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/benchmark/scripts/benchmark_kl_div.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/benchmark/scripts/benchmark_kto_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/benchmark/scripts/benchmark_layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/benchmark/scripts/benchmark_multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/benchmark/scripts/benchmark_orpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/benchmark/scripts/benchmark_qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/benchmark/scripts/benchmark_rms_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/benchmark/scripts/benchmark_rope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/benchmark/scripts/benchmark_simpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/benchmark/scripts/benchmark_softmax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/benchmark/scripts/benchmark_sparse_multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/benchmark/scripts/benchmark_sparsemax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/benchmark/scripts/benchmark_swiglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/benchmark/scripts/benchmark_tvd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/benchmark/scripts/utils.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/dev/fmt-requirements.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/dev/modal/tests.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/dev/modal/tests_bwd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/docs/Examples.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/docs/Getting-Started.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/docs/High-Level-APIs.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/docs/Low-Level-APIs.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/docs/acknowledgement.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/docs/contributing.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/docs/images/banner.GIF +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/docs/images/compose.gif +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/docs/images/e2e-memory.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/docs/images/e2e-tps.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/docs/images/logo-banner.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/docs/images/patch.gif +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/docs/images/post-training.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/docs/index.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/docs/license.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/alignment/accelerate_config.yaml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/alignment/run_orpo.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/huggingface/README.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/huggingface/callback.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/huggingface/config/fsdp_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/huggingface/img/gemma_7b_mem.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/huggingface/img/gemma_7b_tp.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/huggingface/img/llama_mem_alloc.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/huggingface/img/llama_tps.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/huggingface/img/qwen_mem_alloc.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/huggingface/img/qwen_tps.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/huggingface/launch_on_modal.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/huggingface/requirements.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/huggingface/run_benchmarks.sh +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/huggingface/run_gemma.sh +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/huggingface/run_llama.sh +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/huggingface/run_qwen.sh +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/huggingface/run_qwen2_vl.sh +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/huggingface/training.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/huggingface/training_multimodal.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/lightning/README.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/lightning/requirements.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/lightning/training.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/medusa/README.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/medusa/callback.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/medusa/docs/images/Memory_Stage1_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/medusa/docs/images/Memory_Stage1_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/medusa/docs/images/Memory_Stage2_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/medusa/docs/images/Memory_Stage2_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/medusa/docs/images/Throughput_Stage1_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/medusa/docs/images/Throughput_Stage1_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/medusa/docs/images/Throughput_Stage2_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/medusa/docs/images/Throughput_Stage2_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/medusa/fsdp/acc-fsdp.conf +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/medusa/medusa_util.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/medusa/requirements.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/medusa/scripts/llama3_8b_medusa.sh +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/medusa/train.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/licenses/LICENSE-Apache-2.0 +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/licenses/LICENSE-MIT-AutoAWQ +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/licenses/LICENSE-MIT-Efficient-Cross-Entropy +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/licenses/LICENSE-MIT-llmc +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/licenses/LICENSE-MIT-triton +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/mkdocs.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/setup.cfg +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/setup.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/chunked_loss/README.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/chunked_loss/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/chunked_loss/cpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/chunked_loss/dpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/chunked_loss/functional.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/chunked_loss/fused_linear_distillation.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/chunked_loss/fused_linear_ppo.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/chunked_loss/fused_linear_preference.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/chunked_loss/fused_linear_unpaired_preference.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/chunked_loss/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/chunked_loss/jsd_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/chunked_loss/kto_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/chunked_loss/orpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/chunked_loss/simpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/env_report.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/ops/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/ops/cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/ops/dyt.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/ops/experimental/embedding.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/ops/experimental/mm_int8int2.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/ops/fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/ops/fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/ops/geglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/ops/group_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/ops/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/ops/jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/ops/kl_div.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/ops/layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/ops/multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/ops/qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/ops/rope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/ops/softmax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/ops/sparsemax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/ops/swiglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/ops/tvd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/ops/utils.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/auto_model.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/dyt.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/experimental/embedding.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/fsdp.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/functional.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/geglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/gema3_rms.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/group_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/kl_div.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/model/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/model/gemma.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/model/gemma2.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/model/gemma3.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/model/glm4.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/model/llama.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/model/llava.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/model/loss_utils.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/model/mistral.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/model/mixtral.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/model/mllama.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/model/olmo2.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/model/paligemma.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/model/phi3.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/model/qwen2.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/model/qwen2_5_vl.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/model/qwen2_vl.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/model/qwen3.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/model/qwen3_moe.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/rope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/softmax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/sparsemax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/swiglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/trainer/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/trainer/orpo_trainer.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/trainer_integration.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/tvd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/triton/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/triton/monkey_patch.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/utils.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel_nightly.egg-info/SOURCES.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel_nightly.egg-info/dependency_links.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel_nightly.egg-info/requires.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel_nightly.egg-info/top_level.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/chunked_loss/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/chunked_loss/test_cpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/chunked_loss/test_dpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/chunked_loss/test_grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/chunked_loss/test_jsd_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/chunked_loss/test_kto_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/chunked_loss/test_orpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/chunked_loss/test_simpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/conftest.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/convergence/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/convergence/bf16/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/convergence/bf16/test_mini_models.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/convergence/bf16/test_mini_models_multimodal.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/convergence/bf16/test_mini_models_with_logits.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/convergence/fp32/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/convergence/fp32/test_mini_models.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/convergence/fp32/test_mini_models_multimodal.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/convergence/fp32/test_mini_models_with_logits.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/resources/fake_configs/Google/Gemma3/gemma-3-4b-it/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/resources/fake_configs/Google/Paligemma/paligemma-3b-pt-224/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/preprocessor_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/processor_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/resources/fake_configs/Qwen/Qwen2-VL-7B-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/resources/fake_configs/Qwen/Qwen2.5-VL-7B-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/resources/fake_configs/meta-llama/Llama-3.2-11B-Vision-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/resources/scripts/generate_tokenized_dataset.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/resources/tiny_shakespeare.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/resources/tiny_shakespeare_tokenized/data-00000-of-00001.arrow +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/resources/tiny_shakespeare_tokenized/dataset_info.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/resources/tiny_shakespeare_tokenized/state.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/transformers/test_auto_model.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/transformers/test_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/transformers/test_dyt.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/transformers/test_embedding.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/transformers/test_flex_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/transformers/test_fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/transformers/test_fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/transformers/test_geglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/transformers/test_group_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/transformers/test_grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/transformers/test_jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/transformers/test_kl_div.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/transformers/test_layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/transformers/test_mm_int8int2.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/transformers/test_monkey_patch.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/transformers/test_multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/transformers/test_qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/transformers/test_rms_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/transformers/test_rope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/transformers/test_softmax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/transformers/test_sparsemax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/transformers/test_swiglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/transformers/test_trainer_integration.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/transformers/test_transformers.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/transformers/test_tvd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/triton/test_triton_monkey_patch.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250531184114 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/utils.py +0 -0
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
4
4
|
|
5
5
|
[project]
|
6
6
|
name = "liger_kernel_nightly"
|
7
|
-
version = "0.5.10.
|
7
|
+
version = "0.5.10.dev20250601024230"
|
8
8
|
description = "Efficient Triton kernels for LLM Training"
|
9
9
|
urls = { "Homepage" = "https://github.com/linkedin/Liger-Kernel" }
|
10
10
|
readme = { file = "README.md", content-type = "text/markdown" }
|
@@ -193,6 +193,7 @@ def _rms_norm_backward_kernel(
|
|
193
193
|
|
194
194
|
tl.store(dW_ptr + row_block_id * dW_row_stride + col_offsets, dW_row, mask=mask)
|
195
195
|
|
196
|
+
|
196
197
|
@triton.jit
|
197
198
|
def _block_rms_norm_forward_kernel(
|
198
199
|
Y_ptr,
|
@@ -225,8 +226,11 @@ def _block_rms_norm_forward_kernel(
|
|
225
226
|
row_mask = row_idx < n_rows
|
226
227
|
col_mask = col_offsets < n_cols
|
227
228
|
|
228
|
-
|
229
|
-
|
229
|
+
X_row = tl.load(
|
230
|
+
X_ptr + row_idx[:, None] * X_row_stride + col_offsets[None, :],
|
231
|
+
mask=row_mask[:, None] & col_mask[None, :],
|
232
|
+
other=0,
|
233
|
+
)
|
230
234
|
X_row_dtype = X_row.dtype
|
231
235
|
W_row = tl.load(W_ptr + col_offsets, mask=col_mask, other=0)
|
232
236
|
|
@@ -262,7 +266,12 @@ def _block_rms_norm_forward_kernel(
|
|
262
266
|
if casting_mode == _CASTING_MODE_GEMMA:
|
263
267
|
Y_row = Y_row.to(X_row_dtype)
|
264
268
|
|
265
|
-
tl.store(
|
269
|
+
tl.store(
|
270
|
+
Y_ptr + row_idx[:, None] * Y_row_stride + col_offsets[None, :],
|
271
|
+
Y_row,
|
272
|
+
mask=row_mask[:, None] & col_mask[None, :],
|
273
|
+
)
|
274
|
+
|
266
275
|
|
267
276
|
@triton.jit
|
268
277
|
def _block_rms_norm_backward_kernel(
|
@@ -306,8 +315,16 @@ def _block_rms_norm_backward_kernel(
|
|
306
315
|
for start in range(pid * BLOCK_ROW, n_rows, NUM_SMS * BLOCK_ROW):
|
307
316
|
row_idx = start + tl.arange(0, BLOCK_ROW)
|
308
317
|
row_mask = row_idx < n_rows
|
309
|
-
dY_row = tl.load(
|
310
|
-
|
318
|
+
dY_row = tl.load(
|
319
|
+
dY_ptr + row_idx[:, None] * dY_row_stride + col_offsets[None, :],
|
320
|
+
mask=row_mask[:, None] & col_mask[None, :],
|
321
|
+
other=0.0,
|
322
|
+
)
|
323
|
+
X_row = tl.load(
|
324
|
+
X_ptr + row_idx[:, None] * X_row_stride + col_offsets[None, :],
|
325
|
+
mask=row_mask[:, None] & col_mask[None, :],
|
326
|
+
other=0.0,
|
327
|
+
)
|
311
328
|
|
312
329
|
# Get cached rms
|
313
330
|
rstd_row = tl.load(RSTD_ptr + row_idx * RSTD_row_stride, row_mask)
|
@@ -326,7 +343,9 @@ def _block_rms_norm_backward_kernel(
|
|
326
343
|
|
327
344
|
dX_row = rstd_row[:, None] * m
|
328
345
|
|
329
|
-
dX_row += (rstd_row[:, None]) * (
|
346
|
+
dX_row += (rstd_row[:, None]) * (
|
347
|
+
-(1 / n_cols) * (rstd_row * rstd_row * tl.sum(m * X_row, axis=1))[:, None] * X_row
|
348
|
+
)
|
330
349
|
|
331
350
|
# calculate the gradient of W
|
332
351
|
if casting_mode == _CASTING_MODE_LLAMA:
|
@@ -335,8 +354,11 @@ def _block_rms_norm_backward_kernel(
|
|
335
354
|
# here X_row is already in fp32 (see previous if block)
|
336
355
|
dW_row += tl.sum(dY_row * (X_row * rstd_row[:, None]), 0)
|
337
356
|
|
338
|
-
tl.store(
|
339
|
-
|
357
|
+
tl.store(
|
358
|
+
dX_ptr + row_idx[:, None] * dX_row_stride + col_offsets[None, :],
|
359
|
+
dX_row,
|
360
|
+
mask=row_mask[:, None] & col_mask[None, :],
|
361
|
+
)
|
340
362
|
|
341
363
|
tl.store(dW_ptr + pid * dW_row_stride + col_offsets, dW_row, mask=col_mask)
|
342
364
|
|
@@ -549,15 +571,6 @@ class LigerRMSNormFunction(torch.autograd.Function):
|
|
549
571
|
"""
|
550
572
|
X, W, RSTD = ctx.saved_tensors
|
551
573
|
dX, dW = rms_norm_backward(
|
552
|
-
dY,
|
553
|
-
X,
|
554
|
-
W,
|
555
|
-
RSTD,
|
556
|
-
ctx.offset,
|
557
|
-
ctx.casting_mode,
|
558
|
-
ctx.BLOCK_SIZE,
|
559
|
-
ctx.num_warps,
|
560
|
-
ctx.in_place,
|
561
|
-
ctx.row_mode
|
574
|
+
dY, X, W, RSTD, ctx.offset, ctx.casting_mode, ctx.BLOCK_SIZE, ctx.num_warps, ctx.in_place, ctx.row_mode
|
562
575
|
)
|
563
576
|
return dX, dW, None, None, None, None, None
|
@@ -776,7 +776,8 @@ def apply_liger_kernel_to_gemma3_text(
|
|
776
776
|
|
777
777
|
from transformers.models.gemma3 import modeling_gemma3
|
778
778
|
from transformers.models.gemma3.modeling_gemma3 import Gemma3DecoderLayer
|
779
|
-
from transformers.models.gemma3.modeling_gemma3 import Gemma3ForCausalLM
|
779
|
+
from transformers.models.gemma3.modeling_gemma3 import Gemma3ForCausalLM
|
780
|
+
from transformers.models.gemma3.modeling_gemma3 import Gemma3TextModel
|
780
781
|
|
781
782
|
from liger_kernel.transformers.gema3_rms import LigerRMSNormForGemma3
|
782
783
|
from liger_kernel.transformers.model.gemma3 import causal_forward
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|