liger-kernel-nightly 0.5.10.dev20250528223524__tar.gz → 0.5.10.dev20250601024230__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/PKG-INFO +3 -1
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/README.md +2 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/benchmark/scripts/benchmark_softmax.py +3 -3
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/docs/Low-Level-APIs.md +15 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/pyproject.toml +1 -1
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/ops/rms_norm.py +31 -18
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/monkey_patch.py +2 -1
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/rms_norm.py +1 -1
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/softmax.py +1 -1
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel_nightly.egg-info/PKG-INFO +3 -1
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/transformers/test_softmax.py +2 -2
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/.github/ISSUE_TEMPLATE/bug_report.yaml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/.github/ISSUE_TEMPLATE/feature_request.yaml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/.github/pull_request_template.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/.github/workflows/amd-ci.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/.github/workflows/docs.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/.github/workflows/intel-ci.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/.github/workflows/nvi-ci.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/.github/workflows/publish-nightly.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/.github/workflows/publish-release.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/.gitignore +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/.idea/workspace.xml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/LICENSE +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/Makefile +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/NOTICE +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/benchmark/README.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/benchmark/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/benchmark/benchmarks_visualizer.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/benchmark/data/all_benchmark_data.csv +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/benchmark/scripts/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/benchmark/scripts/benchmark_cpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/benchmark/scripts/benchmark_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/benchmark/scripts/benchmark_distill_jsd_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/benchmark/scripts/benchmark_dpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/benchmark/scripts/benchmark_dyt.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/benchmark/scripts/benchmark_embedding.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/benchmark/scripts/benchmark_fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/benchmark/scripts/benchmark_fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/benchmark/scripts/benchmark_geglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/benchmark/scripts/benchmark_group_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/benchmark/scripts/benchmark_jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/benchmark/scripts/benchmark_kl_div.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/benchmark/scripts/benchmark_kto_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/benchmark/scripts/benchmark_layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/benchmark/scripts/benchmark_multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/benchmark/scripts/benchmark_orpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/benchmark/scripts/benchmark_qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/benchmark/scripts/benchmark_rms_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/benchmark/scripts/benchmark_rope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/benchmark/scripts/benchmark_simpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/benchmark/scripts/benchmark_sparse_multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/benchmark/scripts/benchmark_sparsemax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/benchmark/scripts/benchmark_swiglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/benchmark/scripts/benchmark_tvd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/benchmark/scripts/utils.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/dev/fmt-requirements.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/dev/modal/tests.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/dev/modal/tests_bwd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/docs/Examples.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/docs/Getting-Started.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/docs/High-Level-APIs.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/docs/acknowledgement.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/docs/contributing.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/docs/images/banner.GIF +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/docs/images/compose.gif +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/docs/images/e2e-memory.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/docs/images/e2e-tps.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/docs/images/logo-banner.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/docs/images/patch.gif +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/docs/images/post-training.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/docs/index.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/docs/license.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/alignment/accelerate_config.yaml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/alignment/run_orpo.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/huggingface/README.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/huggingface/callback.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/huggingface/config/fsdp_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/huggingface/img/gemma_7b_mem.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/huggingface/img/gemma_7b_tp.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/huggingface/img/llama_mem_alloc.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/huggingface/img/llama_tps.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/huggingface/img/qwen_mem_alloc.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/huggingface/img/qwen_tps.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/huggingface/launch_on_modal.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/huggingface/requirements.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/huggingface/run_benchmarks.sh +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/huggingface/run_gemma.sh +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/huggingface/run_llama.sh +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/huggingface/run_qwen.sh +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/huggingface/run_qwen2_vl.sh +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/huggingface/training.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/huggingface/training_multimodal.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/lightning/README.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/lightning/requirements.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/lightning/training.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/medusa/README.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/medusa/callback.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/medusa/docs/images/Memory_Stage1_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/medusa/docs/images/Memory_Stage1_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/medusa/docs/images/Memory_Stage2_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/medusa/docs/images/Memory_Stage2_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/medusa/docs/images/Throughput_Stage1_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/medusa/docs/images/Throughput_Stage1_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/medusa/docs/images/Throughput_Stage2_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/medusa/docs/images/Throughput_Stage2_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/medusa/fsdp/acc-fsdp.conf +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/medusa/medusa_util.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/medusa/requirements.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/medusa/scripts/llama3_8b_medusa.sh +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/examples/medusa/train.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/licenses/LICENSE-Apache-2.0 +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/licenses/LICENSE-MIT-AutoAWQ +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/licenses/LICENSE-MIT-Efficient-Cross-Entropy +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/licenses/LICENSE-MIT-llmc +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/licenses/LICENSE-MIT-triton +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/mkdocs.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/setup.cfg +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/setup.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/chunked_loss/README.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/chunked_loss/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/chunked_loss/cpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/chunked_loss/dpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/chunked_loss/functional.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/chunked_loss/fused_linear_distillation.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/chunked_loss/fused_linear_ppo.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/chunked_loss/fused_linear_preference.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/chunked_loss/fused_linear_unpaired_preference.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/chunked_loss/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/chunked_loss/jsd_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/chunked_loss/kto_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/chunked_loss/orpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/chunked_loss/simpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/env_report.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/ops/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/ops/cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/ops/dyt.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/ops/experimental/embedding.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/ops/experimental/mm_int8int2.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/ops/fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/ops/fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/ops/geglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/ops/group_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/ops/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/ops/jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/ops/kl_div.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/ops/layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/ops/multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/ops/qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/ops/rope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/ops/softmax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/ops/sparsemax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/ops/swiglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/ops/tvd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/ops/utils.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/auto_model.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/dyt.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/experimental/embedding.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/fsdp.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/functional.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/geglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/gema3_rms.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/group_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/kl_div.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/model/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/model/gemma.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/model/gemma2.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/model/gemma3.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/model/glm4.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/model/llama.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/model/llava.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/model/loss_utils.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/model/mistral.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/model/mixtral.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/model/mllama.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/model/olmo2.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/model/paligemma.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/model/phi3.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/model/qwen2.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/model/qwen2_5_vl.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/model/qwen2_vl.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/model/qwen3.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/model/qwen3_moe.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/rope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/sparsemax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/swiglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/trainer/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/trainer/orpo_trainer.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/trainer_integration.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/transformers/tvd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/triton/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/triton/monkey_patch.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel/utils.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel_nightly.egg-info/SOURCES.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel_nightly.egg-info/dependency_links.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel_nightly.egg-info/requires.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/src/liger_kernel_nightly.egg-info/top_level.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/chunked_loss/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/chunked_loss/test_cpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/chunked_loss/test_dpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/chunked_loss/test_grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/chunked_loss/test_jsd_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/chunked_loss/test_kto_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/chunked_loss/test_orpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/chunked_loss/test_simpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/conftest.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/convergence/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/convergence/bf16/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/convergence/bf16/test_mini_models.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/convergence/bf16/test_mini_models_multimodal.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/convergence/bf16/test_mini_models_with_logits.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/convergence/fp32/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/convergence/fp32/test_mini_models.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/convergence/fp32/test_mini_models_multimodal.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/convergence/fp32/test_mini_models_with_logits.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/resources/fake_configs/Google/Gemma3/gemma-3-4b-it/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/resources/fake_configs/Google/Paligemma/paligemma-3b-pt-224/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/preprocessor_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/processor_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/resources/fake_configs/Qwen/Qwen2-VL-7B-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/resources/fake_configs/Qwen/Qwen2.5-VL-7B-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/resources/fake_configs/meta-llama/Llama-3.2-11B-Vision-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/resources/scripts/generate_tokenized_dataset.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/resources/tiny_shakespeare.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/resources/tiny_shakespeare_tokenized/data-00000-of-00001.arrow +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/resources/tiny_shakespeare_tokenized/dataset_info.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/resources/tiny_shakespeare_tokenized/state.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/transformers/test_auto_model.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/transformers/test_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/transformers/test_dyt.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/transformers/test_embedding.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/transformers/test_flex_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/transformers/test_fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/transformers/test_fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/transformers/test_geglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/transformers/test_group_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/transformers/test_grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/transformers/test_jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/transformers/test_kl_div.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/transformers/test_layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/transformers/test_mm_int8int2.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/transformers/test_monkey_patch.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/transformers/test_multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/transformers/test_qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/transformers/test_rms_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/transformers/test_rope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/transformers/test_sparsemax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/transformers/test_swiglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/transformers/test_trainer_integration.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/transformers/test_transformers.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/transformers/test_tvd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/triton/test_triton_monkey_patch.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250601024230}/test/utils.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: liger_kernel_nightly
|
3
|
-
Version: 0.5.10.
|
3
|
+
Version: 0.5.10.dev20250601024230
|
4
4
|
Summary: Efficient Triton kernels for LLM Training
|
5
5
|
License: BSD 2-CLAUSE LICENSE
|
6
6
|
Copyright 2024 LinkedIn Corporation
|
@@ -325,6 +325,8 @@ loss.backward()
|
|
325
325
|
| GeGLU | `liger_kernel.transformers.LigerGEGLUMLP` |
|
326
326
|
| CrossEntropy | `liger_kernel.transformers.LigerCrossEntropyLoss` |
|
327
327
|
| Fused Linear CrossEntropy | `liger_kernel.transformers.LigerFusedLinearCrossEntropyLoss`|
|
328
|
+
| Multi Token Attention | `liger_kernel.transformers.LigerMultiTokenAttention` |
|
329
|
+
| Softmax | `liger_kernel.transformers.LigerSoftmax` |
|
328
330
|
| Sparsemax | `liger_kernel.transformers.LigerSparsemax` |
|
329
331
|
|
330
332
|
|
@@ -277,6 +277,8 @@ loss.backward()
|
|
277
277
|
| GeGLU | `liger_kernel.transformers.LigerGEGLUMLP` |
|
278
278
|
| CrossEntropy | `liger_kernel.transformers.LigerCrossEntropyLoss` |
|
279
279
|
| Fused Linear CrossEntropy | `liger_kernel.transformers.LigerFusedLinearCrossEntropyLoss`|
|
280
|
+
| Multi Token Attention | `liger_kernel.transformers.LigerMultiTokenAttention` |
|
281
|
+
| Softmax | `liger_kernel.transformers.LigerSoftmax` |
|
280
282
|
| Sparsemax | `liger_kernel.transformers.LigerSparsemax` |
|
281
283
|
|
282
284
|
|
@@ -8,7 +8,7 @@ from utils import _test_memory
|
|
8
8
|
from utils import parse_benchmark_script_args
|
9
9
|
from utils import run_benchmarks
|
10
10
|
|
11
|
-
from liger_kernel.transformers.softmax import
|
11
|
+
from liger_kernel.transformers.softmax import LigerSoftmax
|
12
12
|
from liger_kernel.utils import infer_device
|
13
13
|
|
14
14
|
device = infer_device()
|
@@ -23,7 +23,7 @@ def bench_speed_softmax(input: SingleBenchmarkRunInput) -> SingleBenchmarkRunOut
|
|
23
23
|
dtype = extra_benchmark_config["dtype"]
|
24
24
|
|
25
25
|
x_shape = (M, N)
|
26
|
-
liger_softmax =
|
26
|
+
liger_softmax = LigerSoftmax().to(device).to(dtype)
|
27
27
|
torch_softmax = torch.nn.Softmax(dim=-1).to(device).to(dtype)
|
28
28
|
|
29
29
|
x = torch.randn(x_shape, dtype=dtype, device=device)
|
@@ -72,7 +72,7 @@ def bench_memory_softmax(input: SingleBenchmarkRunInput) -> SingleBenchmarkRunOu
|
|
72
72
|
dtype = extra_benchmark_config.get("dtype", torch.float32)
|
73
73
|
|
74
74
|
torch_softmax = torch.nn.Softmax(dim=-1)
|
75
|
-
liger_softmax =
|
75
|
+
liger_softmax = LigerSoftmax().to(device).to(dtype)
|
76
76
|
|
77
77
|
x = torch.randn(shape, device=device, dtype=dtype, requires_grad=True)
|
78
78
|
|
@@ -9,6 +9,8 @@
|
|
9
9
|
| GeGLU | `liger_kernel.transformers.LigerGEGLUMLP` |
|
10
10
|
| CrossEntropy | `liger_kernel.transformers.LigerCrossEntropyLoss` |
|
11
11
|
| Fused Linear CrossEntropy | `liger_kernel.transformers.LigerFusedLinearCrossEntropyLoss`|
|
12
|
+
| Multi Token Attention | `liger_kernel.transformers.LigerMultiTokenAttention` |
|
13
|
+
| Softmax | `liger_kernel.transformers.LigerSoftmax` |
|
12
14
|
| Sparsemax | `liger_kernel.transformers.LigerSparsemax` |
|
13
15
|
|
14
16
|
|
@@ -51,6 +53,19 @@ This kernel combines linear transformations with cross-entropy loss calculations
|
|
51
53
|
!!! Example "Try it out"
|
52
54
|
You can experiment as shown in this example [here](https://colab.research.google.com/drive/1Z2QtvaIiLm5MWOs7X6ZPS1MN3hcIJFbj?usp=sharing)
|
53
55
|
|
56
|
+
### Multi Token Attention
|
57
|
+
|
58
|
+
The Multi Token Attention kernel implementation provides and optimized fused implementation of multi-token attention over the implemented Pytorch model baseline. This is a new attention mechanism that can operate on multiple Q and K inputs introduced by Meta Research.
|
59
|
+
|
60
|
+
Paper: https://arxiv.org/abs/2504.00927
|
61
|
+
|
62
|
+
### Softmax
|
63
|
+
|
64
|
+
The Softmax kernel implementation provides an optimized implementation of the softmax operation, which is a fundamental component in neural networks for converting raw scores into probability distributions.
|
65
|
+
|
66
|
+
The implementation shows notable speedups compared to the Softmax PyTorch implementation
|
67
|
+
|
68
|
+
|
54
69
|
### Sparsemax
|
55
70
|
|
56
71
|
Sparsemax is a sparse alternative to softmax that produces sparse probability distributions. This kernel implements an efficient version of the sparsemax operation that can be used as a drop-in replacement for softmax in attention mechanisms or classification tasks.
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
4
4
|
|
5
5
|
[project]
|
6
6
|
name = "liger_kernel_nightly"
|
7
|
-
version = "0.5.10.
|
7
|
+
version = "0.5.10.dev20250601024230"
|
8
8
|
description = "Efficient Triton kernels for LLM Training"
|
9
9
|
urls = { "Homepage" = "https://github.com/linkedin/Liger-Kernel" }
|
10
10
|
readme = { file = "README.md", content-type = "text/markdown" }
|
@@ -193,6 +193,7 @@ def _rms_norm_backward_kernel(
|
|
193
193
|
|
194
194
|
tl.store(dW_ptr + row_block_id * dW_row_stride + col_offsets, dW_row, mask=mask)
|
195
195
|
|
196
|
+
|
196
197
|
@triton.jit
|
197
198
|
def _block_rms_norm_forward_kernel(
|
198
199
|
Y_ptr,
|
@@ -225,8 +226,11 @@ def _block_rms_norm_forward_kernel(
|
|
225
226
|
row_mask = row_idx < n_rows
|
226
227
|
col_mask = col_offsets < n_cols
|
227
228
|
|
228
|
-
|
229
|
-
|
229
|
+
X_row = tl.load(
|
230
|
+
X_ptr + row_idx[:, None] * X_row_stride + col_offsets[None, :],
|
231
|
+
mask=row_mask[:, None] & col_mask[None, :],
|
232
|
+
other=0,
|
233
|
+
)
|
230
234
|
X_row_dtype = X_row.dtype
|
231
235
|
W_row = tl.load(W_ptr + col_offsets, mask=col_mask, other=0)
|
232
236
|
|
@@ -262,7 +266,12 @@ def _block_rms_norm_forward_kernel(
|
|
262
266
|
if casting_mode == _CASTING_MODE_GEMMA:
|
263
267
|
Y_row = Y_row.to(X_row_dtype)
|
264
268
|
|
265
|
-
tl.store(
|
269
|
+
tl.store(
|
270
|
+
Y_ptr + row_idx[:, None] * Y_row_stride + col_offsets[None, :],
|
271
|
+
Y_row,
|
272
|
+
mask=row_mask[:, None] & col_mask[None, :],
|
273
|
+
)
|
274
|
+
|
266
275
|
|
267
276
|
@triton.jit
|
268
277
|
def _block_rms_norm_backward_kernel(
|
@@ -306,8 +315,16 @@ def _block_rms_norm_backward_kernel(
|
|
306
315
|
for start in range(pid * BLOCK_ROW, n_rows, NUM_SMS * BLOCK_ROW):
|
307
316
|
row_idx = start + tl.arange(0, BLOCK_ROW)
|
308
317
|
row_mask = row_idx < n_rows
|
309
|
-
dY_row = tl.load(
|
310
|
-
|
318
|
+
dY_row = tl.load(
|
319
|
+
dY_ptr + row_idx[:, None] * dY_row_stride + col_offsets[None, :],
|
320
|
+
mask=row_mask[:, None] & col_mask[None, :],
|
321
|
+
other=0.0,
|
322
|
+
)
|
323
|
+
X_row = tl.load(
|
324
|
+
X_ptr + row_idx[:, None] * X_row_stride + col_offsets[None, :],
|
325
|
+
mask=row_mask[:, None] & col_mask[None, :],
|
326
|
+
other=0.0,
|
327
|
+
)
|
311
328
|
|
312
329
|
# Get cached rms
|
313
330
|
rstd_row = tl.load(RSTD_ptr + row_idx * RSTD_row_stride, row_mask)
|
@@ -326,7 +343,9 @@ def _block_rms_norm_backward_kernel(
|
|
326
343
|
|
327
344
|
dX_row = rstd_row[:, None] * m
|
328
345
|
|
329
|
-
dX_row += (rstd_row[:, None]) * (
|
346
|
+
dX_row += (rstd_row[:, None]) * (
|
347
|
+
-(1 / n_cols) * (rstd_row * rstd_row * tl.sum(m * X_row, axis=1))[:, None] * X_row
|
348
|
+
)
|
330
349
|
|
331
350
|
# calculate the gradient of W
|
332
351
|
if casting_mode == _CASTING_MODE_LLAMA:
|
@@ -335,8 +354,11 @@ def _block_rms_norm_backward_kernel(
|
|
335
354
|
# here X_row is already in fp32 (see previous if block)
|
336
355
|
dW_row += tl.sum(dY_row * (X_row * rstd_row[:, None]), 0)
|
337
356
|
|
338
|
-
tl.store(
|
339
|
-
|
357
|
+
tl.store(
|
358
|
+
dX_ptr + row_idx[:, None] * dX_row_stride + col_offsets[None, :],
|
359
|
+
dX_row,
|
360
|
+
mask=row_mask[:, None] & col_mask[None, :],
|
361
|
+
)
|
340
362
|
|
341
363
|
tl.store(dW_ptr + pid * dW_row_stride + col_offsets, dW_row, mask=col_mask)
|
342
364
|
|
@@ -549,15 +571,6 @@ class LigerRMSNormFunction(torch.autograd.Function):
|
|
549
571
|
"""
|
550
572
|
X, W, RSTD = ctx.saved_tensors
|
551
573
|
dX, dW = rms_norm_backward(
|
552
|
-
dY,
|
553
|
-
X,
|
554
|
-
W,
|
555
|
-
RSTD,
|
556
|
-
ctx.offset,
|
557
|
-
ctx.casting_mode,
|
558
|
-
ctx.BLOCK_SIZE,
|
559
|
-
ctx.num_warps,
|
560
|
-
ctx.in_place,
|
561
|
-
ctx.row_mode
|
574
|
+
dY, X, W, RSTD, ctx.offset, ctx.casting_mode, ctx.BLOCK_SIZE, ctx.num_warps, ctx.in_place, ctx.row_mode
|
562
575
|
)
|
563
576
|
return dX, dW, None, None, None, None, None
|
@@ -776,7 +776,8 @@ def apply_liger_kernel_to_gemma3_text(
|
|
776
776
|
|
777
777
|
from transformers.models.gemma3 import modeling_gemma3
|
778
778
|
from transformers.models.gemma3.modeling_gemma3 import Gemma3DecoderLayer
|
779
|
-
from transformers.models.gemma3.modeling_gemma3 import Gemma3ForCausalLM
|
779
|
+
from transformers.models.gemma3.modeling_gemma3 import Gemma3ForCausalLM
|
780
|
+
from transformers.models.gemma3.modeling_gemma3 import Gemma3TextModel
|
780
781
|
|
781
782
|
from liger_kernel.transformers.gema3_rms import LigerRMSNormForGemma3
|
782
783
|
from liger_kernel.transformers.model.gemma3 import causal_forward
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: liger_kernel_nightly
|
3
|
-
Version: 0.5.10.
|
3
|
+
Version: 0.5.10.dev20250601024230
|
4
4
|
Summary: Efficient Triton kernels for LLM Training
|
5
5
|
License: BSD 2-CLAUSE LICENSE
|
6
6
|
Copyright 2024 LinkedIn Corporation
|
@@ -325,6 +325,8 @@ loss.backward()
|
|
325
325
|
| GeGLU | `liger_kernel.transformers.LigerGEGLUMLP` |
|
326
326
|
| CrossEntropy | `liger_kernel.transformers.LigerCrossEntropyLoss` |
|
327
327
|
| Fused Linear CrossEntropy | `liger_kernel.transformers.LigerFusedLinearCrossEntropyLoss`|
|
328
|
+
| Multi Token Attention | `liger_kernel.transformers.LigerMultiTokenAttention` |
|
329
|
+
| Softmax | `liger_kernel.transformers.LigerSoftmax` |
|
328
330
|
| Sparsemax | `liger_kernel.transformers.LigerSparsemax` |
|
329
331
|
|
330
332
|
|
@@ -6,7 +6,7 @@ from test.utils import set_seed
|
|
6
6
|
from test.utils import supports_bfloat16
|
7
7
|
|
8
8
|
from liger_kernel.transformers.functional import liger_softmax
|
9
|
-
from liger_kernel.transformers.softmax import
|
9
|
+
from liger_kernel.transformers.softmax import LigerSoftmax
|
10
10
|
from liger_kernel.utils import infer_device
|
11
11
|
|
12
12
|
device = infer_device()
|
@@ -47,7 +47,7 @@ def test_liger_softmax(shape, dtype, atol, rtol):
|
|
47
47
|
|
48
48
|
torch_softmax = torch.nn.Softmax(dim=-1)
|
49
49
|
ref_out = torch_softmax(x1)
|
50
|
-
liger_softmax =
|
50
|
+
liger_softmax = LigerSoftmax().to(device).to(dtype)
|
51
51
|
liger_out = liger_softmax(x2)
|
52
52
|
|
53
53
|
assert_verbose_allclose(ref_out, liger_out, atol=atol, rtol=rtol)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|