liger-kernel-nightly 0.5.10.dev20250528223524__tar.gz → 0.5.10.dev20250531184114__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/PKG-INFO +3 -1
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/README.md +2 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/benchmark/scripts/benchmark_softmax.py +3 -3
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/docs/Low-Level-APIs.md +15 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/pyproject.toml +1 -1
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/transformers/softmax.py +1 -1
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel_nightly.egg-info/PKG-INFO +3 -1
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/test/transformers/test_softmax.py +2 -2
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/.github/ISSUE_TEMPLATE/bug_report.yaml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/.github/ISSUE_TEMPLATE/feature_request.yaml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/.github/pull_request_template.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/.github/workflows/amd-ci.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/.github/workflows/docs.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/.github/workflows/intel-ci.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/.github/workflows/nvi-ci.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/.github/workflows/publish-nightly.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/.github/workflows/publish-release.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/.gitignore +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/.idea/workspace.xml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/LICENSE +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/Makefile +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/NOTICE +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/benchmark/README.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/benchmark/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/benchmark/benchmarks_visualizer.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/benchmark/data/all_benchmark_data.csv +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/benchmark/scripts/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/benchmark/scripts/benchmark_cpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/benchmark/scripts/benchmark_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/benchmark/scripts/benchmark_distill_jsd_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/benchmark/scripts/benchmark_dpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/benchmark/scripts/benchmark_dyt.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/benchmark/scripts/benchmark_embedding.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/benchmark/scripts/benchmark_fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/benchmark/scripts/benchmark_fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/benchmark/scripts/benchmark_geglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/benchmark/scripts/benchmark_group_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/benchmark/scripts/benchmark_jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/benchmark/scripts/benchmark_kl_div.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/benchmark/scripts/benchmark_kto_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/benchmark/scripts/benchmark_layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/benchmark/scripts/benchmark_multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/benchmark/scripts/benchmark_orpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/benchmark/scripts/benchmark_qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/benchmark/scripts/benchmark_rms_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/benchmark/scripts/benchmark_rope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/benchmark/scripts/benchmark_simpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/benchmark/scripts/benchmark_sparse_multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/benchmark/scripts/benchmark_sparsemax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/benchmark/scripts/benchmark_swiglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/benchmark/scripts/benchmark_tvd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/benchmark/scripts/utils.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/dev/fmt-requirements.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/dev/modal/tests.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/dev/modal/tests_bwd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/docs/Examples.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/docs/Getting-Started.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/docs/High-Level-APIs.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/docs/acknowledgement.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/docs/contributing.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/docs/images/banner.GIF +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/docs/images/compose.gif +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/docs/images/e2e-memory.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/docs/images/e2e-tps.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/docs/images/logo-banner.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/docs/images/patch.gif +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/docs/images/post-training.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/docs/index.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/docs/license.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/examples/alignment/accelerate_config.yaml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/examples/alignment/run_orpo.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/examples/huggingface/README.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/examples/huggingface/callback.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/examples/huggingface/config/fsdp_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/examples/huggingface/img/gemma_7b_mem.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/examples/huggingface/img/gemma_7b_tp.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/examples/huggingface/img/llama_mem_alloc.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/examples/huggingface/img/llama_tps.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/examples/huggingface/img/qwen_mem_alloc.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/examples/huggingface/img/qwen_tps.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/examples/huggingface/launch_on_modal.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/examples/huggingface/requirements.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/examples/huggingface/run_benchmarks.sh +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/examples/huggingface/run_gemma.sh +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/examples/huggingface/run_llama.sh +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/examples/huggingface/run_qwen.sh +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/examples/huggingface/run_qwen2_vl.sh +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/examples/huggingface/training.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/examples/huggingface/training_multimodal.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/examples/lightning/README.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/examples/lightning/requirements.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/examples/lightning/training.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/examples/medusa/README.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/examples/medusa/callback.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/examples/medusa/docs/images/Memory_Stage1_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/examples/medusa/docs/images/Memory_Stage1_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/examples/medusa/docs/images/Memory_Stage2_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/examples/medusa/docs/images/Memory_Stage2_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/examples/medusa/docs/images/Throughput_Stage1_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/examples/medusa/docs/images/Throughput_Stage1_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/examples/medusa/docs/images/Throughput_Stage2_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/examples/medusa/docs/images/Throughput_Stage2_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/examples/medusa/fsdp/acc-fsdp.conf +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/examples/medusa/medusa_util.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/examples/medusa/requirements.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/examples/medusa/scripts/llama3_8b_medusa.sh +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/examples/medusa/train.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/licenses/LICENSE-Apache-2.0 +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/licenses/LICENSE-MIT-AutoAWQ +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/licenses/LICENSE-MIT-Efficient-Cross-Entropy +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/licenses/LICENSE-MIT-llmc +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/licenses/LICENSE-MIT-triton +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/mkdocs.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/setup.cfg +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/setup.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/chunked_loss/README.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/chunked_loss/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/chunked_loss/cpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/chunked_loss/dpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/chunked_loss/functional.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/chunked_loss/fused_linear_distillation.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/chunked_loss/fused_linear_ppo.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/chunked_loss/fused_linear_preference.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/chunked_loss/fused_linear_unpaired_preference.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/chunked_loss/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/chunked_loss/jsd_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/chunked_loss/kto_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/chunked_loss/orpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/chunked_loss/simpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/env_report.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/ops/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/ops/cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/ops/dyt.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/ops/experimental/embedding.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/ops/experimental/mm_int8int2.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/ops/fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/ops/fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/ops/geglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/ops/group_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/ops/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/ops/jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/ops/kl_div.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/ops/layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/ops/multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/ops/qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/ops/rms_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/ops/rope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/ops/softmax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/ops/sparsemax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/ops/swiglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/ops/tvd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/ops/utils.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/transformers/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/transformers/auto_model.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/transformers/cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/transformers/dyt.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/transformers/experimental/embedding.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/transformers/fsdp.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/transformers/functional.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/transformers/fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/transformers/fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/transformers/geglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/transformers/gema3_rms.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/transformers/group_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/transformers/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/transformers/jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/transformers/kl_div.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/transformers/layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/transformers/model/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/transformers/model/gemma.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/transformers/model/gemma2.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/transformers/model/gemma3.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/transformers/model/glm4.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/transformers/model/llama.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/transformers/model/llava.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/transformers/model/loss_utils.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/transformers/model/mistral.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/transformers/model/mixtral.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/transformers/model/mllama.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/transformers/model/olmo2.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/transformers/model/paligemma.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/transformers/model/phi3.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/transformers/model/qwen2.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/transformers/model/qwen2_5_vl.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/transformers/model/qwen2_vl.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/transformers/model/qwen3.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/transformers/model/qwen3_moe.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/transformers/monkey_patch.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/transformers/multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/transformers/qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/transformers/rms_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/transformers/rope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/transformers/sparsemax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/transformers/swiglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/transformers/trainer/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/transformers/trainer/orpo_trainer.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/transformers/trainer_integration.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/transformers/tvd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/triton/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/triton/monkey_patch.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel/utils.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel_nightly.egg-info/SOURCES.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel_nightly.egg-info/dependency_links.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel_nightly.egg-info/requires.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/src/liger_kernel_nightly.egg-info/top_level.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/test/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/test/chunked_loss/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/test/chunked_loss/test_cpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/test/chunked_loss/test_dpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/test/chunked_loss/test_grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/test/chunked_loss/test_jsd_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/test/chunked_loss/test_kto_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/test/chunked_loss/test_orpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/test/chunked_loss/test_simpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/test/conftest.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/test/convergence/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/test/convergence/bf16/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/test/convergence/bf16/test_mini_models.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/test/convergence/bf16/test_mini_models_multimodal.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/test/convergence/bf16/test_mini_models_with_logits.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/test/convergence/fp32/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/test/convergence/fp32/test_mini_models.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/test/convergence/fp32/test_mini_models_multimodal.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/test/convergence/fp32/test_mini_models_with_logits.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/test/resources/fake_configs/Google/Gemma3/gemma-3-4b-it/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/test/resources/fake_configs/Google/Paligemma/paligemma-3b-pt-224/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/preprocessor_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/processor_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/test/resources/fake_configs/Qwen/Qwen2-VL-7B-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/test/resources/fake_configs/Qwen/Qwen2.5-VL-7B-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/test/resources/fake_configs/meta-llama/Llama-3.2-11B-Vision-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/test/resources/scripts/generate_tokenized_dataset.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/test/resources/tiny_shakespeare.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/test/resources/tiny_shakespeare_tokenized/data-00000-of-00001.arrow +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/test/resources/tiny_shakespeare_tokenized/dataset_info.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/test/resources/tiny_shakespeare_tokenized/state.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/test/transformers/test_auto_model.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/test/transformers/test_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/test/transformers/test_dyt.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/test/transformers/test_embedding.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/test/transformers/test_flex_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/test/transformers/test_fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/test/transformers/test_fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/test/transformers/test_geglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/test/transformers/test_group_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/test/transformers/test_grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/test/transformers/test_jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/test/transformers/test_kl_div.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/test/transformers/test_layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/test/transformers/test_mm_int8int2.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/test/transformers/test_monkey_patch.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/test/transformers/test_multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/test/transformers/test_qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/test/transformers/test_rms_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/test/transformers/test_rope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/test/transformers/test_sparsemax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/test/transformers/test_swiglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/test/transformers/test_trainer_integration.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/test/transformers/test_transformers.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/test/transformers/test_tvd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/test/triton/test_triton_monkey_patch.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250528223524 → liger_kernel_nightly-0.5.10.dev20250531184114}/test/utils.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: liger_kernel_nightly
|
3
|
-
Version: 0.5.10.
|
3
|
+
Version: 0.5.10.dev20250531184114
|
4
4
|
Summary: Efficient Triton kernels for LLM Training
|
5
5
|
License: BSD 2-CLAUSE LICENSE
|
6
6
|
Copyright 2024 LinkedIn Corporation
|
@@ -325,6 +325,8 @@ loss.backward()
|
|
325
325
|
| GeGLU | `liger_kernel.transformers.LigerGEGLUMLP` |
|
326
326
|
| CrossEntropy | `liger_kernel.transformers.LigerCrossEntropyLoss` |
|
327
327
|
| Fused Linear CrossEntropy | `liger_kernel.transformers.LigerFusedLinearCrossEntropyLoss`|
|
328
|
+
| Multi Token Attention | `liger_kernel.transformers.LigerMultiTokenAttention` |
|
329
|
+
| Softmax | `liger_kernel.transformers.LigerSoftmax` |
|
328
330
|
| Sparsemax | `liger_kernel.transformers.LigerSparsemax` |
|
329
331
|
|
330
332
|
|
@@ -277,6 +277,8 @@ loss.backward()
|
|
277
277
|
| GeGLU | `liger_kernel.transformers.LigerGEGLUMLP` |
|
278
278
|
| CrossEntropy | `liger_kernel.transformers.LigerCrossEntropyLoss` |
|
279
279
|
| Fused Linear CrossEntropy | `liger_kernel.transformers.LigerFusedLinearCrossEntropyLoss`|
|
280
|
+
| Multi Token Attention | `liger_kernel.transformers.LigerMultiTokenAttention` |
|
281
|
+
| Softmax | `liger_kernel.transformers.LigerSoftmax` |
|
280
282
|
| Sparsemax | `liger_kernel.transformers.LigerSparsemax` |
|
281
283
|
|
282
284
|
|
@@ -8,7 +8,7 @@ from utils import _test_memory
|
|
8
8
|
from utils import parse_benchmark_script_args
|
9
9
|
from utils import run_benchmarks
|
10
10
|
|
11
|
-
from liger_kernel.transformers.softmax import
|
11
|
+
from liger_kernel.transformers.softmax import LigerSoftmax
|
12
12
|
from liger_kernel.utils import infer_device
|
13
13
|
|
14
14
|
device = infer_device()
|
@@ -23,7 +23,7 @@ def bench_speed_softmax(input: SingleBenchmarkRunInput) -> SingleBenchmarkRunOut
|
|
23
23
|
dtype = extra_benchmark_config["dtype"]
|
24
24
|
|
25
25
|
x_shape = (M, N)
|
26
|
-
liger_softmax =
|
26
|
+
liger_softmax = LigerSoftmax().to(device).to(dtype)
|
27
27
|
torch_softmax = torch.nn.Softmax(dim=-1).to(device).to(dtype)
|
28
28
|
|
29
29
|
x = torch.randn(x_shape, dtype=dtype, device=device)
|
@@ -72,7 +72,7 @@ def bench_memory_softmax(input: SingleBenchmarkRunInput) -> SingleBenchmarkRunOu
|
|
72
72
|
dtype = extra_benchmark_config.get("dtype", torch.float32)
|
73
73
|
|
74
74
|
torch_softmax = torch.nn.Softmax(dim=-1)
|
75
|
-
liger_softmax =
|
75
|
+
liger_softmax = LigerSoftmax().to(device).to(dtype)
|
76
76
|
|
77
77
|
x = torch.randn(shape, device=device, dtype=dtype, requires_grad=True)
|
78
78
|
|
@@ -9,6 +9,8 @@
|
|
9
9
|
| GeGLU | `liger_kernel.transformers.LigerGEGLUMLP` |
|
10
10
|
| CrossEntropy | `liger_kernel.transformers.LigerCrossEntropyLoss` |
|
11
11
|
| Fused Linear CrossEntropy | `liger_kernel.transformers.LigerFusedLinearCrossEntropyLoss`|
|
12
|
+
| Multi Token Attention | `liger_kernel.transformers.LigerMultiTokenAttention` |
|
13
|
+
| Softmax | `liger_kernel.transformers.LigerSoftmax` |
|
12
14
|
| Sparsemax | `liger_kernel.transformers.LigerSparsemax` |
|
13
15
|
|
14
16
|
|
@@ -51,6 +53,19 @@ This kernel combines linear transformations with cross-entropy loss calculations
|
|
51
53
|
!!! Example "Try it out"
|
52
54
|
You can experiment as shown in this example [here](https://colab.research.google.com/drive/1Z2QtvaIiLm5MWOs7X6ZPS1MN3hcIJFbj?usp=sharing)
|
53
55
|
|
56
|
+
### Multi Token Attention
|
57
|
+
|
58
|
+
The Multi Token Attention kernel implementation provides and optimized fused implementation of multi-token attention over the implemented Pytorch model baseline. This is a new attention mechanism that can operate on multiple Q and K inputs introduced by Meta Research.
|
59
|
+
|
60
|
+
Paper: https://arxiv.org/abs/2504.00927
|
61
|
+
|
62
|
+
### Softmax
|
63
|
+
|
64
|
+
The Softmax kernel implementation provides an optimized implementation of the softmax operation, which is a fundamental component in neural networks for converting raw scores into probability distributions.
|
65
|
+
|
66
|
+
The implementation shows notable speedups compared to the Softmax PyTorch implementation
|
67
|
+
|
68
|
+
|
54
69
|
### Sparsemax
|
55
70
|
|
56
71
|
Sparsemax is a sparse alternative to softmax that produces sparse probability distributions. This kernel implements an efficient version of the sparsemax operation that can be used as a drop-in replacement for softmax in attention mechanisms or classification tasks.
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
4
4
|
|
5
5
|
[project]
|
6
6
|
name = "liger_kernel_nightly"
|
7
|
-
version = "0.5.10.
|
7
|
+
version = "0.5.10.dev20250531184114"
|
8
8
|
description = "Efficient Triton kernels for LLM Training"
|
9
9
|
urls = { "Homepage" = "https://github.com/linkedin/Liger-Kernel" }
|
10
10
|
readme = { file = "README.md", content-type = "text/markdown" }
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: liger_kernel_nightly
|
3
|
-
Version: 0.5.10.
|
3
|
+
Version: 0.5.10.dev20250531184114
|
4
4
|
Summary: Efficient Triton kernels for LLM Training
|
5
5
|
License: BSD 2-CLAUSE LICENSE
|
6
6
|
Copyright 2024 LinkedIn Corporation
|
@@ -325,6 +325,8 @@ loss.backward()
|
|
325
325
|
| GeGLU | `liger_kernel.transformers.LigerGEGLUMLP` |
|
326
326
|
| CrossEntropy | `liger_kernel.transformers.LigerCrossEntropyLoss` |
|
327
327
|
| Fused Linear CrossEntropy | `liger_kernel.transformers.LigerFusedLinearCrossEntropyLoss`|
|
328
|
+
| Multi Token Attention | `liger_kernel.transformers.LigerMultiTokenAttention` |
|
329
|
+
| Softmax | `liger_kernel.transformers.LigerSoftmax` |
|
328
330
|
| Sparsemax | `liger_kernel.transformers.LigerSparsemax` |
|
329
331
|
|
330
332
|
|
@@ -6,7 +6,7 @@ from test.utils import set_seed
|
|
6
6
|
from test.utils import supports_bfloat16
|
7
7
|
|
8
8
|
from liger_kernel.transformers.functional import liger_softmax
|
9
|
-
from liger_kernel.transformers.softmax import
|
9
|
+
from liger_kernel.transformers.softmax import LigerSoftmax
|
10
10
|
from liger_kernel.utils import infer_device
|
11
11
|
|
12
12
|
device = infer_device()
|
@@ -47,7 +47,7 @@ def test_liger_softmax(shape, dtype, atol, rtol):
|
|
47
47
|
|
48
48
|
torch_softmax = torch.nn.Softmax(dim=-1)
|
49
49
|
ref_out = torch_softmax(x1)
|
50
|
-
liger_softmax =
|
50
|
+
liger_softmax = LigerSoftmax().to(device).to(dtype)
|
51
51
|
liger_out = liger_softmax(x2)
|
52
52
|
|
53
53
|
assert_verbose_allclose(ref_out, liger_out, atol=atol, rtol=rtol)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|