liger-kernel-nightly 0.6.1.dev20250730201330__tar.gz → 0.6.1.dev20250805235740__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/PKG-INFO +1 -1
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/benchmark/scripts/benchmark_fused_linear_cross_entropy.py +11 -3
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/pyproject.toml +1 -1
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/ops/fused_linear_cross_entropy.py +21 -13
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/transformers/functional.py +2 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/transformers/fused_linear_cross_entropy.py +3 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/transformers/model/phi3.py +0 -14
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel_nightly.egg-info/PKG-INFO +1 -1
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/test/convergence/fp32/test_mini_models_multimodal.py +1 -1
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/test/transformers/test_fused_linear_cross_entropy.py +12 -5
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/.github/ISSUE_TEMPLATE/bug_report.yaml +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/.github/ISSUE_TEMPLATE/feature_request.yaml +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/.github/pull_request_template.md +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/.github/workflows/amd-ci.yml +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/.github/workflows/benchmark.yml +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/.github/workflows/docs.yml +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/.github/workflows/intel-ci.yml +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/.github/workflows/nvi-ci.yml +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/.github/workflows/publish-nightly.yml +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/.github/workflows/publish-release.yml +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/.gitignore +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/LICENSE +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/Makefile +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/NOTICE +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/README.md +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/benchmark/README.md +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/benchmark/__init__.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/benchmark/benchmarks_visualizer.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/benchmark/data/all_benchmark_data.csv +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/benchmark/scripts/__init__.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/benchmark/scripts/benchmark_cpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/benchmark/scripts/benchmark_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/benchmark/scripts/benchmark_distill_cosine_loss.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/benchmark/scripts/benchmark_distill_jsd_loss.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/benchmark/scripts/benchmark_dpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/benchmark/scripts/benchmark_dyt.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/benchmark/scripts/benchmark_embedding.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/benchmark/scripts/benchmark_fused_add_rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/benchmark/scripts/benchmark_fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/benchmark/scripts/benchmark_fused_neighborhood_attention.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/benchmark/scripts/benchmark_geglu.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/benchmark/scripts/benchmark_group_norm.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/benchmark/scripts/benchmark_jsd.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/benchmark/scripts/benchmark_kl_div.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/benchmark/scripts/benchmark_kto_loss.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/benchmark/scripts/benchmark_layer_norm.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/benchmark/scripts/benchmark_multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/benchmark/scripts/benchmark_orpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/benchmark/scripts/benchmark_qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/benchmark/scripts/benchmark_rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/benchmark/scripts/benchmark_rope.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/benchmark/scripts/benchmark_simpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/benchmark/scripts/benchmark_softmax.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/benchmark/scripts/benchmark_sparse_multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/benchmark/scripts/benchmark_sparsemax.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/benchmark/scripts/benchmark_swiglu.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/benchmark/scripts/benchmark_tvd.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/benchmark/scripts/utils.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/dev/fmt-requirements.txt +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/dev/modal/benchmarks.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/dev/modal/tests.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/dev/modal/tests_bwd.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/docs/Examples.md +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/docs/Getting-Started.md +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/docs/High-Level-APIs.md +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/docs/Low-Level-APIs.md +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/docs/acknowledgement.md +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/docs/contributing.md +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/docs/images/banner.GIF +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/docs/images/compose.gif +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/docs/images/e2e-memory.png +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/docs/images/e2e-tps.png +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/docs/images/logo-banner.png +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/docs/images/patch.gif +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/docs/images/post-training.png +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/docs/index.md +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/docs/license.md +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/examples/alignment/accelerate_config.yaml +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/examples/alignment/run_orpo.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/examples/huggingface/README.md +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/examples/huggingface/callback.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/examples/huggingface/config/fsdp_config.json +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/examples/huggingface/img/gemma_7b_mem.png +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/examples/huggingface/img/gemma_7b_tp.png +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/examples/huggingface/img/llama_mem_alloc.png +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/examples/huggingface/img/llama_tps.png +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/examples/huggingface/img/qwen_mem_alloc.png +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/examples/huggingface/img/qwen_tps.png +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/examples/huggingface/launch_on_modal.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/examples/huggingface/requirements.txt +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/examples/huggingface/run_benchmarks.sh +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/examples/huggingface/run_gemma.sh +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/examples/huggingface/run_llama.sh +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/examples/huggingface/run_qwen.sh +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/examples/huggingface/run_qwen2_vl.sh +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/examples/huggingface/training.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/examples/huggingface/training_multimodal.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/examples/lightning/README.md +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/examples/lightning/requirements.txt +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/examples/lightning/training.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/examples/medusa/README.md +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/examples/medusa/callback.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/examples/medusa/docs/images/Memory_Stage1_num_head_3.png +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/examples/medusa/docs/images/Memory_Stage1_num_head_5.png +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/examples/medusa/docs/images/Memory_Stage2_num_head_3.png +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/examples/medusa/docs/images/Memory_Stage2_num_head_5.png +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/examples/medusa/docs/images/Throughput_Stage1_num_head_3.png +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/examples/medusa/docs/images/Throughput_Stage1_num_head_5.png +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/examples/medusa/docs/images/Throughput_Stage2_num_head_3.png +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/examples/medusa/docs/images/Throughput_Stage2_num_head_5.png +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/examples/medusa/fsdp/acc-fsdp.conf +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/examples/medusa/medusa_util.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/examples/medusa/requirements.txt +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/examples/medusa/scripts/llama3_8b_medusa.sh +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/examples/medusa/train.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/licenses/LICENSE-Apache-2.0 +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/licenses/LICENSE-MIT-AutoAWQ +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/licenses/LICENSE-MIT-Efficient-Cross-Entropy +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/licenses/LICENSE-MIT-llmc +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/licenses/LICENSE-MIT-triton +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/mkdocs.yml +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/setup.cfg +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/setup.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/__init__.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/chunked_loss/README.md +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/chunked_loss/__init__.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/chunked_loss/cosine_similarity_loss.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/chunked_loss/cpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/chunked_loss/dpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/chunked_loss/functional.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/chunked_loss/fused_linear_distillation.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/chunked_loss/fused_linear_ppo.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/chunked_loss/fused_linear_preference.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/chunked_loss/fused_linear_unpaired_preference.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/chunked_loss/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/chunked_loss/jsd_loss.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/chunked_loss/kto_loss.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/chunked_loss/orpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/chunked_loss/simpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/env_report.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/ops/__init__.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/ops/cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/ops/dyt.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/ops/experimental/embedding.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/ops/experimental/mm_int8int2.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/ops/fused_add_rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/ops/fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/ops/fused_neighborhood_attention.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/ops/geglu.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/ops/group_norm.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/ops/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/ops/jsd.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/ops/kl_div.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/ops/layer_norm.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/ops/multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/ops/qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/ops/rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/ops/rope.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/ops/softmax.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/ops/sparsemax.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/ops/swiglu.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/ops/tvd.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/ops/utils.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/transformers/__init__.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/transformers/auto_model.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/transformers/cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/transformers/dyt.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/transformers/experimental/embedding.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/transformers/fsdp.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/transformers/fused_add_rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/transformers/fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/transformers/fused_neighborhood_attention.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/transformers/geglu.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/transformers/group_norm.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/transformers/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/transformers/jsd.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/transformers/kl_div.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/transformers/layer_norm.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/transformers/model/__init__.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/transformers/model/gemma.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/transformers/model/gemma2.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/transformers/model/gemma3.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/transformers/model/glm4.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/transformers/model/llama.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/transformers/model/llama4.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/transformers/model/llava.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/transformers/model/loss_utils.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/transformers/model/mistral.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/transformers/model/mixtral.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/transformers/model/mllama.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/transformers/model/olmo2.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/transformers/model/paligemma.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/transformers/model/qwen2.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/transformers/model/qwen2_5_vl.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/transformers/model/qwen2_vl.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/transformers/model/qwen3.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/transformers/model/qwen3_moe.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/transformers/model/smollm3.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/transformers/monkey_patch.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/transformers/multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/transformers/qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/transformers/rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/transformers/rope.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/transformers/softmax.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/transformers/sparsemax.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/transformers/swiglu.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/transformers/trainer/__init__.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/transformers/trainer/orpo_trainer.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/transformers/trainer_integration.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/transformers/tvd.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/triton/__init__.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/triton/monkey_patch.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel/utils.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel_nightly.egg-info/SOURCES.txt +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel_nightly.egg-info/dependency_links.txt +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel_nightly.egg-info/requires.txt +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/src/liger_kernel_nightly.egg-info/top_level.txt +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/test/__init__.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/test/chunked_loss/__init__.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/test/chunked_loss/test_cosine_loss.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/test/chunked_loss/test_cpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/test/chunked_loss/test_dpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/test/chunked_loss/test_grpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/test/chunked_loss/test_jsd_loss.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/test/chunked_loss/test_kto_loss.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/test/chunked_loss/test_orpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/test/chunked_loss/test_simpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/test/conftest.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/test/convergence/__init__.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/test/convergence/bf16/__init__.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/test/convergence/bf16/test_mini_models.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/test/convergence/bf16/test_mini_models_multimodal.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/test/convergence/bf16/test_mini_models_with_logits.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/test/convergence/fp32/__init__.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/test/convergence/fp32/test_mini_models.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/test/convergence/fp32/test_mini_models_with_logits.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/test/resources/fake_configs/Google/Gemma3/gemma-3-4b-it/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/test/resources/fake_configs/Google/Paligemma/paligemma-3b-pt-224/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/preprocessor_config.json +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/processor_config.json +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/test/resources/fake_configs/Qwen/Qwen2-VL-7B-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/test/resources/fake_configs/Qwen/Qwen2.5-VL-7B-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/test/resources/fake_configs/meta-llama/Llama-3.2-11B-Vision-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/test/resources/fake_configs/meta-llama/Llama-4-Scout-17B-16E-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/test/resources/scripts/generate_tokenized_dataset.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/test/resources/tiny_shakespeare.txt +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/test/resources/tiny_shakespeare_tokenized/data-00000-of-00001.arrow +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/test/resources/tiny_shakespeare_tokenized/dataset_info.json +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/test/resources/tiny_shakespeare_tokenized/state.json +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/test/transformers/test_auto_model.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/test/transformers/test_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/test/transformers/test_dyt.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/test/transformers/test_embedding.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/test/transformers/test_flex_attention.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/test/transformers/test_fused_add_rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/test/transformers/test_fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/test/transformers/test_fused_neighborhood_attention.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/test/transformers/test_geglu.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/test/transformers/test_group_norm.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/test/transformers/test_grpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/test/transformers/test_jsd.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/test/transformers/test_kl_div.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/test/transformers/test_layer_norm.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/test/transformers/test_mm_int8int2.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/test/transformers/test_monkey_patch.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/test/transformers/test_multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/test/transformers/test_qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/test/transformers/test_rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/test/transformers/test_rope.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/test/transformers/test_softmax.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/test/transformers/test_sparsemax.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/test/transformers/test_swiglu.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/test/transformers/test_trainer_integration.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/test/transformers/test_transformers.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/test/transformers/test_tvd.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/test/triton/test_triton_monkey_patch.py +0 -0
- {liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/test/utils.py +0 -0
@@ -34,10 +34,12 @@ class TorchLMHeadCE(torch.nn.Module):
|
|
34
34
|
|
35
35
|
|
36
36
|
class LigerLMHeadCE(torch.nn.Module):
|
37
|
-
def __init__(self, H: int, V: int, dtype: torch.dtype, ignore_index: int = -100):
|
37
|
+
def __init__(self, H: int, V: int, dtype: torch.dtype, ignore_index: int = -100, accum_dtype=None):
|
38
38
|
super().__init__()
|
39
39
|
self.lin = torch.nn.Linear(in_features=H, out_features=V, bias=False, dtype=dtype)
|
40
|
-
self.ce_loss = LigerFusedLinearCrossEntropyLoss(
|
40
|
+
self.ce_loss = LigerFusedLinearCrossEntropyLoss(
|
41
|
+
ignore_index=ignore_index, reduction="mean", accum_dtype=accum_dtype
|
42
|
+
)
|
41
43
|
|
42
44
|
def forward(self, x, y):
|
43
45
|
return self.ce_loss(self.lin.weight, x, y)
|
@@ -59,6 +61,7 @@ def bench_memory_fused_linear_cross_entropy(
|
|
59
61
|
|
60
62
|
torch_lm_head_ce = TorchLMHeadCE(H=H, V=V, dtype=dtype).to(device)
|
61
63
|
liger_lm_head_ce = LigerLMHeadCE(H=H, V=V, dtype=dtype).to(device)
|
64
|
+
liger_lm_head_ce_fp32_accum = LigerLMHeadCE(H=H, V=V, dtype=dtype, accum_dtype=torch.float32).to(device)
|
62
65
|
|
63
66
|
_input = torch.randn(BT, H, requires_grad=True, dtype=dtype, device=device)
|
64
67
|
target = torch.randint(V, (BT, 1), dtype=torch.long, device=device).squeeze(1)
|
@@ -66,6 +69,8 @@ def bench_memory_fused_linear_cross_entropy(
|
|
66
69
|
def fwd():
|
67
70
|
if provider == "liger":
|
68
71
|
return liger_lm_head_ce(_input, target)
|
72
|
+
elif provider == "liger-fp32-accum":
|
73
|
+
return liger_lm_head_ce_fp32_accum(_input, target)
|
69
74
|
elif provider == "huggingface":
|
70
75
|
return torch_lm_head_ce(_input, target)
|
71
76
|
|
@@ -98,6 +103,7 @@ def bench_speed_fused_linear_cross_entropy(
|
|
98
103
|
|
99
104
|
torch_lm_head_ce = TorchLMHeadCE(H=H, V=V, dtype=dtype).to(device)
|
100
105
|
liger_lm_head_ce = LigerLMHeadCE(H=H, V=V, dtype=dtype).to(device)
|
106
|
+
liger_lm_head_ce_fp32_accum = LigerLMHeadCE(H=H, V=V, dtype=dtype, accum_dtype=torch.float32).to(device)
|
101
107
|
|
102
108
|
_input = torch.randn(BT, H, requires_grad=True, dtype=dtype, device=device)
|
103
109
|
target = torch.randint(V, (BT, 1), dtype=torch.long, device=device).squeeze(1)
|
@@ -105,6 +111,8 @@ def bench_speed_fused_linear_cross_entropy(
|
|
105
111
|
def fwd():
|
106
112
|
if provider == "liger":
|
107
113
|
return liger_lm_head_ce(_input, target)
|
114
|
+
elif provider == "liger-fp32-accum":
|
115
|
+
return liger_lm_head_ce_fp32_accum(_input, target)
|
108
116
|
elif provider == "huggingface":
|
109
117
|
return torch_lm_head_ce(_input, target)
|
110
118
|
|
@@ -149,7 +157,7 @@ if __name__ == "__main__":
|
|
149
157
|
"x_name": "BT",
|
150
158
|
"x_label": "B x T",
|
151
159
|
"x_values": [2**i for i in range(12, 16)],
|
152
|
-
"kernel_providers": ["liger", "huggingface"],
|
160
|
+
"kernel_providers": ["liger", "liger-fp32-accum", "huggingface"],
|
153
161
|
"extra_benchmark_configs": [{"H": 4096, "V": 128256, "mode": "forward", "dtype": torch.bfloat16}],
|
154
162
|
"overwrite": args.overwrite,
|
155
163
|
}
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
4
4
|
|
5
5
|
[project]
|
6
6
|
name = "liger_kernel_nightly"
|
7
|
-
version = "0.6.1.
|
7
|
+
version = "0.6.1.dev20250805235740"
|
8
8
|
description = "Efficient Triton kernels for LLM Training"
|
9
9
|
urls = { "Homepage" = "https://github.com/linkedin/Liger-Kernel" }
|
10
10
|
readme = { file = "README.md", content-type = "text/markdown" }
|
@@ -25,6 +25,7 @@ def fused_linear_cross_entropy_forward(
|
|
25
25
|
reduction="mean",
|
26
26
|
softcap=None,
|
27
27
|
return_z_loss=False,
|
28
|
+
accum_dtype=None,
|
28
29
|
):
|
29
30
|
assert isinstance(return_z_loss, bool), f"return_z_loss must be True or False. Got: {return_z_loss}"
|
30
31
|
device = _input.device
|
@@ -44,10 +45,16 @@ def fused_linear_cross_entropy_forward(
|
|
44
45
|
chunk_size = triton.next_power_of_2(triton.cdiv(BT, inc_factor)) # (BT + inc_factor - 1) // inc_factor
|
45
46
|
num_chunks = triton.cdiv(BT, chunk_size) # (BT + chunk_size - 1) // chunk_size
|
46
47
|
|
47
|
-
grad_weight = torch.zeros_like(weight, device=device) if weight.requires_grad else None
|
48
48
|
grad_input = torch.zeros_like(_input, device=device)
|
49
|
-
|
50
|
-
# we use fp32 for loss accumulator
|
49
|
+
|
50
|
+
# we use fp32 for loss and gradients accumulator
|
51
|
+
if accum_dtype is None:
|
52
|
+
grad_weight = torch.zeros_like(weight, device=device) if weight.requires_grad else None
|
53
|
+
grad_bias = torch.zeros_like(bias, device=device) if bias is not None else None
|
54
|
+
else:
|
55
|
+
grad_weight = torch.zeros_like(weight, dtype=accum_dtype, device=device) if weight.requires_grad else None
|
56
|
+
grad_bias = torch.zeros_like(bias, dtype=accum_dtype, device=device) if bias is not None else None
|
57
|
+
|
51
58
|
loss_1d = torch.zeros(BT, dtype=torch.float32, device=device)
|
52
59
|
z_loss_1d = torch.zeros(BT, dtype=_input.dtype, device=_input.device) if return_z_loss else None
|
53
60
|
|
@@ -124,16 +131,7 @@ def fused_linear_cross_entropy_forward(
|
|
124
131
|
grad_input[start_idx:end_idx] = grad_logits_chunk @ weight
|
125
132
|
|
126
133
|
if grad_weight is not None:
|
127
|
-
torch.
|
128
|
-
input=grad_weight,
|
129
|
-
mat1=logits_chunk.t().to(
|
130
|
-
_input_chunk.dtype
|
131
|
-
), # In an autocast scenario without bias, differing logits_chunk data types will cause an addmm operation error.
|
132
|
-
mat2=_input_chunk,
|
133
|
-
out=grad_weight,
|
134
|
-
alpha=1.0,
|
135
|
-
beta=1.0,
|
136
|
-
)
|
134
|
+
grad_weight += torch.mm(grad_logits_chunk.t(), _input_chunk).float()
|
137
135
|
|
138
136
|
if bias is not None:
|
139
137
|
torch.add(
|
@@ -151,6 +149,11 @@ def fused_linear_cross_entropy_forward(
|
|
151
149
|
else:
|
152
150
|
loss = torch.sum(loss_1d)
|
153
151
|
z_loss = torch.sum(z_loss_1d) if return_z_loss else None
|
152
|
+
|
153
|
+
# Cast back to original dtype
|
154
|
+
grad_weight = grad_weight.to(weight.dtype) if grad_weight is not None else None
|
155
|
+
grad_bias = grad_bias.to(bias.dtype) if grad_bias is not None else None
|
156
|
+
|
154
157
|
return loss, z_loss, grad_input, grad_weight, grad_bias
|
155
158
|
|
156
159
|
|
@@ -217,6 +220,7 @@ class LigerFusedLinearCrossEntropyFunction(torch.autograd.Function):
|
|
217
220
|
reduction="mean",
|
218
221
|
softcap=None,
|
219
222
|
return_z_loss: bool = False,
|
223
|
+
accum_dtype=None,
|
220
224
|
):
|
221
225
|
"""
|
222
226
|
Fusing the last linear layer with cross-entropy loss
|
@@ -235,6 +239,8 @@ class LigerFusedLinearCrossEntropyFunction(torch.autograd.Function):
|
|
235
239
|
ignore_index: the index to ignore in the target
|
236
240
|
label_smoothing (float): The amount of smoothing when computing the loss, where 0.0 means no smoothing.
|
237
241
|
reduction: reduction to apply
|
242
|
+
accum_dtype (torch.dtype): the dtype of intermediate result buffers for weight and bias gradient accumulations.
|
243
|
+
Recommended to set `accum_dtype` to higher precision, e.g. `torch.float32`, if the training is unstable with original dtype. Default: `None`, performing accumulations in original dtype
|
238
244
|
"""
|
239
245
|
|
240
246
|
loss, z_loss, grad_input, grad_weight, grad_bias = fused_linear_cross_entropy_forward(
|
@@ -249,6 +255,7 @@ class LigerFusedLinearCrossEntropyFunction(torch.autograd.Function):
|
|
249
255
|
reduction=reduction,
|
250
256
|
softcap=softcap,
|
251
257
|
return_z_loss=return_z_loss,
|
258
|
+
accum_dtype=accum_dtype,
|
252
259
|
)
|
253
260
|
# downcast to dtype and store for backward
|
254
261
|
ctx.save_for_backward(
|
@@ -280,4 +287,5 @@ class LigerFusedLinearCrossEntropyFunction(torch.autograd.Function):
|
|
280
287
|
None,
|
281
288
|
None,
|
282
289
|
None,
|
290
|
+
None,
|
283
291
|
)
|
@@ -64,6 +64,7 @@ def liger_fused_linear_cross_entropy(
|
|
64
64
|
reduction: str = "mean",
|
65
65
|
softcap: Optional[float] = None,
|
66
66
|
return_z_loss: bool = False,
|
67
|
+
accum_dtype=None,
|
67
68
|
):
|
68
69
|
loss, z_loss = LigerFusedLinearCrossEntropyFunction.apply(
|
69
70
|
input,
|
@@ -77,6 +78,7 @@ def liger_fused_linear_cross_entropy(
|
|
77
78
|
reduction,
|
78
79
|
softcap,
|
79
80
|
return_z_loss,
|
81
|
+
accum_dtype,
|
80
82
|
)
|
81
83
|
if not return_z_loss:
|
82
84
|
return loss
|
@@ -15,6 +15,7 @@ class LigerFusedLinearCrossEntropyLoss(torch.nn.Module):
|
|
15
15
|
reduction: str = "mean",
|
16
16
|
softcap: Optional[float] = None,
|
17
17
|
return_z_loss: bool = False,
|
18
|
+
accum_dtype: Optional[torch.dtype] = None,
|
18
19
|
):
|
19
20
|
super().__init__()
|
20
21
|
assert (label_smoothing >= 0) and (label_smoothing <= 1), (
|
@@ -32,6 +33,7 @@ class LigerFusedLinearCrossEntropyLoss(torch.nn.Module):
|
|
32
33
|
self.reduction = reduction
|
33
34
|
self.softcap = softcap
|
34
35
|
self.return_z_loss = return_z_loss
|
36
|
+
self.accum_dtype = accum_dtype
|
35
37
|
|
36
38
|
def forward(self, lin_weight, _input, target, bias=None):
|
37
39
|
loss, z_loss = LigerFusedLinearCrossEntropyFunction.apply(
|
@@ -46,6 +48,7 @@ class LigerFusedLinearCrossEntropyLoss(torch.nn.Module):
|
|
46
48
|
self.reduction,
|
47
49
|
self.softcap,
|
48
50
|
self.return_z_loss,
|
51
|
+
self.accum_dtype,
|
49
52
|
)
|
50
53
|
if not self.return_z_loss:
|
51
54
|
return loss
|
@@ -180,20 +180,6 @@ def lce_forward(
|
|
180
180
|
'This is an example script .\n Certainly! Below is a sample script that demonstrates a simple task, such as calculating the sum'
|
181
181
|
```"""
|
182
182
|
|
183
|
-
from transformers.models.phi3.modeling_phi3 import logging
|
184
|
-
|
185
|
-
logger = logging.get_logger(__name__)
|
186
|
-
|
187
|
-
if (
|
188
|
-
use_cache
|
189
|
-
and self.config.rope_scaling
|
190
|
-
and cache_position is not None
|
191
|
-
and cache_position[0] == self.config.original_max_position_embeddings
|
192
|
-
):
|
193
|
-
logger.warning(
|
194
|
-
f"If you are not using the generate method, you may encounter nonsensical outputs after the {self.config.original_max_position_embeddings}th token, as the KV cache needs to be recomputed."
|
195
|
-
)
|
196
|
-
|
197
183
|
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
198
184
|
output_hidden_states = (
|
199
185
|
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
|
@@ -80,6 +80,7 @@ class LigerLMHeadCE(torch.nn.Module):
|
|
80
80
|
reduction: str = "mean",
|
81
81
|
softcap: Optional[float] = None,
|
82
82
|
return_z_loss: bool = False,
|
83
|
+
accum_dtype=None,
|
83
84
|
):
|
84
85
|
super().__init__()
|
85
86
|
self.lin = torch.nn.Linear(in_features=H, out_features=V, bias=bias, dtype=dtype)
|
@@ -91,6 +92,7 @@ class LigerLMHeadCE(torch.nn.Module):
|
|
91
92
|
reduction=reduction,
|
92
93
|
softcap=softcap,
|
93
94
|
return_z_loss=return_z_loss,
|
95
|
+
accum_dtype=accum_dtype,
|
94
96
|
)
|
95
97
|
|
96
98
|
def forward(self, x, y):
|
@@ -120,11 +122,11 @@ class LigerLMHeadCE(torch.nn.Module):
|
|
120
122
|
)
|
121
123
|
@pytest.mark.parametrize("bias", [True, False])
|
122
124
|
@pytest.mark.parametrize(
|
123
|
-
"has_ce_weight, label_smoothing, ignore_index, lse_square_scale, softcap, return_z_loss",
|
125
|
+
"has_ce_weight, label_smoothing, ignore_index, lse_square_scale, softcap, return_z_loss, accum_dtype",
|
124
126
|
[
|
125
|
-
(False, 0, -100, 0, None, False),
|
127
|
+
(False, 0, -100, 0, None, False, None),
|
126
128
|
# Pass non-default values once to ensure all params work along
|
127
|
-
(True, 0.1, 42, 1e-4, 30.0, True),
|
129
|
+
(True, 0.1, 42, 1e-4, 30.0, True, torch.float32),
|
128
130
|
],
|
129
131
|
)
|
130
132
|
def test_correctness(
|
@@ -142,6 +144,7 @@ def test_correctness(
|
|
142
144
|
reduction,
|
143
145
|
softcap,
|
144
146
|
return_z_loss,
|
147
|
+
accum_dtype,
|
145
148
|
atol,
|
146
149
|
rtol,
|
147
150
|
):
|
@@ -174,6 +177,7 @@ def test_correctness(
|
|
174
177
|
softcap=softcap,
|
175
178
|
return_z_loss=return_z_loss,
|
176
179
|
dtype=dtype,
|
180
|
+
accum_dtype=accum_dtype,
|
177
181
|
).to(device)
|
178
182
|
|
179
183
|
# init the linear in all CEs with the same weights
|
@@ -267,9 +271,10 @@ def test_correctness_functional(B, T, H, V, scalar, dtype, bias, ce_weight, atol
|
|
267
271
|
reduction="mean",
|
268
272
|
softcap=30.0,
|
269
273
|
return_z_loss=True,
|
274
|
+
accum_dtype=torch.float32,
|
270
275
|
)
|
271
276
|
y2, z2 = LigerFusedLinearCrossEntropyFunction.apply(
|
272
|
-
x2, weight, target, bias, ce_weight, -100, 1e-4, 0.1, "mean", 30.0, True
|
277
|
+
x2, weight, target, bias, ce_weight, -100, 1e-4, 0.1, "mean", 30.0, True, torch.float32
|
273
278
|
)
|
274
279
|
|
275
280
|
assert torch.allclose(y1, y2, atol=atol, rtol=rtol)
|
@@ -299,7 +304,8 @@ def test_correctness_functional(B, T, H, V, scalar, dtype, bias, ce_weight, atol
|
|
299
304
|
(False, torch.float16, 5e-3, 5e-2),
|
300
305
|
],
|
301
306
|
)
|
302
|
-
|
307
|
+
@pytest.mark.parametrize("accum_dtype", [None, torch.float32])
|
308
|
+
def test_amp(B, T, H, V, bias, cast_dtype, accum_dtype, atol, rtol):
|
303
309
|
dtype = torch.float32
|
304
310
|
torch_lm_head_ce = TorchLMHeadCE(
|
305
311
|
H=H,
|
@@ -316,6 +322,7 @@ def test_amp(B, T, H, V, bias, cast_dtype, atol, rtol):
|
|
316
322
|
label_smoothing=0.0,
|
317
323
|
reduction="mean",
|
318
324
|
dtype=dtype,
|
325
|
+
accum_dtype=accum_dtype,
|
319
326
|
).to(device)
|
320
327
|
|
321
328
|
# init the linear in all CEs with the same weights
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{liger_kernel_nightly-0.6.1.dev20250730201330 → liger_kernel_nightly-0.6.1.dev20250805235740}/NOTICE
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|