liger-kernel-nightly 0.5.8.dev20250429220905__tar.gz → 0.5.8.dev20250429233059__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/PKG-INFO +1 -1
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/pyproject.toml +1 -1
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/ops/cross_entropy.py +4 -1
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/ops/fused_linear_cross_entropy.py +4 -3
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/transformers/fused_linear_cross_entropy.py +1 -2
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel_nightly.egg-info/PKG-INFO +1 -1
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/test/transformers/test_cross_entropy.py +108 -2
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/test/transformers/test_fused_linear_cross_entropy.py +3 -4
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/.github/ISSUE_TEMPLATE/bug_report.yaml +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/.github/ISSUE_TEMPLATE/feature_request.yaml +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/.github/pull_request_template.md +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/.github/workflows/amd-ci.yml +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/.github/workflows/docs.yml +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/.github/workflows/intel-ci.yml +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/.github/workflows/nvi-ci.yml +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/.github/workflows/publish-nightly.yml +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/.github/workflows/publish-release.yml +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/.gitignore +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/LICENSE +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/Makefile +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/NOTICE +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/README.md +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/benchmark/README.md +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/benchmark/__init__.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/benchmark/benchmarks_visualizer.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/benchmark/data/all_benchmark_data.csv +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/benchmark/scripts/__init__.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/benchmark/scripts/benchmark_cpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/benchmark/scripts/benchmark_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/benchmark/scripts/benchmark_distill_jsd_loss.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/benchmark/scripts/benchmark_dpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/benchmark/scripts/benchmark_dyt.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/benchmark/scripts/benchmark_embedding.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/benchmark/scripts/benchmark_fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/benchmark/scripts/benchmark_fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/benchmark/scripts/benchmark_geglu.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/benchmark/scripts/benchmark_group_norm.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/benchmark/scripts/benchmark_jsd.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/benchmark/scripts/benchmark_kl_div.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/benchmark/scripts/benchmark_kto_loss.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/benchmark/scripts/benchmark_layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/benchmark/scripts/benchmark_orpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/benchmark/scripts/benchmark_qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/benchmark/scripts/benchmark_rms_norm.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/benchmark/scripts/benchmark_rope.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/benchmark/scripts/benchmark_simpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/benchmark/scripts/benchmark_swiglu.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/benchmark/scripts/benchmark_tvd.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/benchmark/scripts/utils.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/dev/fmt-requirements.txt +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/dev/modal/tests.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/dev/modal/tests_bwd.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/docs/Examples.md +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/docs/Getting-Started.md +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/docs/High-Level-APIs.md +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/docs/Low-Level-APIs.md +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/docs/acknowledgement.md +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/docs/contributing.md +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/docs/images/banner.GIF +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/docs/images/compose.gif +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/docs/images/e2e-memory.png +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/docs/images/e2e-tps.png +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/docs/images/logo-banner.png +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/docs/images/patch.gif +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/docs/images/post-training.png +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/docs/index.md +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/docs/license.md +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/examples/alignment/accelerate_config.yaml +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/examples/alignment/run_orpo.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/examples/huggingface/README.md +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/examples/huggingface/callback.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/examples/huggingface/config/fsdp_config.json +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/examples/huggingface/img/gemma_7b_mem.png +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/examples/huggingface/img/gemma_7b_tp.png +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/examples/huggingface/img/llama_mem_alloc.png +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/examples/huggingface/img/llama_tps.png +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/examples/huggingface/img/qwen_mem_alloc.png +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/examples/huggingface/img/qwen_tps.png +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/examples/huggingface/launch_on_modal.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/examples/huggingface/requirements.txt +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/examples/huggingface/run_benchmarks.sh +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/examples/huggingface/run_gemma.sh +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/examples/huggingface/run_llama.sh +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/examples/huggingface/run_qwen.sh +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/examples/huggingface/run_qwen2_vl.sh +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/examples/huggingface/training.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/examples/huggingface/training_multimodal.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/examples/lightning/README.md +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/examples/lightning/requirements.txt +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/examples/lightning/training.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/examples/medusa/README.md +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/examples/medusa/callback.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/examples/medusa/docs/images/Memory_Stage1_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/examples/medusa/docs/images/Memory_Stage1_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/examples/medusa/docs/images/Memory_Stage2_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/examples/medusa/docs/images/Memory_Stage2_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/examples/medusa/docs/images/Throughput_Stage1_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/examples/medusa/docs/images/Throughput_Stage1_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/examples/medusa/docs/images/Throughput_Stage2_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/examples/medusa/docs/images/Throughput_Stage2_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/examples/medusa/fsdp/acc-fsdp.conf +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/examples/medusa/medusa_util.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/examples/medusa/requirements.txt +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/examples/medusa/scripts/llama3_8b_medusa.sh +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/examples/medusa/train.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/licenses/LICENSE-Apache-2.0 +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/licenses/LICENSE-MIT-AutoAWQ +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/licenses/LICENSE-MIT-Efficient-Cross-Entropy +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/licenses/LICENSE-MIT-llmc +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/licenses/LICENSE-MIT-triton +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/mkdocs.yml +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/setup.cfg +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/setup.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/__init__.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/chunked_loss/README.md +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/chunked_loss/__init__.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/chunked_loss/cpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/chunked_loss/dpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/chunked_loss/functional.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/chunked_loss/fused_linear_distillation.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/chunked_loss/fused_linear_ppo.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/chunked_loss/fused_linear_preference.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/chunked_loss/fused_linear_unpaired_preference.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/chunked_loss/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/chunked_loss/jsd_loss.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/chunked_loss/kto_loss.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/chunked_loss/orpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/chunked_loss/simpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/env_report.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/ops/__init__.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/ops/dyt.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/ops/experimental/embedding.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/ops/experimental/mm_int8int2.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/ops/fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/ops/geglu.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/ops/group_norm.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/ops/jsd.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/ops/kl_div.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/ops/layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/ops/qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/ops/rms_norm.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/ops/rope.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/ops/swiglu.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/ops/tvd.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/ops/utils.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/transformers/__init__.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/transformers/auto_model.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/transformers/cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/transformers/dyt.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/transformers/experimental/embedding.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/transformers/functional.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/transformers/fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/transformers/geglu.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/transformers/gema3_rms.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/transformers/group_norm.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/transformers/jsd.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/transformers/kl_div.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/transformers/layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/transformers/model/__init__.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/transformers/model/gemma.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/transformers/model/gemma2.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/transformers/model/gemma3.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/transformers/model/llama.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/transformers/model/llava.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/transformers/model/loss_utils.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/transformers/model/mistral.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/transformers/model/mixtral.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/transformers/model/mllama.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/transformers/model/olmo2.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/transformers/model/paligemma.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/transformers/model/phi3.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/transformers/model/qwen2.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/transformers/model/qwen2_5_vl.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/transformers/model/qwen2_vl.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/transformers/monkey_patch.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/transformers/qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/transformers/rms_norm.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/transformers/rope.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/transformers/swiglu.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/transformers/trainer/__init__.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/transformers/trainer/orpo_trainer.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/transformers/trainer_integration.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/transformers/tvd.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/triton/__init__.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/triton/monkey_patch.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel/utils.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel_nightly.egg-info/SOURCES.txt +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel_nightly.egg-info/dependency_links.txt +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel_nightly.egg-info/requires.txt +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/src/liger_kernel_nightly.egg-info/top_level.txt +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/test/__init__.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/test/chunked_loss/__init__.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/test/chunked_loss/test_cpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/test/chunked_loss/test_dpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/test/chunked_loss/test_grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/test/chunked_loss/test_jsd_loss.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/test/chunked_loss/test_kto_loss.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/test/chunked_loss/test_orpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/test/chunked_loss/test_simpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/test/conftest.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/test/convergence/__init__.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/test/convergence/bf16/__init__.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/test/convergence/bf16/test_mini_models.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/test/convergence/bf16/test_mini_models_multimodal.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/test/convergence/bf16/test_mini_models_with_logits.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/test/convergence/fp32/__init__.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/test/convergence/fp32/test_mini_models.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/test/convergence/fp32/test_mini_models_multimodal.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/test/convergence/fp32/test_mini_models_with_logits.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/test/resources/fake_configs/Google/Gemma3/gemma-3-4b-it/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/test/resources/fake_configs/Google/Paligemma/paligemma-3b-pt-224/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/preprocessor_config.json +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/processor_config.json +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/test/resources/fake_configs/Qwen/Qwen2-VL-7B-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/test/resources/fake_configs/Qwen/Qwen2.5-VL-7B-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/test/resources/fake_configs/meta-llama/Llama-3.2-11B-Vision-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/test/resources/scripts/generate_tokenized_dataset.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/test/resources/tiny_shakespeare.txt +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/test/resources/tiny_shakespeare_tokenized/data-00000-of-00001.arrow +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/test/resources/tiny_shakespeare_tokenized/dataset_info.json +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/test/resources/tiny_shakespeare_tokenized/state.json +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/test/transformers/test_auto_model.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/test/transformers/test_dyt.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/test/transformers/test_embedding.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/test/transformers/test_flex_attention.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/test/transformers/test_fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/test/transformers/test_geglu.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/test/transformers/test_group_norm.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/test/transformers/test_jsd.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/test/transformers/test_kl_div.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/test/transformers/test_layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/test/transformers/test_mm_int8int2.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/test/transformers/test_monkey_patch.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/test/transformers/test_qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/test/transformers/test_rms_norm.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/test/transformers/test_rope.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/test/transformers/test_swiglu.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/test/transformers/test_trainer_integration.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/test/transformers/test_transformers.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/test/transformers/test_tvd.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/test/triton/test_triton_monkey_patch.py +0 -0
- {liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/test/utils.py +0 -0
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
4
4
|
|
5
5
|
[project]
|
6
6
|
name = "liger_kernel_nightly"
|
7
|
-
version = "0.5.8.
|
7
|
+
version = "0.5.8.dev20250429233059"
|
8
8
|
description = "Efficient Triton kernels for LLM Training"
|
9
9
|
urls = { "Homepage" = "https://github.com/linkedin/Liger-Kernel" }
|
10
10
|
readme = { file = "README.md", content-type = "text/markdown" }
|
@@ -351,7 +351,10 @@ def cross_entropy_backward(_input, grad_output):
|
|
351
351
|
# If cross entropy is the last layer, grad_output is 1.0. Skip the mul to save time
|
352
352
|
if torch.equal(grad_output, torch.tensor(1.0, device=grad_output.device)):
|
353
353
|
pass
|
354
|
-
|
354
|
+
# If reduction is 'none'
|
355
|
+
elif grad_output.ndim > 0:
|
356
|
+
_input = _input * grad_output.unsqueeze(dim=1)
|
357
|
+
# If reduction is ['mean', 'sum'], grad_output is just a scalar
|
355
358
|
# We use a Triton kernel instead of a PyTorch operation because modifying inputs in-place
|
356
359
|
# for gradient storage and backward multiple times causes anomalies with PyTorch but not with Triton.
|
357
360
|
else:
|
@@ -143,9 +143,10 @@ def fused_linear_cross_entropy_forward(
|
|
143
143
|
alpha=1.0,
|
144
144
|
)
|
145
145
|
|
146
|
-
if reduction
|
147
|
-
|
148
|
-
|
146
|
+
# Need extra calculations for backward if reduction=='none'. Not supporting reduction='none' now.
|
147
|
+
# if reduction == "none":
|
148
|
+
# loss = loss_1d
|
149
|
+
# z_loss = z_loss_1d if return_z_loss else None
|
149
150
|
|
150
151
|
else:
|
151
152
|
loss = torch.sum(loss_1d)
|
@@ -23,8 +23,7 @@ class LigerFusedLinearCrossEntropyLoss(torch.nn.Module):
|
|
23
23
|
assert reduction in {
|
24
24
|
"mean",
|
25
25
|
"sum",
|
26
|
-
|
27
|
-
}, f"reduction must be one of 'mean', 'sum', or 'none'. Got: {reduction}"
|
26
|
+
}, f"reduction must be 'mean' or 'sum'. Got: {reduction}"
|
28
27
|
assert softcap is None or softcap > 0, f"softcap must greater than 0.0 or None. Got: {softcap}"
|
29
28
|
self.ce_weight = ce_weight
|
30
29
|
self.ignore_index = ignore_index
|
@@ -400,8 +400,58 @@ def _test_correctness_not_last_layer_once(target_ce, B, T, V, reduction, scalar,
|
|
400
400
|
loss1 = output * 3
|
401
401
|
loss2 = output2 * 3
|
402
402
|
|
403
|
-
|
404
|
-
|
403
|
+
grad_output = torch.rand_like(output)
|
404
|
+
loss1.backward(gradient=grad_output)
|
405
|
+
loss2.backward(gradient=grad_output)
|
406
|
+
assert torch.allclose(_input.grad, _input2.grad, atol=atol, rtol=rtol)
|
407
|
+
|
408
|
+
|
409
|
+
def _test_correctness_not_last_layer_with_other_params_once(
|
410
|
+
target_ce,
|
411
|
+
B,
|
412
|
+
T,
|
413
|
+
V,
|
414
|
+
reduction,
|
415
|
+
ignore_index,
|
416
|
+
lse_square_scale,
|
417
|
+
label_smoothing,
|
418
|
+
softcap,
|
419
|
+
scalar,
|
420
|
+
dtype,
|
421
|
+
atol,
|
422
|
+
rtol,
|
423
|
+
):
|
424
|
+
torch_ce = CrossEntropyWithZLoss(
|
425
|
+
reduction=reduction,
|
426
|
+
ignore_index=ignore_index,
|
427
|
+
lse_square_scale=lse_square_scale,
|
428
|
+
label_smoothing=label_smoothing,
|
429
|
+
)
|
430
|
+
|
431
|
+
_tensor = torch.randn(B * T, V, device=device, dtype=dtype) * scalar
|
432
|
+
_input = _tensor.detach().clone().requires_grad_(True)
|
433
|
+
_input2 = _tensor.detach().clone().requires_grad_(True)
|
434
|
+
|
435
|
+
target = torch.randint(0, V, (B * T,), device=device, dtype=torch.long)
|
436
|
+
# Assign some random number of elements as ignore_index
|
437
|
+
num_elements_to_assign = torch.randint(
|
438
|
+
1, B * T // 2, (1,)
|
439
|
+
).item() # Random number of elements to set to ignore_index
|
440
|
+
indices_to_assign = torch.randperm(B * T)[:num_elements_to_assign] # Randomly select indices
|
441
|
+
target[indices_to_assign] = ignore_index
|
442
|
+
|
443
|
+
# upcasting to match liger's casting strategy
|
444
|
+
# and downcasting to original dtype
|
445
|
+
output = torch_ce(softcap * torch.tanh(_input.to(torch.float32) / softcap), target).to(dtype)
|
446
|
+
output2 = target_ce(_input2, target)
|
447
|
+
assert torch.allclose(output, output2, atol=atol, rtol=rtol)
|
448
|
+
|
449
|
+
loss1 = output * 3
|
450
|
+
loss2 = output2 * 3
|
451
|
+
|
452
|
+
grad_output = torch.rand_like(output)
|
453
|
+
loss1.backward(gradient=grad_output)
|
454
|
+
loss2.backward(gradient=grad_output)
|
405
455
|
assert torch.allclose(_input.grad, _input2.grad, atol=atol, rtol=rtol)
|
406
456
|
|
407
457
|
|
@@ -864,6 +914,62 @@ def test_correctness_not_last_layer(B, T, V, reduction, scalar, dtype, atol, rto
|
|
864
914
|
_test_correctness_not_last_layer_once(liger_ce, B, T, V, reduction, scalar, dtype, atol, rtol)
|
865
915
|
|
866
916
|
|
917
|
+
@pytest.mark.parametrize(
|
918
|
+
"B, T, V",
|
919
|
+
[
|
920
|
+
(2, 1024, 32000), # llama2, mistral
|
921
|
+
# # weird shapes
|
922
|
+
(3, 423, 32000),
|
923
|
+
],
|
924
|
+
)
|
925
|
+
@pytest.mark.parametrize(
|
926
|
+
"ignore_index, lse_square_scale, label_smoothing, softcap",
|
927
|
+
[
|
928
|
+
(-100, 1e-4, 0.1, 30.0),
|
929
|
+
(42, 1e-5, 0.2, 40.0),
|
930
|
+
],
|
931
|
+
)
|
932
|
+
@pytest.mark.parametrize("reduction", ["sum", "mean"])
|
933
|
+
@pytest.mark.parametrize(
|
934
|
+
"scalar, dtype, atol, rtol",
|
935
|
+
[
|
936
|
+
pytest.param(
|
937
|
+
1.0,
|
938
|
+
torch.bfloat16,
|
939
|
+
1e-8,
|
940
|
+
5e-2,
|
941
|
+
marks=pytest.mark.skipif(not supports_bfloat16(), reason="bfloat16 not supported on this GPU"),
|
942
|
+
),
|
943
|
+
(1.0, torch.float32, 1e-8, 1e-5),
|
944
|
+
],
|
945
|
+
)
|
946
|
+
def test_correctness_not_last_layer_with_other_params(
|
947
|
+
B, T, V, reduction, ignore_index, lse_square_scale, label_smoothing, softcap, scalar, dtype, atol, rtol
|
948
|
+
):
|
949
|
+
liger_ce = LigerCrossEntropyLoss(
|
950
|
+
reduction=reduction,
|
951
|
+
ignore_index=ignore_index,
|
952
|
+
lse_square_scale=lse_square_scale,
|
953
|
+
label_smoothing=label_smoothing,
|
954
|
+
softcap=softcap,
|
955
|
+
)
|
956
|
+
_test_correctness_not_last_layer_with_other_params_once(
|
957
|
+
liger_ce,
|
958
|
+
B,
|
959
|
+
T,
|
960
|
+
V,
|
961
|
+
reduction,
|
962
|
+
ignore_index,
|
963
|
+
lse_square_scale,
|
964
|
+
label_smoothing,
|
965
|
+
softcap,
|
966
|
+
scalar,
|
967
|
+
dtype,
|
968
|
+
atol,
|
969
|
+
rtol,
|
970
|
+
)
|
971
|
+
|
972
|
+
|
867
973
|
def test_float32_internal():
|
868
974
|
"""
|
869
975
|
This test validates that the internal softmax calculations occur in float32,
|
@@ -116,8 +116,6 @@ class LigerLMHeadCE(torch.nn.Module):
|
|
116
116
|
("mean", 1.0, torch.float32, 1e-5, 5e-4),
|
117
117
|
("sum", 1.0, torch.bfloat16, 5e-0, 5e1),
|
118
118
|
("sum", 1.0, torch.float32, 1e-3, 5e-2),
|
119
|
-
("none", 1.0, torch.bfloat16, 5e-0, 5e1),
|
120
|
-
("none", 1.0, torch.float32, 1e-3, 5e-2),
|
121
119
|
],
|
122
120
|
)
|
123
121
|
@pytest.mark.parametrize("bias", [True, False])
|
@@ -207,8 +205,9 @@ def test_correctness(
|
|
207
205
|
if return_z_loss:
|
208
206
|
assert_verbose_allclose(z_output1, z_output2, atol=atol, rtol=rtol)
|
209
207
|
|
210
|
-
|
211
|
-
|
208
|
+
grad_output = torch.ones_like(output1)
|
209
|
+
output1.backward(gradient=grad_output)
|
210
|
+
output2.backward(gradient=grad_output)
|
212
211
|
|
213
212
|
assert_verbose_allclose(_input1.grad, _input2.grad, atol=atol, rtol=rtol)
|
214
213
|
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{liger_kernel_nightly-0.5.8.dev20250429220905 → liger_kernel_nightly-0.5.8.dev20250429233059}/NOTICE
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|