liger-kernel-nightly 0.5.6.dev20250411210855__tar.gz → 0.5.6.dev20250411224032__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/PKG-INFO +1 -1
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/pyproject.toml +1 -1
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/transformers/model/gemma2.py +1 -1
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/transformers/model/gemma3.py +1 -1
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/transformers/model/loss_utils.py +17 -10
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel_nightly.egg-info/PKG-INFO +1 -1
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/.github/ISSUE_TEMPLATE/bug_report.yaml +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/.github/ISSUE_TEMPLATE/feature_request.yaml +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/.github/pull_request_template.md +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/.github/workflows/amd-ci.yml +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/.github/workflows/docs.yml +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/.github/workflows/intel-ci.yml +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/.github/workflows/nvi-ci.yml +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/.github/workflows/publish-nightly.yml +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/.github/workflows/publish-release.yml +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/.gitignore +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/LICENSE +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/Makefile +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/NOTICE +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/README.md +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/benchmark/README.md +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/benchmark/__init__.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/benchmark/benchmarks_visualizer.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/benchmark/data/all_benchmark_data.csv +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/benchmark/scripts/__init__.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/benchmark/scripts/benchmark_cpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/benchmark/scripts/benchmark_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/benchmark/scripts/benchmark_distill_jsd_loss.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/benchmark/scripts/benchmark_dpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/benchmark/scripts/benchmark_dyt.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/benchmark/scripts/benchmark_embedding.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/benchmark/scripts/benchmark_fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/benchmark/scripts/benchmark_fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/benchmark/scripts/benchmark_geglu.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/benchmark/scripts/benchmark_group_norm.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/benchmark/scripts/benchmark_jsd.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/benchmark/scripts/benchmark_kl_div.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/benchmark/scripts/benchmark_kto_loss.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/benchmark/scripts/benchmark_layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/benchmark/scripts/benchmark_orpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/benchmark/scripts/benchmark_qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/benchmark/scripts/benchmark_rms_norm.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/benchmark/scripts/benchmark_rope.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/benchmark/scripts/benchmark_simpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/benchmark/scripts/benchmark_swiglu.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/benchmark/scripts/benchmark_tvd.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/benchmark/scripts/utils.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/dev/fmt-requirements.txt +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/dev/modal/tests.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/dev/modal/tests_bwd.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/docs/Examples.md +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/docs/Getting-Started.md +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/docs/High-Level-APIs.md +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/docs/Low-Level-APIs.md +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/docs/acknowledgement.md +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/docs/contributing.md +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/docs/images/banner.GIF +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/docs/images/compose.gif +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/docs/images/e2e-memory.png +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/docs/images/e2e-tps.png +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/docs/images/logo-banner.png +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/docs/images/patch.gif +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/docs/images/post-training.png +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/docs/index.md +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/docs/license.md +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/examples/alignment/accelerate_config.yaml +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/examples/alignment/run_orpo.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/examples/huggingface/README.md +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/examples/huggingface/callback.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/examples/huggingface/config/fsdp_config.json +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/examples/huggingface/img/gemma_7b_mem.png +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/examples/huggingface/img/gemma_7b_tp.png +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/examples/huggingface/img/llama_mem_alloc.png +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/examples/huggingface/img/llama_tps.png +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/examples/huggingface/img/qwen_mem_alloc.png +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/examples/huggingface/img/qwen_tps.png +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/examples/huggingface/launch_on_modal.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/examples/huggingface/requirements.txt +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/examples/huggingface/run_benchmarks.sh +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/examples/huggingface/run_gemma.sh +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/examples/huggingface/run_llama.sh +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/examples/huggingface/run_qwen.sh +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/examples/huggingface/run_qwen2_vl.sh +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/examples/huggingface/training.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/examples/huggingface/training_multimodal.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/examples/lightning/README.md +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/examples/lightning/requirements.txt +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/examples/lightning/training.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/examples/medusa/README.md +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/examples/medusa/callback.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/examples/medusa/docs/images/Memory_Stage1_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/examples/medusa/docs/images/Memory_Stage1_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/examples/medusa/docs/images/Memory_Stage2_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/examples/medusa/docs/images/Memory_Stage2_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/examples/medusa/docs/images/Throughput_Stage1_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/examples/medusa/docs/images/Throughput_Stage1_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/examples/medusa/docs/images/Throughput_Stage2_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/examples/medusa/docs/images/Throughput_Stage2_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/examples/medusa/fsdp/acc-fsdp.conf +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/examples/medusa/medusa_util.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/examples/medusa/requirements.txt +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/examples/medusa/scripts/llama3_8b_medusa.sh +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/examples/medusa/train.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/licenses/LICENSE-Apache-2.0 +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/licenses/LICENSE-MIT-AutoAWQ +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/licenses/LICENSE-MIT-Efficient-Cross-Entropy +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/licenses/LICENSE-MIT-llmc +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/licenses/LICENSE-MIT-triton +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/mkdocs.yml +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/setup.cfg +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/setup.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/__init__.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/chunked_loss/README.md +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/chunked_loss/__init__.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/chunked_loss/cpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/chunked_loss/dpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/chunked_loss/functional.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/chunked_loss/fused_linear_distillation.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/chunked_loss/fused_linear_ppo.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/chunked_loss/fused_linear_preference.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/chunked_loss/fused_linear_unpaired_preference.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/chunked_loss/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/chunked_loss/jsd_loss.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/chunked_loss/kto_loss.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/chunked_loss/orpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/chunked_loss/simpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/env_report.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/ops/__init__.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/ops/cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/ops/dyt.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/ops/experimental/embedding.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/ops/experimental/mm_int8int2.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/ops/fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/ops/fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/ops/geglu.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/ops/group_norm.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/ops/jsd.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/ops/kl_div.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/ops/layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/ops/qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/ops/rms_norm.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/ops/rope.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/ops/swiglu.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/ops/tvd.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/ops/utils.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/transformers/__init__.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/transformers/auto_model.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/transformers/cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/transformers/dyt.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/transformers/experimental/embedding.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/transformers/functional.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/transformers/fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/transformers/fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/transformers/geglu.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/transformers/gema3_rms.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/transformers/group_norm.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/transformers/jsd.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/transformers/kl_div.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/transformers/layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/transformers/model/__init__.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/transformers/model/gemma.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/transformers/model/llama.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/transformers/model/llava.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/transformers/model/mistral.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/transformers/model/mixtral.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/transformers/model/mllama.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/transformers/model/olmo2.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/transformers/model/paligemma.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/transformers/model/phi3.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/transformers/model/qwen2.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/transformers/model/qwen2_5_vl.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/transformers/model/qwen2_vl.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/transformers/monkey_patch.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/transformers/qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/transformers/rms_norm.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/transformers/rope.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/transformers/swiglu.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/transformers/trainer/__init__.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/transformers/trainer/orpo_trainer.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/transformers/trainer_integration.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/transformers/tvd.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/triton/__init__.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/triton/monkey_patch.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel/utils.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel_nightly.egg-info/SOURCES.txt +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel_nightly.egg-info/dependency_links.txt +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel_nightly.egg-info/requires.txt +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/src/liger_kernel_nightly.egg-info/top_level.txt +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/test/__init__.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/test/chunked_loss/__init__.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/test/chunked_loss/test_cpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/test/chunked_loss/test_dpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/test/chunked_loss/test_grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/test/chunked_loss/test_jsd_loss.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/test/chunked_loss/test_kto_loss.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/test/chunked_loss/test_orpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/test/chunked_loss/test_simpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/test/conftest.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/test/convergence/__init__.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/test/convergence/bf16/__init__.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/test/convergence/bf16/test_mini_models.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/test/convergence/bf16/test_mini_models_multimodal.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/test/convergence/bf16/test_mini_models_with_logits.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/test/convergence/fp32/__init__.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/test/convergence/fp32/test_mini_models.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/test/convergence/fp32/test_mini_models_multimodal.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/test/convergence/fp32/test_mini_models_with_logits.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/test/resources/fake_configs/Google/Gemma3/gemma-3-4b-it/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/test/resources/fake_configs/Google/Paligemma/paligemma-3b-pt-224/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/preprocessor_config.json +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/processor_config.json +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/test/resources/fake_configs/Qwen/Qwen2-VL-7B-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/test/resources/fake_configs/Qwen/Qwen2.5-VL-7B-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/test/resources/fake_configs/meta-llama/Llama-3.2-11B-Vision-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/test/resources/scripts/generate_tokenized_dataset.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/test/resources/tiny_shakespeare.txt +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/test/resources/tiny_shakespeare_tokenized/data-00000-of-00001.arrow +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/test/resources/tiny_shakespeare_tokenized/dataset_info.json +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/test/resources/tiny_shakespeare_tokenized/state.json +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/test/transformers/test_auto_model.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/test/transformers/test_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/test/transformers/test_dyt.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/test/transformers/test_embedding.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/test/transformers/test_flex_attention.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/test/transformers/test_fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/test/transformers/test_fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/test/transformers/test_geglu.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/test/transformers/test_group_norm.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/test/transformers/test_jsd.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/test/transformers/test_kl_div.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/test/transformers/test_layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/test/transformers/test_mm_int8int2.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/test/transformers/test_monkey_patch.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/test/transformers/test_qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/test/transformers/test_rms_norm.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/test/transformers/test_rope.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/test/transformers/test_swiglu.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/test/transformers/test_trainer_integration.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/test/transformers/test_transformers.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/test/transformers/test_tvd.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/test/triton/test_triton_monkey_patch.py +0 -0
- {liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/test/utils.py +0 -0
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
4
4
|
|
5
5
|
[project]
|
6
6
|
name = "liger_kernel_nightly"
|
7
|
-
version = "0.5.6.
|
7
|
+
version = "0.5.6.dev20250411224032"
|
8
8
|
description = "Efficient Triton kernels for LLM Training"
|
9
9
|
urls = { "Homepage" = "https://github.com/linkedin/Liger-Kernel" }
|
10
10
|
readme = { file = "README.md", content-type = "text/markdown" }
|
@@ -222,7 +222,7 @@ def lce_forward(
|
|
222
222
|
lm_head_weight=self.lm_head.weight,
|
223
223
|
labels=labels,
|
224
224
|
hidden_size=self.config.hidden_size,
|
225
|
-
|
225
|
+
final_logit_softcapping=self.config.final_logit_softcapping,
|
226
226
|
**loss_kwargs,
|
227
227
|
)
|
228
228
|
|
@@ -112,7 +112,7 @@ def causal_forward(
|
|
112
112
|
lm_head_weight=self.lm_head.weight,
|
113
113
|
labels=labels,
|
114
114
|
hidden_size=self.config.hidden_size,
|
115
|
-
|
115
|
+
final_logit_softcapping=self.config.final_logit_softcapping,
|
116
116
|
**loss_kwargs,
|
117
117
|
)
|
118
118
|
|
@@ -1,14 +1,18 @@
|
|
1
|
+
from typing import Optional
|
2
|
+
|
3
|
+
import torch
|
1
4
|
import torch.nn as nn
|
2
5
|
|
3
6
|
import liger_kernel.transformers.functional as F
|
4
7
|
|
5
8
|
|
6
9
|
def fixed_fused_linear_cross_entropy(
|
7
|
-
hidden_states,
|
8
|
-
lm_head_weight,
|
9
|
-
target,
|
10
|
-
num_items_in_batch: int = None,
|
10
|
+
hidden_states: torch.Tensor,
|
11
|
+
lm_head_weight: torch.Tensor,
|
12
|
+
target: torch.Tensor,
|
13
|
+
num_items_in_batch: Optional[int] = None,
|
11
14
|
ignore_index: int = -100,
|
15
|
+
final_logit_softcapping: Optional[float] = None,
|
12
16
|
**kwargs,
|
13
17
|
):
|
14
18
|
reduction = "sum" if num_items_in_batch is not None else "mean"
|
@@ -18,7 +22,7 @@ def fixed_fused_linear_cross_entropy(
|
|
18
22
|
target,
|
19
23
|
reduction=reduction,
|
20
24
|
ignore_index=ignore_index,
|
21
|
-
|
25
|
+
softcap=final_logit_softcapping,
|
22
26
|
)
|
23
27
|
if reduction == "sum":
|
24
28
|
loss = loss / num_items_in_batch
|
@@ -31,15 +35,17 @@ def LigerForCausalLMLoss(
|
|
31
35
|
lm_head_weight,
|
32
36
|
labels,
|
33
37
|
hidden_size: int,
|
34
|
-
num_items_in_batch: int = None,
|
38
|
+
num_items_in_batch: Optional[int] = None,
|
35
39
|
ignore_index: int = -100,
|
40
|
+
shift_labels: Optional[torch.Tensor] = None,
|
41
|
+
final_logit_softcapping: Optional[float] = None,
|
36
42
|
**kwargs,
|
37
43
|
):
|
38
44
|
# Skip upcast since intermediate values for the loss are all fp32 in kernel
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
45
|
+
if shift_labels is None:
|
46
|
+
# Shift so that token < n predict n
|
47
|
+
labels = nn.functional.pad(labels, (0, 1), value=ignore_index)
|
48
|
+
shift_labels = labels[..., 1:].contiguous()
|
43
49
|
|
44
50
|
# Flatten the tokens
|
45
51
|
hidden_states = hidden_states.view(-1, hidden_size)
|
@@ -52,6 +58,7 @@ def LigerForCausalLMLoss(
|
|
52
58
|
shift_labels,
|
53
59
|
num_items_in_batch,
|
54
60
|
ignore_index,
|
61
|
+
final_logit_softcapping,
|
55
62
|
**kwargs,
|
56
63
|
)
|
57
64
|
return loss
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{liger_kernel_nightly-0.5.6.dev20250411210855 → liger_kernel_nightly-0.5.6.dev20250411224032}/NOTICE
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|