liger-kernel-nightly 0.5.4.dev20250304205249__tar.gz → 0.5.4.dev20250305231637__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of liger-kernel-nightly might be problematic. Click here for more details.
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/PKG-INFO +1 -1
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/pyproject.toml +1 -1
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel_nightly.egg-info/PKG-INFO +1 -1
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/test/chunked_loss/test_kto_loss.py +70 -22
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/.github/ISSUE_TEMPLATE/bug_report.yaml +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/.github/ISSUE_TEMPLATE/feature_request.yaml +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/.github/pull_request_template.md +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/.github/workflows/amd-ci.yml +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/.github/workflows/docs.yml +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/.github/workflows/intel-ci.yml +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/.github/workflows/nvi-ci.yml +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/.github/workflows/publish-nightly.yml +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/.github/workflows/publish-release.yml +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/.gitignore +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/LICENSE +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/Makefile +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/NOTICE +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/README.md +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/benchmark/README.md +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/benchmark/__init__.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/benchmark/benchmarks_visualizer.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/benchmark/data/all_benchmark_data.csv +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/benchmark/scripts/__init__.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/benchmark/scripts/benchmark_cpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/benchmark/scripts/benchmark_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/benchmark/scripts/benchmark_distill_jsd_loss.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/benchmark/scripts/benchmark_dpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/benchmark/scripts/benchmark_embedding.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/benchmark/scripts/benchmark_fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/benchmark/scripts/benchmark_fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/benchmark/scripts/benchmark_geglu.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/benchmark/scripts/benchmark_group_norm.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/benchmark/scripts/benchmark_jsd.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/benchmark/scripts/benchmark_kl_div.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/benchmark/scripts/benchmark_kto_loss.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/benchmark/scripts/benchmark_layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/benchmark/scripts/benchmark_orpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/benchmark/scripts/benchmark_qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/benchmark/scripts/benchmark_rms_norm.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/benchmark/scripts/benchmark_rope.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/benchmark/scripts/benchmark_simpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/benchmark/scripts/benchmark_swiglu.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/benchmark/scripts/benchmark_tvd.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/benchmark/scripts/utils.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/dev/fmt-requirements.txt +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/dev/modal/tests.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/dev/modal/tests_bwd.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/docs/Examples.md +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/docs/Getting-Started.md +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/docs/High-Level-APIs.md +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/docs/Low-Level-APIs.md +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/docs/acknowledgement.md +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/docs/contributing.md +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/docs/images/banner.GIF +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/docs/images/compose.gif +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/docs/images/e2e-memory.png +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/docs/images/e2e-tps.png +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/docs/images/logo-banner.png +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/docs/images/patch.gif +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/docs/images/post-training.png +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/docs/index.md +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/docs/license.md +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/examples/alignment/accelerate_config.yaml +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/examples/alignment/run_orpo.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/examples/huggingface/README.md +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/examples/huggingface/callback.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/examples/huggingface/config/fsdp_config.json +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/examples/huggingface/img/gemma_7b_mem.png +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/examples/huggingface/img/gemma_7b_tp.png +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/examples/huggingface/img/llama_mem_alloc.png +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/examples/huggingface/img/llama_tps.png +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/examples/huggingface/img/qwen_mem_alloc.png +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/examples/huggingface/img/qwen_tps.png +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/examples/huggingface/launch_on_modal.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/examples/huggingface/requirements.txt +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/examples/huggingface/run_benchmarks.sh +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/examples/huggingface/run_gemma.sh +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/examples/huggingface/run_llama.sh +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/examples/huggingface/run_qwen.sh +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/examples/huggingface/run_qwen2_vl.sh +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/examples/huggingface/training.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/examples/huggingface/training_multimodal.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/examples/lightning/README.md +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/examples/lightning/requirements.txt +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/examples/lightning/training.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/examples/medusa/README.md +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/examples/medusa/callback.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/examples/medusa/docs/images/Memory_Stage1_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/examples/medusa/docs/images/Memory_Stage1_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/examples/medusa/docs/images/Memory_Stage2_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/examples/medusa/docs/images/Memory_Stage2_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/examples/medusa/docs/images/Throughput_Stage1_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/examples/medusa/docs/images/Throughput_Stage1_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/examples/medusa/docs/images/Throughput_Stage2_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/examples/medusa/docs/images/Throughput_Stage2_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/examples/medusa/fsdp/acc-fsdp.conf +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/examples/medusa/medusa_util.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/examples/medusa/requirements.txt +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/examples/medusa/scripts/llama3_8b_medusa.sh +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/examples/medusa/train.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/licenses/LICENSE-Apache-2.0 +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/licenses/LICENSE-MIT-AutoAWQ +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/licenses/LICENSE-MIT-Efficient-Cross-Entropy +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/licenses/LICENSE-MIT-llmc +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/licenses/LICENSE-MIT-triton +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/mkdocs.yml +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/setup.cfg +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/setup.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/__init__.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/chunked_loss/README.md +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/chunked_loss/__init__.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/chunked_loss/cpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/chunked_loss/dpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/chunked_loss/functional.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/chunked_loss/fused_linear_distillation.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/chunked_loss/fused_linear_preference.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/chunked_loss/fused_linear_rlhf.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/chunked_loss/fused_linear_unpaired_preference.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/chunked_loss/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/chunked_loss/jsd_loss.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/chunked_loss/kto_loss.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/chunked_loss/orpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/chunked_loss/simpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/env_report.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/ops/__init__.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/ops/cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/ops/experimental/embedding.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/ops/experimental/mm_int8int2.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/ops/fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/ops/fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/ops/geglu.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/ops/group_norm.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/ops/jsd.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/ops/kl_div.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/ops/layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/ops/qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/ops/rms_norm.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/ops/rope.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/ops/swiglu.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/ops/tvd.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/ops/utils.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/transformers/__init__.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/transformers/auto_model.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/transformers/cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/transformers/experimental/embedding.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/transformers/functional.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/transformers/fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/transformers/fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/transformers/geglu.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/transformers/group_norm.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/transformers/jsd.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/transformers/kl_div.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/transformers/layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/transformers/model/__init__.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/transformers/model/gemma.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/transformers/model/gemma2.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/transformers/model/llama.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/transformers/model/mistral.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/transformers/model/mixtral.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/transformers/model/mllama.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/transformers/model/olmo2.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/transformers/model/phi3.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/transformers/model/qwen2.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/transformers/model/qwen2_5_vl.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/transformers/model/qwen2_vl.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/transformers/monkey_patch.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/transformers/qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/transformers/rms_norm.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/transformers/rope.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/transformers/swiglu.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/transformers/trainer/__init__.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/transformers/trainer/orpo_trainer.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/transformers/trainer_integration.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/transformers/tvd.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/triton/__init__.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/triton/monkey_patch.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel/utils.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel_nightly.egg-info/SOURCES.txt +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel_nightly.egg-info/dependency_links.txt +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel_nightly.egg-info/requires.txt +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/src/liger_kernel_nightly.egg-info/top_level.txt +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/test/__init__.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/test/chunked_loss/__init__.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/test/chunked_loss/test_cpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/test/chunked_loss/test_dpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/test/chunked_loss/test_grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/test/chunked_loss/test_jsd_loss.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/test/chunked_loss/test_orpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/test/chunked_loss/test_simpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/test/conftest.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/test/convergence/__init__.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/test/convergence/bf16/__init__.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/test/convergence/bf16/test_mini_models.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/test/convergence/bf16/test_mini_models_multimodal.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/test/convergence/bf16/test_mini_models_with_logits.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/test/convergence/fp32/__init__.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/test/convergence/fp32/test_mini_models.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/test/convergence/fp32/test_mini_models_multimodal.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/test/convergence/fp32/test_mini_models_with_logits.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/test/resources/fake_configs/Qwen/Qwen2-VL-7B-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/test/resources/fake_configs/Qwen/Qwen2.5-VL-7B-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/test/resources/fake_configs/meta-llama/Llama-3.2-11B-Vision-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/test/resources/scripts/generate_tokenized_dataset.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/test/resources/tiny_shakespeare.txt +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/test/resources/tiny_shakespeare_tokenized/data-00000-of-00001.arrow +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/test/resources/tiny_shakespeare_tokenized/dataset_info.json +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/test/resources/tiny_shakespeare_tokenized/state.json +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/test/transformers/test_auto_model.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/test/transformers/test_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/test/transformers/test_embedding.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/test/transformers/test_flex_attention.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/test/transformers/test_fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/test/transformers/test_fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/test/transformers/test_geglu.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/test/transformers/test_group_norm.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/test/transformers/test_jsd.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/test/transformers/test_kl_div.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/test/transformers/test_layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/test/transformers/test_mm_int8int2.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/test/transformers/test_monkey_patch.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/test/transformers/test_qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/test/transformers/test_rms_norm.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/test/transformers/test_rope.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/test/transformers/test_swiglu.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/test/transformers/test_trainer_integration.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/test/transformers/test_transformers.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/test/transformers/test_tvd.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/test/triton/test_triton_monkey_patch.py +0 -0
- {liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/test/utils.py +0 -0
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "liger_kernel_nightly"
|
|
7
|
-
version = "0.5.4.
|
|
7
|
+
version = "0.5.4.dev20250305231637"
|
|
8
8
|
description = "Efficient Triton kernels for LLM Training"
|
|
9
9
|
urls = { "Homepage" = "https://github.com/linkedin/Liger-Kernel" }
|
|
10
10
|
readme = { file = "README.md", content-type = "text/markdown" }
|
|
@@ -164,20 +164,22 @@ class LigerLMHeadKTO(torch.nn.Module):
|
|
|
164
164
|
],
|
|
165
165
|
)
|
|
166
166
|
@pytest.mark.parametrize(
|
|
167
|
-
"scalar, dtype, atol, rtol
|
|
167
|
+
"scalar, dtype, atol, rtol",
|
|
168
168
|
[
|
|
169
|
-
(1.0, torch.bfloat16, 5e-2, 5e-1
|
|
170
|
-
(1.0, torch.float32, 1e-5, 5e-4
|
|
169
|
+
(1.0, torch.bfloat16, 5e-2, 5e-1),
|
|
170
|
+
(1.0, torch.float32, 1e-5, 5e-4),
|
|
171
171
|
],
|
|
172
172
|
)
|
|
173
173
|
@pytest.mark.parametrize("bias", [True, False])
|
|
174
174
|
@pytest.mark.parametrize("ref_bias", [True, False])
|
|
175
175
|
@pytest.mark.parametrize("ignore_index, beta", [(-100, 0.1), (42, 0.2)])
|
|
176
|
-
def test_correctness(B, T, H, V, scalar, dtype, atol, rtol,
|
|
176
|
+
def test_correctness(B, T, H, V, scalar, dtype, atol, rtol, bias, ref_bias, ignore_index, beta):
|
|
177
177
|
# Preference labels shape: [B]
|
|
178
178
|
# Create binary preference labels (0 or 1) for each sequence in the batch
|
|
179
179
|
# Used to indicate preferred sequences (1) vs non-preferred sequences (0)
|
|
180
180
|
preference_labels = torch.randint(2, (B,), dtype=torch.bool, device=device, requires_grad=False)
|
|
181
|
+
num_chosen_samples = preference_labels.sum()
|
|
182
|
+
num_rejected_samples = len(preference_labels) - num_chosen_samples
|
|
181
183
|
|
|
182
184
|
# Precomputed KL divergence between policy and reference distributions
|
|
183
185
|
kl = torch.randn(1, device=device, dtype=dtype)
|
|
@@ -247,13 +249,35 @@ def test_correctness(B, T, H, V, scalar, dtype, atol, rtol, atol_aux, rtol_aux,
|
|
|
247
249
|
|
|
248
250
|
assert len(aggregated_aux_outputs1) == len(aggregated_aux_outputs2)
|
|
249
251
|
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
252
|
+
# chosen_logps
|
|
253
|
+
chosen_logps_mean1 = aggregated_aux_outputs1[0] / ((num_chosen_samples) + 1e-20)
|
|
254
|
+
chosen_logps_mean2 = aggregated_aux_outputs2[0] / ((num_chosen_samples) + 1e-20)
|
|
255
|
+
assert_verbose_allclose(chosen_logps_mean1, chosen_logps_mean2, atol=atol, rtol=rtol)
|
|
256
|
+
|
|
257
|
+
# chosen_logits
|
|
258
|
+
chosen_logits_mean1 = aggregated_aux_outputs1[2] / ((num_chosen_samples * T * V) + 1e-20)
|
|
259
|
+
chosen_logits_mean2 = aggregated_aux_outputs2[2] / ((num_chosen_samples * T * V) + 1e-20)
|
|
260
|
+
assert_verbose_allclose(chosen_logits_mean1, chosen_logits_mean2, atol=atol, rtol=rtol)
|
|
261
|
+
|
|
262
|
+
# chosen_rewards
|
|
263
|
+
chosen_rewards_mean1 = aggregated_aux_outputs1[4] / ((num_chosen_samples) + 1e-20)
|
|
264
|
+
chosen_rewards_mean2 = aggregated_aux_outputs2[4] / ((num_chosen_samples) + 1e-20)
|
|
265
|
+
assert_verbose_allclose(chosen_rewards_mean1, chosen_rewards_mean2, atol=atol, rtol=rtol)
|
|
266
|
+
|
|
267
|
+
# rejected_logps
|
|
268
|
+
rejected_logps_mean1 = aggregated_aux_outputs1[1] / ((num_rejected_samples) + 1e-20)
|
|
269
|
+
rejected_logps_mean2 = aggregated_aux_outputs2[1] / ((num_rejected_samples) + 1e-20)
|
|
270
|
+
assert_verbose_allclose(rejected_logps_mean1, rejected_logps_mean2, atol=atol, rtol=rtol)
|
|
271
|
+
|
|
272
|
+
# rejected_logits
|
|
273
|
+
rejected_logits_mean1 = aggregated_aux_outputs1[3] / ((num_rejected_samples * T * V) + 1e-20)
|
|
274
|
+
rejected_logits_mean2 = aggregated_aux_outputs2[3] / ((num_rejected_samples * T * V) + 1e-20)
|
|
275
|
+
assert_verbose_allclose(rejected_logits_mean1, rejected_logits_mean2, atol=atol, rtol=rtol)
|
|
276
|
+
|
|
277
|
+
# rejected_rewards
|
|
278
|
+
rejected_rewards_mean1 = aggregated_aux_outputs1[5] / ((num_rejected_samples) + 1e-20)
|
|
279
|
+
rejected_rewards_mean2 = aggregated_aux_outputs2[5] / ((num_rejected_samples) + 1e-20)
|
|
280
|
+
assert_verbose_allclose(rejected_rewards_mean1, rejected_rewards_mean2, atol=atol, rtol=rtol)
|
|
257
281
|
|
|
258
282
|
loss1.backward()
|
|
259
283
|
loss2.backward()
|
|
@@ -288,19 +312,21 @@ def test_correctness(B, T, H, V, scalar, dtype, atol, rtol, atol_aux, rtol_aux,
|
|
|
288
312
|
],
|
|
289
313
|
)
|
|
290
314
|
@pytest.mark.parametrize(
|
|
291
|
-
"scalar, dtype, atol, rtol
|
|
315
|
+
"scalar, dtype, atol, rtol",
|
|
292
316
|
[
|
|
293
|
-
(1.0, torch.bfloat16, 5e-2, 5e-1
|
|
294
|
-
(1.0, torch.float32, 1e-5, 5e-4
|
|
317
|
+
(1.0, torch.bfloat16, 5e-2, 5e-1),
|
|
318
|
+
(1.0, torch.float32, 1e-5, 5e-4),
|
|
295
319
|
],
|
|
296
320
|
)
|
|
297
321
|
@pytest.mark.parametrize("bias", [True, False])
|
|
298
322
|
@pytest.mark.parametrize("ref_bias", [True, False])
|
|
299
|
-
def test_correctness_functional(B, T, H, V, scalar, dtype, atol, rtol,
|
|
323
|
+
def test_correctness_functional(B, T, H, V, scalar, dtype, atol, rtol, bias, ref_bias):
|
|
300
324
|
# Preference labels shape: [B]
|
|
301
325
|
# Create binary preference labels (0 or 1) for each sequence in the batch
|
|
302
326
|
# Used to indicate preferred sequences (1) vs non-preferred sequences (0)
|
|
303
327
|
preference_labels = torch.randint(2, (B,), dtype=torch.bool, device=device)
|
|
328
|
+
num_chosen_samples = preference_labels.sum()
|
|
329
|
+
num_rejected_samples = len(preference_labels) - num_chosen_samples
|
|
304
330
|
|
|
305
331
|
# Precomputed KL divergence between policy and reference distributions
|
|
306
332
|
kl = torch.randn(1, device=device, dtype=dtype)
|
|
@@ -365,13 +391,35 @@ def test_correctness_functional(B, T, H, V, scalar, dtype, atol, rtol, atol_aux,
|
|
|
365
391
|
|
|
366
392
|
assert len(aggregated_aux_outputs1) == len(aggregated_aux_outputs2)
|
|
367
393
|
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
394
|
+
# chosen_logps
|
|
395
|
+
chosen_logps_mean1 = aggregated_aux_outputs1[0] / ((num_chosen_samples) + 1e-20)
|
|
396
|
+
chosen_logps_mean2 = aggregated_aux_outputs2[0] / ((num_chosen_samples) + 1e-20)
|
|
397
|
+
assert_verbose_allclose(chosen_logps_mean1, chosen_logps_mean2, atol=atol, rtol=rtol)
|
|
398
|
+
|
|
399
|
+
# chosen_logits
|
|
400
|
+
chosen_logits_mean1 = aggregated_aux_outputs1[2] / ((num_chosen_samples * T * V) + 1e-20)
|
|
401
|
+
chosen_logits_mean2 = aggregated_aux_outputs2[2] / ((num_chosen_samples * T * V) + 1e-20)
|
|
402
|
+
assert_verbose_allclose(chosen_logits_mean1, chosen_logits_mean2, atol=atol, rtol=rtol)
|
|
403
|
+
|
|
404
|
+
# chosen_rewards
|
|
405
|
+
chosen_rewards_mean1 = aggregated_aux_outputs1[4] / ((num_chosen_samples) + 1e-20)
|
|
406
|
+
chosen_rewards_mean2 = aggregated_aux_outputs2[4] / ((num_chosen_samples) + 1e-20)
|
|
407
|
+
assert_verbose_allclose(chosen_rewards_mean1, chosen_rewards_mean2, atol=atol, rtol=rtol)
|
|
408
|
+
|
|
409
|
+
# rejected_logps
|
|
410
|
+
rejected_logps_mean1 = aggregated_aux_outputs1[1] / ((num_rejected_samples) + 1e-20)
|
|
411
|
+
rejected_logps_mean2 = aggregated_aux_outputs2[1] / ((num_rejected_samples) + 1e-20)
|
|
412
|
+
assert_verbose_allclose(rejected_logps_mean1, rejected_logps_mean2, atol=atol, rtol=rtol)
|
|
413
|
+
|
|
414
|
+
# rejected_logits
|
|
415
|
+
rejected_logits_mean1 = aggregated_aux_outputs1[3] / ((num_rejected_samples * T * V) + 1e-20)
|
|
416
|
+
rejected_logits_mean2 = aggregated_aux_outputs2[3] / ((num_rejected_samples * T * V) + 1e-20)
|
|
417
|
+
assert_verbose_allclose(rejected_logits_mean1, rejected_logits_mean2, atol=atol, rtol=rtol)
|
|
418
|
+
|
|
419
|
+
# rejected_rewards
|
|
420
|
+
rejected_rewards_mean1 = aggregated_aux_outputs1[5] / ((num_rejected_samples) + 1e-20)
|
|
421
|
+
rejected_rewards_mean2 = aggregated_aux_outputs2[5] / ((num_rejected_samples) + 1e-20)
|
|
422
|
+
assert_verbose_allclose(rejected_rewards_mean1, rejected_rewards_mean2, atol=atol, rtol=rtol)
|
|
375
423
|
|
|
376
424
|
loss1.backward()
|
|
377
425
|
loss2.backward()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{liger_kernel_nightly-0.5.4.dev20250304205249 → liger_kernel_nightly-0.5.4.dev20250305231637}/NOTICE
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|