liger-kernel-nightly 0.5.10.dev20250611215839__tar.gz → 0.5.10.dev20250613212111__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/PKG-INFO +1 -1
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_cpo_loss.py +1 -1
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_cross_entropy.py +1 -1
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_distill_jsd_loss.py +1 -1
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_dpo_loss.py +1 -1
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_embedding.py +1 -1
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_fused_linear_cross_entropy.py +1 -1
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_fused_linear_jsd.py +1 -1
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_jsd.py +1 -1
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_kl_div.py +1 -1
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_kto_loss.py +1 -1
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_layer_norm.py +1 -1
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_orpo_loss.py +1 -1
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_simpo_loss.py +1 -1
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_softmax.py +1 -1
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_swiglu.py +1 -1
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_tvd.py +1 -1
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/utils.py +1 -1
- liger_kernel_nightly-0.5.10.dev20250613212111/dev/modal/benchmarks.py +73 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/pyproject.toml +1 -1
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel_nightly.egg-info/PKG-INFO +1 -1
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/convergence/bf16/test_mini_models.py +20 -18
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/convergence/bf16/test_mini_models_multimodal.py +16 -10
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/convergence/bf16/test_mini_models_with_logits.py +18 -11
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/convergence/fp32/test_mini_models.py +14 -12
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/convergence/fp32/test_mini_models_multimodal.py +19 -12
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/convergence/fp32/test_mini_models_with_logits.py +15 -9
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/utils.py +11 -0
- liger_kernel_nightly-0.5.10.dev20250611215839/dev/modal/benchmarks.py +0 -28
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/.github/ISSUE_TEMPLATE/bug_report.yaml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/.github/ISSUE_TEMPLATE/feature_request.yaml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/.github/pull_request_template.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/.github/workflows/amd-ci.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/.github/workflows/benchmark.yml +2 -2
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/.github/workflows/docs.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/.github/workflows/intel-ci.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/.github/workflows/nvi-ci.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/.github/workflows/publish-nightly.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/.github/workflows/publish-release.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/.gitignore +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/.idea/workspace.xml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/LICENSE +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/Makefile +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/NOTICE +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/README.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/README.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/benchmarks_visualizer.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/data/all_benchmark_data.csv +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_dyt.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_fused_neighborhood_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_geglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_group_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_rms_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_rope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_sparse_multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_sparsemax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/dev/fmt-requirements.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/dev/modal/tests.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/dev/modal/tests_bwd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/docs/Examples.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/docs/Getting-Started.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/docs/High-Level-APIs.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/docs/Low-Level-APIs.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/docs/acknowledgement.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/docs/contributing.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/docs/images/banner.GIF +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/docs/images/compose.gif +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/docs/images/e2e-memory.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/docs/images/e2e-tps.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/docs/images/logo-banner.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/docs/images/patch.gif +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/docs/images/post-training.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/docs/index.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/docs/license.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/alignment/accelerate_config.yaml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/alignment/run_orpo.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/huggingface/README.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/huggingface/callback.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/huggingface/config/fsdp_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/huggingface/img/gemma_7b_mem.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/huggingface/img/gemma_7b_tp.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/huggingface/img/llama_mem_alloc.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/huggingface/img/llama_tps.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/huggingface/img/qwen_mem_alloc.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/huggingface/img/qwen_tps.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/huggingface/launch_on_modal.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/huggingface/requirements.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/huggingface/run_benchmarks.sh +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/huggingface/run_gemma.sh +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/huggingface/run_llama.sh +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/huggingface/run_qwen.sh +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/huggingface/run_qwen2_vl.sh +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/huggingface/training.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/huggingface/training_multimodal.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/lightning/README.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/lightning/requirements.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/lightning/training.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/medusa/README.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/medusa/callback.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/medusa/docs/images/Memory_Stage1_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/medusa/docs/images/Memory_Stage1_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/medusa/docs/images/Memory_Stage2_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/medusa/docs/images/Memory_Stage2_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/medusa/docs/images/Throughput_Stage1_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/medusa/docs/images/Throughput_Stage1_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/medusa/docs/images/Throughput_Stage2_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/medusa/docs/images/Throughput_Stage2_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/medusa/fsdp/acc-fsdp.conf +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/medusa/medusa_util.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/medusa/requirements.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/medusa/scripts/llama3_8b_medusa.sh +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/medusa/train.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/licenses/LICENSE-Apache-2.0 +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/licenses/LICENSE-MIT-AutoAWQ +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/licenses/LICENSE-MIT-Efficient-Cross-Entropy +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/licenses/LICENSE-MIT-llmc +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/licenses/LICENSE-MIT-triton +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/mkdocs.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/setup.cfg +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/setup.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/chunked_loss/README.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/chunked_loss/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/chunked_loss/cpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/chunked_loss/dpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/chunked_loss/functional.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/chunked_loss/fused_linear_distillation.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/chunked_loss/fused_linear_ppo.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/chunked_loss/fused_linear_preference.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/chunked_loss/fused_linear_unpaired_preference.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/chunked_loss/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/chunked_loss/jsd_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/chunked_loss/kto_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/chunked_loss/orpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/chunked_loss/simpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/env_report.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/dyt.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/experimental/embedding.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/experimental/mm_int8int2.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/fused_neighborhood_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/geglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/group_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/kl_div.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/rms_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/rope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/softmax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/sparsemax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/swiglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/tvd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/utils.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/auto_model.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/dyt.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/experimental/embedding.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/fsdp.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/functional.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/fused_neighborhood_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/geglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/group_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/kl_div.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/model/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/model/gemma.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/model/gemma2.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/model/gemma3.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/model/glm4.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/model/llama.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/model/llava.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/model/loss_utils.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/model/mistral.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/model/mixtral.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/model/mllama.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/model/olmo2.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/model/paligemma.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/model/phi3.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/model/qwen2.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/model/qwen2_5_vl.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/model/qwen2_vl.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/model/qwen3.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/model/qwen3_moe.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/monkey_patch.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/rms_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/rope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/softmax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/sparsemax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/swiglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/trainer/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/trainer/orpo_trainer.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/trainer_integration.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/tvd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/triton/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/triton/monkey_patch.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/utils.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel_nightly.egg-info/SOURCES.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel_nightly.egg-info/dependency_links.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel_nightly.egg-info/requires.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel_nightly.egg-info/top_level.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/chunked_loss/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/chunked_loss/test_cpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/chunked_loss/test_dpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/chunked_loss/test_grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/chunked_loss/test_jsd_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/chunked_loss/test_kto_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/chunked_loss/test_orpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/chunked_loss/test_simpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/conftest.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/convergence/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/convergence/bf16/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/convergence/fp32/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/resources/fake_configs/Google/Gemma3/gemma-3-4b-it/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/resources/fake_configs/Google/Paligemma/paligemma-3b-pt-224/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/preprocessor_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/processor_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/resources/fake_configs/Qwen/Qwen2-VL-7B-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/resources/fake_configs/Qwen/Qwen2.5-VL-7B-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/resources/fake_configs/meta-llama/Llama-3.2-11B-Vision-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/resources/scripts/generate_tokenized_dataset.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/resources/tiny_shakespeare.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/resources/tiny_shakespeare_tokenized/data-00000-of-00001.arrow +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/resources/tiny_shakespeare_tokenized/dataset_info.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/resources/tiny_shakespeare_tokenized/state.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_auto_model.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_dyt.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_embedding.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_flex_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_fused_neighborhood_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_geglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_group_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_kl_div.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_mm_int8int2.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_monkey_patch.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_rms_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_rope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_softmax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_sparsemax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_swiglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_trainer_integration.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_transformers.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_tvd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250611215839 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/triton/test_triton_monkey_patch.py +0 -0
@@ -147,7 +147,7 @@ if __name__ == "__main__":
|
|
147
147
|
|
148
148
|
run_benchmarks(
|
149
149
|
bench_test_fn=bench_speed_fused_linear_cpo_loss,
|
150
|
-
kernel_operation_modes=["forward", "full"],
|
150
|
+
kernel_operation_modes=["forward", "backward", "full"],
|
151
151
|
metric_name="speed",
|
152
152
|
metric_unit="ms",
|
153
153
|
**common_configs,
|
@@ -109,7 +109,7 @@ if __name__ == "__main__":
|
|
109
109
|
|
110
110
|
run_benchmarks(
|
111
111
|
bench_test_fn=bench_speed_cross_entropy,
|
112
|
-
kernel_operation_modes=["forward", "full"],
|
112
|
+
kernel_operation_modes=["forward", "backward", "full"],
|
113
113
|
metric_name="speed",
|
114
114
|
metric_unit="ms",
|
115
115
|
**common_configs,
|
@@ -248,7 +248,7 @@ if __name__ == "__main__":
|
|
248
248
|
|
249
249
|
run_benchmarks(
|
250
250
|
bench_test_fn=bench_speed_jsd_loss,
|
251
|
-
kernel_operation_modes=["forward", "full"],
|
251
|
+
kernel_operation_modes=["forward", "backward", "full"],
|
252
252
|
metric_name="speed",
|
253
253
|
metric_unit="ms",
|
254
254
|
**common_configs,
|
@@ -166,7 +166,7 @@ if __name__ == "__main__":
|
|
166
166
|
|
167
167
|
run_benchmarks(
|
168
168
|
bench_test_fn=bench_speed_dpo_loss,
|
169
|
-
kernel_operation_modes=["forward", "full"],
|
169
|
+
kernel_operation_modes=["forward", "backward", "full"],
|
170
170
|
metric_name="speed",
|
171
171
|
metric_unit="ms",
|
172
172
|
**common_configs,
|
@@ -112,7 +112,7 @@ if __name__ == "__main__":
|
|
112
112
|
|
113
113
|
run_benchmarks(
|
114
114
|
bench_test_fn=bench_speed_embedding,
|
115
|
-
kernel_operation_modes=["forward", "full"],
|
115
|
+
kernel_operation_modes=["forward", "backward", "full"],
|
116
116
|
metric_name="speed",
|
117
117
|
metric_unit="ms",
|
118
118
|
**common_configs,
|
@@ -156,7 +156,7 @@ if __name__ == "__main__":
|
|
156
156
|
|
157
157
|
run_benchmarks(
|
158
158
|
bench_test_fn=bench_speed_fused_linear_cross_entropy,
|
159
|
-
kernel_operation_modes=["forward", "full"],
|
159
|
+
kernel_operation_modes=["forward", "backward", "full"],
|
160
160
|
metric_name="speed",
|
161
161
|
metric_unit="ms",
|
162
162
|
**common_configs,
|
@@ -246,7 +246,7 @@ if __name__ == "__main__":
|
|
246
246
|
|
247
247
|
run_benchmarks(
|
248
248
|
bench_test_fn=bench_speed_fused_linear_jsd,
|
249
|
-
kernel_operation_modes=["forward", "full"],
|
249
|
+
kernel_operation_modes=["forward", "backward", "full"],
|
250
250
|
metric_name="speed",
|
251
251
|
metric_unit="ms",
|
252
252
|
**common_configs,
|
@@ -143,7 +143,7 @@ if __name__ == "__main__":
|
|
143
143
|
|
144
144
|
run_benchmarks(
|
145
145
|
bench_test_fn=bench_speed_jsd,
|
146
|
-
kernel_operation_modes=["forward", "full"],
|
146
|
+
kernel_operation_modes=["forward", "backward", "full"],
|
147
147
|
metric_name="speed",
|
148
148
|
metric_unit="ms",
|
149
149
|
**common_args,
|
@@ -110,7 +110,7 @@ if __name__ == "__main__":
|
|
110
110
|
|
111
111
|
run_benchmarks(
|
112
112
|
bench_test_fn=bench_speed_kldiv,
|
113
|
-
kernel_operation_modes=["forward", "full"],
|
113
|
+
kernel_operation_modes=["forward", "backward", "full"],
|
114
114
|
metric_name="speed",
|
115
115
|
metric_unit="ms",
|
116
116
|
**common_args,
|
@@ -299,7 +299,7 @@ if __name__ == "__main__":
|
|
299
299
|
|
300
300
|
run_benchmarks(
|
301
301
|
bench_test_fn=bench_speed_kto_loss,
|
302
|
-
kernel_operation_modes=["forward", "full"],
|
302
|
+
kernel_operation_modes=["forward", "backward", "full"],
|
303
303
|
metric_name="speed",
|
304
304
|
metric_unit="ms",
|
305
305
|
**common_configs,
|
@@ -111,7 +111,7 @@ if __name__ == "__main__":
|
|
111
111
|
|
112
112
|
run_benchmarks(
|
113
113
|
bench_test_fn=bench_speed_layer_norm,
|
114
|
-
kernel_operation_modes=["forward", "full"],
|
114
|
+
kernel_operation_modes=["forward", "backward", "full"],
|
115
115
|
metric_name="speed",
|
116
116
|
metric_unit="ms",
|
117
117
|
**common_configs,
|
@@ -149,7 +149,7 @@ if __name__ == "__main__":
|
|
149
149
|
|
150
150
|
run_benchmarks(
|
151
151
|
bench_test_fn=bench_speed_fused_linear_orpo_loss,
|
152
|
-
kernel_operation_modes=["forward", "full"],
|
152
|
+
kernel_operation_modes=["forward", "full", "backward"],
|
153
153
|
metric_name="speed",
|
154
154
|
metric_unit="ms",
|
155
155
|
**common_configs,
|
@@ -147,7 +147,7 @@ if __name__ == "__main__":
|
|
147
147
|
|
148
148
|
run_benchmarks(
|
149
149
|
bench_test_fn=bench_speed_fused_linear_simpo_loss,
|
150
|
-
kernel_operation_modes=["forward", "full"],
|
150
|
+
kernel_operation_modes=["forward", "full", "backward"],
|
151
151
|
metric_name="speed",
|
152
152
|
metric_unit="ms",
|
153
153
|
**common_configs,
|
@@ -124,7 +124,7 @@ if __name__ == "__main__":
|
|
124
124
|
|
125
125
|
run_benchmarks(
|
126
126
|
bench_test_fn=bench_speed_softmax,
|
127
|
-
kernel_operation_modes=["forward", "full"],
|
127
|
+
kernel_operation_modes=["forward", "full", "backward"],
|
128
128
|
metric_name="speed",
|
129
129
|
metric_unit="ms",
|
130
130
|
overwrite=args.overwrite,
|
@@ -161,7 +161,7 @@ if __name__ == "__main__":
|
|
161
161
|
|
162
162
|
run_benchmarks(
|
163
163
|
bench_test_fn=bench_speed_swiglu,
|
164
|
-
kernel_operation_modes=["forward"],
|
164
|
+
kernel_operation_modes=["forward", "full", "backward"],
|
165
165
|
metric_name="speed",
|
166
166
|
metric_unit="ms",
|
167
167
|
**common_configs,
|
@@ -126,7 +126,7 @@ if __name__ == "__main__":
|
|
126
126
|
|
127
127
|
run_benchmarks(
|
128
128
|
bench_test_fn=bench_speed_tvd,
|
129
|
-
kernel_operation_modes=["forward", "full"],
|
129
|
+
kernel_operation_modes=["forward", "full", "backward"],
|
130
130
|
metric_name="speed",
|
131
131
|
metric_unit="ms",
|
132
132
|
**common_args,
|
@@ -235,7 +235,7 @@ def update_benchmark_data_csv(
|
|
235
235
|
pass
|
236
236
|
else:
|
237
237
|
existing_data_dict[row_key] = row_dict
|
238
|
-
|
238
|
+
os.makedirs(os.path.dirname(filename_abs_path), exist_ok=True)
|
239
239
|
with open(filename_abs_path, mode="w", newline="") as file:
|
240
240
|
writer = csv.DictWriter(file, fieldnames=fieldnames)
|
241
241
|
writer.writeheader()
|
@@ -0,0 +1,73 @@
|
|
1
|
+
from pathlib import Path
|
2
|
+
|
3
|
+
import modal
|
4
|
+
|
5
|
+
ROOT_PATH = Path(__file__).parent.parent.parent
|
6
|
+
REMOTE_ROOT_PATH = "/root/liger-kernel"
|
7
|
+
PYTHON_VERSION = "3.12"
|
8
|
+
|
9
|
+
image = modal.Image.debian_slim(python_version=PYTHON_VERSION).pip_install("uv")
|
10
|
+
|
11
|
+
app = modal.App("liger_benchmarks", image=image)
|
12
|
+
|
13
|
+
# mount: add local files to the remote container
|
14
|
+
repo = image.add_local_dir(ROOT_PATH, remote_path=REMOTE_ROOT_PATH)
|
15
|
+
|
16
|
+
|
17
|
+
@app.function(gpu="H100", image=repo, timeout=60 * 45)
|
18
|
+
def liger_benchmarks():
|
19
|
+
import os
|
20
|
+
import subprocess
|
21
|
+
|
22
|
+
subprocess.run(
|
23
|
+
["uv pip install -e '.[dev]' --system"],
|
24
|
+
check=True,
|
25
|
+
shell=True,
|
26
|
+
cwd=REMOTE_ROOT_PATH,
|
27
|
+
)
|
28
|
+
subprocess.run(["make run-benchmarks"], check=True, shell=True, cwd=REMOTE_ROOT_PATH)
|
29
|
+
|
30
|
+
file_path = Path(REMOTE_ROOT_PATH) / "benchmark" / "data" / "all_benchmark_data.csv"
|
31
|
+
print(f"Checking if file exists at: {file_path}")
|
32
|
+
print(f"File exists: {os.path.exists(file_path)}")
|
33
|
+
|
34
|
+
if not os.path.exists(file_path):
|
35
|
+
print("Listing directory contents:")
|
36
|
+
data_dir = file_path.parent
|
37
|
+
if os.path.exists(data_dir):
|
38
|
+
print(f"Contents of {data_dir}:")
|
39
|
+
print(os.listdir(data_dir))
|
40
|
+
else:
|
41
|
+
print(f"Data directory {data_dir} does not exist")
|
42
|
+
raise FileNotFoundError(f"Benchmark data file not found at {file_path}")
|
43
|
+
|
44
|
+
with open(file_path, "rb") as f:
|
45
|
+
data = f.read()
|
46
|
+
print(f"Successfully read {len(data)} bytes of data")
|
47
|
+
return data
|
48
|
+
|
49
|
+
|
50
|
+
@app.local_entrypoint()
|
51
|
+
def main():
|
52
|
+
try:
|
53
|
+
# Run the benchmarks and get the data
|
54
|
+
print("Starting benchmark run...")
|
55
|
+
benchmark_data = liger_benchmarks.remote()
|
56
|
+
|
57
|
+
if not benchmark_data:
|
58
|
+
raise ValueError("No data received from remote function")
|
59
|
+
|
60
|
+
# Save the data locally
|
61
|
+
local_data_path = ROOT_PATH / "benchmark" / "data" / "all_benchmark_data.csv"
|
62
|
+
print(f"Attempting to save data to: {local_data_path}")
|
63
|
+
|
64
|
+
local_data_path.parent.mkdir(parents=True, exist_ok=True)
|
65
|
+
|
66
|
+
with open(local_data_path, "wb") as f:
|
67
|
+
f.write(benchmark_data)
|
68
|
+
|
69
|
+
print(f"Successfully saved {len(benchmark_data)} bytes to: {local_data_path}")
|
70
|
+
|
71
|
+
except Exception as e:
|
72
|
+
print(f"Error occurred: {str(e)}")
|
73
|
+
raise
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
4
4
|
|
5
5
|
[project]
|
6
6
|
name = "liger_kernel_nightly"
|
7
|
-
version = "0.5.10.
|
7
|
+
version = "0.5.10.dev20250613212111"
|
8
8
|
description = "Efficient Triton kernels for LLM Training"
|
9
9
|
urls = { "Homepage" = "https://github.com/linkedin/Liger-Kernel" }
|
10
10
|
readme = { file = "README.md", content-type = "text/markdown" }
|
@@ -38,6 +38,8 @@ from liger_kernel.transformers import apply_liger_kernel_to_qwen3_moe
|
|
38
38
|
from test.utils import DEFAULT_DATASET_PATH
|
39
39
|
from test.utils import MiniModelConfig
|
40
40
|
from test.utils import assert_verbose_allclose
|
41
|
+
from test.utils import get_logprobs
|
42
|
+
from test.utils import get_topk
|
41
43
|
from test.utils import revert_liger_kernel_to_gemma
|
42
44
|
from test.utils import revert_liger_kernel_to_gemma2
|
43
45
|
from test.utils import revert_liger_kernel_to_gemma3_text
|
@@ -851,17 +853,17 @@ def run_mini_model(
|
|
851
853
|
eval_output = model(**eval_batch)
|
852
854
|
print(f"Eval Loss: {eval_output.loss.item()}")
|
853
855
|
loss_list.append(eval_output.loss.item())
|
854
|
-
|
856
|
+
topk_logprobs = get_topk(get_logprobs(eval_output.logits))
|
855
857
|
MINI_MODEL_SETUPS[model_name].liger_kernel_patch_revert_func(**revert_kwargs)
|
856
858
|
return {
|
857
859
|
"loss": loss_list,
|
858
|
-
"
|
860
|
+
"topk_logprobs": topk_logprobs.values,
|
859
861
|
"model": model,
|
860
862
|
}
|
861
863
|
|
862
864
|
|
863
865
|
@pytest.mark.parametrize(
|
864
|
-
"model_name, num_steps, lr, dtype, loss_atol, loss_rtol,
|
866
|
+
"model_name, num_steps, lr, dtype, loss_atol, loss_rtol, logprobs_atol, logprobs_rtol, param_atol, param_rtol",
|
865
867
|
[
|
866
868
|
pytest.param(
|
867
869
|
"mini_llama3",
|
@@ -884,7 +886,7 @@ def run_mini_model(
|
|
884
886
|
1e-3,
|
885
887
|
1e-2,
|
886
888
|
1e-1,
|
887
|
-
1e-
|
889
|
+
1e-1,
|
888
890
|
1e-2,
|
889
891
|
1e-2,
|
890
892
|
marks=[
|
@@ -902,7 +904,7 @@ def run_mini_model(
|
|
902
904
|
torch.bfloat16,
|
903
905
|
1e-3,
|
904
906
|
1e-2,
|
905
|
-
1, # 1e-1
|
907
|
+
1e-1, # 1e-1
|
906
908
|
1e-1, # 1e-2
|
907
909
|
1e-2,
|
908
910
|
1e-2,
|
@@ -972,7 +974,7 @@ def run_mini_model(
|
|
972
974
|
torch.bfloat16,
|
973
975
|
1e-3,
|
974
976
|
1e-2,
|
975
|
-
1, # 1e-1
|
977
|
+
1e-1, # 1e-1
|
976
978
|
1e-1, # 1e-2
|
977
979
|
1e-2,
|
978
980
|
1e-2,
|
@@ -1111,8 +1113,8 @@ def run_mini_model(
|
|
1111
1113
|
torch.bfloat16,
|
1112
1114
|
1e-3,
|
1113
1115
|
1e-2,
|
1114
|
-
1e-1,
|
1115
1116
|
1e-2,
|
1117
|
+
1e-1,
|
1116
1118
|
1e-2,
|
1117
1119
|
1e-2,
|
1118
1120
|
marks=pytest.mark.skipif(not supports_bfloat16(), reason="bfloat16 not supported on this GPU"),
|
@@ -1124,8 +1126,8 @@ def run_mini_model(
|
|
1124
1126
|
torch.bfloat16,
|
1125
1127
|
1e-3,
|
1126
1128
|
1e-2,
|
1127
|
-
1e-1,
|
1128
1129
|
1e-2,
|
1130
|
+
1e-1,
|
1129
1131
|
1e-2,
|
1130
1132
|
1e-2,
|
1131
1133
|
marks=pytest.mark.skipif(not supports_bfloat16(), reason="bfloat16 not supported on this GPU"),
|
@@ -1153,8 +1155,8 @@ def run_mini_model(
|
|
1153
1155
|
torch.bfloat16,
|
1154
1156
|
1e-3,
|
1155
1157
|
1e-2,
|
1156
|
-
|
1157
|
-
|
1158
|
+
3e-1,
|
1159
|
+
4e-1,
|
1158
1160
|
1e-2,
|
1159
1161
|
1e-2,
|
1160
1162
|
marks=[
|
@@ -1174,8 +1176,8 @@ def test_mini_model(
|
|
1174
1176
|
dtype,
|
1175
1177
|
loss_atol,
|
1176
1178
|
loss_rtol,
|
1177
|
-
|
1178
|
-
|
1179
|
+
logprobs_atol,
|
1180
|
+
logprobs_rtol,
|
1179
1181
|
param_atol,
|
1180
1182
|
param_rtol,
|
1181
1183
|
):
|
@@ -1193,13 +1195,13 @@ def test_mini_model(
|
|
1193
1195
|
rtol=loss_rtol,
|
1194
1196
|
)
|
1195
1197
|
|
1196
|
-
# Compare the
|
1197
|
-
if expected_output["
|
1198
|
+
# Compare the topk logprobs from evaluation step
|
1199
|
+
if expected_output["topk_logprobs"] is not None and actual_output["topk_logprobs"] is not None:
|
1198
1200
|
assert_verbose_allclose(
|
1199
|
-
expected_output["
|
1200
|
-
actual_output["
|
1201
|
-
atol=
|
1202
|
-
rtol=
|
1201
|
+
expected_output["topk_logprobs"],
|
1202
|
+
actual_output["topk_logprobs"],
|
1203
|
+
atol=logprobs_atol,
|
1204
|
+
rtol=logprobs_rtol,
|
1203
1205
|
)
|
1204
1206
|
|
1205
1207
|
# Compare the params from the last step
|
@@ -20,6 +20,8 @@ from test.utils import FAKE_CONFIGS_PATH
|
|
20
20
|
from test.utils import UNTOKENIZED_DATASET_PATH
|
21
21
|
from test.utils import MiniModelConfig
|
22
22
|
from test.utils import assert_verbose_allclose
|
23
|
+
from test.utils import get_logprobs
|
24
|
+
from test.utils import get_topk
|
23
25
|
from test.utils import is_torchvision_available
|
24
26
|
from test.utils import load_image_processing_config
|
25
27
|
from test.utils import load_processor_config
|
@@ -764,13 +766,17 @@ def run_mini_model_multimodal(
|
|
764
766
|
|
765
767
|
print(f"Step {i}, Loss: {output.loss.item()}")
|
766
768
|
loss_list.append(output.loss.item())
|
767
|
-
|
769
|
+
topk_logprobs = get_topk(get_logprobs(output.logits))
|
768
770
|
MINI_MODEL_SETUPS[model_name].liger_kernel_patch_revert_func(**revert_kwargs)
|
769
|
-
return {
|
771
|
+
return {
|
772
|
+
"loss": loss_list,
|
773
|
+
"topk_logprobs": topk_logprobs.values,
|
774
|
+
"model": model,
|
775
|
+
}
|
770
776
|
|
771
777
|
|
772
778
|
@pytest.mark.parametrize(
|
773
|
-
"model_name, num_steps, lr, dtype, loss_atol, loss_rtol,
|
779
|
+
"model_name, num_steps, lr, dtype, loss_atol, loss_rtol, logprobs_atol, logprobs_rtol, param_atol, param_rtol",
|
774
780
|
[
|
775
781
|
pytest.param(
|
776
782
|
"mini_qwen2_vl",
|
@@ -917,8 +923,8 @@ def test_mini_model_multimodal(
|
|
917
923
|
dtype,
|
918
924
|
loss_atol,
|
919
925
|
loss_rtol,
|
920
|
-
|
921
|
-
|
926
|
+
logprobs_atol,
|
927
|
+
logprobs_rtol,
|
922
928
|
param_atol,
|
923
929
|
param_rtol,
|
924
930
|
):
|
@@ -937,12 +943,12 @@ def test_mini_model_multimodal(
|
|
937
943
|
rtol=loss_rtol,
|
938
944
|
)
|
939
945
|
|
940
|
-
# Compare the
|
946
|
+
# Compare the topk logprobs from evaluation step
|
941
947
|
assert_verbose_allclose(
|
942
|
-
expected_output["
|
943
|
-
actual_output["
|
944
|
-
atol=
|
945
|
-
rtol=
|
948
|
+
expected_output["topk_logprobs"],
|
949
|
+
actual_output["topk_logprobs"],
|
950
|
+
atol=logprobs_atol,
|
951
|
+
rtol=logprobs_rtol,
|
946
952
|
)
|
947
953
|
|
948
954
|
# Compare the params from the last step
|
@@ -38,6 +38,8 @@ from liger_kernel.transformers import apply_liger_kernel_to_qwen3_moe
|
|
38
38
|
from test.utils import DEFAULT_DATASET_PATH
|
39
39
|
from test.utils import MiniModelConfig
|
40
40
|
from test.utils import assert_verbose_allclose
|
41
|
+
from test.utils import get_logprobs
|
42
|
+
from test.utils import get_topk
|
41
43
|
from test.utils import revert_liger_kernel_to_gemma
|
42
44
|
from test.utils import revert_liger_kernel_to_gemma2
|
43
45
|
from test.utils import revert_liger_kernel_to_gemma3_text
|
@@ -842,12 +844,17 @@ def run_mini_model(
|
|
842
844
|
print(f"Step {i}, Loss: {output.loss.item()}")
|
843
845
|
loss_list.append(output.loss.item())
|
844
846
|
|
847
|
+
topk_logprobs = get_topk(get_logprobs(output.logits))
|
845
848
|
MINI_MODEL_SETUPS[model_name].liger_kernel_patch_revert_func(**revert_kwargs)
|
846
|
-
return {
|
849
|
+
return {
|
850
|
+
"loss": loss_list,
|
851
|
+
"topk_logprobs": topk_logprobs.values,
|
852
|
+
"model": model,
|
853
|
+
}
|
847
854
|
|
848
855
|
|
849
856
|
@pytest.mark.parametrize(
|
850
|
-
"model_name, num_steps, lr, dtype, loss_atol, loss_rtol,
|
857
|
+
"model_name, num_steps, lr, dtype, loss_atol, loss_rtol, logprobs_atol, logprobs_rtol, param_atol, param_rtol",
|
851
858
|
[
|
852
859
|
pytest.param(
|
853
860
|
"mini_llama3",
|
@@ -1058,8 +1065,8 @@ def run_mini_model(
|
|
1058
1065
|
torch.bfloat16,
|
1059
1066
|
1e-3,
|
1060
1067
|
1e-2,
|
1061
|
-
1e-1,
|
1062
1068
|
1e-2,
|
1069
|
+
1e-1,
|
1063
1070
|
1e-2,
|
1064
1071
|
1e-2,
|
1065
1072
|
marks=pytest.mark.skipif(not supports_bfloat16(), reason="bfloat16 not supported on this GPU"),
|
@@ -1071,8 +1078,8 @@ def run_mini_model(
|
|
1071
1078
|
torch.bfloat16,
|
1072
1079
|
1e-3,
|
1073
1080
|
1e-2,
|
1074
|
-
1e-1,
|
1075
1081
|
1e-2,
|
1082
|
+
1e-1,
|
1076
1083
|
1e-2,
|
1077
1084
|
1e-2,
|
1078
1085
|
marks=pytest.mark.skipif(not supports_bfloat16(), reason="bfloat16 not supported on this GPU"),
|
@@ -1159,8 +1166,8 @@ def test_mini_model(
|
|
1159
1166
|
dtype,
|
1160
1167
|
loss_atol,
|
1161
1168
|
loss_rtol,
|
1162
|
-
|
1163
|
-
|
1169
|
+
logprobs_atol,
|
1170
|
+
logprobs_rtol,
|
1164
1171
|
param_atol,
|
1165
1172
|
param_rtol,
|
1166
1173
|
):
|
@@ -1180,12 +1187,12 @@ def test_mini_model(
|
|
1180
1187
|
|
1181
1188
|
# No logits are materialized
|
1182
1189
|
# import pdb; pdb.set_trace()
|
1183
|
-
# Compare the
|
1190
|
+
# Compare the topk logprobs from evaluation step
|
1184
1191
|
assert_verbose_allclose(
|
1185
|
-
expected_output["
|
1186
|
-
actual_output["
|
1187
|
-
atol=
|
1188
|
-
rtol=
|
1192
|
+
expected_output["topk_logprobs"],
|
1193
|
+
actual_output["topk_logprobs"],
|
1194
|
+
atol=logprobs_atol,
|
1195
|
+
rtol=logprobs_rtol,
|
1189
1196
|
)
|
1190
1197
|
|
1191
1198
|
# Compare the params from the last step
|
@@ -38,6 +38,8 @@ from liger_kernel.transformers import apply_liger_kernel_to_qwen3_moe
|
|
38
38
|
from test.utils import DEFAULT_DATASET_PATH
|
39
39
|
from test.utils import MiniModelConfig
|
40
40
|
from test.utils import assert_verbose_allclose
|
41
|
+
from test.utils import get_logprobs
|
42
|
+
from test.utils import get_topk
|
41
43
|
from test.utils import revert_liger_kernel_to_gemma
|
42
44
|
from test.utils import revert_liger_kernel_to_gemma2
|
43
45
|
from test.utils import revert_liger_kernel_to_gemma3_text
|
@@ -849,17 +851,17 @@ def run_mini_model(
|
|
849
851
|
eval_output = model(**eval_batch)
|
850
852
|
print(f"Eval Loss: {eval_output.loss.item()}")
|
851
853
|
loss_list.append(eval_output.loss.item())
|
852
|
-
|
854
|
+
topk_logprobs = get_topk(get_logprobs(eval_output.logits))
|
853
855
|
MINI_MODEL_SETUPS[model_name].liger_kernel_patch_revert_func(**revert_kwargs)
|
854
856
|
return {
|
855
857
|
"loss": loss_list,
|
856
|
-
"
|
858
|
+
"topk_logprobs": topk_logprobs.values,
|
857
859
|
"model": model,
|
858
860
|
}
|
859
861
|
|
860
862
|
|
861
863
|
@pytest.mark.parametrize(
|
862
|
-
"model_name, num_steps, lr, dtype, loss_atol, loss_rtol,
|
864
|
+
"model_name, num_steps, lr, dtype, loss_atol, loss_rtol, logprobs_atol, logprobs_rtol, param_atol, param_rtol",
|
863
865
|
[
|
864
866
|
("mini_llama3", 32, 1e-4, torch.float32, 1e-8, 2e-5, 1e-4, 1e-5, 5e-3, 1e-5),
|
865
867
|
pytest.param(
|
@@ -1013,7 +1015,7 @@ def run_mini_model(
|
|
1013
1015
|
# TODO: mixtral is flaky so disable the test for now
|
1014
1016
|
# ("mini_mixtral", 32, 1e-4, torch.float32, 5e-4, 1e-4, 5e-3, 1e-5, 1e-2, 1e-5),
|
1015
1017
|
# Gemma 1.1 and 2 has more tolerance because currently, the kernel is not a perfect match (casts are not done the same way)
|
1016
|
-
("mini_gemma1", 32, 1e-4, torch.float32, 1e-8, 1e-4, 5e-3, 1e-
|
1018
|
+
("mini_gemma1", 32, 1e-4, torch.float32, 1e-8, 1e-4, 5e-3, 1e-2, 5e-3, 1e-5),
|
1017
1019
|
("mini_gemma1.1", 32, 1e-4, torch.float32, 1e-8, 1e-4, 5e-3, 1e-5, 5e-3, 1e-5),
|
1018
1020
|
("mini_gemma2", 32, 1e-4, torch.float32, 1e-8, 1e-4, 5e-3, 1e-5, 5e-3, 1e-5),
|
1019
1021
|
pytest.param(
|
@@ -1041,8 +1043,8 @@ def test_mini_model(
|
|
1041
1043
|
dtype,
|
1042
1044
|
loss_atol,
|
1043
1045
|
loss_rtol,
|
1044
|
-
|
1045
|
-
|
1046
|
+
logprobs_atol,
|
1047
|
+
logprobs_rtol,
|
1046
1048
|
param_atol,
|
1047
1049
|
param_rtol,
|
1048
1050
|
):
|
@@ -1060,13 +1062,13 @@ def test_mini_model(
|
|
1060
1062
|
rtol=loss_rtol,
|
1061
1063
|
)
|
1062
1064
|
|
1063
|
-
# Compare the
|
1064
|
-
if expected_output["
|
1065
|
+
# Compare the topk logprobs from evaluation step
|
1066
|
+
if expected_output["topk_logprobs"] is not None and actual_output["topk_logprobs"] is not None:
|
1065
1067
|
assert_verbose_allclose(
|
1066
|
-
expected_output["
|
1067
|
-
actual_output["
|
1068
|
-
atol=
|
1069
|
-
rtol=
|
1068
|
+
expected_output["topk_logprobs"],
|
1069
|
+
actual_output["topk_logprobs"],
|
1070
|
+
atol=logprobs_atol,
|
1071
|
+
rtol=logprobs_rtol,
|
1070
1072
|
)
|
1071
1073
|
|
1072
1074
|
# Compare the params from the last step
|