liger-kernel-nightly 0.5.10.dev20250613192702__tar.gz → 0.5.10.dev20250613212111__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/PKG-INFO +1 -1
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/dev/modal/benchmarks.py +8 -8
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/pyproject.toml +1 -1
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel_nightly.egg-info/PKG-INFO +1 -1
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/convergence/bf16/test_mini_models.py +20 -18
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/convergence/bf16/test_mini_models_multimodal.py +16 -10
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/convergence/bf16/test_mini_models_with_logits.py +18 -11
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/convergence/fp32/test_mini_models.py +14 -12
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/convergence/fp32/test_mini_models_multimodal.py +19 -12
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/convergence/fp32/test_mini_models_with_logits.py +15 -9
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/utils.py +11 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/.github/ISSUE_TEMPLATE/bug_report.yaml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/.github/ISSUE_TEMPLATE/feature_request.yaml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/.github/pull_request_template.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/.github/workflows/amd-ci.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/.github/workflows/benchmark.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/.github/workflows/docs.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/.github/workflows/intel-ci.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/.github/workflows/nvi-ci.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/.github/workflows/publish-nightly.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/.github/workflows/publish-release.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/.gitignore +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/.idea/workspace.xml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/LICENSE +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/Makefile +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/NOTICE +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/README.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/README.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/benchmarks_visualizer.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/data/all_benchmark_data.csv +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_cpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_distill_jsd_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_dpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_dyt.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_embedding.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_fused_neighborhood_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_geglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_group_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_kl_div.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_kto_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_orpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_rms_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_rope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_simpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_softmax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_sparse_multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_sparsemax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_swiglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/benchmark_tvd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/benchmark/scripts/utils.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/dev/fmt-requirements.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/dev/modal/tests.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/dev/modal/tests_bwd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/docs/Examples.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/docs/Getting-Started.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/docs/High-Level-APIs.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/docs/Low-Level-APIs.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/docs/acknowledgement.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/docs/contributing.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/docs/images/banner.GIF +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/docs/images/compose.gif +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/docs/images/e2e-memory.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/docs/images/e2e-tps.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/docs/images/logo-banner.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/docs/images/patch.gif +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/docs/images/post-training.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/docs/index.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/docs/license.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/alignment/accelerate_config.yaml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/alignment/run_orpo.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/huggingface/README.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/huggingface/callback.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/huggingface/config/fsdp_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/huggingface/img/gemma_7b_mem.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/huggingface/img/gemma_7b_tp.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/huggingface/img/llama_mem_alloc.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/huggingface/img/llama_tps.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/huggingface/img/qwen_mem_alloc.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/huggingface/img/qwen_tps.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/huggingface/launch_on_modal.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/huggingface/requirements.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/huggingface/run_benchmarks.sh +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/huggingface/run_gemma.sh +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/huggingface/run_llama.sh +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/huggingface/run_qwen.sh +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/huggingface/run_qwen2_vl.sh +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/huggingface/training.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/huggingface/training_multimodal.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/lightning/README.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/lightning/requirements.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/lightning/training.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/medusa/README.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/medusa/callback.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/medusa/docs/images/Memory_Stage1_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/medusa/docs/images/Memory_Stage1_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/medusa/docs/images/Memory_Stage2_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/medusa/docs/images/Memory_Stage2_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/medusa/docs/images/Throughput_Stage1_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/medusa/docs/images/Throughput_Stage1_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/medusa/docs/images/Throughput_Stage2_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/medusa/docs/images/Throughput_Stage2_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/medusa/fsdp/acc-fsdp.conf +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/medusa/medusa_util.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/medusa/requirements.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/medusa/scripts/llama3_8b_medusa.sh +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/examples/medusa/train.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/licenses/LICENSE-Apache-2.0 +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/licenses/LICENSE-MIT-AutoAWQ +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/licenses/LICENSE-MIT-Efficient-Cross-Entropy +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/licenses/LICENSE-MIT-llmc +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/licenses/LICENSE-MIT-triton +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/mkdocs.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/setup.cfg +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/setup.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/chunked_loss/README.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/chunked_loss/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/chunked_loss/cpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/chunked_loss/dpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/chunked_loss/functional.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/chunked_loss/fused_linear_distillation.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/chunked_loss/fused_linear_ppo.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/chunked_loss/fused_linear_preference.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/chunked_loss/fused_linear_unpaired_preference.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/chunked_loss/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/chunked_loss/jsd_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/chunked_loss/kto_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/chunked_loss/orpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/chunked_loss/simpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/env_report.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/dyt.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/experimental/embedding.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/experimental/mm_int8int2.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/fused_neighborhood_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/geglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/group_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/kl_div.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/rms_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/rope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/softmax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/sparsemax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/swiglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/tvd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/ops/utils.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/auto_model.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/dyt.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/experimental/embedding.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/fsdp.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/functional.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/fused_neighborhood_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/geglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/group_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/kl_div.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/model/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/model/gemma.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/model/gemma2.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/model/gemma3.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/model/glm4.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/model/llama.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/model/llava.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/model/loss_utils.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/model/mistral.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/model/mixtral.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/model/mllama.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/model/olmo2.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/model/paligemma.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/model/phi3.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/model/qwen2.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/model/qwen2_5_vl.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/model/qwen2_vl.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/model/qwen3.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/model/qwen3_moe.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/monkey_patch.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/rms_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/rope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/softmax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/sparsemax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/swiglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/trainer/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/trainer/orpo_trainer.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/trainer_integration.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/transformers/tvd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/triton/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/triton/monkey_patch.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel/utils.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel_nightly.egg-info/SOURCES.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel_nightly.egg-info/dependency_links.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel_nightly.egg-info/requires.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/src/liger_kernel_nightly.egg-info/top_level.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/chunked_loss/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/chunked_loss/test_cpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/chunked_loss/test_dpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/chunked_loss/test_grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/chunked_loss/test_jsd_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/chunked_loss/test_kto_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/chunked_loss/test_orpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/chunked_loss/test_simpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/conftest.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/convergence/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/convergence/bf16/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/convergence/fp32/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/resources/fake_configs/Google/Gemma3/gemma-3-4b-it/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/resources/fake_configs/Google/Paligemma/paligemma-3b-pt-224/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/preprocessor_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/processor_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/resources/fake_configs/Qwen/Qwen2-VL-7B-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/resources/fake_configs/Qwen/Qwen2.5-VL-7B-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/resources/fake_configs/meta-llama/Llama-3.2-11B-Vision-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/resources/scripts/generate_tokenized_dataset.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/resources/tiny_shakespeare.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/resources/tiny_shakespeare_tokenized/data-00000-of-00001.arrow +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/resources/tiny_shakespeare_tokenized/dataset_info.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/resources/tiny_shakespeare_tokenized/state.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_auto_model.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_dyt.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_embedding.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_flex_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_fused_neighborhood_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_geglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_group_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_kl_div.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_mm_int8int2.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_monkey_patch.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_rms_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_rope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_softmax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_sparsemax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_swiglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_trainer_integration.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_transformers.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/transformers/test_tvd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250613192702 → liger_kernel_nightly-0.5.10.dev20250613212111}/test/triton/test_triton_monkey_patch.py +0 -0
@@ -16,8 +16,8 @@ repo = image.add_local_dir(ROOT_PATH, remote_path=REMOTE_ROOT_PATH)
|
|
16
16
|
|
17
17
|
@app.function(gpu="H100", image=repo, timeout=60 * 45)
|
18
18
|
def liger_benchmarks():
|
19
|
-
import subprocess
|
20
19
|
import os
|
20
|
+
import subprocess
|
21
21
|
|
22
22
|
subprocess.run(
|
23
23
|
["uv pip install -e '.[dev]' --system"],
|
@@ -30,7 +30,7 @@ def liger_benchmarks():
|
|
30
30
|
file_path = Path(REMOTE_ROOT_PATH) / "benchmark" / "data" / "all_benchmark_data.csv"
|
31
31
|
print(f"Checking if file exists at: {file_path}")
|
32
32
|
print(f"File exists: {os.path.exists(file_path)}")
|
33
|
-
|
33
|
+
|
34
34
|
if not os.path.exists(file_path):
|
35
35
|
print("Listing directory contents:")
|
36
36
|
data_dir = file_path.parent
|
@@ -53,21 +53,21 @@ def main():
|
|
53
53
|
# Run the benchmarks and get the data
|
54
54
|
print("Starting benchmark run...")
|
55
55
|
benchmark_data = liger_benchmarks.remote()
|
56
|
-
|
56
|
+
|
57
57
|
if not benchmark_data:
|
58
58
|
raise ValueError("No data received from remote function")
|
59
|
-
|
59
|
+
|
60
60
|
# Save the data locally
|
61
61
|
local_data_path = ROOT_PATH / "benchmark" / "data" / "all_benchmark_data.csv"
|
62
62
|
print(f"Attempting to save data to: {local_data_path}")
|
63
|
-
|
63
|
+
|
64
64
|
local_data_path.parent.mkdir(parents=True, exist_ok=True)
|
65
|
-
|
65
|
+
|
66
66
|
with open(local_data_path, "wb") as f:
|
67
67
|
f.write(benchmark_data)
|
68
|
-
|
68
|
+
|
69
69
|
print(f"Successfully saved {len(benchmark_data)} bytes to: {local_data_path}")
|
70
|
-
|
70
|
+
|
71
71
|
except Exception as e:
|
72
72
|
print(f"Error occurred: {str(e)}")
|
73
73
|
raise
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
4
4
|
|
5
5
|
[project]
|
6
6
|
name = "liger_kernel_nightly"
|
7
|
-
version = "0.5.10.
|
7
|
+
version = "0.5.10.dev20250613212111"
|
8
8
|
description = "Efficient Triton kernels for LLM Training"
|
9
9
|
urls = { "Homepage" = "https://github.com/linkedin/Liger-Kernel" }
|
10
10
|
readme = { file = "README.md", content-type = "text/markdown" }
|
@@ -38,6 +38,8 @@ from liger_kernel.transformers import apply_liger_kernel_to_qwen3_moe
|
|
38
38
|
from test.utils import DEFAULT_DATASET_PATH
|
39
39
|
from test.utils import MiniModelConfig
|
40
40
|
from test.utils import assert_verbose_allclose
|
41
|
+
from test.utils import get_logprobs
|
42
|
+
from test.utils import get_topk
|
41
43
|
from test.utils import revert_liger_kernel_to_gemma
|
42
44
|
from test.utils import revert_liger_kernel_to_gemma2
|
43
45
|
from test.utils import revert_liger_kernel_to_gemma3_text
|
@@ -851,17 +853,17 @@ def run_mini_model(
|
|
851
853
|
eval_output = model(**eval_batch)
|
852
854
|
print(f"Eval Loss: {eval_output.loss.item()}")
|
853
855
|
loss_list.append(eval_output.loss.item())
|
854
|
-
|
856
|
+
topk_logprobs = get_topk(get_logprobs(eval_output.logits))
|
855
857
|
MINI_MODEL_SETUPS[model_name].liger_kernel_patch_revert_func(**revert_kwargs)
|
856
858
|
return {
|
857
859
|
"loss": loss_list,
|
858
|
-
"
|
860
|
+
"topk_logprobs": topk_logprobs.values,
|
859
861
|
"model": model,
|
860
862
|
}
|
861
863
|
|
862
864
|
|
863
865
|
@pytest.mark.parametrize(
|
864
|
-
"model_name, num_steps, lr, dtype, loss_atol, loss_rtol,
|
866
|
+
"model_name, num_steps, lr, dtype, loss_atol, loss_rtol, logprobs_atol, logprobs_rtol, param_atol, param_rtol",
|
865
867
|
[
|
866
868
|
pytest.param(
|
867
869
|
"mini_llama3",
|
@@ -884,7 +886,7 @@ def run_mini_model(
|
|
884
886
|
1e-3,
|
885
887
|
1e-2,
|
886
888
|
1e-1,
|
887
|
-
1e-
|
889
|
+
1e-1,
|
888
890
|
1e-2,
|
889
891
|
1e-2,
|
890
892
|
marks=[
|
@@ -902,7 +904,7 @@ def run_mini_model(
|
|
902
904
|
torch.bfloat16,
|
903
905
|
1e-3,
|
904
906
|
1e-2,
|
905
|
-
1, # 1e-1
|
907
|
+
1e-1, # 1e-1
|
906
908
|
1e-1, # 1e-2
|
907
909
|
1e-2,
|
908
910
|
1e-2,
|
@@ -972,7 +974,7 @@ def run_mini_model(
|
|
972
974
|
torch.bfloat16,
|
973
975
|
1e-3,
|
974
976
|
1e-2,
|
975
|
-
1, # 1e-1
|
977
|
+
1e-1, # 1e-1
|
976
978
|
1e-1, # 1e-2
|
977
979
|
1e-2,
|
978
980
|
1e-2,
|
@@ -1111,8 +1113,8 @@ def run_mini_model(
|
|
1111
1113
|
torch.bfloat16,
|
1112
1114
|
1e-3,
|
1113
1115
|
1e-2,
|
1114
|
-
1e-1,
|
1115
1116
|
1e-2,
|
1117
|
+
1e-1,
|
1116
1118
|
1e-2,
|
1117
1119
|
1e-2,
|
1118
1120
|
marks=pytest.mark.skipif(not supports_bfloat16(), reason="bfloat16 not supported on this GPU"),
|
@@ -1124,8 +1126,8 @@ def run_mini_model(
|
|
1124
1126
|
torch.bfloat16,
|
1125
1127
|
1e-3,
|
1126
1128
|
1e-2,
|
1127
|
-
1e-1,
|
1128
1129
|
1e-2,
|
1130
|
+
1e-1,
|
1129
1131
|
1e-2,
|
1130
1132
|
1e-2,
|
1131
1133
|
marks=pytest.mark.skipif(not supports_bfloat16(), reason="bfloat16 not supported on this GPU"),
|
@@ -1153,8 +1155,8 @@ def run_mini_model(
|
|
1153
1155
|
torch.bfloat16,
|
1154
1156
|
1e-3,
|
1155
1157
|
1e-2,
|
1156
|
-
|
1157
|
-
|
1158
|
+
3e-1,
|
1159
|
+
4e-1,
|
1158
1160
|
1e-2,
|
1159
1161
|
1e-2,
|
1160
1162
|
marks=[
|
@@ -1174,8 +1176,8 @@ def test_mini_model(
|
|
1174
1176
|
dtype,
|
1175
1177
|
loss_atol,
|
1176
1178
|
loss_rtol,
|
1177
|
-
|
1178
|
-
|
1179
|
+
logprobs_atol,
|
1180
|
+
logprobs_rtol,
|
1179
1181
|
param_atol,
|
1180
1182
|
param_rtol,
|
1181
1183
|
):
|
@@ -1193,13 +1195,13 @@ def test_mini_model(
|
|
1193
1195
|
rtol=loss_rtol,
|
1194
1196
|
)
|
1195
1197
|
|
1196
|
-
# Compare the
|
1197
|
-
if expected_output["
|
1198
|
+
# Compare the topk logprobs from evaluation step
|
1199
|
+
if expected_output["topk_logprobs"] is not None and actual_output["topk_logprobs"] is not None:
|
1198
1200
|
assert_verbose_allclose(
|
1199
|
-
expected_output["
|
1200
|
-
actual_output["
|
1201
|
-
atol=
|
1202
|
-
rtol=
|
1201
|
+
expected_output["topk_logprobs"],
|
1202
|
+
actual_output["topk_logprobs"],
|
1203
|
+
atol=logprobs_atol,
|
1204
|
+
rtol=logprobs_rtol,
|
1203
1205
|
)
|
1204
1206
|
|
1205
1207
|
# Compare the params from the last step
|
@@ -20,6 +20,8 @@ from test.utils import FAKE_CONFIGS_PATH
|
|
20
20
|
from test.utils import UNTOKENIZED_DATASET_PATH
|
21
21
|
from test.utils import MiniModelConfig
|
22
22
|
from test.utils import assert_verbose_allclose
|
23
|
+
from test.utils import get_logprobs
|
24
|
+
from test.utils import get_topk
|
23
25
|
from test.utils import is_torchvision_available
|
24
26
|
from test.utils import load_image_processing_config
|
25
27
|
from test.utils import load_processor_config
|
@@ -764,13 +766,17 @@ def run_mini_model_multimodal(
|
|
764
766
|
|
765
767
|
print(f"Step {i}, Loss: {output.loss.item()}")
|
766
768
|
loss_list.append(output.loss.item())
|
767
|
-
|
769
|
+
topk_logprobs = get_topk(get_logprobs(output.logits))
|
768
770
|
MINI_MODEL_SETUPS[model_name].liger_kernel_patch_revert_func(**revert_kwargs)
|
769
|
-
return {
|
771
|
+
return {
|
772
|
+
"loss": loss_list,
|
773
|
+
"topk_logprobs": topk_logprobs.values,
|
774
|
+
"model": model,
|
775
|
+
}
|
770
776
|
|
771
777
|
|
772
778
|
@pytest.mark.parametrize(
|
773
|
-
"model_name, num_steps, lr, dtype, loss_atol, loss_rtol,
|
779
|
+
"model_name, num_steps, lr, dtype, loss_atol, loss_rtol, logprobs_atol, logprobs_rtol, param_atol, param_rtol",
|
774
780
|
[
|
775
781
|
pytest.param(
|
776
782
|
"mini_qwen2_vl",
|
@@ -917,8 +923,8 @@ def test_mini_model_multimodal(
|
|
917
923
|
dtype,
|
918
924
|
loss_atol,
|
919
925
|
loss_rtol,
|
920
|
-
|
921
|
-
|
926
|
+
logprobs_atol,
|
927
|
+
logprobs_rtol,
|
922
928
|
param_atol,
|
923
929
|
param_rtol,
|
924
930
|
):
|
@@ -937,12 +943,12 @@ def test_mini_model_multimodal(
|
|
937
943
|
rtol=loss_rtol,
|
938
944
|
)
|
939
945
|
|
940
|
-
# Compare the
|
946
|
+
# Compare the topk logprobs from evaluation step
|
941
947
|
assert_verbose_allclose(
|
942
|
-
expected_output["
|
943
|
-
actual_output["
|
944
|
-
atol=
|
945
|
-
rtol=
|
948
|
+
expected_output["topk_logprobs"],
|
949
|
+
actual_output["topk_logprobs"],
|
950
|
+
atol=logprobs_atol,
|
951
|
+
rtol=logprobs_rtol,
|
946
952
|
)
|
947
953
|
|
948
954
|
# Compare the params from the last step
|
@@ -38,6 +38,8 @@ from liger_kernel.transformers import apply_liger_kernel_to_qwen3_moe
|
|
38
38
|
from test.utils import DEFAULT_DATASET_PATH
|
39
39
|
from test.utils import MiniModelConfig
|
40
40
|
from test.utils import assert_verbose_allclose
|
41
|
+
from test.utils import get_logprobs
|
42
|
+
from test.utils import get_topk
|
41
43
|
from test.utils import revert_liger_kernel_to_gemma
|
42
44
|
from test.utils import revert_liger_kernel_to_gemma2
|
43
45
|
from test.utils import revert_liger_kernel_to_gemma3_text
|
@@ -842,12 +844,17 @@ def run_mini_model(
|
|
842
844
|
print(f"Step {i}, Loss: {output.loss.item()}")
|
843
845
|
loss_list.append(output.loss.item())
|
844
846
|
|
847
|
+
topk_logprobs = get_topk(get_logprobs(output.logits))
|
845
848
|
MINI_MODEL_SETUPS[model_name].liger_kernel_patch_revert_func(**revert_kwargs)
|
846
|
-
return {
|
849
|
+
return {
|
850
|
+
"loss": loss_list,
|
851
|
+
"topk_logprobs": topk_logprobs.values,
|
852
|
+
"model": model,
|
853
|
+
}
|
847
854
|
|
848
855
|
|
849
856
|
@pytest.mark.parametrize(
|
850
|
-
"model_name, num_steps, lr, dtype, loss_atol, loss_rtol,
|
857
|
+
"model_name, num_steps, lr, dtype, loss_atol, loss_rtol, logprobs_atol, logprobs_rtol, param_atol, param_rtol",
|
851
858
|
[
|
852
859
|
pytest.param(
|
853
860
|
"mini_llama3",
|
@@ -1058,8 +1065,8 @@ def run_mini_model(
|
|
1058
1065
|
torch.bfloat16,
|
1059
1066
|
1e-3,
|
1060
1067
|
1e-2,
|
1061
|
-
1e-1,
|
1062
1068
|
1e-2,
|
1069
|
+
1e-1,
|
1063
1070
|
1e-2,
|
1064
1071
|
1e-2,
|
1065
1072
|
marks=pytest.mark.skipif(not supports_bfloat16(), reason="bfloat16 not supported on this GPU"),
|
@@ -1071,8 +1078,8 @@ def run_mini_model(
|
|
1071
1078
|
torch.bfloat16,
|
1072
1079
|
1e-3,
|
1073
1080
|
1e-2,
|
1074
|
-
1e-1,
|
1075
1081
|
1e-2,
|
1082
|
+
1e-1,
|
1076
1083
|
1e-2,
|
1077
1084
|
1e-2,
|
1078
1085
|
marks=pytest.mark.skipif(not supports_bfloat16(), reason="bfloat16 not supported on this GPU"),
|
@@ -1159,8 +1166,8 @@ def test_mini_model(
|
|
1159
1166
|
dtype,
|
1160
1167
|
loss_atol,
|
1161
1168
|
loss_rtol,
|
1162
|
-
|
1163
|
-
|
1169
|
+
logprobs_atol,
|
1170
|
+
logprobs_rtol,
|
1164
1171
|
param_atol,
|
1165
1172
|
param_rtol,
|
1166
1173
|
):
|
@@ -1180,12 +1187,12 @@ def test_mini_model(
|
|
1180
1187
|
|
1181
1188
|
# No logits are materialized
|
1182
1189
|
# import pdb; pdb.set_trace()
|
1183
|
-
# Compare the
|
1190
|
+
# Compare the topk logprobs from evaluation step
|
1184
1191
|
assert_verbose_allclose(
|
1185
|
-
expected_output["
|
1186
|
-
actual_output["
|
1187
|
-
atol=
|
1188
|
-
rtol=
|
1192
|
+
expected_output["topk_logprobs"],
|
1193
|
+
actual_output["topk_logprobs"],
|
1194
|
+
atol=logprobs_atol,
|
1195
|
+
rtol=logprobs_rtol,
|
1189
1196
|
)
|
1190
1197
|
|
1191
1198
|
# Compare the params from the last step
|
@@ -38,6 +38,8 @@ from liger_kernel.transformers import apply_liger_kernel_to_qwen3_moe
|
|
38
38
|
from test.utils import DEFAULT_DATASET_PATH
|
39
39
|
from test.utils import MiniModelConfig
|
40
40
|
from test.utils import assert_verbose_allclose
|
41
|
+
from test.utils import get_logprobs
|
42
|
+
from test.utils import get_topk
|
41
43
|
from test.utils import revert_liger_kernel_to_gemma
|
42
44
|
from test.utils import revert_liger_kernel_to_gemma2
|
43
45
|
from test.utils import revert_liger_kernel_to_gemma3_text
|
@@ -849,17 +851,17 @@ def run_mini_model(
|
|
849
851
|
eval_output = model(**eval_batch)
|
850
852
|
print(f"Eval Loss: {eval_output.loss.item()}")
|
851
853
|
loss_list.append(eval_output.loss.item())
|
852
|
-
|
854
|
+
topk_logprobs = get_topk(get_logprobs(eval_output.logits))
|
853
855
|
MINI_MODEL_SETUPS[model_name].liger_kernel_patch_revert_func(**revert_kwargs)
|
854
856
|
return {
|
855
857
|
"loss": loss_list,
|
856
|
-
"
|
858
|
+
"topk_logprobs": topk_logprobs.values,
|
857
859
|
"model": model,
|
858
860
|
}
|
859
861
|
|
860
862
|
|
861
863
|
@pytest.mark.parametrize(
|
862
|
-
"model_name, num_steps, lr, dtype, loss_atol, loss_rtol,
|
864
|
+
"model_name, num_steps, lr, dtype, loss_atol, loss_rtol, logprobs_atol, logprobs_rtol, param_atol, param_rtol",
|
863
865
|
[
|
864
866
|
("mini_llama3", 32, 1e-4, torch.float32, 1e-8, 2e-5, 1e-4, 1e-5, 5e-3, 1e-5),
|
865
867
|
pytest.param(
|
@@ -1013,7 +1015,7 @@ def run_mini_model(
|
|
1013
1015
|
# TODO: mixtral is flaky so disable the test for now
|
1014
1016
|
# ("mini_mixtral", 32, 1e-4, torch.float32, 5e-4, 1e-4, 5e-3, 1e-5, 1e-2, 1e-5),
|
1015
1017
|
# Gemma 1.1 and 2 has more tolerance because currently, the kernel is not a perfect match (casts are not done the same way)
|
1016
|
-
("mini_gemma1", 32, 1e-4, torch.float32, 1e-8, 1e-4, 5e-3, 1e-
|
1018
|
+
("mini_gemma1", 32, 1e-4, torch.float32, 1e-8, 1e-4, 5e-3, 1e-2, 5e-3, 1e-5),
|
1017
1019
|
("mini_gemma1.1", 32, 1e-4, torch.float32, 1e-8, 1e-4, 5e-3, 1e-5, 5e-3, 1e-5),
|
1018
1020
|
("mini_gemma2", 32, 1e-4, torch.float32, 1e-8, 1e-4, 5e-3, 1e-5, 5e-3, 1e-5),
|
1019
1021
|
pytest.param(
|
@@ -1041,8 +1043,8 @@ def test_mini_model(
|
|
1041
1043
|
dtype,
|
1042
1044
|
loss_atol,
|
1043
1045
|
loss_rtol,
|
1044
|
-
|
1045
|
-
|
1046
|
+
logprobs_atol,
|
1047
|
+
logprobs_rtol,
|
1046
1048
|
param_atol,
|
1047
1049
|
param_rtol,
|
1048
1050
|
):
|
@@ -1060,13 +1062,13 @@ def test_mini_model(
|
|
1060
1062
|
rtol=loss_rtol,
|
1061
1063
|
)
|
1062
1064
|
|
1063
|
-
# Compare the
|
1064
|
-
if expected_output["
|
1065
|
+
# Compare the topk logprobs from evaluation step
|
1066
|
+
if expected_output["topk_logprobs"] is not None and actual_output["topk_logprobs"] is not None:
|
1065
1067
|
assert_verbose_allclose(
|
1066
|
-
expected_output["
|
1067
|
-
actual_output["
|
1068
|
-
atol=
|
1069
|
-
rtol=
|
1068
|
+
expected_output["topk_logprobs"],
|
1069
|
+
actual_output["topk_logprobs"],
|
1070
|
+
atol=logprobs_atol,
|
1071
|
+
rtol=logprobs_rtol,
|
1070
1072
|
)
|
1071
1073
|
|
1072
1074
|
# Compare the params from the last step
|
@@ -20,6 +20,8 @@ from test.utils import FAKE_CONFIGS_PATH
|
|
20
20
|
from test.utils import UNTOKENIZED_DATASET_PATH
|
21
21
|
from test.utils import MiniModelConfig
|
22
22
|
from test.utils import assert_verbose_allclose
|
23
|
+
from test.utils import get_logprobs
|
24
|
+
from test.utils import get_topk
|
23
25
|
from test.utils import is_torchvision_available
|
24
26
|
from test.utils import load_image_processing_config
|
25
27
|
from test.utils import load_processor_config
|
@@ -762,11 +764,16 @@ def run_mini_model_multimodal(
|
|
762
764
|
print(f"Step {i}, Loss: {output.loss.item()}")
|
763
765
|
loss_list.append(output.loss.item())
|
764
766
|
|
765
|
-
|
767
|
+
topk_logprobs = get_topk(get_logprobs(output.logits))
|
768
|
+
return {
|
769
|
+
"loss": loss_list,
|
770
|
+
"topk_logprobs": topk_logprobs.values,
|
771
|
+
"model": model,
|
772
|
+
}
|
766
773
|
|
767
774
|
|
768
775
|
@pytest.mark.parametrize(
|
769
|
-
"model_name, num_steps, lr, dtype, loss_atol, loss_rtol,
|
776
|
+
"model_name, num_steps, lr, dtype, loss_atol, loss_rtol, logprobs_atol, logprobs_rtol, param_atol, param_rtol",
|
770
777
|
[
|
771
778
|
pytest.param(
|
772
779
|
"mini_qwen2_vl",
|
@@ -875,10 +882,10 @@ def run_mini_model_multimodal(
|
|
875
882
|
32,
|
876
883
|
1e-4,
|
877
884
|
torch.float32,
|
878
|
-
1e-
|
879
|
-
1e-
|
880
|
-
|
881
|
-
1e-
|
885
|
+
1e-3,
|
886
|
+
1e-3,
|
887
|
+
1e-1,
|
888
|
+
1e-1,
|
882
889
|
5e-3,
|
883
890
|
1e-5,
|
884
891
|
marks=[
|
@@ -898,8 +905,8 @@ def test_mini_model_multimodal(
|
|
898
905
|
dtype,
|
899
906
|
loss_atol,
|
900
907
|
loss_rtol,
|
901
|
-
|
902
|
-
|
908
|
+
logprobs_atol,
|
909
|
+
logprobs_rtol,
|
903
910
|
param_atol,
|
904
911
|
param_rtol,
|
905
912
|
):
|
@@ -920,10 +927,10 @@ def test_mini_model_multimodal(
|
|
920
927
|
|
921
928
|
# Compare the logits from the last step
|
922
929
|
assert_verbose_allclose(
|
923
|
-
expected_output["
|
924
|
-
actual_output["
|
925
|
-
atol=
|
926
|
-
rtol=
|
930
|
+
expected_output["topk_logprobs"],
|
931
|
+
actual_output["topk_logprobs"],
|
932
|
+
atol=logprobs_atol,
|
933
|
+
rtol=logprobs_rtol,
|
927
934
|
)
|
928
935
|
|
929
936
|
# Compare the params from the last step
|
@@ -38,6 +38,8 @@ from liger_kernel.transformers import apply_liger_kernel_to_qwen3_moe
|
|
38
38
|
from test.utils import DEFAULT_DATASET_PATH
|
39
39
|
from test.utils import MiniModelConfig
|
40
40
|
from test.utils import assert_verbose_allclose
|
41
|
+
from test.utils import get_logprobs
|
42
|
+
from test.utils import get_topk
|
41
43
|
from test.utils import revert_liger_kernel_to_gemma
|
42
44
|
from test.utils import revert_liger_kernel_to_gemma2
|
43
45
|
from test.utils import revert_liger_kernel_to_gemma3_text
|
@@ -841,12 +843,17 @@ def run_mini_model(
|
|
841
843
|
print(f"Step {i}, Loss: {output.loss.item()}")
|
842
844
|
loss_list.append(output.loss.item())
|
843
845
|
|
846
|
+
topk_logprobs = get_topk(get_logprobs(output.logits))
|
844
847
|
MINI_MODEL_SETUPS[model_name].liger_kernel_patch_revert_func(**revert_kwargs)
|
845
|
-
return {
|
848
|
+
return {
|
849
|
+
"loss": loss_list,
|
850
|
+
"topk_logprobs": topk_logprobs.values,
|
851
|
+
"model": model,
|
852
|
+
}
|
846
853
|
|
847
854
|
|
848
855
|
@pytest.mark.parametrize(
|
849
|
-
"model_name, num_steps, lr, dtype, loss_atol, loss_rtol,
|
856
|
+
"model_name, num_steps, lr, dtype, loss_atol, loss_rtol, logprobs_atol, logprobs_rtol, param_atol, param_rtol",
|
850
857
|
[
|
851
858
|
("mini_llama3", 32, 1e-4, torch.float32, 1e-8, 2e-5, 1e-4, 1e-5, 5e-3, 1e-5),
|
852
859
|
pytest.param(
|
@@ -1027,8 +1034,8 @@ def test_mini_model(
|
|
1027
1034
|
dtype,
|
1028
1035
|
loss_atol,
|
1029
1036
|
loss_rtol,
|
1030
|
-
|
1031
|
-
|
1037
|
+
logprobs_atol,
|
1038
|
+
logprobs_rtol,
|
1032
1039
|
param_atol,
|
1033
1040
|
param_rtol,
|
1034
1041
|
):
|
@@ -1048,12 +1055,11 @@ def test_mini_model(
|
|
1048
1055
|
|
1049
1056
|
# No logits are materialized
|
1050
1057
|
# import pdb; pdb.set_trace()
|
1051
|
-
# Compare the logits from the last step
|
1052
1058
|
assert_verbose_allclose(
|
1053
|
-
expected_output["
|
1054
|
-
actual_output["
|
1055
|
-
atol=
|
1056
|
-
rtol=
|
1059
|
+
expected_output["topk_logprobs"],
|
1060
|
+
actual_output["topk_logprobs"],
|
1061
|
+
atol=logprobs_atol,
|
1062
|
+
rtol=logprobs_rtol,
|
1057
1063
|
)
|
1058
1064
|
|
1059
1065
|
# Compare the params from the last step
|
@@ -57,6 +57,17 @@ def set_seed(seed=42):
|
|
57
57
|
os.environ["PYTHONHASHSEED"] = str(seed)
|
58
58
|
|
59
59
|
|
60
|
+
@torch.no_grad
|
61
|
+
def get_logprobs(tensor):
|
62
|
+
return torch.nn.functional.log_softmax(tensor, dim=-1, dtype=torch.float32)
|
63
|
+
|
64
|
+
|
65
|
+
@torch.no_grad
|
66
|
+
def get_topk(tensor, k=20):
|
67
|
+
topk = torch.topk(tensor, k, dim=-1)
|
68
|
+
return topk
|
69
|
+
|
70
|
+
|
60
71
|
def assert_verbose_allclose(tensor1, tensor2, rtol=1e-05, atol=1e-08, max_print=5):
|
61
72
|
"""
|
62
73
|
Assert that two tensors are element-wise equal within a tolerance, providing detailed information about mismatches.
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|