liger-kernel-nightly 0.5.10.dev20250630171450__tar.gz → 0.5.10.dev20250630172023__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/PKG-INFO +1 -1
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/pyproject.toml +1 -1
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/transformers/monkey_patch.py +113 -31
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel_nightly.egg-info/PKG-INFO +1 -1
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/test/transformers/test_monkey_patch.py +89 -1
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/.github/ISSUE_TEMPLATE/bug_report.yaml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/.github/ISSUE_TEMPLATE/feature_request.yaml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/.github/pull_request_template.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/.github/workflows/amd-ci.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/.github/workflows/benchmark.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/.github/workflows/docs.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/.github/workflows/intel-ci.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/.github/workflows/nvi-ci.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/.github/workflows/publish-nightly.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/.github/workflows/publish-release.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/.gitignore +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/.idea/workspace.xml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/LICENSE +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/Makefile +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/NOTICE +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/README.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/benchmark/README.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/benchmark/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/benchmark/benchmarks_visualizer.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/benchmark/data/all_benchmark_data.csv +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/benchmark/scripts/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/benchmark/scripts/benchmark_cpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/benchmark/scripts/benchmark_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/benchmark/scripts/benchmark_distill_jsd_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/benchmark/scripts/benchmark_dpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/benchmark/scripts/benchmark_dyt.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/benchmark/scripts/benchmark_embedding.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/benchmark/scripts/benchmark_fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/benchmark/scripts/benchmark_fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/benchmark/scripts/benchmark_fused_neighborhood_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/benchmark/scripts/benchmark_geglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/benchmark/scripts/benchmark_group_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/benchmark/scripts/benchmark_jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/benchmark/scripts/benchmark_kl_div.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/benchmark/scripts/benchmark_kto_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/benchmark/scripts/benchmark_layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/benchmark/scripts/benchmark_multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/benchmark/scripts/benchmark_orpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/benchmark/scripts/benchmark_qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/benchmark/scripts/benchmark_rms_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/benchmark/scripts/benchmark_rope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/benchmark/scripts/benchmark_simpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/benchmark/scripts/benchmark_softmax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/benchmark/scripts/benchmark_sparse_multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/benchmark/scripts/benchmark_sparsemax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/benchmark/scripts/benchmark_swiglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/benchmark/scripts/benchmark_tvd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/benchmark/scripts/utils.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/dev/fmt-requirements.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/dev/modal/benchmarks.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/dev/modal/tests.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/dev/modal/tests_bwd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/docs/Examples.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/docs/Getting-Started.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/docs/High-Level-APIs.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/docs/Low-Level-APIs.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/docs/acknowledgement.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/docs/contributing.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/docs/images/banner.GIF +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/docs/images/compose.gif +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/docs/images/e2e-memory.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/docs/images/e2e-tps.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/docs/images/logo-banner.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/docs/images/patch.gif +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/docs/images/post-training.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/docs/index.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/docs/license.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/examples/alignment/accelerate_config.yaml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/examples/alignment/run_orpo.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/examples/huggingface/README.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/examples/huggingface/callback.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/examples/huggingface/config/fsdp_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/examples/huggingface/img/gemma_7b_mem.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/examples/huggingface/img/gemma_7b_tp.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/examples/huggingface/img/llama_mem_alloc.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/examples/huggingface/img/llama_tps.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/examples/huggingface/img/qwen_mem_alloc.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/examples/huggingface/img/qwen_tps.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/examples/huggingface/launch_on_modal.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/examples/huggingface/requirements.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/examples/huggingface/run_benchmarks.sh +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/examples/huggingface/run_gemma.sh +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/examples/huggingface/run_llama.sh +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/examples/huggingface/run_qwen.sh +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/examples/huggingface/run_qwen2_vl.sh +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/examples/huggingface/training.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/examples/huggingface/training_multimodal.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/examples/lightning/README.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/examples/lightning/requirements.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/examples/lightning/training.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/examples/medusa/README.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/examples/medusa/callback.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/examples/medusa/docs/images/Memory_Stage1_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/examples/medusa/docs/images/Memory_Stage1_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/examples/medusa/docs/images/Memory_Stage2_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/examples/medusa/docs/images/Memory_Stage2_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/examples/medusa/docs/images/Throughput_Stage1_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/examples/medusa/docs/images/Throughput_Stage1_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/examples/medusa/docs/images/Throughput_Stage2_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/examples/medusa/docs/images/Throughput_Stage2_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/examples/medusa/fsdp/acc-fsdp.conf +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/examples/medusa/medusa_util.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/examples/medusa/requirements.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/examples/medusa/scripts/llama3_8b_medusa.sh +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/examples/medusa/train.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/licenses/LICENSE-Apache-2.0 +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/licenses/LICENSE-MIT-AutoAWQ +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/licenses/LICENSE-MIT-Efficient-Cross-Entropy +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/licenses/LICENSE-MIT-llmc +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/licenses/LICENSE-MIT-triton +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/mkdocs.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/setup.cfg +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/setup.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/chunked_loss/README.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/chunked_loss/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/chunked_loss/cpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/chunked_loss/dpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/chunked_loss/functional.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/chunked_loss/fused_linear_distillation.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/chunked_loss/fused_linear_ppo.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/chunked_loss/fused_linear_preference.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/chunked_loss/fused_linear_unpaired_preference.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/chunked_loss/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/chunked_loss/jsd_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/chunked_loss/kto_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/chunked_loss/orpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/chunked_loss/simpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/env_report.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/ops/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/ops/cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/ops/dyt.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/ops/experimental/embedding.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/ops/experimental/mm_int8int2.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/ops/fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/ops/fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/ops/fused_neighborhood_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/ops/geglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/ops/group_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/ops/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/ops/jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/ops/kl_div.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/ops/layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/ops/multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/ops/qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/ops/rms_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/ops/rope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/ops/softmax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/ops/sparsemax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/ops/swiglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/ops/tvd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/ops/utils.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/transformers/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/transformers/auto_model.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/transformers/cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/transformers/dyt.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/transformers/experimental/embedding.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/transformers/fsdp.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/transformers/functional.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/transformers/fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/transformers/fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/transformers/fused_neighborhood_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/transformers/geglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/transformers/group_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/transformers/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/transformers/jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/transformers/kl_div.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/transformers/layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/transformers/model/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/transformers/model/gemma.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/transformers/model/gemma2.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/transformers/model/gemma3.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/transformers/model/glm4.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/transformers/model/llama.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/transformers/model/llama4.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/transformers/model/llava.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/transformers/model/loss_utils.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/transformers/model/mistral.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/transformers/model/mixtral.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/transformers/model/mllama.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/transformers/model/olmo2.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/transformers/model/paligemma.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/transformers/model/phi3.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/transformers/model/qwen2.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/transformers/model/qwen2_5_vl.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/transformers/model/qwen2_vl.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/transformers/model/qwen3.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/transformers/model/qwen3_moe.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/transformers/multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/transformers/qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/transformers/rms_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/transformers/rope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/transformers/softmax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/transformers/sparsemax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/transformers/swiglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/transformers/trainer/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/transformers/trainer/orpo_trainer.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/transformers/trainer_integration.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/transformers/tvd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/triton/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/triton/monkey_patch.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel/utils.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel_nightly.egg-info/SOURCES.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel_nightly.egg-info/dependency_links.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel_nightly.egg-info/requires.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/src/liger_kernel_nightly.egg-info/top_level.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/test/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/test/chunked_loss/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/test/chunked_loss/test_cpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/test/chunked_loss/test_dpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/test/chunked_loss/test_grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/test/chunked_loss/test_jsd_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/test/chunked_loss/test_kto_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/test/chunked_loss/test_orpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/test/chunked_loss/test_simpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/test/conftest.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/test/convergence/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/test/convergence/bf16/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/test/convergence/bf16/test_mini_models.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/test/convergence/bf16/test_mini_models_multimodal.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/test/convergence/bf16/test_mini_models_with_logits.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/test/convergence/fp32/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/test/convergence/fp32/test_mini_models.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/test/convergence/fp32/test_mini_models_multimodal.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/test/convergence/fp32/test_mini_models_with_logits.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/test/resources/fake_configs/Google/Gemma3/gemma-3-4b-it/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/test/resources/fake_configs/Google/Paligemma/paligemma-3b-pt-224/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/preprocessor_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/processor_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/test/resources/fake_configs/Qwen/Qwen2-VL-7B-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/test/resources/fake_configs/Qwen/Qwen2.5-VL-7B-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/test/resources/fake_configs/meta-llama/Llama-3.2-11B-Vision-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/test/resources/fake_configs/meta-llama/Llama-4-Scout-17B-16E-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/test/resources/scripts/generate_tokenized_dataset.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/test/resources/tiny_shakespeare.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/test/resources/tiny_shakespeare_tokenized/data-00000-of-00001.arrow +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/test/resources/tiny_shakespeare_tokenized/dataset_info.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/test/resources/tiny_shakespeare_tokenized/state.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/test/transformers/test_auto_model.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/test/transformers/test_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/test/transformers/test_dyt.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/test/transformers/test_embedding.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/test/transformers/test_flex_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/test/transformers/test_fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/test/transformers/test_fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/test/transformers/test_fused_neighborhood_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/test/transformers/test_geglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/test/transformers/test_group_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/test/transformers/test_grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/test/transformers/test_jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/test/transformers/test_kl_div.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/test/transformers/test_layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/test/transformers/test_mm_int8int2.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/test/transformers/test_multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/test/transformers/test_qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/test/transformers/test_rms_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/test/transformers/test_rope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/test/transformers/test_softmax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/test/transformers/test_sparsemax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/test/transformers/test_swiglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/test/transformers/test_trainer_integration.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/test/transformers/test_transformers.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/test/transformers/test_tvd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/test/triton/test_triton_monkey_patch.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250630171450 → liger_kernel_nightly-0.5.10.dev20250630172023}/test/utils.py +0 -0
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
4
4
|
|
5
5
|
[project]
|
6
6
|
name = "liger_kernel_nightly"
|
7
|
-
version = "0.5.10.
|
7
|
+
version = "0.5.10.dev20250630172023"
|
8
8
|
description = "Efficient Triton kernels for LLM Training"
|
9
9
|
urls = { "Homepage" = "https://github.com/linkedin/Liger-Kernel" }
|
10
10
|
readme = { file = "README.md", content-type = "text/markdown" }
|
@@ -2,6 +2,7 @@ import inspect
|
|
2
2
|
import logging
|
3
3
|
|
4
4
|
from functools import partial
|
5
|
+
from types import MethodType
|
5
6
|
from typing import Callable
|
6
7
|
|
7
8
|
import transformers
|
@@ -260,10 +261,16 @@ def apply_liger_kernel_to_llama(
|
|
260
261
|
|
261
262
|
if fused_linear_cross_entropy:
|
262
263
|
if transformer_version >= version.parse(SUPPORTED_TRANSFORMER_VERSION):
|
263
|
-
|
264
|
+
if model is not None:
|
265
|
+
model.forward = MethodType(llama_lce_forward, model)
|
266
|
+
else:
|
267
|
+
modeling_llama.LlamaForCausalLM.forward = llama_lce_forward
|
264
268
|
else: # if version < 4.46.1
|
265
269
|
logger.warning(TRANSFORMER_DEPRECATION_WARNING)
|
266
|
-
|
270
|
+
if model is not None:
|
271
|
+
model.forward = MethodType(llama_lce_forward_deprecated, model)
|
272
|
+
else:
|
273
|
+
modeling_llama.LlamaForCausalLM.forward = llama_lce_forward_deprecated
|
267
274
|
|
268
275
|
if model is not None:
|
269
276
|
# The model instance already exists, so we need to additionally patch the
|
@@ -318,9 +325,15 @@ def apply_liger_kernel_to_llava(
|
|
318
325
|
modeling_llava.nn.CrossEntropyLoss = LigerCrossEntropyLoss
|
319
326
|
if fused_linear_cross_entropy:
|
320
327
|
if transformer_version >= version.parse("4.52.0"):
|
321
|
-
|
328
|
+
if model is not None:
|
329
|
+
model.forward = MethodType(llava_lce_forward, model)
|
330
|
+
else:
|
331
|
+
modeling_llava.LlavaForConditionalGeneration.forward = llava_lce_forward
|
322
332
|
elif transformer_version >= version.parse("4.49.0") and transformer_version < version.parse("4.52.0"):
|
323
|
-
|
333
|
+
if model is not None:
|
334
|
+
model.forward = MethodType(llava_lce_forward_deprecated, model)
|
335
|
+
else:
|
336
|
+
modeling_llava.LlavaForConditionalGeneration.forward = llava_lce_forward_deprecated
|
324
337
|
else: # if version < 4.49.0
|
325
338
|
logger.warning(
|
326
339
|
"The latest version of Liger does not support transformers < 4.49.0 for llava. Please downgrade your liger version or upgrade your transformer version."
|
@@ -490,7 +503,7 @@ def apply_liger_kernel_to_mllama(
|
|
490
503
|
|
491
504
|
if rope:
|
492
505
|
modeling_mllama.apply_rotary_pos_emb = liger_rotary_pos_emb
|
493
|
-
if layer_norm:
|
506
|
+
if layer_norm and model is None:
|
494
507
|
modeling_mllama.nn.LayerNorm = LigerLayerNorm
|
495
508
|
if rms_norm:
|
496
509
|
modeling_mllama.MllamaTextRMSNorm = LigerRMSNorm
|
@@ -506,10 +519,16 @@ def apply_liger_kernel_to_mllama(
|
|
506
519
|
modeling_mllama.CrossEntropyLoss = LigerCrossEntropyLoss
|
507
520
|
if fused_linear_cross_entropy:
|
508
521
|
if transformer_version >= version.parse(SUPPORTED_TRANSFORMER_VERSION):
|
509
|
-
|
522
|
+
if model is not None:
|
523
|
+
model.forward = MethodType(mllama_lce_forward, model)
|
524
|
+
else:
|
525
|
+
modeling_mllama.MllamaForCausalLM.forward = mllama_lce_forward
|
510
526
|
else: # if version < 4.46.1
|
511
527
|
logger.warning(TRANSFORMER_DEPRECATION_WARNING)
|
512
|
-
|
528
|
+
if model is not None:
|
529
|
+
model.forward = MethodType(mllama_lce_forward_deprecated, model)
|
530
|
+
else:
|
531
|
+
modeling_mllama.MllamaForCausalLM.forward = mllama_lce_forward_deprecated
|
513
532
|
|
514
533
|
if model is not None:
|
515
534
|
# The model instance already exists, so we need to additionally patch the
|
@@ -592,7 +611,10 @@ def apply_liger_kernel_to_mistral(
|
|
592
611
|
if cross_entropy:
|
593
612
|
modeling_mistral.CrossEntropyLoss = LigerCrossEntropyLoss
|
594
613
|
if fused_linear_cross_entropy:
|
595
|
-
|
614
|
+
if model is not None:
|
615
|
+
model.forward = MethodType(mistral_lce_forward, model)
|
616
|
+
else:
|
617
|
+
modeling_mistral.MistralForCausalLM.forward = mistral_lce_forward
|
596
618
|
if swiglu:
|
597
619
|
modeling_mistral.MistralMLP = LigerSwiGLUMLP
|
598
620
|
|
@@ -660,10 +682,16 @@ def apply_liger_kernel_to_mixtral(
|
|
660
682
|
|
661
683
|
if fused_linear_cross_entropy:
|
662
684
|
if transformer_version >= version.parse(SUPPORTED_TRANSFORMER_VERSION):
|
663
|
-
|
685
|
+
if model is not None:
|
686
|
+
model.forward = MethodType(mixtral_lce_forward, model)
|
687
|
+
else:
|
688
|
+
modeling_mixtral.MixtralForCausalLM.forward = mixtral_lce_forward
|
664
689
|
else: # if version < 4.46.1
|
665
690
|
logger.warning(TRANSFORMER_DEPRECATION_WARNING)
|
666
|
-
|
691
|
+
if model is not None:
|
692
|
+
model.forward = MethodType(mixtral_lce_forward_deprecated, model)
|
693
|
+
else:
|
694
|
+
modeling_mixtral.MixtralForCausalLM.forward = mixtral_lce_forward_deprecated
|
667
695
|
if swiglu:
|
668
696
|
modeling_mixtral.MixtralBlockSparseTop2MLP = LigerBlockSparseTop2MLP
|
669
697
|
|
@@ -737,10 +765,16 @@ def apply_liger_kernel_to_gemma(
|
|
737
765
|
modeling_gemma.GemmaMLP = LigerGEGLUMLP
|
738
766
|
if fused_linear_cross_entropy:
|
739
767
|
if transformer_version >= version.parse(SUPPORTED_TRANSFORMER_VERSION):
|
740
|
-
|
768
|
+
if model is not None:
|
769
|
+
model.forward = MethodType(gemma_lce_forward, model)
|
770
|
+
else:
|
771
|
+
modeling_gemma.GemmaForCausalLM.forward = gemma_lce_forward
|
741
772
|
else: # if version < 4.46.1
|
742
773
|
logger.warning(TRANSFORMER_DEPRECATION_WARNING)
|
743
|
-
|
774
|
+
if model is not None:
|
775
|
+
model.forward = MethodType(gemma_lce_forward_deprecated, model)
|
776
|
+
else:
|
777
|
+
modeling_gemma.GemmaForCausalLM.forward = gemma_lce_forward_deprecated
|
744
778
|
|
745
779
|
if model is not None:
|
746
780
|
# The model instance already exists, so we need to additionally patch the
|
@@ -812,10 +846,16 @@ def apply_liger_kernel_to_gemma2(
|
|
812
846
|
modeling_gemma2.CrossEntropyLoss = LigerCrossEntropyLoss
|
813
847
|
if fused_linear_cross_entropy:
|
814
848
|
if transformer_version >= version.parse(SUPPORTED_TRANSFORMER_VERSION):
|
815
|
-
|
849
|
+
if model is not None:
|
850
|
+
model.forward = MethodType(gemma2_lce_forward, model)
|
851
|
+
else:
|
852
|
+
modeling_gemma2.Gemma2ForCausalLM.forward = gemma2_lce_forward
|
816
853
|
else:
|
817
854
|
logger.warning(TRANSFORMER_DEPRECATION_WARNING)
|
818
|
-
|
855
|
+
if model is not None:
|
856
|
+
model.forward = MethodType(gemma2_lce_forward_deprected, model)
|
857
|
+
else:
|
858
|
+
modeling_gemma2.Gemma2ForCausalLM.forward = gemma2_lce_forward_deprected
|
819
859
|
if geglu:
|
820
860
|
modeling_gemma2.Gemma2MLP = LigerGEGLUMLP
|
821
861
|
|
@@ -894,7 +934,10 @@ def apply_liger_kernel_to_gemma3_text(
|
|
894
934
|
nn.functional.cross_entropy = liger_cross_entropy
|
895
935
|
|
896
936
|
if fused_linear_cross_entropy:
|
897
|
-
|
937
|
+
if model is not None:
|
938
|
+
model.forward = MethodType(causal_forward, model)
|
939
|
+
else:
|
940
|
+
modeling_gemma3.Gemma3ForCausalLM.forward = causal_forward
|
898
941
|
|
899
942
|
if model is not None:
|
900
943
|
# The model instance already exists, so we need to additionally patch the
|
@@ -964,7 +1007,7 @@ def apply_liger_kernel_to_gemma3(
|
|
964
1007
|
_patch_rms_norm_module, offset=1.0, casting_mode="gemma", in_place=False
|
965
1008
|
)
|
966
1009
|
|
967
|
-
if layer_norm:
|
1010
|
+
if layer_norm and model is None:
|
968
1011
|
modeling_siglip.nn.LayerNorm = LigerLayerNorm
|
969
1012
|
|
970
1013
|
apply_liger_kernel_to_gemma3_text(
|
@@ -975,7 +1018,10 @@ def apply_liger_kernel_to_gemma3(
|
|
975
1018
|
modeling_gemma3.nn.CrossEntropyLoss = LigerCrossEntropyLoss
|
976
1019
|
|
977
1020
|
if fused_linear_cross_entropy:
|
978
|
-
|
1021
|
+
if model is not None:
|
1022
|
+
model.forward = MethodType(multimodal_forward, model)
|
1023
|
+
else:
|
1024
|
+
modeling_gemma3.Gemma3ForConditionalGeneration.forward = multimodal_forward
|
979
1025
|
|
980
1026
|
if model is not None:
|
981
1027
|
# The model instance already exists, so we need to additionally patch the
|
@@ -1054,7 +1100,7 @@ def apply_liger_kernel_to_paligemma(
|
|
1054
1100
|
from liger_kernel.transformers.model.paligemma import lce_forward_deprecated
|
1055
1101
|
|
1056
1102
|
# The vision_tower is a SiglipVisionModel
|
1057
|
-
if layer_norm:
|
1103
|
+
if layer_norm and model is None:
|
1058
1104
|
modeling_siglip.nn.LayerNorm = LigerLayerNorm
|
1059
1105
|
|
1060
1106
|
# SiglipMLP is standard FFN so LigerGEGLUMLP is not compatible
|
@@ -1072,10 +1118,16 @@ def apply_liger_kernel_to_paligemma(
|
|
1072
1118
|
modeling_paligemma.nn.CrossEntropyLoss = LigerCrossEntropyLoss
|
1073
1119
|
if fused_linear_cross_entropy:
|
1074
1120
|
if transformer_version >= version.parse(SUPPORTED_TRANSFORMER_VERSION):
|
1075
|
-
|
1121
|
+
if model is not None:
|
1122
|
+
model.forward = MethodType(lce_forward, model)
|
1123
|
+
else:
|
1124
|
+
modeling_paligemma.PaliGemmaForConditionalGeneration.forward = lce_forward
|
1076
1125
|
else: # if version < 4.46.1
|
1077
1126
|
logger.warning(TRANSFORMER_DEPRECATION_WARNING)
|
1078
|
-
|
1127
|
+
if model is not None:
|
1128
|
+
model.forward = MethodType(lce_forward_deprecated, model)
|
1129
|
+
else:
|
1130
|
+
modeling_paligemma.PaliGemmaForConditionalGeneration.forward = lce_forward_deprecated
|
1079
1131
|
|
1080
1132
|
if model is not None:
|
1081
1133
|
# The model instance already exists, so we need to additionally patch the
|
@@ -1167,10 +1219,16 @@ def apply_liger_kernel_to_qwen2(
|
|
1167
1219
|
|
1168
1220
|
if fused_linear_cross_entropy:
|
1169
1221
|
if transformer_version >= version.parse(SUPPORTED_TRANSFORMER_VERSION):
|
1170
|
-
|
1222
|
+
if model is not None:
|
1223
|
+
model.forward = MethodType(qwen2_lce_forward, model)
|
1224
|
+
else:
|
1225
|
+
modeling_qwen2.Qwen2ForCausalLM.forward = qwen2_lce_forward
|
1171
1226
|
else: # if version < 4.46.1
|
1172
1227
|
logger.warning(TRANSFORMER_DEPRECATION_WARNING)
|
1173
|
-
|
1228
|
+
if model is not None:
|
1229
|
+
model.forward = MethodType(qwen2_lce_forward_deprecated, model)
|
1230
|
+
else:
|
1231
|
+
modeling_qwen2.Qwen2ForCausalLM.forward = qwen2_lce_forward_deprecated
|
1174
1232
|
|
1175
1233
|
if swiglu:
|
1176
1234
|
modeling_qwen2.Qwen2MLP = LigerSwiGLUMLP
|
@@ -1226,7 +1284,10 @@ def apply_liger_kernel_to_qwen3(
|
|
1226
1284
|
nn.functional.cross_entropy = liger_cross_entropy
|
1227
1285
|
|
1228
1286
|
if fused_linear_cross_entropy:
|
1229
|
-
|
1287
|
+
if model is not None:
|
1288
|
+
model.forward = MethodType(qwen3_lce_forward, model)
|
1289
|
+
else:
|
1290
|
+
modeling_qwen3.Qwen3ForCausalLM.forward = qwen3_lce_forward
|
1230
1291
|
|
1231
1292
|
if swiglu:
|
1232
1293
|
modeling_qwen3.Qwen3MLP = LigerSwiGLUMLP
|
@@ -1281,7 +1342,10 @@ def apply_liger_kernel_to_qwen3_moe(
|
|
1281
1342
|
nn.functional.cross_entropy = liger_cross_entropy
|
1282
1343
|
|
1283
1344
|
if fused_linear_cross_entropy:
|
1284
|
-
|
1345
|
+
if model is not None:
|
1346
|
+
model.forward = MethodType(qwen3_lce_forward, model)
|
1347
|
+
else:
|
1348
|
+
modeling_qwen3_moe.Qwen3MoeForCausalLM.forward = qwen3_lce_forward
|
1285
1349
|
|
1286
1350
|
if swiglu:
|
1287
1351
|
modeling_qwen3_moe.Qwen3MoeMLP = LigerQwen3MoeSwiGLUMLP
|
@@ -1350,12 +1414,15 @@ def apply_liger_kernel_to_qwen2_vl(
|
|
1350
1414
|
if rms_norm:
|
1351
1415
|
# https://github.com/huggingface/transformers/blob/main/src/transformers/models/qwen2_vl/modeling_qwen2_vl.py#L439
|
1352
1416
|
modeling_qwen2_vl.Qwen2RMSNorm = LigerRMSNorm
|
1353
|
-
if layer_norm:
|
1417
|
+
if layer_norm and model is None:
|
1354
1418
|
modeling_qwen2_vl.LayerNorm = LigerLayerNorm
|
1355
1419
|
if cross_entropy:
|
1356
1420
|
modeling_qwen2_vl.CrossEntropyLoss = LigerCrossEntropyLoss
|
1357
1421
|
if fused_linear_cross_entropy:
|
1358
|
-
|
1422
|
+
if model is not None:
|
1423
|
+
model.forward = MethodType(qwen2_vl_lce_forward, model)
|
1424
|
+
else:
|
1425
|
+
modeling_qwen2_vl.Qwen2VLForConditionalGeneration.forward = qwen2_vl_lce_forward
|
1359
1426
|
if swiglu:
|
1360
1427
|
modeling_qwen2_vl.Qwen2MLP = LigerSwiGLUMLP
|
1361
1428
|
|
@@ -1443,7 +1510,10 @@ def apply_liger_kernel_to_qwen2_5_vl(
|
|
1443
1510
|
if cross_entropy:
|
1444
1511
|
modeling_qwen2_5_vl.CrossEntropyLoss = LigerCrossEntropyLoss
|
1445
1512
|
if fused_linear_cross_entropy:
|
1446
|
-
|
1513
|
+
if model is not None:
|
1514
|
+
model.forward = MethodType(qwen2_5_vl_lce_forward, model)
|
1515
|
+
else:
|
1516
|
+
modeling_qwen2_5_vl.Qwen2_5_VLForConditionalGeneration.forward = qwen2_5_vl_lce_forward
|
1447
1517
|
if swiglu:
|
1448
1518
|
modeling_qwen2_5_vl.Qwen2MLP = LigerSwiGLUMLP
|
1449
1519
|
|
@@ -1530,10 +1600,16 @@ def apply_liger_kernel_to_phi3(
|
|
1530
1600
|
modeling_phi3.CrossEntropyLoss = LigerCrossEntropyLoss
|
1531
1601
|
if fused_linear_cross_entropy:
|
1532
1602
|
if transformer_version >= version.parse(SUPPORTED_TRANSFORMER_VERSION):
|
1533
|
-
|
1603
|
+
if model is not None:
|
1604
|
+
model.forward = MethodType(phi3_lce_forward, model)
|
1605
|
+
else:
|
1606
|
+
modeling_phi3.Phi3ForCausalLM.forward = phi3_lce_forward
|
1534
1607
|
else: # if version < 4.46.1
|
1535
1608
|
logger.warning(TRANSFORMER_DEPRECATION_WARNING)
|
1536
|
-
|
1609
|
+
if model is not None:
|
1610
|
+
model.forward = MethodType(phi3_lce_forward_deprecated, model)
|
1611
|
+
else:
|
1612
|
+
modeling_phi3.Phi3ForCausalLM.forward = phi3_lce_forward_deprecated
|
1537
1613
|
|
1538
1614
|
if model is not None:
|
1539
1615
|
# The model instance already exists, so we need to additionally patch the
|
@@ -1597,7 +1673,10 @@ def apply_liger_kernel_to_olmo2(
|
|
1597
1673
|
|
1598
1674
|
nn.functional.cross_entropy = liger_cross_entropy
|
1599
1675
|
if fused_linear_cross_entropy:
|
1600
|
-
|
1676
|
+
if model is not None:
|
1677
|
+
model.forward = MethodType(olmo2_lce_forward, model)
|
1678
|
+
else:
|
1679
|
+
modeling_olmo2.Olmo2ForCausalLM.forward = olmo2_lce_forward
|
1601
1680
|
|
1602
1681
|
if model is not None:
|
1603
1682
|
# The model instance already exists, so we need to additionally patch the
|
@@ -1661,7 +1740,10 @@ def apply_liger_kernel_to_glm4(
|
|
1661
1740
|
|
1662
1741
|
nn.functional.cross_entropy = liger_cross_entropy
|
1663
1742
|
if fused_linear_cross_entropy:
|
1664
|
-
|
1743
|
+
if model is not None:
|
1744
|
+
model.forward = MethodType(glm4_lce_forward, model)
|
1745
|
+
else:
|
1746
|
+
modeling_glm4.Glm4ForCausalLM.forward = glm4_lce_forward
|
1665
1747
|
|
1666
1748
|
if model is not None:
|
1667
1749
|
# The model instance already exists, so we need to additionally patch the
|