liger-kernel-nightly 0.5.10.dev20250526154218__tar.gz → 0.5.10.dev20250528223223__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/PKG-INFO +1 -1
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/pyproject.toml +1 -1
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/transformers/model/llava.py +37 -1
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/transformers/monkey_patch.py +3 -4
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel_nightly.egg-info/PKG-INFO +1 -1
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/test/transformers/test_monkey_patch.py +6 -6
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/.github/ISSUE_TEMPLATE/bug_report.yaml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/.github/ISSUE_TEMPLATE/feature_request.yaml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/.github/pull_request_template.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/.github/workflows/amd-ci.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/.github/workflows/docs.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/.github/workflows/intel-ci.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/.github/workflows/nvi-ci.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/.github/workflows/publish-nightly.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/.github/workflows/publish-release.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/.gitignore +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/.idea/workspace.xml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/LICENSE +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/Makefile +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/NOTICE +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/README.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/benchmark/README.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/benchmark/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/benchmark/benchmarks_visualizer.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/benchmark/data/all_benchmark_data.csv +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/benchmark/scripts/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/benchmark/scripts/benchmark_cpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/benchmark/scripts/benchmark_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/benchmark/scripts/benchmark_distill_jsd_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/benchmark/scripts/benchmark_dpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/benchmark/scripts/benchmark_dyt.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/benchmark/scripts/benchmark_embedding.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/benchmark/scripts/benchmark_fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/benchmark/scripts/benchmark_fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/benchmark/scripts/benchmark_geglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/benchmark/scripts/benchmark_group_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/benchmark/scripts/benchmark_jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/benchmark/scripts/benchmark_kl_div.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/benchmark/scripts/benchmark_kto_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/benchmark/scripts/benchmark_layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/benchmark/scripts/benchmark_multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/benchmark/scripts/benchmark_orpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/benchmark/scripts/benchmark_qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/benchmark/scripts/benchmark_rms_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/benchmark/scripts/benchmark_rope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/benchmark/scripts/benchmark_simpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/benchmark/scripts/benchmark_softmax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/benchmark/scripts/benchmark_sparse_multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/benchmark/scripts/benchmark_sparsemax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/benchmark/scripts/benchmark_swiglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/benchmark/scripts/benchmark_tvd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/benchmark/scripts/utils.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/dev/fmt-requirements.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/dev/modal/tests.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/dev/modal/tests_bwd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/docs/Examples.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/docs/Getting-Started.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/docs/High-Level-APIs.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/docs/Low-Level-APIs.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/docs/acknowledgement.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/docs/contributing.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/docs/images/banner.GIF +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/docs/images/compose.gif +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/docs/images/e2e-memory.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/docs/images/e2e-tps.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/docs/images/logo-banner.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/docs/images/patch.gif +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/docs/images/post-training.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/docs/index.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/docs/license.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/examples/alignment/accelerate_config.yaml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/examples/alignment/run_orpo.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/examples/huggingface/README.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/examples/huggingface/callback.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/examples/huggingface/config/fsdp_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/examples/huggingface/img/gemma_7b_mem.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/examples/huggingface/img/gemma_7b_tp.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/examples/huggingface/img/llama_mem_alloc.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/examples/huggingface/img/llama_tps.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/examples/huggingface/img/qwen_mem_alloc.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/examples/huggingface/img/qwen_tps.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/examples/huggingface/launch_on_modal.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/examples/huggingface/requirements.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/examples/huggingface/run_benchmarks.sh +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/examples/huggingface/run_gemma.sh +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/examples/huggingface/run_llama.sh +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/examples/huggingface/run_qwen.sh +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/examples/huggingface/run_qwen2_vl.sh +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/examples/huggingface/training.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/examples/huggingface/training_multimodal.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/examples/lightning/README.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/examples/lightning/requirements.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/examples/lightning/training.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/examples/medusa/README.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/examples/medusa/callback.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/examples/medusa/docs/images/Memory_Stage1_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/examples/medusa/docs/images/Memory_Stage1_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/examples/medusa/docs/images/Memory_Stage2_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/examples/medusa/docs/images/Memory_Stage2_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/examples/medusa/docs/images/Throughput_Stage1_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/examples/medusa/docs/images/Throughput_Stage1_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/examples/medusa/docs/images/Throughput_Stage2_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/examples/medusa/docs/images/Throughput_Stage2_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/examples/medusa/fsdp/acc-fsdp.conf +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/examples/medusa/medusa_util.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/examples/medusa/requirements.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/examples/medusa/scripts/llama3_8b_medusa.sh +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/examples/medusa/train.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/licenses/LICENSE-Apache-2.0 +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/licenses/LICENSE-MIT-AutoAWQ +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/licenses/LICENSE-MIT-Efficient-Cross-Entropy +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/licenses/LICENSE-MIT-llmc +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/licenses/LICENSE-MIT-triton +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/mkdocs.yml +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/setup.cfg +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/setup.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/chunked_loss/README.md +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/chunked_loss/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/chunked_loss/cpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/chunked_loss/dpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/chunked_loss/functional.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/chunked_loss/fused_linear_distillation.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/chunked_loss/fused_linear_ppo.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/chunked_loss/fused_linear_preference.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/chunked_loss/fused_linear_unpaired_preference.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/chunked_loss/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/chunked_loss/jsd_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/chunked_loss/kto_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/chunked_loss/orpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/chunked_loss/simpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/env_report.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/ops/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/ops/cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/ops/dyt.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/ops/experimental/embedding.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/ops/experimental/mm_int8int2.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/ops/fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/ops/fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/ops/geglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/ops/group_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/ops/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/ops/jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/ops/kl_div.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/ops/layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/ops/multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/ops/qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/ops/rms_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/ops/rope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/ops/softmax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/ops/sparsemax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/ops/swiglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/ops/tvd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/ops/utils.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/transformers/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/transformers/auto_model.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/transformers/cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/transformers/dyt.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/transformers/experimental/embedding.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/transformers/fsdp.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/transformers/functional.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/transformers/fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/transformers/fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/transformers/geglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/transformers/gema3_rms.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/transformers/group_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/transformers/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/transformers/jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/transformers/kl_div.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/transformers/layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/transformers/model/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/transformers/model/gemma.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/transformers/model/gemma2.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/transformers/model/gemma3.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/transformers/model/glm4.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/transformers/model/llama.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/transformers/model/loss_utils.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/transformers/model/mistral.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/transformers/model/mixtral.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/transformers/model/mllama.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/transformers/model/olmo2.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/transformers/model/paligemma.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/transformers/model/phi3.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/transformers/model/qwen2.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/transformers/model/qwen2_5_vl.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/transformers/model/qwen2_vl.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/transformers/model/qwen3.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/transformers/model/qwen3_moe.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/transformers/multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/transformers/qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/transformers/rms_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/transformers/rope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/transformers/softmax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/transformers/sparsemax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/transformers/swiglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/transformers/trainer/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/transformers/trainer/orpo_trainer.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/transformers/trainer_integration.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/transformers/tvd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/triton/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/triton/monkey_patch.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel/utils.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel_nightly.egg-info/SOURCES.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel_nightly.egg-info/dependency_links.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel_nightly.egg-info/requires.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/src/liger_kernel_nightly.egg-info/top_level.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/test/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/test/chunked_loss/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/test/chunked_loss/test_cpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/test/chunked_loss/test_dpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/test/chunked_loss/test_grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/test/chunked_loss/test_jsd_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/test/chunked_loss/test_kto_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/test/chunked_loss/test_orpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/test/chunked_loss/test_simpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/test/conftest.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/test/convergence/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/test/convergence/bf16/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/test/convergence/bf16/test_mini_models.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/test/convergence/bf16/test_mini_models_multimodal.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/test/convergence/bf16/test_mini_models_with_logits.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/test/convergence/fp32/__init__.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/test/convergence/fp32/test_mini_models.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/test/convergence/fp32/test_mini_models_multimodal.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/test/convergence/fp32/test_mini_models_with_logits.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/test/resources/fake_configs/Google/Gemma3/gemma-3-4b-it/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/test/resources/fake_configs/Google/Paligemma/paligemma-3b-pt-224/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/preprocessor_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/processor_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/test/resources/fake_configs/Qwen/Qwen2-VL-7B-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/test/resources/fake_configs/Qwen/Qwen2.5-VL-7B-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/test/resources/fake_configs/meta-llama/Llama-3.2-11B-Vision-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/test/resources/scripts/generate_tokenized_dataset.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/test/resources/tiny_shakespeare.txt +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/test/resources/tiny_shakespeare_tokenized/data-00000-of-00001.arrow +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/test/resources/tiny_shakespeare_tokenized/dataset_info.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/test/resources/tiny_shakespeare_tokenized/state.json +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/test/transformers/test_auto_model.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/test/transformers/test_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/test/transformers/test_dyt.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/test/transformers/test_embedding.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/test/transformers/test_flex_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/test/transformers/test_fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/test/transformers/test_fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/test/transformers/test_geglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/test/transformers/test_group_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/test/transformers/test_grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/test/transformers/test_jsd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/test/transformers/test_kl_div.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/test/transformers/test_layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/test/transformers/test_mm_int8int2.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/test/transformers/test_multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/test/transformers/test_qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/test/transformers/test_rms_norm.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/test/transformers/test_rope.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/test/transformers/test_softmax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/test/transformers/test_sparsemax.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/test/transformers/test_swiglu.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/test/transformers/test_trainer_integration.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/test/transformers/test_transformers.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/test/transformers/test_tvd.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/test/triton/test_triton_monkey_patch.py +0 -0
- {liger_kernel_nightly-0.5.10.dev20250526154218 → liger_kernel_nightly-0.5.10.dev20250528223223}/test/utils.py +0 -0
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
4
4
|
|
5
5
|
[project]
|
6
6
|
name = "liger_kernel_nightly"
|
7
|
-
version = "0.5.10.
|
7
|
+
version = "0.5.10.dev20250528223223"
|
8
8
|
description = "Efficient Triton kernels for LLM Training"
|
9
9
|
urls = { "Homepage" = "https://github.com/linkedin/Liger-Kernel" }
|
10
10
|
readme = { file = "README.md", content-type = "text/markdown" }
|
@@ -5,6 +5,7 @@ from typing import Union
|
|
5
5
|
|
6
6
|
import torch
|
7
7
|
|
8
|
+
from torch.nn import CrossEntropyLoss
|
8
9
|
from transformers.models.llava.modeling_llava import LlavaCausalLMOutputWithPast
|
9
10
|
from transformers.utils import is_torchdynamo_compiling
|
10
11
|
from transformers.utils.deprecation import deprecate_kwarg
|
@@ -189,7 +190,20 @@ def lce_forward_deprecated(
|
|
189
190
|
|
190
191
|
lce = LigerFusedLinearCrossEntropyLoss()
|
191
192
|
loss = lce(self.language_model.lm_head.weight, shift_hidden_states, shift_labels)
|
192
|
-
|
193
|
+
else:
|
194
|
+
logits = self.language_model.lm_head(hidden_states)
|
195
|
+
if labels is not None:
|
196
|
+
# Shift so that tokens < n predict n
|
197
|
+
if attention_mask is not None:
|
198
|
+
shift_attention_mask = attention_mask[..., 1:]
|
199
|
+
shift_logits = logits[..., :-1, :][shift_attention_mask.to(logits.device) != 0].contiguous()
|
200
|
+
shift_labels = labels[..., 1:][shift_attention_mask.to(labels.device) != 0].contiguous()
|
201
|
+
else:
|
202
|
+
shift_logits = logits[..., :-1, :].contiguous()
|
203
|
+
shift_labels = labels[..., 1:].contiguous()
|
204
|
+
# Flatten the tokens
|
205
|
+
loss_fct = CrossEntropyLoss()
|
206
|
+
loss = loss_fct(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1).to(shift_logits.device))
|
193
207
|
if not return_dict:
|
194
208
|
# NOTE: This part has not been tested.
|
195
209
|
output = outputs[1:]
|
@@ -349,6 +363,28 @@ def lce_forward(
|
|
349
363
|
shift_hidden_states.view(-1, shift_hidden_states.size(-1)),
|
350
364
|
shift_labels.view(-1).to(shift_hidden_states.device),
|
351
365
|
)
|
366
|
+
else:
|
367
|
+
logits = self.language_model.lm_head(hidden_states)
|
368
|
+
if labels is not None:
|
369
|
+
# Upcast to float if we need to compute the loss to avoid potential precision issues
|
370
|
+
logits = logits.float()
|
371
|
+
shift_logits = logits[..., :-1, :]
|
372
|
+
shift_labels = labels[..., 1:]
|
373
|
+
if attention_mask is not None:
|
374
|
+
# we use the input attention mask to shift the logits and labels, because it is 2D.
|
375
|
+
# we also crop attn mask in case it is longer, which happens in PrefixTuning with peft
|
376
|
+
shift_attention_mask = attention_mask[:, -shift_logits.shape[1] :].to(logits.device)
|
377
|
+
shift_logits = shift_logits[shift_attention_mask.to(logits.device) != 0].contiguous()
|
378
|
+
shift_labels = shift_labels[shift_attention_mask.to(shift_labels.device) != 0].contiguous()
|
379
|
+
else:
|
380
|
+
shift_logits = shift_logits.contiguous()
|
381
|
+
shift_labels = shift_labels.contiguous()
|
382
|
+
# Flatten the tokens
|
383
|
+
loss_fct = CrossEntropyLoss()
|
384
|
+
|
385
|
+
flat_logits = shift_logits.view(-1, self.config.text_config.vocab_size)
|
386
|
+
flat_labels = shift_labels.view(-1).to(shift_logits.device)
|
387
|
+
loss = loss_fct(flat_logits, flat_labels)
|
352
388
|
|
353
389
|
if not return_dict:
|
354
390
|
# NOTE: This part has not been tested.
|
@@ -776,7 +776,7 @@ def apply_liger_kernel_to_gemma3_text(
|
|
776
776
|
|
777
777
|
from transformers.models.gemma3 import modeling_gemma3
|
778
778
|
from transformers.models.gemma3.modeling_gemma3 import Gemma3DecoderLayer
|
779
|
-
from transformers.models.gemma3.modeling_gemma3 import Gemma3ForCausalLM
|
779
|
+
from transformers.models.gemma3.modeling_gemma3 import Gemma3ForCausalLM, Gemma3TextModel
|
780
780
|
|
781
781
|
from liger_kernel.transformers.gema3_rms import LigerRMSNormForGemma3
|
782
782
|
from liger_kernel.transformers.model.gemma3 import causal_forward
|
@@ -807,9 +807,9 @@ def apply_liger_kernel_to_gemma3_text(
|
|
807
807
|
# The model instance already exists, so we need to additionally patch the
|
808
808
|
# instance variables that reference already-instantiated modules
|
809
809
|
|
810
|
-
if isinstance(model, Gemma3ForCausalLM):
|
810
|
+
if isinstance(model, Gemma3ForCausalLM) or isinstance(model, Gemma3TextModel):
|
811
811
|
# get the base model from the model instance
|
812
|
-
base_model = model.model
|
812
|
+
base_model = model.model if isinstance(model, Gemma3ForCausalLM) else model
|
813
813
|
|
814
814
|
if rms_norm:
|
815
815
|
_patch_rms_norm_module_for_gemma3(base_model.norm)
|
@@ -1625,7 +1625,6 @@ def _apply_liger_kernel_to_instance(model: PreTrainedModel, **kwargs) -> None:
|
|
1625
1625
|
return
|
1626
1626
|
|
1627
1627
|
apply_fn = MODEL_TYPE_TO_APPLY_LIGER_FN[model_type]
|
1628
|
-
|
1629
1628
|
apply_fn_signature = inspect.signature(apply_fn)
|
1630
1629
|
|
1631
1630
|
# Filter out the keyword arguments that are not supported by the apply function
|
@@ -667,7 +667,7 @@ def test_apply_liger_kernel_to_instance_for_gemma3_text():
|
|
667
667
|
|
668
668
|
|
669
669
|
@pytest.mark.skipif(not is_gemma3_available(), reason="gemma3 module not available")
|
670
|
-
def
|
670
|
+
def test_apply_liger_kernel_to_instance_for_gemma3_conditional_generation():
|
671
671
|
# Ensure any monkey patching is cleaned up for subsequent tests
|
672
672
|
|
673
673
|
with patch("transformers.models.gemma3.modeling_gemma3"):
|
@@ -687,8 +687,8 @@ def test_apply_liger_kernel_to_instance_for_gemma3():
|
|
687
687
|
intermediate_size=64,
|
688
688
|
)
|
689
689
|
config = transformers.models.gemma3.configuration_gemma3.Gemma3Config(text_config, vision_config)
|
690
|
-
dummy_model_instance = Gemma3ForConditionalGeneration._from_config(config)
|
691
690
|
|
691
|
+
dummy_model_instance = Gemma3ForConditionalGeneration._from_config(config)
|
692
692
|
assert isinstance(dummy_model_instance, Gemma3ForConditionalGeneration)
|
693
693
|
|
694
694
|
# Check that model instance variables are not yet patched with Liger modules
|
@@ -704,11 +704,11 @@ def test_apply_liger_kernel_to_instance_for_gemma3():
|
|
704
704
|
dummy_model_instance.multi_modal_projector.mm_soft_emb_norm.forward
|
705
705
|
) != inspect.getsource(LigerRMSNorm.forward)
|
706
706
|
|
707
|
-
assert inspect.getsource(dummy_model_instance.language_model.
|
707
|
+
assert inspect.getsource(dummy_model_instance.language_model.norm.forward) != inspect.getsource(
|
708
708
|
LigerRMSNorm.forward
|
709
709
|
)
|
710
710
|
|
711
|
-
for layer in dummy_model_instance.language_model.
|
711
|
+
for layer in dummy_model_instance.language_model.layers:
|
712
712
|
assert inspect.getsource(layer.mlp.forward) != inspect.getsource(LigerGEGLUMLP.forward)
|
713
713
|
assert inspect.getsource(layer.input_layernorm.forward) != inspect.getsource(LigerRMSNorm.forward)
|
714
714
|
assert inspect.getsource(layer.post_attention_layernorm.forward) != inspect.getsource(LigerRMSNorm.forward)
|
@@ -736,10 +736,10 @@ def test_apply_liger_kernel_to_instance_for_gemma3():
|
|
736
736
|
dummy_model_instance.multi_modal_projector.mm_soft_emb_norm.forward
|
737
737
|
) == inspect.getsource(LigerRMSNorm.forward)
|
738
738
|
|
739
|
-
assert inspect.getsource(dummy_model_instance.language_model.
|
739
|
+
assert inspect.getsource(dummy_model_instance.language_model.norm.forward) == inspect.getsource(
|
740
740
|
LigerRMSNorm.forward
|
741
741
|
)
|
742
|
-
for layer in dummy_model_instance.language_model.
|
742
|
+
for layer in dummy_model_instance.language_model.layers:
|
743
743
|
assert inspect.getsource(layer.mlp.forward) == inspect.getsource(LigerGEGLUMLP.forward)
|
744
744
|
assert inspect.getsource(layer.input_layernorm.forward) == inspect.getsource(LigerRMSNorm.forward)
|
745
745
|
assert inspect.getsource(layer.post_attention_layernorm.forward) == inspect.getsource(LigerRMSNorm.forward)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|