liger-kernel-nightly 0.5.9.dev20250515065336__tar.gz → 0.5.9.dev20250516193902__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/PKG-INFO +1 -1
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/pyproject.toml +1 -1
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/chunked_loss/jsd_loss.py +2 -2
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/transformers/model/gemma.py +0 -8
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/transformers/model/gemma2.py +0 -6
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/transformers/model/gemma3.py +0 -8
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/transformers/model/glm4.py +0 -6
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/transformers/model/llama.py +0 -8
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/transformers/model/llava.py +0 -8
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/transformers/model/mistral.py +0 -6
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/transformers/model/mixtral.py +0 -8
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/transformers/model/mllama.py +0 -7
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/transformers/model/olmo2.py +0 -6
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/transformers/model/paligemma.py +0 -8
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/transformers/model/phi3.py +0 -8
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/transformers/model/qwen2.py +0 -8
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/transformers/model/qwen2_5_vl.py +0 -6
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/transformers/model/qwen2_vl.py +0 -6
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/transformers/model/qwen3.py +0 -6
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/transformers/model/qwen3_moe.py +0 -6
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel_nightly.egg-info/PKG-INFO +1 -1
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/.github/ISSUE_TEMPLATE/bug_report.yaml +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/.github/ISSUE_TEMPLATE/feature_request.yaml +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/.github/pull_request_template.md +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/.github/workflows/amd-ci.yml +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/.github/workflows/docs.yml +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/.github/workflows/intel-ci.yml +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/.github/workflows/nvi-ci.yml +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/.github/workflows/publish-nightly.yml +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/.github/workflows/publish-release.yml +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/.gitignore +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/.idea/workspace.xml +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/LICENSE +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/Makefile +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/NOTICE +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/README.md +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/benchmark/README.md +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/benchmark/__init__.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/benchmark/benchmarks_visualizer.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/benchmark/data/all_benchmark_data.csv +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/benchmark/scripts/__init__.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/benchmark/scripts/benchmark_cpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/benchmark/scripts/benchmark_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/benchmark/scripts/benchmark_distill_jsd_loss.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/benchmark/scripts/benchmark_dpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/benchmark/scripts/benchmark_dyt.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/benchmark/scripts/benchmark_embedding.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/benchmark/scripts/benchmark_fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/benchmark/scripts/benchmark_fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/benchmark/scripts/benchmark_geglu.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/benchmark/scripts/benchmark_group_norm.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/benchmark/scripts/benchmark_jsd.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/benchmark/scripts/benchmark_kl_div.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/benchmark/scripts/benchmark_kto_loss.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/benchmark/scripts/benchmark_layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/benchmark/scripts/benchmark_orpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/benchmark/scripts/benchmark_qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/benchmark/scripts/benchmark_rms_norm.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/benchmark/scripts/benchmark_rope.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/benchmark/scripts/benchmark_simpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/benchmark/scripts/benchmark_sparsemax.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/benchmark/scripts/benchmark_swiglu.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/benchmark/scripts/benchmark_tvd.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/benchmark/scripts/utils.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/dev/fmt-requirements.txt +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/dev/modal/tests.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/dev/modal/tests_bwd.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/docs/Examples.md +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/docs/Getting-Started.md +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/docs/High-Level-APIs.md +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/docs/Low-Level-APIs.md +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/docs/acknowledgement.md +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/docs/contributing.md +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/docs/images/banner.GIF +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/docs/images/compose.gif +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/docs/images/e2e-memory.png +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/docs/images/e2e-tps.png +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/docs/images/logo-banner.png +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/docs/images/patch.gif +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/docs/images/post-training.png +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/docs/index.md +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/docs/license.md +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/examples/alignment/accelerate_config.yaml +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/examples/alignment/run_orpo.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/examples/huggingface/README.md +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/examples/huggingface/callback.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/examples/huggingface/config/fsdp_config.json +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/examples/huggingface/img/gemma_7b_mem.png +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/examples/huggingface/img/gemma_7b_tp.png +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/examples/huggingface/img/llama_mem_alloc.png +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/examples/huggingface/img/llama_tps.png +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/examples/huggingface/img/qwen_mem_alloc.png +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/examples/huggingface/img/qwen_tps.png +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/examples/huggingface/launch_on_modal.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/examples/huggingface/requirements.txt +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/examples/huggingface/run_benchmarks.sh +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/examples/huggingface/run_gemma.sh +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/examples/huggingface/run_llama.sh +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/examples/huggingface/run_qwen.sh +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/examples/huggingface/run_qwen2_vl.sh +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/examples/huggingface/training.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/examples/huggingface/training_multimodal.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/examples/lightning/README.md +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/examples/lightning/requirements.txt +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/examples/lightning/training.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/examples/medusa/README.md +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/examples/medusa/callback.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/examples/medusa/docs/images/Memory_Stage1_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/examples/medusa/docs/images/Memory_Stage1_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/examples/medusa/docs/images/Memory_Stage2_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/examples/medusa/docs/images/Memory_Stage2_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/examples/medusa/docs/images/Throughput_Stage1_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/examples/medusa/docs/images/Throughput_Stage1_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/examples/medusa/docs/images/Throughput_Stage2_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/examples/medusa/docs/images/Throughput_Stage2_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/examples/medusa/fsdp/acc-fsdp.conf +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/examples/medusa/medusa_util.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/examples/medusa/requirements.txt +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/examples/medusa/scripts/llama3_8b_medusa.sh +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/examples/medusa/train.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/licenses/LICENSE-Apache-2.0 +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/licenses/LICENSE-MIT-AutoAWQ +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/licenses/LICENSE-MIT-Efficient-Cross-Entropy +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/licenses/LICENSE-MIT-llmc +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/licenses/LICENSE-MIT-triton +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/mkdocs.yml +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/setup.cfg +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/setup.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/__init__.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/chunked_loss/README.md +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/chunked_loss/__init__.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/chunked_loss/cpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/chunked_loss/dpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/chunked_loss/functional.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/chunked_loss/fused_linear_distillation.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/chunked_loss/fused_linear_ppo.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/chunked_loss/fused_linear_preference.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/chunked_loss/fused_linear_unpaired_preference.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/chunked_loss/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/chunked_loss/kto_loss.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/chunked_loss/orpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/chunked_loss/simpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/env_report.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/ops/__init__.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/ops/cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/ops/dyt.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/ops/experimental/embedding.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/ops/experimental/mm_int8int2.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/ops/fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/ops/fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/ops/geglu.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/ops/group_norm.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/ops/jsd.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/ops/kl_div.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/ops/layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/ops/qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/ops/rms_norm.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/ops/rope.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/ops/sparsemax.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/ops/swiglu.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/ops/tvd.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/ops/utils.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/transformers/__init__.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/transformers/auto_model.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/transformers/cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/transformers/dyt.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/transformers/experimental/embedding.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/transformers/functional.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/transformers/fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/transformers/fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/transformers/geglu.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/transformers/gema3_rms.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/transformers/group_norm.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/transformers/jsd.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/transformers/kl_div.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/transformers/layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/transformers/model/__init__.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/transformers/model/loss_utils.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/transformers/monkey_patch.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/transformers/qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/transformers/rms_norm.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/transformers/rope.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/transformers/sparsemax.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/transformers/swiglu.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/transformers/trainer/__init__.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/transformers/trainer/orpo_trainer.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/transformers/trainer_integration.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/transformers/tvd.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/triton/__init__.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/triton/monkey_patch.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel/utils.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel_nightly.egg-info/SOURCES.txt +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel_nightly.egg-info/dependency_links.txt +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel_nightly.egg-info/requires.txt +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/src/liger_kernel_nightly.egg-info/top_level.txt +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/test/__init__.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/test/chunked_loss/__init__.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/test/chunked_loss/test_cpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/test/chunked_loss/test_dpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/test/chunked_loss/test_grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/test/chunked_loss/test_jsd_loss.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/test/chunked_loss/test_kto_loss.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/test/chunked_loss/test_orpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/test/chunked_loss/test_simpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/test/conftest.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/test/convergence/__init__.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/test/convergence/bf16/__init__.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/test/convergence/bf16/test_mini_models.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/test/convergence/bf16/test_mini_models_multimodal.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/test/convergence/bf16/test_mini_models_with_logits.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/test/convergence/fp32/__init__.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/test/convergence/fp32/test_mini_models.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/test/convergence/fp32/test_mini_models_multimodal.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/test/convergence/fp32/test_mini_models_with_logits.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/test/resources/fake_configs/Google/Gemma3/gemma-3-4b-it/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/test/resources/fake_configs/Google/Paligemma/paligemma-3b-pt-224/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/preprocessor_config.json +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/processor_config.json +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/test/resources/fake_configs/Qwen/Qwen2-VL-7B-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/test/resources/fake_configs/Qwen/Qwen2.5-VL-7B-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/test/resources/fake_configs/meta-llama/Llama-3.2-11B-Vision-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/test/resources/scripts/generate_tokenized_dataset.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/test/resources/tiny_shakespeare.txt +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/test/resources/tiny_shakespeare_tokenized/data-00000-of-00001.arrow +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/test/resources/tiny_shakespeare_tokenized/dataset_info.json +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/test/resources/tiny_shakespeare_tokenized/state.json +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/test/transformers/test_auto_model.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/test/transformers/test_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/test/transformers/test_dyt.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/test/transformers/test_embedding.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/test/transformers/test_flex_attention.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/test/transformers/test_fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/test/transformers/test_fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/test/transformers/test_geglu.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/test/transformers/test_group_norm.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/test/transformers/test_jsd.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/test/transformers/test_kl_div.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/test/transformers/test_layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/test/transformers/test_mm_int8int2.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/test/transformers/test_monkey_patch.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/test/transformers/test_qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/test/transformers/test_rms_norm.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/test/transformers/test_rope.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/test/transformers/test_sparsemax.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/test/transformers/test_swiglu.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/test/transformers/test_trainer_integration.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/test/transformers/test_transformers.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/test/transformers/test_tvd.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/test/triton/test_triton_monkey_patch.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/test/utils.py +0 -0
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
4
4
|
|
5
5
|
[project]
|
6
6
|
name = "liger_kernel_nightly"
|
7
|
-
version = "0.5.9.
|
7
|
+
version = "0.5.9.dev20250516193902"
|
8
8
|
description = "Efficient Triton kernels for LLM Training"
|
9
9
|
urls = { "Homepage" = "https://github.com/linkedin/Liger-Kernel" }
|
10
10
|
readme = { file = "README.md", content-type = "text/markdown" }
|
@@ -150,8 +150,8 @@ class LigerFusedLinearJSDLoss(torch.nn.Module):
|
|
150
150
|
teacher_input: torch.Tensor,
|
151
151
|
teacher_weight: torch.Tensor,
|
152
152
|
true_labels: torch.LongTensor,
|
153
|
-
student_bias: torch.Tensor,
|
154
|
-
teacher_bias: torch.Tensor,
|
153
|
+
student_bias: torch.Tensor = None,
|
154
|
+
teacher_bias: torch.Tensor = None,
|
155
155
|
) -> torch.Tensor:
|
156
156
|
"""
|
157
157
|
Compute the JSD distillation loss.
|
@@ -8,18 +8,12 @@ import torch
|
|
8
8
|
from torch.nn import CrossEntropyLoss
|
9
9
|
from transformers.cache_utils import Cache
|
10
10
|
from transformers.modeling_outputs import CausalLMOutputWithPast
|
11
|
-
from transformers.models.gemma.modeling_gemma import _CONFIG_FOR_DOC
|
12
|
-
from transformers.models.gemma.modeling_gemma import GEMMA_INPUTS_DOCSTRING
|
13
|
-
from transformers.utils import add_start_docstrings_to_model_forward
|
14
|
-
from transformers.utils import replace_return_docstrings
|
15
11
|
from transformers.utils.deprecation import deprecate_kwarg
|
16
12
|
|
17
13
|
from liger_kernel.transformers.fused_linear_cross_entropy import LigerFusedLinearCrossEntropyLoss
|
18
14
|
from liger_kernel.transformers.model.loss_utils import LigerForCausalLMLoss
|
19
15
|
|
20
16
|
|
21
|
-
@add_start_docstrings_to_model_forward(GEMMA_INPUTS_DOCSTRING)
|
22
|
-
@replace_return_docstrings(output_type=CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC)
|
23
17
|
def lce_forward_deprecated(
|
24
18
|
self,
|
25
19
|
input_ids: torch.LongTensor = None,
|
@@ -129,8 +123,6 @@ def lce_forward_deprecated(
|
|
129
123
|
|
130
124
|
|
131
125
|
@deprecate_kwarg("num_logits_to_keep", version="4.50", new_name="logits_to_keep")
|
132
|
-
@add_start_docstrings_to_model_forward(GEMMA_INPUTS_DOCSTRING)
|
133
|
-
@replace_return_docstrings(output_type=CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC)
|
134
126
|
def lce_forward(
|
135
127
|
self,
|
136
128
|
input_ids: torch.LongTensor = None,
|
@@ -9,10 +9,6 @@ import torch
|
|
9
9
|
from torch.nn import CrossEntropyLoss
|
10
10
|
from transformers.cache_utils import HybridCache
|
11
11
|
from transformers.modeling_outputs import CausalLMOutputWithPast
|
12
|
-
from transformers.models.gemma2.modeling_gemma2 import _CONFIG_FOR_DOC
|
13
|
-
from transformers.models.gemma2.modeling_gemma2 import GEMMA2_INPUTS_DOCSTRING
|
14
|
-
from transformers.utils import add_start_docstrings_to_model_forward
|
15
|
-
from transformers.utils import replace_return_docstrings
|
16
12
|
from transformers.utils.deprecation import deprecate_kwarg
|
17
13
|
|
18
14
|
from liger_kernel.transformers.fused_linear_cross_entropy import LigerFusedLinearCrossEntropyLoss
|
@@ -136,8 +132,6 @@ def lce_forward_deprecated(
|
|
136
132
|
|
137
133
|
|
138
134
|
@deprecate_kwarg("num_logits_to_keep", version="4.50", new_name="logits_to_keep")
|
139
|
-
@add_start_docstrings_to_model_forward(GEMMA2_INPUTS_DOCSTRING)
|
140
|
-
@replace_return_docstrings(output_type=CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC)
|
141
135
|
def lce_forward(
|
142
136
|
self,
|
143
137
|
input_ids: torch.LongTensor = None,
|
@@ -9,13 +9,9 @@ import torch.nn as nn
|
|
9
9
|
from transformers.cache_utils import Cache
|
10
10
|
from transformers.cache_utils import HybridCache
|
11
11
|
from transformers.modeling_outputs import CausalLMOutputWithPast
|
12
|
-
from transformers.models.gemma3.modeling_gemma3 import _CONFIG_FOR_DOC
|
13
|
-
from transformers.models.gemma3.modeling_gemma3 import GEMMA3_INPUTS_DOCSTRING
|
14
12
|
from transformers.models.gemma3.modeling_gemma3 import Gemma3CausalLMOutputWithPast
|
15
|
-
from transformers.utils import add_start_docstrings_to_model_forward
|
16
13
|
from transformers.utils import is_torchdynamo_compiling
|
17
14
|
from transformers.utils import logging
|
18
|
-
from transformers.utils import replace_return_docstrings
|
19
15
|
from transformers.utils.deprecation import deprecate_kwarg
|
20
16
|
|
21
17
|
from liger_kernel.transformers.fused_linear_cross_entropy import LigerFusedLinearCrossEntropyLoss
|
@@ -25,8 +21,6 @@ logger = logging.get_logger(__name__)
|
|
25
21
|
|
26
22
|
|
27
23
|
@deprecate_kwarg("num_logits_to_keep", version="4.50", new_name="logits_to_keep")
|
28
|
-
@add_start_docstrings_to_model_forward(GEMMA3_INPUTS_DOCSTRING)
|
29
|
-
@replace_return_docstrings(output_type=CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC)
|
30
24
|
def causal_forward(
|
31
25
|
self,
|
32
26
|
input_ids: torch.LongTensor = None,
|
@@ -141,8 +135,6 @@ def causal_forward(
|
|
141
135
|
|
142
136
|
|
143
137
|
@deprecate_kwarg("num_logits_to_keep", version="4.50", new_name="logits_to_keep")
|
144
|
-
@add_start_docstrings_to_model_forward(GEMMA3_INPUTS_DOCSTRING)
|
145
|
-
@replace_return_docstrings(output_type=Gemma3CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC)
|
146
138
|
def multimodal_forward(
|
147
139
|
self,
|
148
140
|
input_ids: torch.LongTensor = None,
|
@@ -6,18 +6,12 @@ from typing import Union
|
|
6
6
|
import torch
|
7
7
|
|
8
8
|
from transformers.modeling_outputs import CausalLMOutputWithPast
|
9
|
-
from transformers.models.glm4.modeling_glm4 import _CONFIG_FOR_DOC
|
10
|
-
from transformers.models.glm4.modeling_glm4 import GLM4_INPUTS_DOCSTRING
|
11
|
-
from transformers.utils import add_start_docstrings_to_model_forward
|
12
|
-
from transformers.utils import replace_return_docstrings
|
13
9
|
from transformers.utils.deprecation import deprecate_kwarg
|
14
10
|
|
15
11
|
from liger_kernel.transformers.model.loss_utils import LigerForCausalLMLoss
|
16
12
|
|
17
13
|
|
18
14
|
@deprecate_kwarg("num_logits_to_keep", version="4.50", new_name="logits_to_keep")
|
19
|
-
@add_start_docstrings_to_model_forward(GLM4_INPUTS_DOCSTRING)
|
20
|
-
@replace_return_docstrings(output_type=CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC)
|
21
15
|
def lce_forward(
|
22
16
|
self,
|
23
17
|
input_ids: torch.LongTensor = None,
|
@@ -9,10 +9,6 @@ import torch.nn.functional as F
|
|
9
9
|
|
10
10
|
from torch.nn import CrossEntropyLoss
|
11
11
|
from transformers.modeling_outputs import CausalLMOutputWithPast
|
12
|
-
from transformers.models.llama.modeling_llama import _CONFIG_FOR_DOC
|
13
|
-
from transformers.models.llama.modeling_llama import LLAMA_INPUTS_DOCSTRING
|
14
|
-
from transformers.utils import add_start_docstrings_to_model_forward
|
15
|
-
from transformers.utils import replace_return_docstrings
|
16
12
|
from transformers.utils.deprecation import deprecate_kwarg
|
17
13
|
|
18
14
|
from liger_kernel.transformers.fused_linear_cross_entropy import LigerFusedLinearCrossEntropyLoss
|
@@ -22,8 +18,6 @@ if TYPE_CHECKING:
|
|
22
18
|
from transformers.cache_utils import Cache
|
23
19
|
|
24
20
|
|
25
|
-
@add_start_docstrings_to_model_forward(LLAMA_INPUTS_DOCSTRING)
|
26
|
-
@replace_return_docstrings(output_type=CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC)
|
27
21
|
def lce_forward_deprecated(
|
28
22
|
self,
|
29
23
|
input_ids: torch.LongTensor = None,
|
@@ -137,8 +131,6 @@ def lce_forward_deprecated(
|
|
137
131
|
|
138
132
|
|
139
133
|
@deprecate_kwarg("num_logits_to_keep", version="4.50", new_name="logits_to_keep")
|
140
|
-
@add_start_docstrings_to_model_forward(LLAMA_INPUTS_DOCSTRING)
|
141
|
-
@replace_return_docstrings(output_type=CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC)
|
142
134
|
def lce_forward(
|
143
135
|
self,
|
144
136
|
input_ids: torch.LongTensor = None,
|
@@ -5,19 +5,13 @@ from typing import Union
|
|
5
5
|
|
6
6
|
import torch
|
7
7
|
|
8
|
-
from transformers.models.llava.modeling_llava import _CONFIG_FOR_DOC
|
9
|
-
from transformers.models.llava.modeling_llava import LLAVA_INPUTS_DOCSTRING
|
10
8
|
from transformers.models.llava.modeling_llava import LlavaCausalLMOutputWithPast
|
11
|
-
from transformers.utils import add_start_docstrings_to_model_forward
|
12
9
|
from transformers.utils import is_torchdynamo_compiling
|
13
|
-
from transformers.utils import replace_return_docstrings
|
14
10
|
from transformers.utils.deprecation import deprecate_kwarg
|
15
11
|
|
16
12
|
from liger_kernel.transformers.fused_linear_cross_entropy import LigerFusedLinearCrossEntropyLoss
|
17
13
|
|
18
14
|
|
19
|
-
@add_start_docstrings_to_model_forward(LLAVA_INPUTS_DOCSTRING)
|
20
|
-
@replace_return_docstrings(output_type=LlavaCausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC)
|
21
15
|
def lce_forward_deprecated(
|
22
16
|
self,
|
23
17
|
input_ids: torch.LongTensor = None,
|
@@ -210,9 +204,7 @@ def lce_forward_deprecated(
|
|
210
204
|
)
|
211
205
|
|
212
206
|
|
213
|
-
@add_start_docstrings_to_model_forward(LLAVA_INPUTS_DOCSTRING)
|
214
207
|
@deprecate_kwarg("num_logits_to_keep", version="4.50", new_name="logits_to_keep")
|
215
|
-
@replace_return_docstrings(output_type=LlavaCausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC)
|
216
208
|
def lce_forward(
|
217
209
|
self,
|
218
210
|
input_ids: torch.LongTensor = None,
|
@@ -7,18 +7,12 @@ import torch
|
|
7
7
|
|
8
8
|
from transformers.cache_utils import Cache
|
9
9
|
from transformers.modeling_outputs import CausalLMOutputWithPast
|
10
|
-
from transformers.models.mistral.modeling_mistral import _CONFIG_FOR_DOC
|
11
|
-
from transformers.models.mistral.modeling_mistral import MISTRAL_INPUTS_DOCSTRING
|
12
|
-
from transformers.utils import add_start_docstrings_to_model_forward
|
13
|
-
from transformers.utils import replace_return_docstrings
|
14
10
|
from transformers.utils.deprecation import deprecate_kwarg
|
15
11
|
|
16
12
|
from liger_kernel.transformers.model.loss_utils import LigerForCausalLMLoss
|
17
13
|
|
18
14
|
|
19
15
|
@deprecate_kwarg("num_logits_to_keep", version="4.50", new_name="logits_to_keep")
|
20
|
-
@add_start_docstrings_to_model_forward(MISTRAL_INPUTS_DOCSTRING)
|
21
|
-
@replace_return_docstrings(output_type=CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC)
|
22
16
|
def lce_forward(
|
23
17
|
self,
|
24
18
|
input_ids: torch.LongTensor = None,
|
@@ -7,19 +7,13 @@ import torch
|
|
7
7
|
|
8
8
|
from torch.nn import CrossEntropyLoss
|
9
9
|
from transformers.modeling_outputs import MoeCausalLMOutputWithPast
|
10
|
-
from transformers.models.mixtral.modeling_mixtral import _CONFIG_FOR_DOC
|
11
|
-
from transformers.models.mixtral.modeling_mixtral import MIXTRAL_INPUTS_DOCSTRING
|
12
10
|
from transformers.models.mixtral.modeling_mixtral import load_balancing_loss_func
|
13
|
-
from transformers.utils import add_start_docstrings_to_model_forward
|
14
|
-
from transformers.utils import replace_return_docstrings
|
15
11
|
from transformers.utils.deprecation import deprecate_kwarg
|
16
12
|
|
17
13
|
from liger_kernel.transformers.fused_linear_cross_entropy import LigerFusedLinearCrossEntropyLoss
|
18
14
|
from liger_kernel.transformers.model.loss_utils import LigerForCausalLMLoss
|
19
15
|
|
20
16
|
|
21
|
-
@add_start_docstrings_to_model_forward(MIXTRAL_INPUTS_DOCSTRING)
|
22
|
-
@replace_return_docstrings(output_type=MoeCausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC)
|
23
17
|
def lce_forward_deprecated(
|
24
18
|
self,
|
25
19
|
input_ids: torch.LongTensor = None,
|
@@ -146,8 +140,6 @@ def lce_forward_deprecated(
|
|
146
140
|
|
147
141
|
|
148
142
|
@deprecate_kwarg("num_logits_to_keep", version="4.50", new_name="logits_to_keep")
|
149
|
-
@add_start_docstrings_to_model_forward(MIXTRAL_INPUTS_DOCSTRING)
|
150
|
-
@replace_return_docstrings(output_type=MoeCausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC)
|
151
143
|
# Ignore copy
|
152
144
|
def lce_forward(
|
153
145
|
self,
|
@@ -8,17 +8,12 @@ import torch
|
|
8
8
|
from torch.nn import CrossEntropyLoss
|
9
9
|
from transformers.cache_utils import Cache
|
10
10
|
from transformers.modeling_outputs import CausalLMOutputWithPast
|
11
|
-
from transformers.models.mllama.modeling_mllama import MLLAMA_INPUTS_DOCSTRING
|
12
|
-
from transformers.utils import add_start_docstrings_to_model_forward
|
13
|
-
from transformers.utils import replace_return_docstrings
|
14
11
|
from transformers.utils.deprecation import deprecate_kwarg
|
15
12
|
|
16
13
|
from liger_kernel.transformers.fused_linear_cross_entropy import LigerFusedLinearCrossEntropyLoss
|
17
14
|
from liger_kernel.transformers.model.loss_utils import LigerForCausalLMLoss
|
18
15
|
|
19
16
|
|
20
|
-
@add_start_docstrings_to_model_forward(MLLAMA_INPUTS_DOCSTRING)
|
21
|
-
@replace_return_docstrings(output_type=CausalLMOutputWithPast, config_class="MllamaTextConfig")
|
22
17
|
def lce_forward_deprecated(
|
23
18
|
self,
|
24
19
|
input_ids: torch.LongTensor = None,
|
@@ -135,8 +130,6 @@ def lce_forward_deprecated(
|
|
135
130
|
|
136
131
|
|
137
132
|
@deprecate_kwarg("num_logits_to_keep", version="4.50", new_name="logits_to_keep")
|
138
|
-
@add_start_docstrings_to_model_forward(MLLAMA_INPUTS_DOCSTRING)
|
139
|
-
@replace_return_docstrings(output_type=CausalLMOutputWithPast, config_class="MllamaTextConfig")
|
140
133
|
def lce_forward(
|
141
134
|
self,
|
142
135
|
input_ids: torch.LongTensor = None,
|
@@ -6,18 +6,12 @@ from typing import Union
|
|
6
6
|
import torch
|
7
7
|
|
8
8
|
from transformers.modeling_outputs import CausalLMOutputWithPast
|
9
|
-
from transformers.models.olmo2.modeling_olmo2 import _CONFIG_FOR_DOC
|
10
|
-
from transformers.models.olmo2.modeling_olmo2 import OLMO2_INPUTS_DOCSTRING
|
11
|
-
from transformers.utils import add_start_docstrings_to_model_forward
|
12
|
-
from transformers.utils import replace_return_docstrings
|
13
9
|
from transformers.utils.deprecation import deprecate_kwarg
|
14
10
|
|
15
11
|
from liger_kernel.transformers.model.loss_utils import LigerForCausalLMLoss
|
16
12
|
|
17
13
|
|
18
14
|
@deprecate_kwarg("num_logits_to_keep", version="4.50", new_name="logits_to_keep")
|
19
|
-
@add_start_docstrings_to_model_forward(OLMO2_INPUTS_DOCSTRING)
|
20
|
-
@replace_return_docstrings(output_type=CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC)
|
21
15
|
def lce_forward(
|
22
16
|
self,
|
23
17
|
input_ids: torch.LongTensor = None,
|
@@ -7,13 +7,9 @@ import torch
|
|
7
7
|
|
8
8
|
from torch.nn import CrossEntropyLoss
|
9
9
|
from transformers.cache_utils import Cache
|
10
|
-
from transformers.models.paligemma.modeling_paligemma import _CONFIG_FOR_DOC
|
11
|
-
from transformers.models.paligemma.modeling_paligemma import PALIGEMMA_INPUTS_DOCSTRING
|
12
10
|
from transformers.models.paligemma.modeling_paligemma import PaliGemmaCausalLMOutputWithPast
|
13
|
-
from transformers.utils import add_start_docstrings_to_model_forward
|
14
11
|
from transformers.utils import is_torchdynamo_compiling
|
15
12
|
from transformers.utils import logging
|
16
|
-
from transformers.utils import replace_return_docstrings
|
17
13
|
from transformers.utils.deprecation import deprecate_kwarg
|
18
14
|
|
19
15
|
from liger_kernel.transformers.fused_linear_cross_entropy import LigerFusedLinearCrossEntropyLoss
|
@@ -21,8 +17,6 @@ from liger_kernel.transformers.fused_linear_cross_entropy import LigerFusedLinea
|
|
21
17
|
logger = logging.get_logger(__name__)
|
22
18
|
|
23
19
|
|
24
|
-
@add_start_docstrings_to_model_forward(PALIGEMMA_INPUTS_DOCSTRING)
|
25
|
-
@replace_return_docstrings(output_type=PaliGemmaCausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC)
|
26
20
|
def lce_forward_deprecated(
|
27
21
|
self,
|
28
22
|
input_ids: torch.LongTensor = None,
|
@@ -206,8 +200,6 @@ def lce_forward_deprecated(
|
|
206
200
|
|
207
201
|
|
208
202
|
@deprecate_kwarg("num_logits_to_keep", version="4.50", new_name="logits_to_keep")
|
209
|
-
@add_start_docstrings_to_model_forward(PALIGEMMA_INPUTS_DOCSTRING)
|
210
|
-
@replace_return_docstrings(output_type=PaliGemmaCausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC)
|
211
203
|
def lce_forward(
|
212
204
|
self,
|
213
205
|
input_ids: torch.LongTensor = None,
|
@@ -7,18 +7,12 @@ import torch
|
|
7
7
|
|
8
8
|
from torch.nn import CrossEntropyLoss
|
9
9
|
from transformers.modeling_outputs import CausalLMOutputWithPast
|
10
|
-
from transformers.models.phi3.modeling_phi3 import _CONFIG_FOR_DOC
|
11
|
-
from transformers.models.phi3.modeling_phi3 import PHI3_INPUTS_DOCSTRING
|
12
|
-
from transformers.utils import add_start_docstrings_to_model_forward
|
13
|
-
from transformers.utils import replace_return_docstrings
|
14
10
|
from transformers.utils.deprecation import deprecate_kwarg
|
15
11
|
|
16
12
|
from liger_kernel.transformers.fused_linear_cross_entropy import LigerFusedLinearCrossEntropyLoss
|
17
13
|
from liger_kernel.transformers.model.loss_utils import LigerForCausalLMLoss
|
18
14
|
|
19
15
|
|
20
|
-
@add_start_docstrings_to_model_forward(PHI3_INPUTS_DOCSTRING)
|
21
|
-
@replace_return_docstrings(output_type=CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC)
|
22
16
|
def lce_forward_deprecated(
|
23
17
|
self,
|
24
18
|
input_ids: torch.LongTensor = None,
|
@@ -128,8 +122,6 @@ def lce_forward_deprecated(
|
|
128
122
|
|
129
123
|
|
130
124
|
@deprecate_kwarg("num_logits_to_keep", version="4.50", new_name="logits_to_keep")
|
131
|
-
@add_start_docstrings_to_model_forward(PHI3_INPUTS_DOCSTRING)
|
132
|
-
@replace_return_docstrings(output_type=CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC)
|
133
125
|
def lce_forward(
|
134
126
|
self,
|
135
127
|
input_ids: torch.LongTensor = None,
|
@@ -7,18 +7,12 @@ import torch
|
|
7
7
|
|
8
8
|
from torch.nn import CrossEntropyLoss
|
9
9
|
from transformers.modeling_outputs import CausalLMOutputWithPast
|
10
|
-
from transformers.models.qwen2.modeling_qwen2 import _CONFIG_FOR_DOC
|
11
|
-
from transformers.models.qwen2.modeling_qwen2 import QWEN2_INPUTS_DOCSTRING
|
12
|
-
from transformers.utils import add_start_docstrings_to_model_forward
|
13
|
-
from transformers.utils import replace_return_docstrings
|
14
10
|
from transformers.utils.deprecation import deprecate_kwarg
|
15
11
|
|
16
12
|
from liger_kernel.transformers.fused_linear_cross_entropy import LigerFusedLinearCrossEntropyLoss
|
17
13
|
from liger_kernel.transformers.model.loss_utils import LigerForCausalLMLoss
|
18
14
|
|
19
15
|
|
20
|
-
@add_start_docstrings_to_model_forward(QWEN2_INPUTS_DOCSTRING)
|
21
|
-
@replace_return_docstrings(output_type=CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC)
|
22
16
|
def lce_forward_deprecated(
|
23
17
|
self,
|
24
18
|
input_ids: torch.LongTensor = None,
|
@@ -127,8 +121,6 @@ def lce_forward_deprecated(
|
|
127
121
|
|
128
122
|
|
129
123
|
@deprecate_kwarg("num_logits_to_keep", version="4.50", new_name="logits_to_keep")
|
130
|
-
@add_start_docstrings_to_model_forward(QWEN2_INPUTS_DOCSTRING)
|
131
|
-
@replace_return_docstrings(output_type=CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC)
|
132
124
|
def lce_forward(
|
133
125
|
self,
|
134
126
|
input_ids: torch.LongTensor = None,
|
@@ -6,17 +6,11 @@ from typing import Union
|
|
6
6
|
import torch
|
7
7
|
|
8
8
|
from torch.nn import CrossEntropyLoss
|
9
|
-
from transformers.models.qwen2_5_vl.modeling_qwen2_5_vl import _CONFIG_FOR_DOC
|
10
|
-
from transformers.models.qwen2_5_vl.modeling_qwen2_5_vl import QWEN2_5_VL_INPUTS_DOCSTRING
|
11
9
|
from transformers.models.qwen2_5_vl.modeling_qwen2_5_vl import Qwen2_5_VLCausalLMOutputWithPast
|
12
|
-
from transformers.utils import add_start_docstrings_to_model_forward
|
13
|
-
from transformers.utils import replace_return_docstrings
|
14
10
|
|
15
11
|
from liger_kernel.transformers.model.loss_utils import LigerForCausalLMLoss
|
16
12
|
|
17
13
|
|
18
|
-
@add_start_docstrings_to_model_forward(QWEN2_5_VL_INPUTS_DOCSTRING)
|
19
|
-
@replace_return_docstrings(output_type=Qwen2_5_VLCausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC)
|
20
14
|
def lce_forward(
|
21
15
|
self,
|
22
16
|
input_ids: torch.LongTensor = None,
|
@@ -8,17 +8,11 @@ import torch
|
|
8
8
|
from packaging import version
|
9
9
|
from torch.nn import CrossEntropyLoss
|
10
10
|
from transformers import __version__ as transformers_version
|
11
|
-
from transformers.models.qwen2_vl.modeling_qwen2_vl import _CONFIG_FOR_DOC
|
12
|
-
from transformers.models.qwen2_vl.modeling_qwen2_vl import QWEN2_VL_INPUTS_DOCSTRING
|
13
11
|
from transformers.models.qwen2_vl.modeling_qwen2_vl import Qwen2VLCausalLMOutputWithPast
|
14
|
-
from transformers.utils import add_start_docstrings_to_model_forward
|
15
|
-
from transformers.utils import replace_return_docstrings
|
16
12
|
|
17
13
|
from liger_kernel.transformers.model.loss_utils import LigerForCausalLMLoss
|
18
14
|
|
19
15
|
|
20
|
-
@add_start_docstrings_to_model_forward(QWEN2_VL_INPUTS_DOCSTRING)
|
21
|
-
@replace_return_docstrings(output_type=Qwen2VLCausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC)
|
22
16
|
def lce_forward(
|
23
17
|
self,
|
24
18
|
input_ids: torch.LongTensor = None,
|
@@ -5,16 +5,10 @@ from typing import Union
|
|
5
5
|
import torch
|
6
6
|
|
7
7
|
from transformers.modeling_outputs import CausalLMOutputWithPast
|
8
|
-
from transformers.models.qwen3.modeling_qwen3 import _CONFIG_FOR_DOC
|
9
|
-
from transformers.models.qwen3.modeling_qwen3 import QWEN3_INPUTS_DOCSTRING
|
10
|
-
from transformers.utils import add_start_docstrings_to_model_forward
|
11
|
-
from transformers.utils import replace_return_docstrings
|
12
8
|
|
13
9
|
from liger_kernel.transformers.model.loss_utils import LigerForCausalLMLoss
|
14
10
|
|
15
11
|
|
16
|
-
@add_start_docstrings_to_model_forward(QWEN3_INPUTS_DOCSTRING)
|
17
|
-
@replace_return_docstrings(output_type=CausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC)
|
18
12
|
def lce_forward(
|
19
13
|
self,
|
20
14
|
input_ids: Optional[torch.LongTensor] = None,
|
@@ -7,16 +7,10 @@ import torch
|
|
7
7
|
from transformers.modeling_outputs import MoeCausalLMOutputWithPast
|
8
8
|
from transformers.modeling_outputs import MoeModelOutputWithPast
|
9
9
|
from transformers.models.mixtral.modeling_mixtral import load_balancing_loss_func
|
10
|
-
from transformers.models.qwen3_moe.modeling_qwen3_moe import _CONFIG_FOR_DOC
|
11
|
-
from transformers.models.qwen3_moe.modeling_qwen3_moe import QWEN3_MOE_INPUTS_DOCSTRING
|
12
|
-
from transformers.utils import add_start_docstrings_to_model_forward
|
13
|
-
from transformers.utils import replace_return_docstrings
|
14
10
|
|
15
11
|
from liger_kernel.transformers.model.loss_utils import LigerForCausalLMLoss
|
16
12
|
|
17
13
|
|
18
|
-
@add_start_docstrings_to_model_forward(QWEN3_MOE_INPUTS_DOCSTRING)
|
19
|
-
@replace_return_docstrings(output_type=MoeCausalLMOutputWithPast, config_class=_CONFIG_FOR_DOC)
|
20
14
|
def lce_forward(
|
21
15
|
self,
|
22
16
|
input_ids: Optional[torch.LongTensor] = None,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{liger_kernel_nightly-0.5.9.dev20250515065336 → liger_kernel_nightly-0.5.9.dev20250516193902}/NOTICE
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|