liger-kernel-nightly 0.6.4.dev20260113145602__tar.gz → 0.6.4.dev20260116105204__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of liger-kernel-nightly might be problematic. Click here for more details.
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/PKG-INFO +1 -1
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/benchmark_tvd.py +7 -4
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/pyproject.toml +1 -1
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/fused_add_rms_norm.py +3 -2
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/layer_norm.py +15 -15
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/poly_norm.py +14 -20
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/rms_norm.py +3 -2
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/utils.py +11 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/gemma3.py +9 -3
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/loss_utils.py +6 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel_nightly.egg-info/PKG-INFO +1 -1
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/.github/ISSUE_TEMPLATE/bug_report.yaml +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/.github/ISSUE_TEMPLATE/feature_request.yaml +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/.github/pull_request_template.md +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/.github/workflows/amd-ci.yml +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/.github/workflows/benchmark.yml +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/.github/workflows/docs.yml +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/.github/workflows/intel-ci.yml +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/.github/workflows/nvi-ci.yml +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/.github/workflows/publish-nightly.yml +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/.github/workflows/publish-release.yml +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/.gitignore +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/LICENSE +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/Makefile +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/NOTICE +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/README.md +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/README.md +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/__init__.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/benchmarks_visualizer.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/data/all_benchmark_data.csv +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/__init__.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/benchmark_cpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/benchmark_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/benchmark_distill_cosine_loss.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/benchmark_distill_jsd_loss.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/benchmark_dpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/benchmark_dyt.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/benchmark_embedding.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/benchmark_fused_add_rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/benchmark_fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/benchmark_fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/benchmark_fused_neighborhood_attention.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/benchmark_geglu.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/benchmark_group_norm.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/benchmark_grpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/benchmark_jsd.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/benchmark_kl_div.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/benchmark_kto_loss.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/benchmark_layer_norm.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/benchmark_llama4_rope.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/benchmark_multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/benchmark_orpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/benchmark_poly_norm.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/benchmark_qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/benchmark_rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/benchmark_rope.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/benchmark_simpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/benchmark_softmax.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/benchmark_sparse_multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/benchmark_sparsemax.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/benchmark_swiglu.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/benchmark_tiled_mlp.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/benchmark/scripts/utils.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/dev/fmt-requirements.txt +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/dev/modal/benchmarks.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/dev/modal/tests.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/dev/modal/tests_bwd.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/docs/Examples.md +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/docs/Getting-Started.md +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/docs/High-Level-APIs.md +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/docs/Low-Level-APIs.md +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/docs/acknowledgement.md +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/docs/contributing.md +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/docs/images/banner.GIF +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/docs/images/compose.gif +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/docs/images/e2e-memory.png +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/docs/images/e2e-tps.png +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/docs/images/logo-banner.png +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/docs/images/patch.gif +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/docs/images/post-training.png +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/docs/index.md +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/docs/license.md +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/alignment/accelerate_config.yaml +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/alignment/run_orpo.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/huggingface/README.md +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/huggingface/callback.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/huggingface/config/fsdp_config.json +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/huggingface/img/gemma_7b_mem.png +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/huggingface/img/gemma_7b_tp.png +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/huggingface/img/llama_mem_alloc.png +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/huggingface/img/llama_tps.png +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/huggingface/img/qwen_mem_alloc.png +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/huggingface/img/qwen_tps.png +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/huggingface/launch_on_modal.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/huggingface/requirements.txt +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/huggingface/run_benchmarks.sh +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/huggingface/run_gemma.sh +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/huggingface/run_llama.sh +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/huggingface/run_qwen.sh +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/huggingface/run_qwen2_vl.sh +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/huggingface/training.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/huggingface/training_multimodal.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/lightning/README.md +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/lightning/requirements.txt +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/lightning/training.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/medusa/README.md +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/medusa/callback.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/medusa/docs/images/Memory_Stage1_num_head_3.png +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/medusa/docs/images/Memory_Stage1_num_head_5.png +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/medusa/docs/images/Memory_Stage2_num_head_3.png +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/medusa/docs/images/Memory_Stage2_num_head_5.png +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/medusa/docs/images/Throughput_Stage1_num_head_3.png +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/medusa/docs/images/Throughput_Stage1_num_head_5.png +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/medusa/docs/images/Throughput_Stage2_num_head_3.png +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/medusa/docs/images/Throughput_Stage2_num_head_5.png +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/medusa/fsdp/acc-fsdp.conf +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/medusa/medusa_util.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/medusa/requirements.txt +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/medusa/scripts/llama3_8b_medusa.sh +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/examples/medusa/train.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/licenses/LICENSE-Apache-2.0 +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/licenses/LICENSE-MIT-AutoAWQ +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/licenses/LICENSE-MIT-Efficient-Cross-Entropy +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/licenses/LICENSE-MIT-llmc +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/licenses/LICENSE-MIT-triton +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/mkdocs.yml +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/setup.cfg +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/setup.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/__init__.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/chunked_loss/README.md +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/chunked_loss/__init__.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/chunked_loss/cosine_similarity_loss.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/chunked_loss/cpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/chunked_loss/dpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/chunked_loss/functional.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/chunked_loss/fused_linear_distillation.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/chunked_loss/fused_linear_ppo.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/chunked_loss/fused_linear_preference.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/chunked_loss/fused_linear_unpaired_preference.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/chunked_loss/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/chunked_loss/jsd_loss.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/chunked_loss/kto_loss.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/chunked_loss/orpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/chunked_loss/simpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/env_report.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/__init__.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/backends/README.md +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/backends/__init__.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/backends/_ascend/__init__.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/backends/_ascend/ascend-ub-manager-design.md +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/backends/_ascend/ops/__init__.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/backends/_ascend/ops/geglu.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/backends/_ascend/ops/qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/backends/_ascend/ops/rope.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/backends/_ascend/ops/swiglu.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/backends/_ascend/ops/tvd.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/backends/_ascend/ub_manager.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/backends/registry.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/dyt.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/experimental/embedding.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/experimental/mm_int8int2.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/fused_neighborhood_attention.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/geglu.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/group_norm.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/jsd.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/kl_div.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/llama4_rope.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/rope.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/softmax.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/sparsemax.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/swiglu.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/tiled_mlp.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/ops/tvd.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/__init__.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/auto_model.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/dyt.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/experimental/__init__.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/experimental/embedding.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/fsdp.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/functional.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/fused_add_rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/fused_neighborhood_attention.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/geglu.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/group_norm.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/jsd.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/kl_div.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/layer_norm.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/llama4_rope.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/__init__.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/exaone4.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/falcon_h1.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/gemma.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/gemma2.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/glm4.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/glm4v.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/glm4v_moe.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/gpt_oss.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/hunyuan_v1.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/internvl.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/llama.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/llama4.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/llava.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/mistral.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/mixtral.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/mllama.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/olmo2.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/olmo3.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/output_classes.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/paligemma.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/phi3.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/qwen2.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/qwen2_5_vl.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/qwen2_vl.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/qwen3.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/qwen3_moe.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/qwen3_next.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/qwen3_vl.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/qwen3_vl_moe.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/smollm3.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/model/smolvlm.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/monkey_patch.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/poly_norm.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/rope.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/softmax.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/sparsemax.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/swiglu.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/tiled_mlp.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/trainer/__init__.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/trainer/orpo_trainer.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/trainer_integration.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/transformers/tvd.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/triton/__init__.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/triton/monkey_patch.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel/utils.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel_nightly.egg-info/SOURCES.txt +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel_nightly.egg-info/dependency_links.txt +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel_nightly.egg-info/requires.txt +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/src/liger_kernel_nightly.egg-info/top_level.txt +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/__init__.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/chunked_loss/__init__.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/chunked_loss/test_cosine_loss.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/chunked_loss/test_cpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/chunked_loss/test_dpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/chunked_loss/test_grpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/chunked_loss/test_jsd_loss.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/chunked_loss/test_kto_loss.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/chunked_loss/test_orpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/chunked_loss/test_simpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/conftest.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/convergence/__init__.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/convergence/bf16/__init__.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/convergence/bf16/test_mini_models.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/convergence/bf16/test_mini_models_multimodal.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/convergence/bf16/test_mini_models_with_logits.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/convergence/fp32/__init__.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/convergence/fp32/test_mini_models.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/convergence/fp32/test_mini_models_multimodal.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/convergence/fp32/test_mini_models_with_logits.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/resources/fake_configs/Google/Gemma3/gemma-3-4b-it/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/resources/fake_configs/Google/Paligemma/paligemma-3b-pt-224/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/resources/fake_configs/HuggingFaceTB/SmolVLM2-256M-Video-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/preprocessor_config.json +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/processor_config.json +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/resources/fake_configs/OpenGVLab/InternVL3-1B-hf/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/resources/fake_configs/Qwen/Qwen2-VL-7B-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/resources/fake_configs/Qwen/Qwen2.5-VL-7B-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/resources/fake_configs/Qwen/Qwen3-VL-4B-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/resources/fake_configs/meta-llama/Llama-3.2-11B-Vision-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/resources/fake_configs/meta-llama/Llama-4-Scout-17B-16E-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/resources/scripts/generate_tokenized_dataset.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/resources/tiny_shakespeare.txt +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/resources/tiny_shakespeare_tokenized/data-00000-of-00001.arrow +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/resources/tiny_shakespeare_tokenized/dataset_info.json +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/resources/tiny_shakespeare_tokenized/state.json +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/transformers/test_auto_model.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/transformers/test_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/transformers/test_dyt.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/transformers/test_embedding.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/transformers/test_flex_attention.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/transformers/test_fused_add_rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/transformers/test_fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/transformers/test_fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/transformers/test_fused_neighborhood_attention.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/transformers/test_geglu.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/transformers/test_group_norm.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/transformers/test_grpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/transformers/test_jsd.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/transformers/test_kl_div.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/transformers/test_layer_norm.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/transformers/test_llama4_rope.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/transformers/test_mm_int8int2.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/transformers/test_monkey_patch.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/transformers/test_multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/transformers/test_poly_norm.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/transformers/test_qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/transformers/test_rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/transformers/test_rope.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/transformers/test_softmax.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/transformers/test_sparsemax.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/transformers/test_swiglu.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/transformers/test_tiled_mlp.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/transformers/test_trainer_integration.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/transformers/test_transformers.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/transformers/test_tvd.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/triton/test_triton_monkey_patch.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/test/utils.py +0 -0
|
@@ -9,6 +9,9 @@ from utils import parse_benchmark_script_args
|
|
|
9
9
|
from utils import run_benchmarks
|
|
10
10
|
|
|
11
11
|
from liger_kernel.transformers.tvd import LigerTVDLoss
|
|
12
|
+
from liger_kernel.utils import infer_device
|
|
13
|
+
|
|
14
|
+
device = infer_device()
|
|
12
15
|
|
|
13
16
|
|
|
14
17
|
class TorchTVDLoss(torch.nn.Module):
|
|
@@ -40,8 +43,8 @@ def bench_speed_tvd(input: SingleBenchmarkRunInput) -> SingleBenchmarkRunOutput:
|
|
|
40
43
|
torch_tvd = TorchTVDLoss(reduction=reduction)
|
|
41
44
|
liger_tvd = LigerTVDLoss(reduction=reduction)
|
|
42
45
|
|
|
43
|
-
_input = torch.randn(B * T, V, requires_grad=True, device=
|
|
44
|
-
target = torch.randn(B * T, V, device=
|
|
46
|
+
_input = torch.randn(B * T, V, requires_grad=True, device=device).softmax(dim=-1)
|
|
47
|
+
target = torch.randn(B * T, V, device=device).softmax(dim=-1)
|
|
45
48
|
|
|
46
49
|
def fwd():
|
|
47
50
|
if input.kernel_provider == "liger":
|
|
@@ -82,8 +85,8 @@ def bench_memory_tvd(input: SingleBenchmarkRunInput) -> SingleBenchmarkRunOutput
|
|
|
82
85
|
V = input.x
|
|
83
86
|
B, T = input.extra_benchmark_config["B"], input.extra_benchmark_config["T"]
|
|
84
87
|
|
|
85
|
-
_input = torch.randn(B * T, V, requires_grad=True, device=
|
|
86
|
-
target = torch.randn(B * T, V, device=
|
|
88
|
+
_input = torch.randn(B * T, V, requires_grad=True, device=device).softmax(dim=-1)
|
|
89
|
+
target = torch.randn(B * T, V, device=device).softmax(dim=-1)
|
|
87
90
|
|
|
88
91
|
def fwd():
|
|
89
92
|
if input.kernel_provider == "liger":
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "liger_kernel_nightly"
|
|
7
|
-
version = "0.6.4.
|
|
7
|
+
version = "0.6.4.dev20260116105204"
|
|
8
8
|
description = "Efficient Triton kernels for LLM Training"
|
|
9
9
|
urls = { "Homepage" = "https://github.com/linkedin/Liger-Kernel" }
|
|
10
10
|
readme = { file = "README.md", content-type = "text/markdown" }
|
|
@@ -8,6 +8,7 @@ import triton.language as tl
|
|
|
8
8
|
from liger_kernel.ops.utils import calculate_settings
|
|
9
9
|
from liger_kernel.ops.utils import compare_version
|
|
10
10
|
from liger_kernel.ops.utils import ensure_contiguous
|
|
11
|
+
from liger_kernel.ops.utils import set_large_grf_mode
|
|
11
12
|
from liger_kernel.ops.utils import torch_to_triton_dtype
|
|
12
13
|
from liger_kernel.utils import get_npu_multi_processor_count
|
|
13
14
|
from liger_kernel.utils import is_npu_available
|
|
@@ -247,7 +248,7 @@ def fused_add_rms_norm_forward(X, R, W, eps, offset, casting_mode):
|
|
|
247
248
|
# XPU-specific optimization
|
|
248
249
|
kernel_args = {}
|
|
249
250
|
if X.device.type == "xpu":
|
|
250
|
-
kernel_args
|
|
251
|
+
set_large_grf_mode(kernel_args)
|
|
251
252
|
|
|
252
253
|
# TODO: add _block_fused_add_rms_norm_forward_kernel
|
|
253
254
|
_fused_add_rms_norm_forward_kernel[(n_rows,)](
|
|
@@ -307,7 +308,7 @@ def fused_add_rms_norm_backward(dY, dS_out, S, W, RSTD, offset, casting_mode, BL
|
|
|
307
308
|
# XPU-specific optimization
|
|
308
309
|
kernel_args = {}
|
|
309
310
|
if S.device.type == "xpu":
|
|
310
|
-
kernel_args
|
|
311
|
+
set_large_grf_mode(kernel_args)
|
|
311
312
|
|
|
312
313
|
# TODO: add _block_fused_add_rms_norm_backward_kernel
|
|
313
314
|
_fused_add_rms_norm_backward_kernel[grid](
|
|
@@ -8,6 +8,8 @@ import triton.language as tl
|
|
|
8
8
|
from liger_kernel.ops.utils import calculate_settings
|
|
9
9
|
from liger_kernel.ops.utils import compare_version
|
|
10
10
|
from liger_kernel.ops.utils import ensure_contiguous
|
|
11
|
+
from liger_kernel.ops.utils import set_large_grf_mode
|
|
12
|
+
from liger_kernel.utils import get_npu_multi_processor_count
|
|
11
13
|
from liger_kernel.utils import is_npu_available
|
|
12
14
|
|
|
13
15
|
if compare_version("triton", operator.ge, "3.0.0") and not is_npu_available():
|
|
@@ -124,14 +126,14 @@ def _layer_norm_backward_kernel(
|
|
|
124
126
|
w = tl.load(W_ptr + cols, mask=mask, other=0.0)
|
|
125
127
|
w_f32 = w.to(tl.float32)
|
|
126
128
|
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
129
|
+
for row_idx in range(row_start, row_end):
|
|
130
|
+
# Calculate pointers for this specific row
|
|
131
|
+
row_X_ptr = X_ptr + row_idx * stride_x
|
|
132
|
+
row_DX_ptr = DX_ptr + row_idx * stride_dx
|
|
133
|
+
row_DY_ptr = DY_ptr + row_idx * stride_dy
|
|
134
|
+
row_Mean_ptr = Mean_ptr + row_idx * stride_mean
|
|
135
|
+
row_RSTD_ptr = RSTD_ptr + row_idx * stride_rstd
|
|
133
136
|
|
|
134
|
-
for _ in range(row_start, row_end):
|
|
135
137
|
# Load data for this row
|
|
136
138
|
x = tl.load(row_X_ptr + cols, mask=mask, other=0.0)
|
|
137
139
|
dy = tl.load(row_DY_ptr + cols, mask=mask, other=0.0)
|
|
@@ -160,12 +162,6 @@ def _layer_norm_backward_kernel(
|
|
|
160
162
|
dW_row += dw
|
|
161
163
|
db_row += db
|
|
162
164
|
|
|
163
|
-
row_X_ptr += stride_x
|
|
164
|
-
row_DX_ptr += stride_dx
|
|
165
|
-
row_DY_ptr += stride_dy
|
|
166
|
-
row_Mean_ptr += stride_mean
|
|
167
|
-
row_RSTD_ptr += stride_rstd
|
|
168
|
-
|
|
169
165
|
tl.store(DW_ptr + row_block_id * stride_dw + cols, dW_row, mask=mask)
|
|
170
166
|
tl.store(DB_ptr + row_block_id * stride_db + cols, db_row, mask=mask)
|
|
171
167
|
|
|
@@ -204,7 +200,7 @@ def layer_norm_forward(X, W, B, eps):
|
|
|
204
200
|
# XPU-specific optimization
|
|
205
201
|
kernel_args = {}
|
|
206
202
|
if X.device.type == "xpu":
|
|
207
|
-
kernel_args
|
|
203
|
+
set_large_grf_mode(kernel_args)
|
|
208
204
|
|
|
209
205
|
# Launch kernel with one thread block per row for optimal performance
|
|
210
206
|
grid = (n_rows,)
|
|
@@ -254,6 +250,8 @@ def layer_norm_backward(dY, X, W, B, Mean, RSTD):
|
|
|
254
250
|
sm_count = torch.cuda.get_device_properties(X.device).multi_processor_count
|
|
255
251
|
elif X.device.type == "xpu":
|
|
256
252
|
sm_count = torch.xpu.get_device_properties(X.device).gpu_eu_count
|
|
253
|
+
elif X.device.type == "npu":
|
|
254
|
+
sm_count = get_npu_multi_processor_count()
|
|
257
255
|
|
|
258
256
|
# fp32 for numerical stability especially.
|
|
259
257
|
_DW = torch.empty((sm_count, n_cols), dtype=torch.float32, device=W.device)
|
|
@@ -272,7 +270,8 @@ def layer_norm_backward(dY, X, W, B, Mean, RSTD):
|
|
|
272
270
|
kernel_args = {"num_warps": num_warps}
|
|
273
271
|
# XPU-specific optimization
|
|
274
272
|
if X.device.type == "xpu":
|
|
275
|
-
kernel_args.update({"
|
|
273
|
+
kernel_args.update({"num_warps": 32, "num_stages": 4})
|
|
274
|
+
set_large_grf_mode(kernel_args)
|
|
276
275
|
|
|
277
276
|
# Launch kernel with one thread block per row for optimal performance
|
|
278
277
|
_layer_norm_backward_kernel[grid](
|
|
@@ -301,6 +300,7 @@ def layer_norm_backward(dY, X, W, B, Mean, RSTD):
|
|
|
301
300
|
DX = DX.view(*shape)
|
|
302
301
|
DW = _DW.sum(dim=0).to(W.dtype)
|
|
303
302
|
DB = _DB.sum(dim=0).to(B.dtype)
|
|
303
|
+
|
|
304
304
|
return DX, DW, DB
|
|
305
305
|
|
|
306
306
|
|
|
@@ -7,6 +7,7 @@ import triton.language as tl
|
|
|
7
7
|
from liger_kernel.ops.utils import calculate_settings
|
|
8
8
|
from liger_kernel.ops.utils import compare_version
|
|
9
9
|
from liger_kernel.ops.utils import ensure_contiguous
|
|
10
|
+
from liger_kernel.ops.utils import set_large_grf_mode
|
|
10
11
|
from liger_kernel.utils import get_npu_multi_processor_count
|
|
11
12
|
from liger_kernel.utils import is_npu_available
|
|
12
13
|
|
|
@@ -140,20 +141,19 @@ def _poly_norm_backward_kernel(
|
|
|
140
141
|
w1 = tl.load(W_ptr + 1).to(tl.float32)
|
|
141
142
|
w2 = tl.load(W_ptr + 2).to(tl.float32)
|
|
142
143
|
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
144
|
+
for row_idx in range(row_start, row_end):
|
|
145
|
+
dy_base = dY_ptr + row_idx * dY_row_stride
|
|
146
|
+
x_base = X_ptr + row_idx * X_row_stride
|
|
147
|
+
dx_base = dX_ptr + row_idx * dX_row_stride
|
|
148
|
+
rstd_base = RSTD_ptr + row_idx * RSTD_row_stride
|
|
147
149
|
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
dY_row = tl.load(dY_ptr + col_offsets, mask=mask, other=0.0).to(tl.float32)
|
|
151
|
-
X_row = tl.load(X_ptr + col_offsets, mask=mask, other=0.0).to(tl.float32)
|
|
150
|
+
dY_row = tl.load(dy_base + col_offsets, mask=mask, other=0.0).to(tl.float32)
|
|
151
|
+
X_row = tl.load(x_base + col_offsets, mask=mask, other=0.0).to(tl.float32)
|
|
152
152
|
|
|
153
153
|
# Load cached rstd values
|
|
154
|
-
rstd_3 = tl.load(
|
|
155
|
-
rstd_2 = tl.load(
|
|
156
|
-
rstd_1 = tl.load(
|
|
154
|
+
rstd_3 = tl.load(rstd_base + 0).to(tl.float32)
|
|
155
|
+
rstd_2 = tl.load(rstd_base + 1).to(tl.float32)
|
|
156
|
+
rstd_1 = tl.load(rstd_base + 2).to(tl.float32)
|
|
157
157
|
|
|
158
158
|
# Compute powers
|
|
159
159
|
X_pow3 = X_row * X_row * X_row
|
|
@@ -190,13 +190,7 @@ def _poly_norm_backward_kernel(
|
|
|
190
190
|
dX_row = grad_x_3 + grad_x_2 + grad_x_1
|
|
191
191
|
|
|
192
192
|
# Store gradient
|
|
193
|
-
tl.store(
|
|
194
|
-
|
|
195
|
-
# Update pointers
|
|
196
|
-
dY_ptr += dY_row_stride
|
|
197
|
-
dX_ptr += dX_row_stride
|
|
198
|
-
X_ptr += X_row_stride
|
|
199
|
-
RSTD_ptr += RSTD_row_stride
|
|
193
|
+
tl.store(dx_base + col_offsets, dX_row, mask=mask)
|
|
200
194
|
|
|
201
195
|
# Store accumulated gradients (scalars)
|
|
202
196
|
tl.store(dW_ptr + row_block_id * dW_row_stride + 0, dW0_acc)
|
|
@@ -239,7 +233,7 @@ def poly_norm_forward(X, W, B, eps=1e-6):
|
|
|
239
233
|
# XPU-specific optimization
|
|
240
234
|
kernel_args = {}
|
|
241
235
|
if X.device.type == "xpu":
|
|
242
|
-
kernel_args
|
|
236
|
+
set_large_grf_mode(kernel_args)
|
|
243
237
|
|
|
244
238
|
# Launch kernel
|
|
245
239
|
_poly_norm_forward_kernel[(n_rows,)](
|
|
@@ -310,7 +304,7 @@ def poly_norm_backward(dY, X, W, RSTD, BLOCK_SIZE, num_warps, in_place):
|
|
|
310
304
|
# XPU-specific optimization
|
|
311
305
|
kernel_args = {}
|
|
312
306
|
if X.device.type == "xpu":
|
|
313
|
-
kernel_args
|
|
307
|
+
set_large_grf_mode(kernel_args)
|
|
314
308
|
|
|
315
309
|
# Launch backward kernel
|
|
316
310
|
_poly_norm_backward_kernel[grid](
|
|
@@ -20,6 +20,7 @@ import triton.language as tl
|
|
|
20
20
|
from liger_kernel.ops.utils import calculate_settings
|
|
21
21
|
from liger_kernel.ops.utils import compare_version
|
|
22
22
|
from liger_kernel.ops.utils import ensure_contiguous
|
|
23
|
+
from liger_kernel.ops.utils import set_large_grf_mode
|
|
23
24
|
from liger_kernel.ops.utils import torch_to_triton_dtype
|
|
24
25
|
from liger_kernel.utils import get_npu_multi_processor_count
|
|
25
26
|
from liger_kernel.utils import is_npu_available
|
|
@@ -436,7 +437,7 @@ def rms_norm_forward(X, W, eps, offset, casting_mode, row_mode):
|
|
|
436
437
|
# XPU-specific optimization
|
|
437
438
|
kernel_args = {}
|
|
438
439
|
if X.device.type == "xpu":
|
|
439
|
-
kernel_args
|
|
440
|
+
set_large_grf_mode(kernel_args)
|
|
440
441
|
if BLOCK_SIZE > 256 or n_rows < 4096 * 8 or row_mode:
|
|
441
442
|
_rms_norm_forward_kernel[(n_rows,)](
|
|
442
443
|
Y,
|
|
@@ -516,7 +517,7 @@ def rms_norm_backward(dY, X, W, RSTD, offset, casting_mode, BLOCK_SIZE, num_warp
|
|
|
516
517
|
# XPU-specific optimization
|
|
517
518
|
kernel_args = {}
|
|
518
519
|
if X.device.type == "xpu":
|
|
519
|
-
kernel_args
|
|
520
|
+
set_large_grf_mode(kernel_args)
|
|
520
521
|
|
|
521
522
|
if BLOCK_SIZE > 256 or n_rows < 4096 * 8 or row_mode:
|
|
522
523
|
_rms_norm_backward_kernel[grid](
|
|
@@ -139,3 +139,14 @@ def get_npu_core_count(default: int = 20) -> int:
|
|
|
139
139
|
return int(props.get("num_vectorcore", default))
|
|
140
140
|
except Exception:
|
|
141
141
|
return default
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def set_large_grf_mode(kernel_args: dict):
|
|
145
|
+
"""Set large GRF mode for XPU devices."""
|
|
146
|
+
# On XPU triton installed along with pytorch-xpu will be called `pytorch-triton-xpu`,
|
|
147
|
+
# triton XPU installed from source will be called `triton`.
|
|
148
|
+
if compare_version("pytorch-triton-xpu", operator.ge, "3.6.0") or compare_version("triton", operator.ge, "3.6.0"):
|
|
149
|
+
kernel_args["grf_mode"] = "256"
|
|
150
|
+
else:
|
|
151
|
+
# API was changed in https://github.com/intel/intel-xpu-backend-for-triton/pull/5430
|
|
152
|
+
kernel_args["grf_mode"] = "large"
|
|
@@ -8,7 +8,6 @@ import torch.nn as nn
|
|
|
8
8
|
from transformers.cache_utils import Cache
|
|
9
9
|
from transformers.utils import logging
|
|
10
10
|
|
|
11
|
-
from liger_kernel.transformers.fused_linear_cross_entropy import LigerFusedLinearCrossEntropyLoss
|
|
12
11
|
from liger_kernel.transformers.model.loss_utils import LigerForCausalLMLoss
|
|
13
12
|
from liger_kernel.transformers.model.loss_utils import unpack_cross_entropy_result
|
|
14
13
|
from liger_kernel.transformers.model.output_classes import LigerCausalLMOutputWithPast
|
|
@@ -268,8 +267,15 @@ def multimodal_forward(
|
|
|
268
267
|
shift_hidden_states = shift_hidden_states.view(-1, self.config.text_config.hidden_size)
|
|
269
268
|
shift_labels = shift_labels.view(-1).to(hidden_device)
|
|
270
269
|
|
|
271
|
-
|
|
272
|
-
|
|
270
|
+
result = LigerForCausalLMLoss(
|
|
271
|
+
hidden_states=shift_hidden_states,
|
|
272
|
+
lm_head_weight=self.lm_head.weight,
|
|
273
|
+
labels=shift_labels,
|
|
274
|
+
hidden_size=self.config.text_config.hidden_size,
|
|
275
|
+
shift_labels=shift_labels,
|
|
276
|
+
final_logit_softcapping=getattr(self.config.text_config, "final_logit_softcapping", None),
|
|
277
|
+
**lm_kwargs,
|
|
278
|
+
)
|
|
273
279
|
loss, _, token_accuracy = unpack_cross_entropy_result(result)
|
|
274
280
|
|
|
275
281
|
else:
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import inspect
|
|
2
|
+
|
|
1
3
|
from typing import Optional
|
|
2
4
|
from typing import Tuple
|
|
3
5
|
|
|
@@ -71,6 +73,10 @@ def LigerForCausalLMLoss(
|
|
|
71
73
|
return_token_accuracy: bool = False,
|
|
72
74
|
**kwargs,
|
|
73
75
|
):
|
|
76
|
+
# Filter out inapplicable kwargs to liger_fused_linear_cross_entropy
|
|
77
|
+
applicable_params = inspect.signature(F.liger_fused_linear_cross_entropy).parameters
|
|
78
|
+
kwargs = {k: v for k, v in kwargs.items() if k in applicable_params}
|
|
79
|
+
|
|
74
80
|
# Skip upcast since intermediate values for the loss are all fp32 in kernel
|
|
75
81
|
if shift_labels is None:
|
|
76
82
|
# Shift so that token < n predict n
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{liger_kernel_nightly-0.6.4.dev20260113145602 → liger_kernel_nightly-0.6.4.dev20260116105204}/NOTICE
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|