liger-kernel-nightly 0.6.4.dev20251206103502__tar.gz → 0.6.4.dev20251209171241__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/PKG-INFO +4 -1
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/README.md +3 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/pyproject.toml +1 -1
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/monkey_patch.py +5 -6
- liger_kernel_nightly-0.6.4.dev20251209171241/src/liger_kernel/transformers/rope.py +64 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel_nightly.egg-info/PKG-INFO +4 -1
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/transformers/test_monkey_patch.py +8 -8
- liger_kernel_nightly-0.6.4.dev20251206103502/src/liger_kernel/transformers/rope.py +0 -63
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/.github/ISSUE_TEMPLATE/bug_report.yaml +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/.github/ISSUE_TEMPLATE/feature_request.yaml +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/.github/pull_request_template.md +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/.github/workflows/amd-ci.yml +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/.github/workflows/benchmark.yml +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/.github/workflows/docs.yml +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/.github/workflows/intel-ci.yml +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/.github/workflows/nvi-ci.yml +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/.github/workflows/publish-nightly.yml +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/.github/workflows/publish-release.yml +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/.gitignore +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/LICENSE +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/Makefile +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/NOTICE +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/README.md +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/__init__.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/benchmarks_visualizer.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/data/all_benchmark_data.csv +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/__init__.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/benchmark_cpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/benchmark_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/benchmark_distill_cosine_loss.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/benchmark_distill_jsd_loss.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/benchmark_dpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/benchmark_dyt.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/benchmark_embedding.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/benchmark_fused_add_rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/benchmark_fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/benchmark_fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/benchmark_fused_neighborhood_attention.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/benchmark_geglu.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/benchmark_group_norm.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/benchmark_grpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/benchmark_jsd.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/benchmark_kl_div.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/benchmark_kto_loss.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/benchmark_layer_norm.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/benchmark_llama4_rope.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/benchmark_multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/benchmark_orpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/benchmark_poly_norm.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/benchmark_qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/benchmark_rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/benchmark_rope.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/benchmark_simpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/benchmark_softmax.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/benchmark_sparse_multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/benchmark_sparsemax.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/benchmark_swiglu.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/benchmark_tiled_mlp.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/benchmark_tvd.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/benchmark/scripts/utils.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/dev/fmt-requirements.txt +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/dev/modal/benchmarks.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/dev/modal/tests.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/dev/modal/tests_bwd.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/docs/Examples.md +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/docs/Getting-Started.md +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/docs/High-Level-APIs.md +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/docs/Low-Level-APIs.md +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/docs/acknowledgement.md +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/docs/contributing.md +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/docs/images/banner.GIF +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/docs/images/compose.gif +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/docs/images/e2e-memory.png +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/docs/images/e2e-tps.png +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/docs/images/logo-banner.png +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/docs/images/patch.gif +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/docs/images/post-training.png +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/docs/index.md +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/docs/license.md +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/alignment/accelerate_config.yaml +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/alignment/run_orpo.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/huggingface/README.md +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/huggingface/callback.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/huggingface/config/fsdp_config.json +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/huggingface/img/gemma_7b_mem.png +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/huggingface/img/gemma_7b_tp.png +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/huggingface/img/llama_mem_alloc.png +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/huggingface/img/llama_tps.png +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/huggingface/img/qwen_mem_alloc.png +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/huggingface/img/qwen_tps.png +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/huggingface/launch_on_modal.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/huggingface/requirements.txt +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/huggingface/run_benchmarks.sh +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/huggingface/run_gemma.sh +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/huggingface/run_llama.sh +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/huggingface/run_qwen.sh +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/huggingface/run_qwen2_vl.sh +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/huggingface/training.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/huggingface/training_multimodal.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/lightning/README.md +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/lightning/requirements.txt +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/lightning/training.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/medusa/README.md +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/medusa/callback.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/medusa/docs/images/Memory_Stage1_num_head_3.png +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/medusa/docs/images/Memory_Stage1_num_head_5.png +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/medusa/docs/images/Memory_Stage2_num_head_3.png +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/medusa/docs/images/Memory_Stage2_num_head_5.png +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/medusa/docs/images/Throughput_Stage1_num_head_3.png +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/medusa/docs/images/Throughput_Stage1_num_head_5.png +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/medusa/docs/images/Throughput_Stage2_num_head_3.png +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/medusa/docs/images/Throughput_Stage2_num_head_5.png +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/medusa/fsdp/acc-fsdp.conf +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/medusa/medusa_util.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/medusa/requirements.txt +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/medusa/scripts/llama3_8b_medusa.sh +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/examples/medusa/train.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/licenses/LICENSE-Apache-2.0 +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/licenses/LICENSE-MIT-AutoAWQ +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/licenses/LICENSE-MIT-Efficient-Cross-Entropy +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/licenses/LICENSE-MIT-llmc +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/licenses/LICENSE-MIT-triton +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/mkdocs.yml +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/setup.cfg +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/setup.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/__init__.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/chunked_loss/README.md +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/chunked_loss/__init__.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/chunked_loss/cosine_similarity_loss.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/chunked_loss/cpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/chunked_loss/dpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/chunked_loss/functional.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/chunked_loss/fused_linear_distillation.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/chunked_loss/fused_linear_ppo.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/chunked_loss/fused_linear_preference.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/chunked_loss/fused_linear_unpaired_preference.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/chunked_loss/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/chunked_loss/jsd_loss.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/chunked_loss/kto_loss.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/chunked_loss/orpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/chunked_loss/simpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/env_report.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/ops/__init__.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/ops/cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/ops/dyt.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/ops/experimental/embedding.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/ops/experimental/mm_int8int2.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/ops/fused_add_rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/ops/fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/ops/fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/ops/fused_neighborhood_attention.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/ops/geglu.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/ops/group_norm.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/ops/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/ops/jsd.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/ops/kl_div.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/ops/layer_norm.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/ops/llama4_rope.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/ops/multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/ops/poly_norm.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/ops/qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/ops/rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/ops/rope.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/ops/softmax.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/ops/sparsemax.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/ops/swiglu.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/ops/tiled_mlp.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/ops/tvd.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/ops/utils.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/__init__.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/auto_model.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/dyt.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/experimental/__init__.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/experimental/embedding.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/fsdp.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/functional.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/fused_add_rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/fused_neighborhood_attention.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/geglu.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/group_norm.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/jsd.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/kl_div.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/layer_norm.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/llama4_rope.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/__init__.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/falcon_h1.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/gemma.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/gemma2.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/gemma3.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/glm4.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/glm4v.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/glm4v_moe.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/gpt_oss.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/hunyuan_v1.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/internvl.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/llama.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/llama4.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/llava.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/loss_utils.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/mistral.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/mixtral.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/mllama.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/olmo2.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/olmo3.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/output_classes.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/paligemma.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/phi3.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/qwen2.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/qwen2_5_vl.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/qwen2_vl.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/qwen3.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/qwen3_moe.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/qwen3_next.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/qwen3_vl.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/qwen3_vl_moe.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/smollm3.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/model/smolvlm.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/poly_norm.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/softmax.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/sparsemax.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/swiglu.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/tiled_mlp.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/trainer/__init__.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/trainer/orpo_trainer.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/trainer_integration.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/transformers/tvd.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/triton/__init__.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/triton/monkey_patch.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel/utils.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel_nightly.egg-info/SOURCES.txt +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel_nightly.egg-info/dependency_links.txt +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel_nightly.egg-info/requires.txt +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/src/liger_kernel_nightly.egg-info/top_level.txt +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/__init__.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/chunked_loss/__init__.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/chunked_loss/test_cosine_loss.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/chunked_loss/test_cpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/chunked_loss/test_dpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/chunked_loss/test_grpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/chunked_loss/test_jsd_loss.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/chunked_loss/test_kto_loss.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/chunked_loss/test_orpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/chunked_loss/test_simpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/conftest.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/convergence/__init__.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/convergence/bf16/__init__.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/convergence/bf16/test_mini_models.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/convergence/bf16/test_mini_models_multimodal.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/convergence/bf16/test_mini_models_with_logits.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/convergence/fp32/__init__.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/convergence/fp32/test_mini_models.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/convergence/fp32/test_mini_models_multimodal.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/convergence/fp32/test_mini_models_with_logits.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/resources/fake_configs/Google/Gemma3/gemma-3-4b-it/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/resources/fake_configs/Google/Paligemma/paligemma-3b-pt-224/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/resources/fake_configs/HuggingFaceTB/SmolVLM2-256M-Video-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/preprocessor_config.json +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/processor_config.json +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/resources/fake_configs/OpenGVLab/InternVL3-1B-hf/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/resources/fake_configs/Qwen/Qwen2-VL-7B-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/resources/fake_configs/Qwen/Qwen2.5-VL-7B-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/resources/fake_configs/Qwen/Qwen3-VL-4B-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/resources/fake_configs/meta-llama/Llama-3.2-11B-Vision-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/resources/fake_configs/meta-llama/Llama-4-Scout-17B-16E-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/resources/scripts/generate_tokenized_dataset.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/resources/tiny_shakespeare.txt +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/resources/tiny_shakespeare_tokenized/data-00000-of-00001.arrow +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/resources/tiny_shakespeare_tokenized/dataset_info.json +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/resources/tiny_shakespeare_tokenized/state.json +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/transformers/test_auto_model.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/transformers/test_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/transformers/test_dyt.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/transformers/test_embedding.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/transformers/test_flex_attention.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/transformers/test_fused_add_rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/transformers/test_fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/transformers/test_fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/transformers/test_fused_neighborhood_attention.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/transformers/test_geglu.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/transformers/test_group_norm.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/transformers/test_grpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/transformers/test_jsd.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/transformers/test_kl_div.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/transformers/test_layer_norm.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/transformers/test_mm_int8int2.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/transformers/test_multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/transformers/test_poly_norm.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/transformers/test_qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/transformers/test_rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/transformers/test_rope.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/transformers/test_softmax.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/transformers/test_sparsemax.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/transformers/test_swiglu.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/transformers/test_tiled_mlp.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/transformers/test_trainer_integration.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/transformers/test_transformers.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/transformers/test_tvd.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/triton/test_triton_monkey_patch.py +0 -0
- {liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/test/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: liger_kernel_nightly
|
|
3
|
-
Version: 0.6.4.
|
|
3
|
+
Version: 0.6.4.dev20251209171241
|
|
4
4
|
Summary: Efficient Triton kernels for LLM Training
|
|
5
5
|
License: BSD 2-CLAUSE LICENSE
|
|
6
6
|
Copyright 2024 LinkedIn Corporation
|
|
@@ -113,6 +113,8 @@ We've also added optimized Post-Training kernels that deliver **up to 80% memory
|
|
|
113
113
|
|
|
114
114
|
You can view the documentation site for additional installation, usage examples, and API references:https://linkedin.github.io/Liger-Kernel/
|
|
115
115
|
|
|
116
|
+
You can view the Liger Kernel Technical Report: https://openreview.net/forum?id=36SjAIT42G
|
|
117
|
+
|
|
116
118
|
## Supercharge Your Model with Liger Kernel
|
|
117
119
|
|
|
118
120
|

|
|
@@ -442,3 +444,4 @@ url={https://openreview.net/forum?id=36SjAIT42G}
|
|
|
442
444
|
↑ Back to Top ↑
|
|
443
445
|
</a>
|
|
444
446
|
</p>
|
|
447
|
+
|
|
@@ -65,6 +65,8 @@ We've also added optimized Post-Training kernels that deliver **up to 80% memory
|
|
|
65
65
|
|
|
66
66
|
You can view the documentation site for additional installation, usage examples, and API references:https://linkedin.github.io/Liger-Kernel/
|
|
67
67
|
|
|
68
|
+
You can view the Liger Kernel Technical Report: https://openreview.net/forum?id=36SjAIT42G
|
|
69
|
+
|
|
68
70
|
## Supercharge Your Model with Liger Kernel
|
|
69
71
|
|
|
70
72
|

|
|
@@ -394,3 +396,4 @@ url={https://openreview.net/forum?id=36SjAIT42G}
|
|
|
394
396
|
↑ Back to Top ↑
|
|
395
397
|
</a>
|
|
396
398
|
</p>
|
|
399
|
+
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "liger_kernel_nightly"
|
|
7
|
-
version = "0.6.4.
|
|
7
|
+
version = "0.6.4.dev20251209171241"
|
|
8
8
|
description = "Efficient Triton kernels for LLM Training"
|
|
9
9
|
urls = { "Homepage" = "https://github.com/linkedin/Liger-Kernel" }
|
|
10
10
|
readme = { file = "README.md", content-type = "text/markdown" }
|
|
@@ -35,8 +35,7 @@ from liger_kernel.transformers.model.smollm3 import lce_forward as smollm3_lce_f
|
|
|
35
35
|
from liger_kernel.transformers.qwen2vl_mrope import liger_multimodal_rotary_pos_emb
|
|
36
36
|
from liger_kernel.transformers.rms_norm import LigerRMSNorm
|
|
37
37
|
from liger_kernel.transformers.rope import liger_rotary_pos_emb
|
|
38
|
-
from liger_kernel.transformers.rope import
|
|
39
|
-
from liger_kernel.transformers.rope import liger_rotary_pos_emb_with_cast_and_leading_batch
|
|
38
|
+
from liger_kernel.transformers.rope import liger_rotary_pos_emb_vision
|
|
40
39
|
from liger_kernel.transformers.swiglu import LigerBlockSparseTop2MLP
|
|
41
40
|
from liger_kernel.transformers.swiglu import LigerPhi3SwiGLUMLP
|
|
42
41
|
from liger_kernel.transformers.swiglu import LigerSwiGLUMLP
|
|
@@ -1754,8 +1753,8 @@ def apply_liger_kernel_to_qwen3_vl(
|
|
|
1754
1753
|
from liger_kernel.transformers.model.qwen3_vl import lce_forward as qwen3_vl_lce_forward
|
|
1755
1754
|
|
|
1756
1755
|
if rope:
|
|
1757
|
-
modeling_qwen3_vl.apply_rotary_pos_emb =
|
|
1758
|
-
modeling_qwen3_vl.apply_rotary_pos_emb_vision =
|
|
1756
|
+
modeling_qwen3_vl.apply_rotary_pos_emb = liger_rotary_pos_emb
|
|
1757
|
+
modeling_qwen3_vl.apply_rotary_pos_emb_vision = liger_rotary_pos_emb_vision
|
|
1759
1758
|
|
|
1760
1759
|
if rms_norm:
|
|
1761
1760
|
modeling_qwen3_vl.Qwen3VLTextRMSNorm = LigerRMSNorm
|
|
@@ -1829,8 +1828,8 @@ def apply_liger_kernel_to_qwen3_vl_moe(
|
|
|
1829
1828
|
from liger_kernel.transformers.model.qwen3_vl_moe import lce_forward as qwen3_vl_moe_lce_forward
|
|
1830
1829
|
|
|
1831
1830
|
if rope:
|
|
1832
|
-
modeling_qwen3_vl_moe.apply_rotary_pos_emb =
|
|
1833
|
-
modeling_qwen3_vl_moe.apply_rotary_pos_emb_vision =
|
|
1831
|
+
modeling_qwen3_vl_moe.apply_rotary_pos_emb = liger_rotary_pos_emb
|
|
1832
|
+
modeling_qwen3_vl_moe.apply_rotary_pos_emb_vision = liger_rotary_pos_emb_vision
|
|
1834
1833
|
|
|
1835
1834
|
if rms_norm:
|
|
1836
1835
|
modeling_qwen3_vl_moe.Qwen3VLMoeTextRMSNorm = LigerRMSNorm
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
from typing import Tuple
|
|
2
|
+
|
|
3
|
+
import torch
|
|
4
|
+
|
|
5
|
+
from liger_kernel.ops.rope import LigerRopeFunction
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def liger_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_dim=1):
|
|
9
|
+
"""
|
|
10
|
+
Applies Rotary Positional Embedding (RoPE) operation to query and key states.
|
|
11
|
+
|
|
12
|
+
Args:
|
|
13
|
+
q (torch.Tensor): The query tensor of shape (bsz, n_q_head, seq_len, head_dim).
|
|
14
|
+
k (torch.Tensor): The key tensor of shape (bsz, n_kv_head, seq_len, head_dim).
|
|
15
|
+
cos (torch.Tensor): The cosine tensor of shape (1, seq_len, head_dim) or (bsz, seq_len, head_dim).
|
|
16
|
+
sin (torch.Tensor): The sine tensor of shape (1, seq_len, head_dim) or (bsz, seq_len, head_dim).
|
|
17
|
+
position_ids (torch.Tensor, optional): The position ids tensor. Defaults to None.
|
|
18
|
+
unsqueeze_dim (int, optional): The dimension to unsqueeze. Defaults to 1.
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
Tuple[torch.Tensor, torch.Tensor]: The query and key tensors after applying the RoPE operation.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
return LigerRopeFunction.apply(q, k, cos, sin, position_ids, unsqueeze_dim)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def liger_rotary_pos_emb_vision(
|
|
28
|
+
q: torch.Tensor,
|
|
29
|
+
k: torch.Tensor,
|
|
30
|
+
cos: torch.Tensor,
|
|
31
|
+
sin: torch.Tensor,
|
|
32
|
+
) -> Tuple[torch.Tensor, torch.Tensor]:
|
|
33
|
+
"""
|
|
34
|
+
Modified version of liger_rotary_pos_emb for qwen3_vl's apply_rotary_pos_emb_vision function.
|
|
35
|
+
Manually tranposed the input and output to match the expected shape for liger_rotary_pos_emb.
|
|
36
|
+
Reference: https://https://github.com/huggingface/transformers/blob/v5.0.0rc0/src/transformers/models/qwen3_vl/modeling_qwen3_vl.py#L116
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
q (torch.Tensor): The query tensor of shape (seq_length, num_heads, head_dim),
|
|
40
|
+
with stride (num_heads * head_dim, head_dim, 1).
|
|
41
|
+
k (torch.Tensor): The query tensor of shape (seq_length, num_heads, head_dim),
|
|
42
|
+
with stride (num_heads * head_dim, head_dim, 1). Same as q.
|
|
43
|
+
cos (torch.Tensor): The cosine tensor of shape (seq_length, head_dim).
|
|
44
|
+
sin (torch.Tensor): The sine tensor of shape (seq_length, head_dim).
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
Tuple[torch.Tensor, torch.Tensor]: The query and key tensors with the same shape and stride as inputs.
|
|
48
|
+
"""
|
|
49
|
+
orig_q_dtype, orig_k_dtype = q.dtype, k.dtype
|
|
50
|
+
|
|
51
|
+
# tranpose to (1, num_heads, seq_length, head_dim) and cast to float32 to match liger_rotary_pos_emb input shape
|
|
52
|
+
# also unsqueeze for batch dim
|
|
53
|
+
q32 = q.to(torch.float32).unsqueeze(0).transpose(1, 2)
|
|
54
|
+
k32 = k.to(torch.float32).unsqueeze(0).transpose(1, 2)
|
|
55
|
+
cos32 = cos.to(torch.float32)
|
|
56
|
+
sin32 = sin.to(torch.float32)
|
|
57
|
+
|
|
58
|
+
q_out, k_out = liger_rotary_pos_emb(q32, k32, cos32, sin32)
|
|
59
|
+
|
|
60
|
+
# transpose back to (seq_length, num_heads, head_dim) and cast back to original dtype
|
|
61
|
+
# also squeeze out batch dim
|
|
62
|
+
q_out = q_out.transpose(1, 2).squeeze(0).to(orig_q_dtype)
|
|
63
|
+
k_out = k_out.transpose(1, 2).squeeze(0).to(orig_k_dtype)
|
|
64
|
+
return q_out, k_out
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: liger_kernel_nightly
|
|
3
|
-
Version: 0.6.4.
|
|
3
|
+
Version: 0.6.4.dev20251209171241
|
|
4
4
|
Summary: Efficient Triton kernels for LLM Training
|
|
5
5
|
License: BSD 2-CLAUSE LICENSE
|
|
6
6
|
Copyright 2024 LinkedIn Corporation
|
|
@@ -113,6 +113,8 @@ We've also added optimized Post-Training kernels that deliver **up to 80% memory
|
|
|
113
113
|
|
|
114
114
|
You can view the documentation site for additional installation, usage examples, and API references:https://linkedin.github.io/Liger-Kernel/
|
|
115
115
|
|
|
116
|
+
You can view the Liger Kernel Technical Report: https://openreview.net/forum?id=36SjAIT42G
|
|
117
|
+
|
|
116
118
|
## Supercharge Your Model with Liger Kernel
|
|
117
119
|
|
|
118
120
|

|
|
@@ -442,3 +444,4 @@ url={https://openreview.net/forum?id=36SjAIT42G}
|
|
|
442
444
|
↑ Back to Top ↑
|
|
443
445
|
</a>
|
|
444
446
|
</p>
|
|
447
|
+
|
|
@@ -1019,8 +1019,8 @@ def test_apply_liger_kernel_to_instance_for_qwen3_vl_moe_text():
|
|
|
1019
1019
|
def test_qwen3_vl_rope_hooks_applied():
|
|
1020
1020
|
# Ensure any monkey patching is cleaned up for subsequent tests
|
|
1021
1021
|
with patch("transformers.models.qwen3_vl.modeling_qwen3_vl") as modeling_mod:
|
|
1022
|
-
from liger_kernel.transformers.monkey_patch import
|
|
1023
|
-
from liger_kernel.transformers.monkey_patch import
|
|
1022
|
+
from liger_kernel.transformers.monkey_patch import liger_rotary_pos_emb
|
|
1023
|
+
from liger_kernel.transformers.monkey_patch import liger_rotary_pos_emb_vision
|
|
1024
1024
|
|
|
1025
1025
|
# Before applying, make sure attributes exist but are not the liger implementations
|
|
1026
1026
|
setattr(modeling_mod, "apply_rotary_pos_emb", object())
|
|
@@ -1028,16 +1028,16 @@ def test_qwen3_vl_rope_hooks_applied():
|
|
|
1028
1028
|
|
|
1029
1029
|
_apply_liger_kernel("qwen3_vl")
|
|
1030
1030
|
|
|
1031
|
-
assert modeling_mod.apply_rotary_pos_emb is
|
|
1032
|
-
assert modeling_mod.apply_rotary_pos_emb_vision is
|
|
1031
|
+
assert modeling_mod.apply_rotary_pos_emb is liger_rotary_pos_emb
|
|
1032
|
+
assert modeling_mod.apply_rotary_pos_emb_vision is liger_rotary_pos_emb_vision
|
|
1033
1033
|
|
|
1034
1034
|
|
|
1035
1035
|
@pytest.mark.skipif(not is_qwen3_vl_moe_available(), reason="qwen3_vl_moe module not available")
|
|
1036
1036
|
def test_qwen3_vl_moe_rope_hooks_applied():
|
|
1037
1037
|
# Ensure any monkey patching is cleaned up for subsequent tests
|
|
1038
1038
|
with patch("transformers.models.qwen3_vl_moe.modeling_qwen3_vl_moe") as modeling_mod:
|
|
1039
|
-
from liger_kernel.transformers.monkey_patch import
|
|
1040
|
-
from liger_kernel.transformers.monkey_patch import
|
|
1039
|
+
from liger_kernel.transformers.monkey_patch import liger_rotary_pos_emb
|
|
1040
|
+
from liger_kernel.transformers.monkey_patch import liger_rotary_pos_emb_vision
|
|
1041
1041
|
|
|
1042
1042
|
# Before applying, make sure attributes exist but are not the liger implementations
|
|
1043
1043
|
setattr(modeling_mod, "apply_rotary_pos_emb", object())
|
|
@@ -1045,8 +1045,8 @@ def test_qwen3_vl_moe_rope_hooks_applied():
|
|
|
1045
1045
|
|
|
1046
1046
|
_apply_liger_kernel("qwen3_vl_moe")
|
|
1047
1047
|
|
|
1048
|
-
assert modeling_mod.apply_rotary_pos_emb is
|
|
1049
|
-
assert modeling_mod.apply_rotary_pos_emb_vision is
|
|
1048
|
+
assert modeling_mod.apply_rotary_pos_emb is liger_rotary_pos_emb
|
|
1049
|
+
assert modeling_mod.apply_rotary_pos_emb_vision is liger_rotary_pos_emb_vision
|
|
1050
1050
|
|
|
1051
1051
|
|
|
1052
1052
|
@pytest.mark.skipif(not is_falcon_h1_available(), reason="falcon_h1 module not available")
|
|
@@ -1,63 +0,0 @@
|
|
|
1
|
-
from typing import Optional
|
|
2
|
-
from typing import Tuple
|
|
3
|
-
|
|
4
|
-
import torch
|
|
5
|
-
|
|
6
|
-
from liger_kernel.ops.rope import LigerRopeFunction
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
def liger_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_dim=1):
|
|
10
|
-
"""
|
|
11
|
-
Applies Rotary Positional Embedding (RoPE) operation to query and key states.
|
|
12
|
-
|
|
13
|
-
Args:
|
|
14
|
-
q (torch.Tensor): The query tensor of shape (bsz, n_q_head, seq_len, head_dim).
|
|
15
|
-
k (torch.Tensor): The key tensor of shape (bsz, n_kv_head, seq_len, head_dim).
|
|
16
|
-
cos (torch.Tensor): The cosine tensor of shape (1, seq_len, head_dim) or (bsz, seq_len, head_dim).
|
|
17
|
-
sin (torch.Tensor): The sine tensor of shape (1, seq_len, head_dim) or (bsz, seq_len, head_dim).
|
|
18
|
-
position_ids (torch.Tensor, optional): The position ids tensor. Defaults to None.
|
|
19
|
-
unsqueeze_dim (int, optional): The dimension to unsqueeze. Defaults to 1.
|
|
20
|
-
|
|
21
|
-
Returns:
|
|
22
|
-
Tuple[torch.Tensor, torch.Tensor]: The query and key tensors after applying the RoPE operation.
|
|
23
|
-
"""
|
|
24
|
-
|
|
25
|
-
return LigerRopeFunction.apply(q, k, cos, sin, position_ids, unsqueeze_dim)
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
def liger_rotary_pos_emb_with_cast(
|
|
29
|
-
q: torch.Tensor,
|
|
30
|
-
k: torch.Tensor,
|
|
31
|
-
cos: torch.Tensor,
|
|
32
|
-
sin: torch.Tensor,
|
|
33
|
-
position_ids: Optional[torch.Tensor] = None,
|
|
34
|
-
unsqueeze_dim: int = 1,
|
|
35
|
-
) -> Tuple[torch.Tensor, torch.Tensor]:
|
|
36
|
-
orig_q_dtype, orig_k_dtype = q.dtype, k.dtype
|
|
37
|
-
|
|
38
|
-
q32 = q.to(torch.float32)
|
|
39
|
-
k32 = k.to(torch.float32)
|
|
40
|
-
cos32 = cos.to(torch.float32)
|
|
41
|
-
sin32 = sin.to(torch.float32)
|
|
42
|
-
|
|
43
|
-
q_out, k_out = liger_rotary_pos_emb(q32, k32, cos32, sin32, position_ids=position_ids, unsqueeze_dim=unsqueeze_dim)
|
|
44
|
-
return q_out.to(orig_q_dtype), k_out.to(orig_k_dtype)
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
def liger_rotary_pos_emb_with_cast_and_leading_batch(
|
|
48
|
-
q: torch.Tensor,
|
|
49
|
-
k: torch.Tensor,
|
|
50
|
-
cos: torch.Tensor,
|
|
51
|
-
sin: torch.Tensor,
|
|
52
|
-
position_ids: Optional[torch.Tensor] = None,
|
|
53
|
-
unsqueeze_dim: int = 1,
|
|
54
|
-
) -> Tuple[torch.Tensor, torch.Tensor]:
|
|
55
|
-
orig_q_dtype, orig_k_dtype = q.dtype, k.dtype
|
|
56
|
-
|
|
57
|
-
q32 = q.to(torch.float32).unsqueeze(0)
|
|
58
|
-
k32 = k.to(torch.float32).unsqueeze(0)
|
|
59
|
-
cos32 = cos.to(torch.float32).unsqueeze(0)
|
|
60
|
-
sin32 = sin.to(torch.float32).unsqueeze(0)
|
|
61
|
-
|
|
62
|
-
q_out, k_out = liger_rotary_pos_emb(q32, k32, cos32, sin32, position_ids=position_ids, unsqueeze_dim=unsqueeze_dim)
|
|
63
|
-
return q_out.to(orig_q_dtype).squeeze(0), k_out.to(orig_k_dtype).squeeze(0)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{liger_kernel_nightly-0.6.4.dev20251206103502 → liger_kernel_nightly-0.6.4.dev20251209171241}/NOTICE
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|