liger-kernel-nightly 0.6.3.dev20251101160510__tar.gz → 0.6.3.dev20251105012545__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/PKG-INFO +1 -1
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/pyproject.toml +1 -1
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/model/qwen3_vl_moe.py +2 -4
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/monkey_patch.py +3 -9
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/rope.py +3 -7
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel_nightly.egg-info/PKG-INFO +1 -1
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/convergence/bf16/test_mini_models_with_logits.py +1 -2
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/convergence/fp32/test_mini_models.py +2 -6
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/convergence/fp32/test_mini_models_multimodal.py +0 -1
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/convergence/fp32/test_mini_models_with_logits.py +2 -6
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/transformers/test_monkey_patch.py +16 -44
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/.github/ISSUE_TEMPLATE/bug_report.yaml +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/.github/ISSUE_TEMPLATE/feature_request.yaml +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/.github/pull_request_template.md +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/.github/workflows/amd-ci.yml +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/.github/workflows/benchmark.yml +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/.github/workflows/docs.yml +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/.github/workflows/intel-ci.yml +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/.github/workflows/nvi-ci.yml +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/.github/workflows/publish-nightly.yml +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/.github/workflows/publish-release.yml +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/.gitignore +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/LICENSE +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/Makefile +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/NOTICE +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/README.md +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/README.md +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/__init__.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/benchmarks_visualizer.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/data/all_benchmark_data.csv +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/__init__.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/benchmark_cpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/benchmark_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/benchmark_distill_cosine_loss.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/benchmark_distill_jsd_loss.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/benchmark_dpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/benchmark_dyt.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/benchmark_embedding.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/benchmark_fused_add_rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/benchmark_fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/benchmark_fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/benchmark_fused_neighborhood_attention.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/benchmark_geglu.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/benchmark_group_norm.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/benchmark_grpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/benchmark_jsd.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/benchmark_kl_div.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/benchmark_kto_loss.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/benchmark_layer_norm.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/benchmark_llama4_rope.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/benchmark_multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/benchmark_orpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/benchmark_poly_norm.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/benchmark_qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/benchmark_rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/benchmark_rope.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/benchmark_simpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/benchmark_softmax.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/benchmark_sparse_multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/benchmark_sparsemax.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/benchmark_swiglu.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/benchmark_tvd.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/benchmark/scripts/utils.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/dev/fmt-requirements.txt +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/dev/modal/benchmarks.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/dev/modal/tests.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/dev/modal/tests_bwd.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/docs/Examples.md +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/docs/Getting-Started.md +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/docs/High-Level-APIs.md +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/docs/Low-Level-APIs.md +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/docs/acknowledgement.md +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/docs/contributing.md +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/docs/images/banner.GIF +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/docs/images/compose.gif +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/docs/images/e2e-memory.png +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/docs/images/e2e-tps.png +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/docs/images/logo-banner.png +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/docs/images/patch.gif +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/docs/images/post-training.png +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/docs/index.md +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/docs/license.md +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/alignment/accelerate_config.yaml +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/alignment/run_orpo.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/huggingface/README.md +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/huggingface/callback.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/huggingface/config/fsdp_config.json +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/huggingface/img/gemma_7b_mem.png +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/huggingface/img/gemma_7b_tp.png +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/huggingface/img/llama_mem_alloc.png +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/huggingface/img/llama_tps.png +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/huggingface/img/qwen_mem_alloc.png +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/huggingface/img/qwen_tps.png +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/huggingface/launch_on_modal.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/huggingface/requirements.txt +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/huggingface/run_benchmarks.sh +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/huggingface/run_gemma.sh +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/huggingface/run_llama.sh +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/huggingface/run_qwen.sh +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/huggingface/run_qwen2_vl.sh +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/huggingface/training.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/huggingface/training_multimodal.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/lightning/README.md +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/lightning/requirements.txt +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/lightning/training.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/medusa/README.md +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/medusa/callback.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/medusa/docs/images/Memory_Stage1_num_head_3.png +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/medusa/docs/images/Memory_Stage1_num_head_5.png +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/medusa/docs/images/Memory_Stage2_num_head_3.png +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/medusa/docs/images/Memory_Stage2_num_head_5.png +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/medusa/docs/images/Throughput_Stage1_num_head_3.png +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/medusa/docs/images/Throughput_Stage1_num_head_5.png +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/medusa/docs/images/Throughput_Stage2_num_head_3.png +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/medusa/docs/images/Throughput_Stage2_num_head_5.png +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/medusa/fsdp/acc-fsdp.conf +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/medusa/medusa_util.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/medusa/requirements.txt +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/medusa/scripts/llama3_8b_medusa.sh +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/examples/medusa/train.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/licenses/LICENSE-Apache-2.0 +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/licenses/LICENSE-MIT-AutoAWQ +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/licenses/LICENSE-MIT-Efficient-Cross-Entropy +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/licenses/LICENSE-MIT-llmc +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/licenses/LICENSE-MIT-triton +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/mkdocs.yml +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/setup.cfg +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/setup.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/__init__.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/chunked_loss/README.md +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/chunked_loss/__init__.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/chunked_loss/cosine_similarity_loss.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/chunked_loss/cpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/chunked_loss/dpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/chunked_loss/functional.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/chunked_loss/fused_linear_distillation.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/chunked_loss/fused_linear_ppo.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/chunked_loss/fused_linear_preference.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/chunked_loss/fused_linear_unpaired_preference.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/chunked_loss/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/chunked_loss/jsd_loss.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/chunked_loss/kto_loss.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/chunked_loss/orpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/chunked_loss/simpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/env_report.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/ops/__init__.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/ops/cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/ops/dyt.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/ops/experimental/embedding.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/ops/experimental/mm_int8int2.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/ops/fused_add_rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/ops/fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/ops/fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/ops/fused_neighborhood_attention.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/ops/geglu.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/ops/group_norm.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/ops/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/ops/jsd.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/ops/kl_div.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/ops/layer_norm.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/ops/llama4_rope.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/ops/multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/ops/poly_norm.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/ops/qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/ops/rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/ops/rope.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/ops/softmax.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/ops/sparsemax.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/ops/swiglu.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/ops/tvd.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/ops/utils.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/__init__.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/auto_model.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/dyt.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/experimental/__init__.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/experimental/embedding.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/fsdp.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/functional.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/fused_add_rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/fused_neighborhood_attention.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/geglu.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/group_norm.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/jsd.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/kl_div.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/layer_norm.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/llama4_rope.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/model/__init__.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/model/falcon_h1.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/model/gemma.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/model/gemma2.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/model/gemma3.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/model/glm4.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/model/glm4v.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/model/glm4v_moe.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/model/internvl.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/model/llama.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/model/llama4.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/model/llava.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/model/loss_utils.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/model/mistral.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/model/mixtral.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/model/mllama.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/model/olmo2.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/model/paligemma.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/model/phi3.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/model/qwen2.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/model/qwen2_5_vl.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/model/qwen2_vl.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/model/qwen3.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/model/qwen3_moe.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/model/qwen3_next.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/model/qwen3_vl.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/model/smollm3.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/model/smolvlm.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/poly_norm.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/softmax.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/sparsemax.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/swiglu.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/trainer/__init__.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/trainer/orpo_trainer.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/trainer_integration.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/transformers/tvd.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/triton/__init__.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/triton/monkey_patch.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel/utils.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel_nightly.egg-info/SOURCES.txt +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel_nightly.egg-info/dependency_links.txt +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel_nightly.egg-info/requires.txt +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/src/liger_kernel_nightly.egg-info/top_level.txt +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/__init__.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/chunked_loss/__init__.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/chunked_loss/test_cosine_loss.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/chunked_loss/test_cpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/chunked_loss/test_dpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/chunked_loss/test_grpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/chunked_loss/test_jsd_loss.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/chunked_loss/test_kto_loss.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/chunked_loss/test_orpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/chunked_loss/test_simpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/conftest.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/convergence/__init__.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/convergence/bf16/__init__.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/convergence/bf16/test_mini_models.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/convergence/bf16/test_mini_models_multimodal.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/convergence/fp32/__init__.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/resources/fake_configs/Google/Gemma3/gemma-3-4b-it/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/resources/fake_configs/Google/Paligemma/paligemma-3b-pt-224/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/resources/fake_configs/HuggingFaceTB/SmolVLM2-256M-Video-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/preprocessor_config.json +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/processor_config.json +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/resources/fake_configs/OpenGVLab/InternVL3-1B-hf/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/resources/fake_configs/Qwen/Qwen2-VL-7B-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/resources/fake_configs/Qwen/Qwen2.5-VL-7B-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/resources/fake_configs/Qwen/Qwen3-VL-4B-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/resources/fake_configs/meta-llama/Llama-3.2-11B-Vision-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/resources/fake_configs/meta-llama/Llama-4-Scout-17B-16E-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/resources/scripts/generate_tokenized_dataset.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/resources/tiny_shakespeare.txt +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/resources/tiny_shakespeare_tokenized/data-00000-of-00001.arrow +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/resources/tiny_shakespeare_tokenized/dataset_info.json +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/resources/tiny_shakespeare_tokenized/state.json +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/transformers/test_auto_model.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/transformers/test_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/transformers/test_dyt.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/transformers/test_embedding.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/transformers/test_flex_attention.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/transformers/test_fused_add_rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/transformers/test_fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/transformers/test_fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/transformers/test_fused_neighborhood_attention.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/transformers/test_geglu.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/transformers/test_group_norm.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/transformers/test_grpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/transformers/test_jsd.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/transformers/test_kl_div.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/transformers/test_layer_norm.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/transformers/test_mm_int8int2.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/transformers/test_multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/transformers/test_poly_norm.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/transformers/test_qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/transformers/test_rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/transformers/test_rope.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/transformers/test_softmax.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/transformers/test_sparsemax.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/transformers/test_swiglu.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/transformers/test_trainer_integration.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/transformers/test_transformers.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/transformers/test_tvd.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/triton/test_triton_monkey_patch.py +0 -0
- {liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/test/utils.py +0 -0
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "liger_kernel_nightly"
|
|
7
|
-
version = "0.6.3.
|
|
7
|
+
version = "0.6.3.dev20251105012545"
|
|
8
8
|
description = "Efficient Triton kernels for LLM Training"
|
|
9
9
|
urls = { "Homepage" = "https://github.com/linkedin/Liger-Kernel" }
|
|
10
10
|
readme = { file = "README.md", content-type = "text/markdown" }
|
|
@@ -5,13 +5,11 @@ from typing import Union
|
|
|
5
5
|
|
|
6
6
|
import torch
|
|
7
7
|
|
|
8
|
+
from transformers.models.qwen3_vl_moe.modeling_qwen3_vl_moe import Qwen3VLMoeCausalLMOutputWithPast
|
|
9
|
+
from transformers.models.qwen3_vl_moe.modeling_qwen3_vl_moe import load_balancing_loss_func
|
|
8
10
|
from transformers.utils import can_return_tuple
|
|
9
11
|
|
|
10
12
|
from liger_kernel.transformers.model.loss_utils import LigerForCausalLMLoss
|
|
11
|
-
from transformers.models.qwen3_vl_moe.modeling_qwen3_vl_moe import (
|
|
12
|
-
Qwen3VLMoeCausalLMOutputWithPast,
|
|
13
|
-
load_balancing_loss_func,
|
|
14
|
-
)
|
|
15
13
|
|
|
16
14
|
|
|
17
15
|
@can_return_tuple
|
|
@@ -6,7 +6,6 @@ from types import MethodType
|
|
|
6
6
|
from typing import Callable
|
|
7
7
|
from typing import Optional
|
|
8
8
|
|
|
9
|
-
import torch
|
|
10
9
|
import transformers
|
|
11
10
|
|
|
12
11
|
from packaging import version
|
|
@@ -36,11 +35,9 @@ from liger_kernel.transformers.model.qwen3_vl_moe import lce_forward as qwen3_vl
|
|
|
36
35
|
from liger_kernel.transformers.model.smollm3 import lce_forward as smollm3_lce_forward
|
|
37
36
|
from liger_kernel.transformers.qwen2vl_mrope import liger_multimodal_rotary_pos_emb
|
|
38
37
|
from liger_kernel.transformers.rms_norm import LigerRMSNorm
|
|
39
|
-
from liger_kernel.transformers.rope import
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
liger_rotary_pos_emb_with_cast_and_leading_batch,
|
|
43
|
-
)
|
|
38
|
+
from liger_kernel.transformers.rope import liger_rotary_pos_emb
|
|
39
|
+
from liger_kernel.transformers.rope import liger_rotary_pos_emb_with_cast
|
|
40
|
+
from liger_kernel.transformers.rope import liger_rotary_pos_emb_with_cast_and_leading_batch
|
|
44
41
|
from liger_kernel.transformers.swiglu import LigerBlockSparseTop2MLP
|
|
45
42
|
from liger_kernel.transformers.swiglu import LigerPhi3SwiGLUMLP
|
|
46
43
|
from liger_kernel.transformers.swiglu import LigerSwiGLUMLP
|
|
@@ -64,7 +61,6 @@ def _bind_method_to_module(module, method_name: str, new_method: Callable):
|
|
|
64
61
|
module.__dict__[method_name] = new_method.__get__(module, module.__class__)
|
|
65
62
|
|
|
66
63
|
|
|
67
|
-
|
|
68
64
|
def _patch_rms_norm_module(module, offset=0.0, eps=1e-6, casting_mode="llama", in_place=True, row_mode=None):
|
|
69
65
|
# Check if the module is a PEFT ModulesToSaveWrapper
|
|
70
66
|
# If it is, we need to patch the modules_to_save.default and original_modules
|
|
@@ -1651,7 +1647,6 @@ def apply_liger_kernel_to_qwen2_5_vl(
|
|
|
1651
1647
|
_patch_rms_norm_module(decoder_layer.post_attention_layernorm)
|
|
1652
1648
|
|
|
1653
1649
|
|
|
1654
|
-
|
|
1655
1650
|
def apply_liger_kernel_to_qwen3_vl(
|
|
1656
1651
|
rope: bool = True,
|
|
1657
1652
|
cross_entropy: bool = False,
|
|
@@ -1688,7 +1683,6 @@ def apply_liger_kernel_to_qwen3_vl(
|
|
|
1688
1683
|
modeling_qwen3_vl.apply_rotary_pos_emb = liger_rotary_pos_emb_with_cast
|
|
1689
1684
|
modeling_qwen3_vl.apply_rotary_pos_emb_vision = liger_rotary_pos_emb_with_cast_and_leading_batch
|
|
1690
1685
|
|
|
1691
|
-
|
|
1692
1686
|
if rms_norm:
|
|
1693
1687
|
modeling_qwen3_vl.Qwen3VLTextRMSNorm = LigerRMSNorm
|
|
1694
1688
|
|
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
from typing import Optional
|
|
1
|
+
from typing import Optional
|
|
2
|
+
from typing import Tuple
|
|
2
3
|
|
|
3
4
|
import torch
|
|
4
5
|
|
|
@@ -32,7 +33,6 @@ def liger_rotary_pos_emb_with_cast(
|
|
|
32
33
|
position_ids: Optional[torch.Tensor] = None,
|
|
33
34
|
unsqueeze_dim: int = 1,
|
|
34
35
|
) -> Tuple[torch.Tensor, torch.Tensor]:
|
|
35
|
-
|
|
36
36
|
orig_q_dtype, orig_k_dtype = q.dtype, k.dtype
|
|
37
37
|
|
|
38
38
|
q32 = q.to(torch.float32)
|
|
@@ -52,8 +52,6 @@ def liger_rotary_pos_emb_with_cast_and_leading_batch(
|
|
|
52
52
|
position_ids: Optional[torch.Tensor] = None,
|
|
53
53
|
unsqueeze_dim: int = 1,
|
|
54
54
|
) -> Tuple[torch.Tensor, torch.Tensor]:
|
|
55
|
-
|
|
56
|
-
|
|
57
55
|
orig_q_dtype, orig_k_dtype = q.dtype, k.dtype
|
|
58
56
|
|
|
59
57
|
q32 = q.to(torch.float32).unsqueeze(0)
|
|
@@ -61,7 +59,5 @@ def liger_rotary_pos_emb_with_cast_and_leading_batch(
|
|
|
61
59
|
cos32 = cos.to(torch.float32).unsqueeze(0)
|
|
62
60
|
sin32 = sin.to(torch.float32).unsqueeze(0)
|
|
63
61
|
|
|
64
|
-
q_out, k_out = liger_rotary_pos_emb(
|
|
65
|
-
q32, k32, cos32, sin32, position_ids=position_ids, unsqueeze_dim=unsqueeze_dim
|
|
66
|
-
)
|
|
62
|
+
q_out, k_out = liger_rotary_pos_emb(q32, k32, cos32, sin32, position_ids=position_ids, unsqueeze_dim=unsqueeze_dim)
|
|
67
63
|
return q_out.to(orig_q_dtype).squeeze(0), k_out.to(orig_k_dtype).squeeze(0)
|
|
@@ -1347,11 +1347,10 @@ def run_mini_model(
|
|
|
1347
1347
|
|
|
1348
1348
|
if "llava" in model_name:
|
|
1349
1349
|
apply_liger_kernel_to_llama(**kwargs)
|
|
1350
|
-
|
|
1350
|
+
|
|
1351
1351
|
kwargs["fused_linear_cross_entropy"] = False
|
|
1352
1352
|
kwargs["cross_entropy"] = False
|
|
1353
1353
|
|
|
1354
|
-
|
|
1355
1354
|
MINI_MODEL_SETUPS[model_name].liger_kernel_patch_func(**kwargs)
|
|
1356
1355
|
else:
|
|
1357
1356
|
MINI_MODEL_SETUPS[model_name].liger_kernel_patch_revert_func(**revert_kwargs)
|
|
@@ -42,13 +42,11 @@ from liger_kernel.transformers import apply_liger_kernel_to_phi3
|
|
|
42
42
|
from liger_kernel.transformers import apply_liger_kernel_to_qwen2
|
|
43
43
|
from liger_kernel.transformers import apply_liger_kernel_to_qwen2_5_vl
|
|
44
44
|
from liger_kernel.transformers import apply_liger_kernel_to_qwen2_vl
|
|
45
|
-
from liger_kernel.transformers import apply_liger_kernel_to_qwen3_vl
|
|
46
|
-
from liger_kernel.transformers import apply_liger_kernel_to_qwen3_vl_moe
|
|
47
45
|
from liger_kernel.transformers import apply_liger_kernel_to_qwen3
|
|
48
46
|
from liger_kernel.transformers import apply_liger_kernel_to_qwen3_moe
|
|
47
|
+
from liger_kernel.transformers import apply_liger_kernel_to_qwen3_next
|
|
49
48
|
from liger_kernel.transformers import apply_liger_kernel_to_qwen3_vl
|
|
50
49
|
from liger_kernel.transformers import apply_liger_kernel_to_qwen3_vl_moe
|
|
51
|
-
from liger_kernel.transformers import apply_liger_kernel_to_qwen3_next
|
|
52
50
|
from liger_kernel.transformers import apply_liger_kernel_to_smollm3
|
|
53
51
|
from test.utils import DEFAULT_DATASET_PATH
|
|
54
52
|
from test.utils import MiniModelConfig
|
|
@@ -76,13 +74,11 @@ from test.utils import revert_liger_kernel_to_phi3
|
|
|
76
74
|
from test.utils import revert_liger_kernel_to_qwen2
|
|
77
75
|
from test.utils import revert_liger_kernel_to_qwen2_5_vl
|
|
78
76
|
from test.utils import revert_liger_kernel_to_qwen2_vl
|
|
79
|
-
from test.utils import revert_liger_kernel_to_qwen3_vl
|
|
80
|
-
from test.utils import revert_liger_kernel_to_qwen3_vl_moe
|
|
81
77
|
from test.utils import revert_liger_kernel_to_qwen3
|
|
82
78
|
from test.utils import revert_liger_kernel_to_qwen3_moe
|
|
79
|
+
from test.utils import revert_liger_kernel_to_qwen3_next
|
|
83
80
|
from test.utils import revert_liger_kernel_to_qwen3_vl
|
|
84
81
|
from test.utils import revert_liger_kernel_to_qwen3_vl_moe
|
|
85
|
-
from test.utils import revert_liger_kernel_to_qwen3_next
|
|
86
82
|
from test.utils import revert_liger_kernel_to_smollm3
|
|
87
83
|
from test.utils import set_seed
|
|
88
84
|
from test.utils import simple_collate_fn
|
|
@@ -42,13 +42,11 @@ from liger_kernel.transformers import apply_liger_kernel_to_phi3
|
|
|
42
42
|
from liger_kernel.transformers import apply_liger_kernel_to_qwen2
|
|
43
43
|
from liger_kernel.transformers import apply_liger_kernel_to_qwen2_5_vl
|
|
44
44
|
from liger_kernel.transformers import apply_liger_kernel_to_qwen2_vl
|
|
45
|
-
from liger_kernel.transformers import apply_liger_kernel_to_qwen3_vl
|
|
46
|
-
from liger_kernel.transformers import apply_liger_kernel_to_qwen3_vl_moe
|
|
47
45
|
from liger_kernel.transformers import apply_liger_kernel_to_qwen3
|
|
48
46
|
from liger_kernel.transformers import apply_liger_kernel_to_qwen3_moe
|
|
47
|
+
from liger_kernel.transformers import apply_liger_kernel_to_qwen3_next
|
|
49
48
|
from liger_kernel.transformers import apply_liger_kernel_to_qwen3_vl
|
|
50
49
|
from liger_kernel.transformers import apply_liger_kernel_to_qwen3_vl_moe
|
|
51
|
-
from liger_kernel.transformers import apply_liger_kernel_to_qwen3_next
|
|
52
50
|
from liger_kernel.transformers import apply_liger_kernel_to_smollm3
|
|
53
51
|
from test.utils import DEFAULT_DATASET_PATH
|
|
54
52
|
from test.utils import MiniModelConfig
|
|
@@ -76,13 +74,11 @@ from test.utils import revert_liger_kernel_to_phi3
|
|
|
76
74
|
from test.utils import revert_liger_kernel_to_qwen2
|
|
77
75
|
from test.utils import revert_liger_kernel_to_qwen2_5_vl
|
|
78
76
|
from test.utils import revert_liger_kernel_to_qwen2_vl
|
|
79
|
-
from test.utils import revert_liger_kernel_to_qwen3_vl
|
|
80
|
-
from test.utils import revert_liger_kernel_to_qwen3_vl_moe
|
|
81
77
|
from test.utils import revert_liger_kernel_to_qwen3
|
|
82
78
|
from test.utils import revert_liger_kernel_to_qwen3_moe
|
|
79
|
+
from test.utils import revert_liger_kernel_to_qwen3_next
|
|
83
80
|
from test.utils import revert_liger_kernel_to_qwen3_vl
|
|
84
81
|
from test.utils import revert_liger_kernel_to_qwen3_vl_moe
|
|
85
|
-
from test.utils import revert_liger_kernel_to_qwen3_next
|
|
86
82
|
from test.utils import revert_liger_kernel_to_smollm3
|
|
87
83
|
from test.utils import set_seed
|
|
88
84
|
from test.utils import simple_collate_fn
|
|
@@ -498,9 +498,7 @@ def test_apply_liger_kernel_to_instance_for_qwen3_vl_for_conditional_generation(
|
|
|
498
498
|
LigerRMSNorm.forward
|
|
499
499
|
)
|
|
500
500
|
for decoder_layer in dummy_model_instance.language_model.layers:
|
|
501
|
-
assert inspect.getsource(decoder_layer.input_layernorm.forward) != inspect.getsource(
|
|
502
|
-
LigerRMSNorm.forward
|
|
503
|
-
)
|
|
501
|
+
assert inspect.getsource(decoder_layer.input_layernorm.forward) != inspect.getsource(LigerRMSNorm.forward)
|
|
504
502
|
assert inspect.getsource(decoder_layer.post_attention_layernorm.forward) != inspect.getsource(
|
|
505
503
|
LigerRMSNorm.forward
|
|
506
504
|
)
|
|
@@ -520,9 +518,7 @@ def test_apply_liger_kernel_to_instance_for_qwen3_vl_for_conditional_generation(
|
|
|
520
518
|
LigerRMSNorm.forward
|
|
521
519
|
)
|
|
522
520
|
for decoder_layer in dummy_model_instance.language_model.layers:
|
|
523
|
-
assert inspect.getsource(decoder_layer.input_layernorm.forward) == inspect.getsource(
|
|
524
|
-
LigerRMSNorm.forward
|
|
525
|
-
)
|
|
521
|
+
assert inspect.getsource(decoder_layer.input_layernorm.forward) == inspect.getsource(LigerRMSNorm.forward)
|
|
526
522
|
assert inspect.getsource(decoder_layer.post_attention_layernorm.forward) == inspect.getsource(
|
|
527
523
|
LigerRMSNorm.forward
|
|
528
524
|
)
|
|
@@ -603,9 +599,7 @@ def test_apply_liger_kernel_to_instance_for_qwen3_vl():
|
|
|
603
599
|
LigerRMSNorm.forward
|
|
604
600
|
)
|
|
605
601
|
for decoder_layer in dummy_model_instance.language_model.layers:
|
|
606
|
-
assert inspect.getsource(decoder_layer.input_layernorm.forward) != inspect.getsource(
|
|
607
|
-
LigerRMSNorm.forward
|
|
608
|
-
)
|
|
602
|
+
assert inspect.getsource(decoder_layer.input_layernorm.forward) != inspect.getsource(LigerRMSNorm.forward)
|
|
609
603
|
assert inspect.getsource(decoder_layer.post_attention_layernorm.forward) != inspect.getsource(
|
|
610
604
|
LigerRMSNorm.forward
|
|
611
605
|
)
|
|
@@ -625,9 +619,7 @@ def test_apply_liger_kernel_to_instance_for_qwen3_vl():
|
|
|
625
619
|
LigerRMSNorm.forward
|
|
626
620
|
)
|
|
627
621
|
for decoder_layer in dummy_model_instance.language_model.layers:
|
|
628
|
-
assert inspect.getsource(decoder_layer.input_layernorm.forward) == inspect.getsource(
|
|
629
|
-
LigerRMSNorm.forward
|
|
630
|
-
)
|
|
622
|
+
assert inspect.getsource(decoder_layer.input_layernorm.forward) == inspect.getsource(LigerRMSNorm.forward)
|
|
631
623
|
assert inspect.getsource(decoder_layer.post_attention_layernorm.forward) == inspect.getsource(
|
|
632
624
|
LigerRMSNorm.forward
|
|
633
625
|
)
|
|
@@ -681,9 +673,7 @@ def test_apply_liger_kernel_to_instance_for_qwen3_vl_text():
|
|
|
681
673
|
# Note: Text models don't have forward method patching, so skip this check
|
|
682
674
|
assert inspect.getsource(dummy_model_instance.norm.forward) != inspect.getsource(LigerRMSNorm.forward)
|
|
683
675
|
for decoder_layer in dummy_model_instance.layers:
|
|
684
|
-
assert inspect.getsource(decoder_layer.input_layernorm.forward) != inspect.getsource(
|
|
685
|
-
LigerRMSNorm.forward
|
|
686
|
-
)
|
|
676
|
+
assert inspect.getsource(decoder_layer.input_layernorm.forward) != inspect.getsource(LigerRMSNorm.forward)
|
|
687
677
|
assert inspect.getsource(decoder_layer.post_attention_layernorm.forward) != inspect.getsource(
|
|
688
678
|
LigerRMSNorm.forward
|
|
689
679
|
)
|
|
@@ -701,9 +691,7 @@ def test_apply_liger_kernel_to_instance_for_qwen3_vl_text():
|
|
|
701
691
|
# Note: Text models don't have forward method patching, so skip this check
|
|
702
692
|
assert inspect.getsource(dummy_model_instance.norm.forward) == inspect.getsource(LigerRMSNorm.forward)
|
|
703
693
|
for decoder_layer in dummy_model_instance.layers:
|
|
704
|
-
assert inspect.getsource(decoder_layer.input_layernorm.forward) == inspect.getsource(
|
|
705
|
-
LigerRMSNorm.forward
|
|
706
|
-
)
|
|
694
|
+
assert inspect.getsource(decoder_layer.input_layernorm.forward) == inspect.getsource(LigerRMSNorm.forward)
|
|
707
695
|
assert inspect.getsource(decoder_layer.post_attention_layernorm.forward) == inspect.getsource(
|
|
708
696
|
LigerRMSNorm.forward
|
|
709
697
|
)
|
|
@@ -789,9 +777,7 @@ def test_apply_liger_kernel_to_instance_for_qwen3_vl_moe_for_conditional_generat
|
|
|
789
777
|
LigerRMSNorm.forward
|
|
790
778
|
)
|
|
791
779
|
for decoder_layer in dummy_model_instance.language_model.layers:
|
|
792
|
-
assert inspect.getsource(decoder_layer.input_layernorm.forward) != inspect.getsource(
|
|
793
|
-
LigerRMSNorm.forward
|
|
794
|
-
)
|
|
780
|
+
assert inspect.getsource(decoder_layer.input_layernorm.forward) != inspect.getsource(LigerRMSNorm.forward)
|
|
795
781
|
assert inspect.getsource(decoder_layer.post_attention_layernorm.forward) != inspect.getsource(
|
|
796
782
|
LigerRMSNorm.forward
|
|
797
783
|
)
|
|
@@ -811,9 +797,7 @@ def test_apply_liger_kernel_to_instance_for_qwen3_vl_moe_for_conditional_generat
|
|
|
811
797
|
LigerRMSNorm.forward
|
|
812
798
|
)
|
|
813
799
|
for decoder_layer in dummy_model_instance.language_model.layers:
|
|
814
|
-
assert inspect.getsource(decoder_layer.input_layernorm.forward) == inspect.getsource(
|
|
815
|
-
LigerRMSNorm.forward
|
|
816
|
-
)
|
|
800
|
+
assert inspect.getsource(decoder_layer.input_layernorm.forward) == inspect.getsource(LigerRMSNorm.forward)
|
|
817
801
|
assert inspect.getsource(decoder_layer.post_attention_layernorm.forward) == inspect.getsource(
|
|
818
802
|
LigerRMSNorm.forward
|
|
819
803
|
)
|
|
@@ -899,9 +883,7 @@ def test_apply_liger_kernel_to_instance_for_qwen3_vl_moe():
|
|
|
899
883
|
LigerRMSNorm.forward
|
|
900
884
|
)
|
|
901
885
|
for decoder_layer in dummy_model_instance.language_model.layers:
|
|
902
|
-
assert inspect.getsource(decoder_layer.input_layernorm.forward) != inspect.getsource(
|
|
903
|
-
LigerRMSNorm.forward
|
|
904
|
-
)
|
|
886
|
+
assert inspect.getsource(decoder_layer.input_layernorm.forward) != inspect.getsource(LigerRMSNorm.forward)
|
|
905
887
|
assert inspect.getsource(decoder_layer.post_attention_layernorm.forward) != inspect.getsource(
|
|
906
888
|
LigerRMSNorm.forward
|
|
907
889
|
)
|
|
@@ -921,9 +903,7 @@ def test_apply_liger_kernel_to_instance_for_qwen3_vl_moe():
|
|
|
921
903
|
LigerRMSNorm.forward
|
|
922
904
|
)
|
|
923
905
|
for decoder_layer in dummy_model_instance.language_model.layers:
|
|
924
|
-
assert inspect.getsource(decoder_layer.input_layernorm.forward) == inspect.getsource(
|
|
925
|
-
LigerRMSNorm.forward
|
|
926
|
-
)
|
|
906
|
+
assert inspect.getsource(decoder_layer.input_layernorm.forward) == inspect.getsource(LigerRMSNorm.forward)
|
|
927
907
|
assert inspect.getsource(decoder_layer.post_attention_layernorm.forward) == inspect.getsource(
|
|
928
908
|
LigerRMSNorm.forward
|
|
929
909
|
)
|
|
@@ -982,9 +962,7 @@ def test_apply_liger_kernel_to_instance_for_qwen3_vl_moe_text():
|
|
|
982
962
|
# Note: Text models don't have forward method patching, so skip this check
|
|
983
963
|
assert inspect.getsource(dummy_model_instance.norm.forward) != inspect.getsource(LigerRMSNorm.forward)
|
|
984
964
|
for decoder_layer in dummy_model_instance.layers:
|
|
985
|
-
assert inspect.getsource(decoder_layer.input_layernorm.forward) != inspect.getsource(
|
|
986
|
-
LigerRMSNorm.forward
|
|
987
|
-
)
|
|
965
|
+
assert inspect.getsource(decoder_layer.input_layernorm.forward) != inspect.getsource(LigerRMSNorm.forward)
|
|
988
966
|
assert inspect.getsource(decoder_layer.post_attention_layernorm.forward) != inspect.getsource(
|
|
989
967
|
LigerRMSNorm.forward
|
|
990
968
|
)
|
|
@@ -1002,9 +980,7 @@ def test_apply_liger_kernel_to_instance_for_qwen3_vl_moe_text():
|
|
|
1002
980
|
# Note: Text models don't have forward method patching, so skip this check
|
|
1003
981
|
assert inspect.getsource(dummy_model_instance.norm.forward) == inspect.getsource(LigerRMSNorm.forward)
|
|
1004
982
|
for decoder_layer in dummy_model_instance.layers:
|
|
1005
|
-
assert inspect.getsource(decoder_layer.input_layernorm.forward) == inspect.getsource(
|
|
1006
|
-
LigerRMSNorm.forward
|
|
1007
|
-
)
|
|
983
|
+
assert inspect.getsource(decoder_layer.input_layernorm.forward) == inspect.getsource(LigerRMSNorm.forward)
|
|
1008
984
|
assert inspect.getsource(decoder_layer.post_attention_layernorm.forward) == inspect.getsource(
|
|
1009
985
|
LigerRMSNorm.forward
|
|
1010
986
|
)
|
|
@@ -1025,10 +1001,8 @@ def test_apply_liger_kernel_to_instance_for_qwen3_vl_moe_text():
|
|
|
1025
1001
|
def test_qwen3_vl_rope_hooks_applied():
|
|
1026
1002
|
# Ensure any monkey patching is cleaned up for subsequent tests
|
|
1027
1003
|
with patch("transformers.models.qwen3_vl.modeling_qwen3_vl") as modeling_mod:
|
|
1028
|
-
from liger_kernel.transformers.monkey_patch import
|
|
1029
|
-
|
|
1030
|
-
_liger_qwen3_vl_apply_rotary_pos_emb_vision,
|
|
1031
|
-
)
|
|
1004
|
+
from liger_kernel.transformers.monkey_patch import _liger_qwen3_vl_apply_rotary_pos_emb_vision
|
|
1005
|
+
from liger_kernel.transformers.monkey_patch import liger_rotary_pos_emb
|
|
1032
1006
|
|
|
1033
1007
|
# Before applying, make sure attributes exist but are not the liger implementations
|
|
1034
1008
|
setattr(modeling_mod, "apply_rotary_pos_emb", object())
|
|
@@ -1044,10 +1018,8 @@ def test_qwen3_vl_rope_hooks_applied():
|
|
|
1044
1018
|
def test_qwen3_vl_moe_rope_hooks_applied():
|
|
1045
1019
|
# Ensure any monkey patching is cleaned up for subsequent tests
|
|
1046
1020
|
with patch("transformers.models.qwen3_vl_moe.modeling_qwen3_vl_moe") as modeling_mod:
|
|
1047
|
-
from liger_kernel.transformers.monkey_patch import
|
|
1048
|
-
|
|
1049
|
-
_liger_qwen3_vl_apply_rotary_pos_emb_vision,
|
|
1050
|
-
)
|
|
1021
|
+
from liger_kernel.transformers.monkey_patch import _liger_qwen3_vl_apply_rotary_pos_emb_vision
|
|
1022
|
+
from liger_kernel.transformers.monkey_patch import liger_rotary_pos_emb
|
|
1051
1023
|
|
|
1052
1024
|
# Before applying, make sure attributes exist but are not the liger implementations
|
|
1053
1025
|
setattr(modeling_mod, "apply_rotary_pos_emb", object())
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{liger_kernel_nightly-0.6.3.dev20251101160510 → liger_kernel_nightly-0.6.3.dev20251105012545}/NOTICE
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|