liger-kernel-nightly 0.6.0.dev20250709030408__tar.gz → 0.6.0.dev20250709091230__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/PKG-INFO +2 -2
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/dev/modal/tests.py +1 -1
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/dev/modal/tests_bwd.py +3 -3
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/pyproject.toml +1 -1
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/setup.py +1 -1
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/model/gemma3.py +1 -1
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/monkey_patch.py +4 -1
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel_nightly.egg-info/PKG-INFO +2 -2
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel_nightly.egg-info/requires.txt +1 -1
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/convergence/bf16/test_mini_models.py +2 -2
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/convergence/fp32/test_mini_models.py +3 -3
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/transformers/test_monkey_patch.py +35 -33
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/.github/ISSUE_TEMPLATE/bug_report.yaml +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/.github/ISSUE_TEMPLATE/feature_request.yaml +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/.github/pull_request_template.md +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/.github/workflows/amd-ci.yml +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/.github/workflows/benchmark.yml +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/.github/workflows/docs.yml +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/.github/workflows/intel-ci.yml +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/.github/workflows/nvi-ci.yml +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/.github/workflows/publish-nightly.yml +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/.github/workflows/publish-release.yml +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/.gitignore +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/LICENSE +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/Makefile +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/NOTICE +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/README.md +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/README.md +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/__init__.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/benchmarks_visualizer.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/data/all_benchmark_data.csv +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/scripts/__init__.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/scripts/benchmark_cpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/scripts/benchmark_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/scripts/benchmark_distill_cosine_loss.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/scripts/benchmark_distill_jsd_loss.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/scripts/benchmark_dpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/scripts/benchmark_dyt.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/scripts/benchmark_embedding.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/scripts/benchmark_fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/scripts/benchmark_fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/scripts/benchmark_fused_neighborhood_attention.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/scripts/benchmark_geglu.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/scripts/benchmark_group_norm.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/scripts/benchmark_jsd.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/scripts/benchmark_kl_div.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/scripts/benchmark_kto_loss.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/scripts/benchmark_layer_norm.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/scripts/benchmark_multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/scripts/benchmark_orpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/scripts/benchmark_qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/scripts/benchmark_rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/scripts/benchmark_rope.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/scripts/benchmark_simpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/scripts/benchmark_softmax.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/scripts/benchmark_sparse_multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/scripts/benchmark_sparsemax.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/scripts/benchmark_swiglu.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/scripts/benchmark_tvd.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/benchmark/scripts/utils.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/dev/fmt-requirements.txt +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/dev/modal/benchmarks.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/docs/Examples.md +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/docs/Getting-Started.md +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/docs/High-Level-APIs.md +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/docs/Low-Level-APIs.md +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/docs/acknowledgement.md +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/docs/contributing.md +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/docs/images/banner.GIF +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/docs/images/compose.gif +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/docs/images/e2e-memory.png +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/docs/images/e2e-tps.png +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/docs/images/logo-banner.png +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/docs/images/patch.gif +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/docs/images/post-training.png +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/docs/index.md +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/docs/license.md +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/alignment/accelerate_config.yaml +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/alignment/run_orpo.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/huggingface/README.md +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/huggingface/callback.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/huggingface/config/fsdp_config.json +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/huggingface/img/gemma_7b_mem.png +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/huggingface/img/gemma_7b_tp.png +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/huggingface/img/llama_mem_alloc.png +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/huggingface/img/llama_tps.png +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/huggingface/img/qwen_mem_alloc.png +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/huggingface/img/qwen_tps.png +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/huggingface/launch_on_modal.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/huggingface/requirements.txt +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/huggingface/run_benchmarks.sh +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/huggingface/run_gemma.sh +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/huggingface/run_llama.sh +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/huggingface/run_qwen.sh +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/huggingface/run_qwen2_vl.sh +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/huggingface/training.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/huggingface/training_multimodal.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/lightning/README.md +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/lightning/requirements.txt +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/lightning/training.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/medusa/README.md +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/medusa/callback.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/medusa/docs/images/Memory_Stage1_num_head_3.png +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/medusa/docs/images/Memory_Stage1_num_head_5.png +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/medusa/docs/images/Memory_Stage2_num_head_3.png +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/medusa/docs/images/Memory_Stage2_num_head_5.png +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/medusa/docs/images/Throughput_Stage1_num_head_3.png +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/medusa/docs/images/Throughput_Stage1_num_head_5.png +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/medusa/docs/images/Throughput_Stage2_num_head_3.png +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/medusa/docs/images/Throughput_Stage2_num_head_5.png +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/medusa/fsdp/acc-fsdp.conf +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/medusa/medusa_util.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/medusa/requirements.txt +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/medusa/scripts/llama3_8b_medusa.sh +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/examples/medusa/train.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/licenses/LICENSE-Apache-2.0 +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/licenses/LICENSE-MIT-AutoAWQ +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/licenses/LICENSE-MIT-Efficient-Cross-Entropy +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/licenses/LICENSE-MIT-llmc +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/licenses/LICENSE-MIT-triton +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/mkdocs.yml +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/setup.cfg +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/__init__.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/chunked_loss/README.md +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/chunked_loss/__init__.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/chunked_loss/cosine_similarity_loss.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/chunked_loss/cpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/chunked_loss/dpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/chunked_loss/functional.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/chunked_loss/fused_linear_distillation.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/chunked_loss/fused_linear_ppo.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/chunked_loss/fused_linear_preference.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/chunked_loss/fused_linear_unpaired_preference.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/chunked_loss/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/chunked_loss/jsd_loss.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/chunked_loss/kto_loss.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/chunked_loss/orpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/chunked_loss/simpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/env_report.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/ops/__init__.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/ops/cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/ops/dyt.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/ops/experimental/embedding.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/ops/experimental/mm_int8int2.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/ops/fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/ops/fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/ops/fused_neighborhood_attention.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/ops/geglu.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/ops/group_norm.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/ops/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/ops/jsd.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/ops/kl_div.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/ops/layer_norm.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/ops/multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/ops/qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/ops/rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/ops/rope.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/ops/softmax.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/ops/sparsemax.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/ops/swiglu.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/ops/tvd.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/ops/utils.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/__init__.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/auto_model.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/dyt.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/experimental/embedding.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/fsdp.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/functional.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/fused_neighborhood_attention.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/geglu.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/group_norm.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/jsd.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/kl_div.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/layer_norm.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/model/__init__.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/model/gemma.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/model/gemma2.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/model/glm4.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/model/llama.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/model/llama4.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/model/llava.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/model/loss_utils.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/model/mistral.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/model/mixtral.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/model/mllama.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/model/olmo2.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/model/paligemma.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/model/phi3.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/model/qwen2.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/model/qwen2_5_vl.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/model/qwen2_vl.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/model/qwen3.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/model/qwen3_moe.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/rope.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/softmax.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/sparsemax.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/swiglu.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/trainer/__init__.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/trainer/orpo_trainer.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/trainer_integration.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/transformers/tvd.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/triton/__init__.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/triton/monkey_patch.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel/utils.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel_nightly.egg-info/SOURCES.txt +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel_nightly.egg-info/dependency_links.txt +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/src/liger_kernel_nightly.egg-info/top_level.txt +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/__init__.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/chunked_loss/__init__.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/chunked_loss/test_cosine_loss.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/chunked_loss/test_cpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/chunked_loss/test_dpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/chunked_loss/test_grpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/chunked_loss/test_jsd_loss.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/chunked_loss/test_kto_loss.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/chunked_loss/test_orpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/chunked_loss/test_simpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/conftest.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/convergence/__init__.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/convergence/bf16/__init__.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/convergence/bf16/test_mini_models_multimodal.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/convergence/bf16/test_mini_models_with_logits.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/convergence/fp32/__init__.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/convergence/fp32/test_mini_models_multimodal.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/convergence/fp32/test_mini_models_with_logits.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/resources/fake_configs/Google/Gemma3/gemma-3-4b-it/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/resources/fake_configs/Google/Paligemma/paligemma-3b-pt-224/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/preprocessor_config.json +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/processor_config.json +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/resources/fake_configs/Qwen/Qwen2-VL-7B-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/resources/fake_configs/Qwen/Qwen2.5-VL-7B-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/resources/fake_configs/meta-llama/Llama-3.2-11B-Vision-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/resources/fake_configs/meta-llama/Llama-4-Scout-17B-16E-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/resources/scripts/generate_tokenized_dataset.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/resources/tiny_shakespeare.txt +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/resources/tiny_shakespeare_tokenized/data-00000-of-00001.arrow +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/resources/tiny_shakespeare_tokenized/dataset_info.json +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/resources/tiny_shakespeare_tokenized/state.json +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/transformers/test_auto_model.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/transformers/test_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/transformers/test_dyt.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/transformers/test_embedding.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/transformers/test_flex_attention.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/transformers/test_fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/transformers/test_fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/transformers/test_fused_neighborhood_attention.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/transformers/test_geglu.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/transformers/test_group_norm.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/transformers/test_grpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/transformers/test_jsd.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/transformers/test_kl_div.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/transformers/test_layer_norm.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/transformers/test_mm_int8int2.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/transformers/test_multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/transformers/test_qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/transformers/test_rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/transformers/test_rope.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/transformers/test_softmax.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/transformers/test_sparsemax.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/transformers/test_swiglu.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/transformers/test_trainer_integration.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/transformers/test_transformers.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/transformers/test_tvd.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/triton/test_triton_monkey_patch.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/test/utils.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: liger_kernel_nightly
|
3
|
-
Version: 0.6.0.
|
3
|
+
Version: 0.6.0.dev20250709091230
|
4
4
|
Summary: Efficient Triton kernels for LLM Training
|
5
5
|
License: BSD 2-CLAUSE LICENSE
|
6
6
|
Copyright 2024 LinkedIn Corporation
|
@@ -33,7 +33,7 @@ License-File: NOTICE
|
|
33
33
|
Requires-Dist: torch>=2.1.2
|
34
34
|
Requires-Dist: triton>=2.3.1
|
35
35
|
Provides-Extra: dev
|
36
|
-
Requires-Dist: transformers>=4.
|
36
|
+
Requires-Dist: transformers>=4.49.0; extra == "dev"
|
37
37
|
Requires-Dist: matplotlib>=3.7.2; extra == "dev"
|
38
38
|
Requires-Dist: flake8>=4.0.1.1; extra == "dev"
|
39
39
|
Requires-Dist: black>=24.4.2; extra == "dev"
|
@@ -14,7 +14,7 @@ app = modal.App("liger_tests", image=image)
|
|
14
14
|
repo = image.add_local_dir(ROOT_PATH, remote_path=REMOTE_ROOT_PATH)
|
15
15
|
|
16
16
|
|
17
|
-
@app.function(gpu="A10G", image=repo, timeout=60 *
|
17
|
+
@app.function(gpu="A10G", image=repo, timeout=60 * 60)
|
18
18
|
def liger_tests():
|
19
19
|
import subprocess
|
20
20
|
|
@@ -14,7 +14,7 @@ app = modal.App("liger_tests_bwd", image=image)
|
|
14
14
|
repo = image.add_local_dir(ROOT_PATH, remote_path=REMOTE_ROOT_PATH)
|
15
15
|
|
16
16
|
|
17
|
-
@app.function(gpu="A10G", image=repo, timeout=60 *
|
17
|
+
@app.function(gpu="A10G", image=repo, timeout=60 * 60)
|
18
18
|
def liger_bwd_tests():
|
19
19
|
import subprocess
|
20
20
|
|
@@ -24,9 +24,9 @@ def liger_bwd_tests():
|
|
24
24
|
shell=True,
|
25
25
|
cwd=REMOTE_ROOT_PATH,
|
26
26
|
)
|
27
|
-
# force install transformers==4.
|
27
|
+
# force install transformers==4.49.0
|
28
28
|
subprocess.run(
|
29
|
-
["uv pip install transformers==4.
|
29
|
+
["uv pip install transformers==4.49.0 --system"],
|
30
30
|
check=True,
|
31
31
|
shell=True,
|
32
32
|
cwd=REMOTE_ROOT_PATH,
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
4
4
|
|
5
5
|
[project]
|
6
6
|
name = "liger_kernel_nightly"
|
7
|
-
version = "0.6.0.
|
7
|
+
version = "0.6.0.dev20250709091230"
|
8
8
|
description = "Efficient Triton kernels for LLM Training"
|
9
9
|
urls = { "Homepage" = "https://github.com/linkedin/Liger-Kernel" }
|
10
10
|
readme = { file = "README.md", content-type = "text/markdown" }
|
@@ -255,7 +255,7 @@ def multimodal_forward(
|
|
255
255
|
shift_labels = shift_labels.view(-1).to(hidden_device)
|
256
256
|
|
257
257
|
lce = LigerFusedLinearCrossEntropyLoss()
|
258
|
-
loss = lce(self.
|
258
|
+
loss = lce(self.lm_head.weight, shift_hidden_states, shift_labels)
|
259
259
|
else:
|
260
260
|
logits = self.lm_head(kept_hidden_states)
|
261
261
|
if labels is not None:
|
@@ -537,7 +537,10 @@ def apply_liger_kernel_to_mllama(
|
|
537
537
|
if isinstance(model, MllamaForConditionalGeneration):
|
538
538
|
language_model: MllamaForCausalLM = model.language_model
|
539
539
|
vision_model: MllamaVisionModel = model.vision_model
|
540
|
-
|
540
|
+
if isinstance(language_model, MllamaForCausalLM):
|
541
|
+
text_model: MllamaTextModel = language_model.model
|
542
|
+
else:
|
543
|
+
text_model = language_model
|
541
544
|
elif isinstance(model, MllamaForCausalLM):
|
542
545
|
text_model = model.model
|
543
546
|
vision_model = None
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: liger_kernel_nightly
|
3
|
-
Version: 0.6.0.
|
3
|
+
Version: 0.6.0.dev20250709091230
|
4
4
|
Summary: Efficient Triton kernels for LLM Training
|
5
5
|
License: BSD 2-CLAUSE LICENSE
|
6
6
|
Copyright 2024 LinkedIn Corporation
|
@@ -33,7 +33,7 @@ License-File: NOTICE
|
|
33
33
|
Requires-Dist: torch>=2.1.2
|
34
34
|
Requires-Dist: triton>=2.3.1
|
35
35
|
Provides-Extra: dev
|
36
|
-
Requires-Dist: transformers>=4.
|
36
|
+
Requires-Dist: transformers>=4.49.0; extra == "dev"
|
37
37
|
Requires-Dist: matplotlib>=3.7.2; extra == "dev"
|
38
38
|
Requires-Dist: flake8>=4.0.1.1; extra == "dev"
|
39
39
|
Requires-Dist: black>=24.4.2; extra == "dev"
|
@@ -957,8 +957,8 @@ def run_mini_model(
|
|
957
957
|
reason="LLaVa not available in this version of transformers",
|
958
958
|
),
|
959
959
|
pytest.mark.skipif(
|
960
|
-
version.parse(transformers.__version__) < version.parse("4.
|
961
|
-
reason="
|
960
|
+
version.parse(transformers.__version__) < version.parse("4.52.0"),
|
961
|
+
reason="LLaVa doesn't materialize logits in transformers<=4.52.0 so we can't test it",
|
962
962
|
),
|
963
963
|
],
|
964
964
|
),
|
@@ -938,8 +938,8 @@ def run_mini_model(
|
|
938
938
|
reason="LLaVa not available in this version of transformers",
|
939
939
|
),
|
940
940
|
pytest.mark.skipif(
|
941
|
-
version.parse(transformers.__version__) < version.parse("4.
|
942
|
-
reason="
|
941
|
+
version.parse(transformers.__version__) < version.parse("4.52.0"),
|
942
|
+
reason="LLaVa doesn't materialize logits in transformers<=4.52.0 so we can't test it",
|
943
943
|
),
|
944
944
|
],
|
945
945
|
),
|
@@ -1103,7 +1103,7 @@ def run_mini_model(
|
|
1103
1103
|
torch.float32,
|
1104
1104
|
1e-8,
|
1105
1105
|
1e-4,
|
1106
|
-
|
1106
|
+
4e-2, # 4e-3
|
1107
1107
|
1e-5, # 1e-5
|
1108
1108
|
5e-3,
|
1109
1109
|
1e-5,
|
@@ -74,24 +74,6 @@ def is_llama4_available():
|
|
74
74
|
return False
|
75
75
|
|
76
76
|
|
77
|
-
def is_qwen2_vl_available():
|
78
|
-
try:
|
79
|
-
import transformers.models.qwen2_vl # noqa: F401
|
80
|
-
|
81
|
-
return True
|
82
|
-
except ImportError:
|
83
|
-
return False
|
84
|
-
|
85
|
-
|
86
|
-
def is_qwen2_5_vl_available():
|
87
|
-
try:
|
88
|
-
import transformers.models.qwen2_5_vl # noqa: F401
|
89
|
-
|
90
|
-
return True
|
91
|
-
except ImportError:
|
92
|
-
return False
|
93
|
-
|
94
|
-
|
95
77
|
def is_qwen3_available():
|
96
78
|
try:
|
97
79
|
import transformers.models.qwen3 # noqa: F401
|
@@ -365,6 +347,7 @@ def test_apply_liger_kernel_to_instance_for_mllama_for_conditional_generation():
|
|
365
347
|
# Ensure any monkey patching is cleaned up for subsequent tests
|
366
348
|
with patch("transformers.models.mllama.modeling_mllama"):
|
367
349
|
from transformers.models.mllama.modeling_mllama import MllamaForConditionalGeneration
|
350
|
+
from transformers.models.mllama.modeling_mllama import MllamaTextModel
|
368
351
|
|
369
352
|
# Instantiate a dummy model
|
370
353
|
config = transformers.models.mllama.configuration_mllama.MllamaConfig(
|
@@ -398,10 +381,14 @@ def test_apply_liger_kernel_to_instance_for_mllama_for_conditional_generation():
|
|
398
381
|
|
399
382
|
# Check that model instance variables are not yet patched with Liger modules
|
400
383
|
assert inspect.getsource(dummy_model_instance.forward) != inspect.getsource(mllama_lce_forward)
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
384
|
+
|
385
|
+
if isinstance(dummy_model_instance.language_model, MllamaTextModel):
|
386
|
+
language_model = dummy_model_instance.language_model
|
387
|
+
else:
|
388
|
+
language_model = dummy_model_instance.language_model.model
|
389
|
+
|
390
|
+
assert inspect.getsource(language_model.norm.forward) != inspect.getsource(LigerRMSNorm.forward)
|
391
|
+
for layer in language_model.layers:
|
405
392
|
assert inspect.getsource(layer.mlp.forward) != inspect.getsource(LigerSwiGLUMLP.forward)
|
406
393
|
assert inspect.getsource(layer.input_layernorm.forward) != inspect.getsource(LigerRMSNorm.forward)
|
407
394
|
assert inspect.getsource(layer.post_attention_layernorm.forward) != inspect.getsource(LigerRMSNorm.forward)
|
@@ -428,10 +415,8 @@ def test_apply_liger_kernel_to_instance_for_mllama_for_conditional_generation():
|
|
428
415
|
|
429
416
|
# Check that the model's instance variables were correctly patched with Liger modules
|
430
417
|
assert inspect.getsource(dummy_model_instance.forward) == inspect.getsource(mllama_lce_forward)
|
431
|
-
assert inspect.getsource(
|
432
|
-
|
433
|
-
)
|
434
|
-
for layer in dummy_model_instance.language_model.layers:
|
418
|
+
assert inspect.getsource(language_model.norm.forward) == inspect.getsource(LigerRMSNorm.forward)
|
419
|
+
for layer in language_model.layers:
|
435
420
|
assert inspect.getsource(layer.mlp.forward) == inspect.getsource(LigerSwiGLUMLP.forward)
|
436
421
|
assert inspect.getsource(layer.input_layernorm.forward) == inspect.getsource(LigerRMSNorm.forward)
|
437
422
|
assert inspect.getsource(layer.post_attention_layernorm.forward) == inspect.getsource(LigerRMSNorm.forward)
|
@@ -452,7 +437,6 @@ def test_apply_liger_kernel_to_instance_for_mllama_for_conditional_generation():
|
|
452
437
|
assert inspect.getsource(layer.post_attention_layernorm.forward) == inspect.getsource(
|
453
438
|
LigerLayerNorm.forward
|
454
439
|
)
|
455
|
-
|
456
440
|
try:
|
457
441
|
print(dummy_model_instance)
|
458
442
|
except Exception as e:
|
@@ -1130,7 +1114,10 @@ def test_apply_liger_kernel_to_instance_for_qwen3_moe():
|
|
1130
1114
|
pytest.fail(f"An exception occured in extra_expr: {type(e).__name__} - {e}")
|
1131
1115
|
|
1132
1116
|
|
1133
|
-
@pytest.mark.skipif(
|
1117
|
+
@pytest.mark.skipif(
|
1118
|
+
transformer_version < version.parse("4.52.4"),
|
1119
|
+
reason="Qwen2-VL support is only compatible with transformers >= 4.52.4",
|
1120
|
+
)
|
1134
1121
|
def test_apply_liger_kernel_to_instance_for_qwen2_vl_for_conditional_generation():
|
1135
1122
|
# Ensure any monkey patching is cleaned up for subsequent tests
|
1136
1123
|
with patch("transformers.models.qwen2_vl.modeling_qwen2_vl"):
|
@@ -1196,7 +1183,10 @@ def test_apply_liger_kernel_to_instance_for_qwen2_vl_for_conditional_generation(
|
|
1196
1183
|
pytest.fail(f"An exception occured in extra_expr: {type(e).__name__} - {e}")
|
1197
1184
|
|
1198
1185
|
|
1199
|
-
@pytest.mark.skipif(
|
1186
|
+
@pytest.mark.skipif(
|
1187
|
+
transformer_version < version.parse("4.52.4"),
|
1188
|
+
reason="Qwen2-VL support is only compatible with transformers >= 4.52.4",
|
1189
|
+
)
|
1200
1190
|
def test_apply_liger_kernel_to_instance_for_qwen2_vl():
|
1201
1191
|
# Ensure any monkey patching is cleaned up for subsequent tests
|
1202
1192
|
with patch("transformers.models.qwen2_vl.modeling_qwen2_vl"):
|
@@ -1262,7 +1252,10 @@ def test_apply_liger_kernel_to_instance_for_qwen2_vl():
|
|
1262
1252
|
pytest.fail(f"An exception occured in extra_expr: {type(e).__name__} - {e}")
|
1263
1253
|
|
1264
1254
|
|
1265
|
-
@pytest.mark.skipif(
|
1255
|
+
@pytest.mark.skipif(
|
1256
|
+
transformer_version < version.parse("4.52.4"),
|
1257
|
+
reason="Qwen2-VL support is only compatible with transformers >= 4.52.4",
|
1258
|
+
)
|
1266
1259
|
def test_apply_liger_kernel_to_instance_for_qwen2_vl_text():
|
1267
1260
|
# Ensure any monkey patching is cleaned up for subsequent tests
|
1268
1261
|
with patch("transformers.models.qwen2_vl.modeling_qwen2_vl"):
|
@@ -1310,7 +1303,10 @@ def test_apply_liger_kernel_to_instance_for_qwen2_vl_text():
|
|
1310
1303
|
pytest.fail(f"An exception occured in extra_expr: {type(e).__name__} - {e}")
|
1311
1304
|
|
1312
1305
|
|
1313
|
-
@pytest.mark.skipif(
|
1306
|
+
@pytest.mark.skipif(
|
1307
|
+
transformer_version < version.parse("4.52.4"),
|
1308
|
+
reason="Qwen2.5-VL support is only compatible with transformers >= 4.52.4",
|
1309
|
+
)
|
1314
1310
|
def test_apply_liger_kernel_to_instance_for_qwen2_5_vl():
|
1315
1311
|
# Ensure any monkey patching is cleaned up for subsequent tests
|
1316
1312
|
with patch("transformers.models.qwen2_5_vl.modeling_qwen2_5_vl"):
|
@@ -1376,7 +1372,10 @@ def test_apply_liger_kernel_to_instance_for_qwen2_5_vl():
|
|
1376
1372
|
pytest.fail(f"An exception occured in extra_expr: {type(e).__name__} - {e}")
|
1377
1373
|
|
1378
1374
|
|
1379
|
-
@pytest.mark.skipif(
|
1375
|
+
@pytest.mark.skipif(
|
1376
|
+
transformer_version < version.parse("4.52.4"),
|
1377
|
+
reason="Qwen2.5-VL support is only compatible with transformers >= 4.52.4",
|
1378
|
+
)
|
1380
1379
|
def test_apply_liger_kernel_to_instance_for_qwen2_5_vl_for_conditional_generation():
|
1381
1380
|
# Ensure any monkey patching is cleaned up for subsequent tests
|
1382
1381
|
with patch("transformers.models.qwen2_5_vl.modeling_qwen2_5_vl"):
|
@@ -1442,7 +1441,10 @@ def test_apply_liger_kernel_to_instance_for_qwen2_5_vl_for_conditional_generatio
|
|
1442
1441
|
pytest.fail(f"An exception occured in extra_expr: {type(e).__name__} - {e}")
|
1443
1442
|
|
1444
1443
|
|
1445
|
-
@pytest.mark.skipif(
|
1444
|
+
@pytest.mark.skipif(
|
1445
|
+
transformer_version < version.parse("4.52.4"),
|
1446
|
+
reason="Qwen2.5-VL support is only compatible with transformers >= 4.52.4",
|
1447
|
+
)
|
1446
1448
|
def test_apply_liger_kernel_to_instance_for_qwen2_5_vl_text():
|
1447
1449
|
# Ensure any monkey patching is cleaned up for subsequent tests
|
1448
1450
|
with patch("transformers.models.qwen2_5_vl.modeling_qwen2_5_vl"):
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{liger_kernel_nightly-0.6.0.dev20250709030408 → liger_kernel_nightly-0.6.0.dev20250709091230}/NOTICE
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|