liger-kernel-nightly 0.6.2.dev20250913213534__tar.gz → 0.6.2.dev20250919191028__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of liger-kernel-nightly might be problematic. Click here for more details.
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/PKG-INFO +2 -5
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/docs/Examples.md +1 -1
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/examples/alignment/run_orpo.py +1 -1
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/examples/huggingface/training.py +2 -2
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/examples/huggingface/training_multimodal.py +1 -1
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/examples/medusa/train.py +1 -1
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/pyproject.toml +1 -1
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/setup.py +1 -4
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/transformers/fused_linear_cross_entropy.py +1 -1
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/transformers/model/glm4v.py +1 -1
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/transformers/model/glm4v_moe.py +1 -1
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel_nightly.egg-info/PKG-INFO +2 -5
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel_nightly.egg-info/requires.txt +1 -4
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/test/transformers/test_monkey_patch.py +27 -27
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/.github/ISSUE_TEMPLATE/bug_report.yaml +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/.github/ISSUE_TEMPLATE/feature_request.yaml +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/.github/pull_request_template.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/.github/workflows/amd-ci.yml +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/.github/workflows/benchmark.yml +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/.github/workflows/docs.yml +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/.github/workflows/intel-ci.yml +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/.github/workflows/nvi-ci.yml +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/.github/workflows/publish-nightly.yml +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/.github/workflows/publish-release.yml +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/.gitignore +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/LICENSE +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/Makefile +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/NOTICE +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/README.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/benchmark/README.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/benchmark/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/benchmark/benchmarks_visualizer.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/benchmark/data/all_benchmark_data.csv +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/benchmark/scripts/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/benchmark/scripts/benchmark_cpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/benchmark/scripts/benchmark_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/benchmark/scripts/benchmark_distill_cosine_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/benchmark/scripts/benchmark_distill_jsd_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/benchmark/scripts/benchmark_dpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/benchmark/scripts/benchmark_dyt.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/benchmark/scripts/benchmark_embedding.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/benchmark/scripts/benchmark_fused_add_rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/benchmark/scripts/benchmark_fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/benchmark/scripts/benchmark_fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/benchmark/scripts/benchmark_fused_neighborhood_attention.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/benchmark/scripts/benchmark_geglu.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/benchmark/scripts/benchmark_group_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/benchmark/scripts/benchmark_grpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/benchmark/scripts/benchmark_jsd.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/benchmark/scripts/benchmark_kl_div.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/benchmark/scripts/benchmark_kto_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/benchmark/scripts/benchmark_layer_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/benchmark/scripts/benchmark_llama4_rope.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/benchmark/scripts/benchmark_multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/benchmark/scripts/benchmark_orpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/benchmark/scripts/benchmark_qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/benchmark/scripts/benchmark_rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/benchmark/scripts/benchmark_rope.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/benchmark/scripts/benchmark_simpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/benchmark/scripts/benchmark_softmax.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/benchmark/scripts/benchmark_sparse_multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/benchmark/scripts/benchmark_sparsemax.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/benchmark/scripts/benchmark_swiglu.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/benchmark/scripts/benchmark_tvd.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/benchmark/scripts/utils.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/dev/fmt-requirements.txt +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/dev/modal/benchmarks.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/dev/modal/tests.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/dev/modal/tests_bwd.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/docs/Getting-Started.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/docs/High-Level-APIs.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/docs/Low-Level-APIs.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/docs/acknowledgement.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/docs/contributing.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/docs/images/banner.GIF +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/docs/images/compose.gif +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/docs/images/e2e-memory.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/docs/images/e2e-tps.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/docs/images/logo-banner.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/docs/images/patch.gif +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/docs/images/post-training.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/docs/index.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/docs/license.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/examples/alignment/accelerate_config.yaml +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/examples/huggingface/README.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/examples/huggingface/callback.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/examples/huggingface/config/fsdp_config.json +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/examples/huggingface/img/gemma_7b_mem.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/examples/huggingface/img/gemma_7b_tp.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/examples/huggingface/img/llama_mem_alloc.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/examples/huggingface/img/llama_tps.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/examples/huggingface/img/qwen_mem_alloc.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/examples/huggingface/img/qwen_tps.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/examples/huggingface/launch_on_modal.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/examples/huggingface/requirements.txt +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/examples/huggingface/run_benchmarks.sh +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/examples/huggingface/run_gemma.sh +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/examples/huggingface/run_llama.sh +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/examples/huggingface/run_qwen.sh +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/examples/huggingface/run_qwen2_vl.sh +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/examples/lightning/README.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/examples/lightning/requirements.txt +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/examples/lightning/training.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/examples/medusa/README.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/examples/medusa/callback.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/examples/medusa/docs/images/Memory_Stage1_num_head_3.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/examples/medusa/docs/images/Memory_Stage1_num_head_5.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/examples/medusa/docs/images/Memory_Stage2_num_head_3.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/examples/medusa/docs/images/Memory_Stage2_num_head_5.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/examples/medusa/docs/images/Throughput_Stage1_num_head_3.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/examples/medusa/docs/images/Throughput_Stage1_num_head_5.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/examples/medusa/docs/images/Throughput_Stage2_num_head_3.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/examples/medusa/docs/images/Throughput_Stage2_num_head_5.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/examples/medusa/fsdp/acc-fsdp.conf +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/examples/medusa/medusa_util.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/examples/medusa/requirements.txt +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/examples/medusa/scripts/llama3_8b_medusa.sh +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/licenses/LICENSE-Apache-2.0 +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/licenses/LICENSE-MIT-AutoAWQ +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/licenses/LICENSE-MIT-Efficient-Cross-Entropy +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/licenses/LICENSE-MIT-llmc +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/licenses/LICENSE-MIT-triton +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/mkdocs.yml +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/setup.cfg +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/chunked_loss/README.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/chunked_loss/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/chunked_loss/cosine_similarity_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/chunked_loss/cpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/chunked_loss/dpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/chunked_loss/functional.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/chunked_loss/fused_linear_distillation.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/chunked_loss/fused_linear_ppo.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/chunked_loss/fused_linear_preference.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/chunked_loss/fused_linear_unpaired_preference.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/chunked_loss/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/chunked_loss/jsd_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/chunked_loss/kto_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/chunked_loss/orpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/chunked_loss/simpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/env_report.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/ops/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/ops/cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/ops/dyt.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/ops/experimental/embedding.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/ops/experimental/mm_int8int2.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/ops/fused_add_rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/ops/fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/ops/fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/ops/fused_neighborhood_attention.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/ops/geglu.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/ops/group_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/ops/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/ops/jsd.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/ops/kl_div.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/ops/layer_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/ops/llama4_rope.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/ops/multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/ops/qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/ops/rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/ops/rope.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/ops/softmax.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/ops/sparsemax.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/ops/swiglu.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/ops/tvd.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/ops/utils.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/transformers/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/transformers/auto_model.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/transformers/cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/transformers/dyt.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/transformers/experimental/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/transformers/experimental/embedding.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/transformers/fsdp.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/transformers/functional.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/transformers/fused_add_rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/transformers/fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/transformers/fused_neighborhood_attention.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/transformers/geglu.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/transformers/group_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/transformers/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/transformers/jsd.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/transformers/kl_div.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/transformers/layer_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/transformers/llama4_rope.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/transformers/model/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/transformers/model/gemma.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/transformers/model/gemma2.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/transformers/model/gemma3.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/transformers/model/glm4.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/transformers/model/llama.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/transformers/model/llama4.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/transformers/model/llava.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/transformers/model/loss_utils.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/transformers/model/mistral.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/transformers/model/mixtral.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/transformers/model/mllama.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/transformers/model/olmo2.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/transformers/model/paligemma.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/transformers/model/phi3.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/transformers/model/qwen2.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/transformers/model/qwen2_5_vl.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/transformers/model/qwen2_vl.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/transformers/model/qwen3.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/transformers/model/qwen3_moe.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/transformers/model/smollm3.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/transformers/monkey_patch.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/transformers/multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/transformers/qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/transformers/rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/transformers/rope.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/transformers/softmax.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/transformers/sparsemax.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/transformers/swiglu.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/transformers/trainer/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/transformers/trainer/orpo_trainer.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/transformers/trainer_integration.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/transformers/tvd.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/triton/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/triton/monkey_patch.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel/utils.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel_nightly.egg-info/SOURCES.txt +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel_nightly.egg-info/dependency_links.txt +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/src/liger_kernel_nightly.egg-info/top_level.txt +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/test/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/test/chunked_loss/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/test/chunked_loss/test_cosine_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/test/chunked_loss/test_cpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/test/chunked_loss/test_dpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/test/chunked_loss/test_grpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/test/chunked_loss/test_jsd_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/test/chunked_loss/test_kto_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/test/chunked_loss/test_orpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/test/chunked_loss/test_simpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/test/conftest.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/test/convergence/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/test/convergence/bf16/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/test/convergence/bf16/test_mini_models.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/test/convergence/bf16/test_mini_models_multimodal.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/test/convergence/bf16/test_mini_models_with_logits.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/test/convergence/fp32/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/test/convergence/fp32/test_mini_models.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/test/convergence/fp32/test_mini_models_multimodal.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/test/convergence/fp32/test_mini_models_with_logits.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/test/resources/fake_configs/Google/Gemma3/gemma-3-4b-it/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/test/resources/fake_configs/Google/Paligemma/paligemma-3b-pt-224/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/preprocessor_config.json +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/processor_config.json +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/test/resources/fake_configs/Qwen/Qwen2-VL-7B-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/test/resources/fake_configs/Qwen/Qwen2.5-VL-7B-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/test/resources/fake_configs/meta-llama/Llama-3.2-11B-Vision-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/test/resources/fake_configs/meta-llama/Llama-4-Scout-17B-16E-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/test/resources/scripts/generate_tokenized_dataset.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/test/resources/tiny_shakespeare.txt +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/test/resources/tiny_shakespeare_tokenized/data-00000-of-00001.arrow +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/test/resources/tiny_shakespeare_tokenized/dataset_info.json +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/test/resources/tiny_shakespeare_tokenized/state.json +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/test/transformers/test_auto_model.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/test/transformers/test_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/test/transformers/test_dyt.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/test/transformers/test_embedding.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/test/transformers/test_flex_attention.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/test/transformers/test_fused_add_rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/test/transformers/test_fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/test/transformers/test_fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/test/transformers/test_fused_neighborhood_attention.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/test/transformers/test_geglu.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/test/transformers/test_group_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/test/transformers/test_grpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/test/transformers/test_jsd.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/test/transformers/test_kl_div.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/test/transformers/test_layer_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/test/transformers/test_mm_int8int2.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/test/transformers/test_multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/test/transformers/test_qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/test/transformers/test_rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/test/transformers/test_rope.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/test/transformers/test_softmax.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/test/transformers/test_sparsemax.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/test/transformers/test_swiglu.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/test/transformers/test_trainer_integration.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/test/transformers/test_transformers.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/test/transformers/test_tvd.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/test/triton/test_triton_monkey_patch.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/test/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: liger_kernel_nightly
|
|
3
|
-
Version: 0.6.2.
|
|
3
|
+
Version: 0.6.2.dev20250919191028
|
|
4
4
|
Summary: Efficient Triton kernels for LLM Training
|
|
5
5
|
License: BSD 2-CLAUSE LICENSE
|
|
6
6
|
Copyright 2024 LinkedIn Corporation
|
|
@@ -35,9 +35,7 @@ Requires-Dist: triton>=2.3.1
|
|
|
35
35
|
Provides-Extra: dev
|
|
36
36
|
Requires-Dist: transformers>=4.49.0; extra == "dev"
|
|
37
37
|
Requires-Dist: matplotlib>=3.7.2; extra == "dev"
|
|
38
|
-
Requires-Dist:
|
|
39
|
-
Requires-Dist: black>=24.4.2; extra == "dev"
|
|
40
|
-
Requires-Dist: isort>=5.13.2; extra == "dev"
|
|
38
|
+
Requires-Dist: ruff>=0.12.0; extra == "dev"
|
|
41
39
|
Requires-Dist: pytest>=7.1.2; extra == "dev"
|
|
42
40
|
Requires-Dist: pytest-xdist; extra == "dev"
|
|
43
41
|
Requires-Dist: pytest-cov; extra == "dev"
|
|
@@ -45,7 +43,6 @@ Requires-Dist: pytest-asyncio; extra == "dev"
|
|
|
45
43
|
Requires-Dist: pytest-rerunfailures; extra == "dev"
|
|
46
44
|
Requires-Dist: datasets>=2.19.2; extra == "dev"
|
|
47
45
|
Requires-Dist: seaborn; extra == "dev"
|
|
48
|
-
Requires-Dist: mkdocs; extra == "dev"
|
|
49
46
|
Requires-Dist: mkdocs-material; extra == "dev"
|
|
50
47
|
Requires-Dist: torchvision>=0.20; extra == "dev"
|
|
51
48
|
|
|
@@ -239,7 +239,7 @@ from liger_kernel.transformers.trainer import LigerORPOTrainer # noqa: F401
|
|
|
239
239
|
|
|
240
240
|
model = AutoModelForCausalLM.from_pretrained(
|
|
241
241
|
"meta-llama/Llama-3.2-1B-Instruct",
|
|
242
|
-
|
|
242
|
+
dtype=torch.bfloat16,
|
|
243
243
|
)
|
|
244
244
|
|
|
245
245
|
tokenizer = AutoTokenizer.from_pretrained(
|
|
@@ -9,7 +9,7 @@ from liger_kernel.transformers.trainer import LigerORPOTrainer # noqa: F401
|
|
|
9
9
|
|
|
10
10
|
model = AutoModelForCausalLM.from_pretrained(
|
|
11
11
|
"meta-llama/Llama-3.2-1B-Instruct",
|
|
12
|
-
|
|
12
|
+
dtype=torch.bfloat16,
|
|
13
13
|
)
|
|
14
14
|
|
|
15
15
|
tokenizer = AutoTokenizer.from_pretrained(
|
|
@@ -48,7 +48,7 @@ def train():
|
|
|
48
48
|
custom_args.model_name,
|
|
49
49
|
trust_remote_code=True,
|
|
50
50
|
use_cache=False,
|
|
51
|
-
|
|
51
|
+
dtype=torch.bfloat16,
|
|
52
52
|
# These args will get passed to the appropriate apply_liger_kernel_to_* function
|
|
53
53
|
# to override the default settings
|
|
54
54
|
# cross_entropy=True,
|
|
@@ -59,7 +59,7 @@ def train():
|
|
|
59
59
|
custom_args.model_name,
|
|
60
60
|
trust_remote_code=True,
|
|
61
61
|
use_cache=False,
|
|
62
|
-
|
|
62
|
+
dtype=torch.bfloat16,
|
|
63
63
|
)
|
|
64
64
|
|
|
65
65
|
trainer = SFTTrainer(
|
|
@@ -56,7 +56,7 @@ def construct_model_and_processor(model_name: str, use_liger: bool) -> torch.nn.
|
|
|
56
56
|
model = Qwen2VLForConditionalGeneration.from_pretrained(
|
|
57
57
|
pretrained_model_name_or_path=model_name,
|
|
58
58
|
use_cache=False,
|
|
59
|
-
|
|
59
|
+
dtype=torch.bfloat16,
|
|
60
60
|
low_cpu_mem_usage=True,
|
|
61
61
|
attn_implementation="sdpa",
|
|
62
62
|
)
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "liger_kernel_nightly"
|
|
7
|
-
version = "0.6.2.
|
|
7
|
+
version = "0.6.2.dev20250919191028"
|
|
8
8
|
description = "Efficient Triton kernels for LLM Training"
|
|
9
9
|
urls = { "Homepage" = "https://github.com/linkedin/Liger-Kernel" }
|
|
10
10
|
readme = { file = "README.md", content-type = "text/markdown" }
|
|
@@ -33,9 +33,7 @@ def get_optional_dependencies():
|
|
|
33
33
|
"dev": [
|
|
34
34
|
"transformers>=4.49.0",
|
|
35
35
|
"matplotlib>=3.7.2",
|
|
36
|
-
"
|
|
37
|
-
"black>=24.4.2",
|
|
38
|
-
"isort>=5.13.2",
|
|
36
|
+
"ruff>=0.12.0",
|
|
39
37
|
"pytest>=7.1.2",
|
|
40
38
|
"pytest-xdist",
|
|
41
39
|
"pytest-cov",
|
|
@@ -43,7 +41,6 @@ def get_optional_dependencies():
|
|
|
43
41
|
"pytest-rerunfailures",
|
|
44
42
|
"datasets>=2.19.2",
|
|
45
43
|
"seaborn",
|
|
46
|
-
"mkdocs",
|
|
47
44
|
"mkdocs-material",
|
|
48
45
|
"torchvision>=0.20",
|
|
49
46
|
]
|
|
@@ -25,7 +25,7 @@ class LigerFusedLinearCrossEntropyLoss(torch.nn.Module):
|
|
|
25
25
|
assert reduction in {
|
|
26
26
|
"mean",
|
|
27
27
|
"sum",
|
|
28
|
-
"none",
|
|
28
|
+
"none",
|
|
29
29
|
}, f"reduction must be 'mean' or 'sum' or 'none'. Got: {reduction}"
|
|
30
30
|
assert softcap is None or softcap > 0, f"softcap must greater than 0.0 or None. Got: {softcap}"
|
|
31
31
|
self.ce_weight = ce_weight
|
|
@@ -70,7 +70,7 @@ def lce_forward(
|
|
|
70
70
|
>>> processor = AutoProcessor.from_pretrained(MODEL_PATH, use_fast=True)
|
|
71
71
|
>>> model = Glm4vForConditionalGeneration.from_pretrained(
|
|
72
72
|
pretrained_model_name_or_path=MODEL_PATH,
|
|
73
|
-
|
|
73
|
+
dtype=torch.bfloat16,
|
|
74
74
|
device_map="auto",
|
|
75
75
|
)
|
|
76
76
|
>>> inputs = processor.apply_chat_template(
|
|
@@ -75,7 +75,7 @@ def lce_forward(
|
|
|
75
75
|
>>> processor = AutoProcessor.from_pretrained(MODEL_PATH)
|
|
76
76
|
>>> model = Glm4vMoeForConditionalGeneration.from_pretrained(
|
|
77
77
|
pretrained_model_name_or_path=MODEL_PATH,
|
|
78
|
-
|
|
78
|
+
dtype="auto",
|
|
79
79
|
device_map="auto",
|
|
80
80
|
)
|
|
81
81
|
>>> inputs = processor.apply_chat_template(
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: liger_kernel_nightly
|
|
3
|
-
Version: 0.6.2.
|
|
3
|
+
Version: 0.6.2.dev20250919191028
|
|
4
4
|
Summary: Efficient Triton kernels for LLM Training
|
|
5
5
|
License: BSD 2-CLAUSE LICENSE
|
|
6
6
|
Copyright 2024 LinkedIn Corporation
|
|
@@ -35,9 +35,7 @@ Requires-Dist: triton>=2.3.1
|
|
|
35
35
|
Provides-Extra: dev
|
|
36
36
|
Requires-Dist: transformers>=4.49.0; extra == "dev"
|
|
37
37
|
Requires-Dist: matplotlib>=3.7.2; extra == "dev"
|
|
38
|
-
Requires-Dist:
|
|
39
|
-
Requires-Dist: black>=24.4.2; extra == "dev"
|
|
40
|
-
Requires-Dist: isort>=5.13.2; extra == "dev"
|
|
38
|
+
Requires-Dist: ruff>=0.12.0; extra == "dev"
|
|
41
39
|
Requires-Dist: pytest>=7.1.2; extra == "dev"
|
|
42
40
|
Requires-Dist: pytest-xdist; extra == "dev"
|
|
43
41
|
Requires-Dist: pytest-cov; extra == "dev"
|
|
@@ -45,7 +43,6 @@ Requires-Dist: pytest-asyncio; extra == "dev"
|
|
|
45
43
|
Requires-Dist: pytest-rerunfailures; extra == "dev"
|
|
46
44
|
Requires-Dist: datasets>=2.19.2; extra == "dev"
|
|
47
45
|
Requires-Dist: seaborn; extra == "dev"
|
|
48
|
-
Requires-Dist: mkdocs; extra == "dev"
|
|
49
46
|
Requires-Dist: mkdocs-material; extra == "dev"
|
|
50
47
|
Requires-Dist: torchvision>=0.20; extra == "dev"
|
|
51
48
|
|
|
@@ -4,9 +4,7 @@ triton>=2.3.1
|
|
|
4
4
|
[dev]
|
|
5
5
|
transformers>=4.49.0
|
|
6
6
|
matplotlib>=3.7.2
|
|
7
|
-
|
|
8
|
-
black>=24.4.2
|
|
9
|
-
isort>=5.13.2
|
|
7
|
+
ruff>=0.12.0
|
|
10
8
|
pytest>=7.1.2
|
|
11
9
|
pytest-xdist
|
|
12
10
|
pytest-cov
|
|
@@ -14,6 +12,5 @@ pytest-asyncio
|
|
|
14
12
|
pytest-rerunfailures
|
|
15
13
|
datasets>=2.19.2
|
|
16
14
|
seaborn
|
|
17
|
-
mkdocs
|
|
18
15
|
mkdocs-material
|
|
19
16
|
torchvision>=0.20
|
|
@@ -338,7 +338,7 @@ def test_apply_liger_kernel_to_instance_for_llama():
|
|
|
338
338
|
with patch("transformers.models.llama.modeling_llama"):
|
|
339
339
|
# Instantiate a dummy model
|
|
340
340
|
config = transformers.models.llama.configuration_llama.LlamaConfig(
|
|
341
|
-
|
|
341
|
+
dtype=torch.bfloat16,
|
|
342
342
|
rms_norm_eps=1e-5,
|
|
343
343
|
hidden_size=32,
|
|
344
344
|
intermediate_size=64,
|
|
@@ -382,7 +382,7 @@ def test_apply_liger_kernel_to_instance_for_mllama_for_conditional_generation():
|
|
|
382
382
|
|
|
383
383
|
# Instantiate a dummy model
|
|
384
384
|
config = transformers.models.mllama.configuration_mllama.MllamaConfig(
|
|
385
|
-
|
|
385
|
+
dtype=torch.bfloat16,
|
|
386
386
|
text_config=transformers.models.mllama.configuration_mllama.MllamaTextConfig(
|
|
387
387
|
rms_norm_eps=1e-5,
|
|
388
388
|
hidden_size=32,
|
|
@@ -533,7 +533,7 @@ def test_apply_liger_kernel_to_instance_for_llama4_for_causal_lm():
|
|
|
533
533
|
|
|
534
534
|
# Instantiate a dummy model
|
|
535
535
|
config = transformers.models.llama4.configuration_llama4.Llama4TextConfig(
|
|
536
|
-
|
|
536
|
+
dtype=torch.bfloat16,
|
|
537
537
|
rms_norm_eps=1e-5,
|
|
538
538
|
hidden_size=32,
|
|
539
539
|
intermediate_size=64,
|
|
@@ -573,9 +573,9 @@ def test_apply_liger_kernel_to_instance_for_llama4_for_conditional_generation():
|
|
|
573
573
|
|
|
574
574
|
# Instantiate a dummy model
|
|
575
575
|
config = transformers.models.llama4.configuration_llama4.Llama4Config(
|
|
576
|
-
|
|
576
|
+
dtype=torch.bfloat16,
|
|
577
577
|
text_config=transformers.models.llama4.configuration_llama4.Llama4TextConfig(
|
|
578
|
-
|
|
578
|
+
dtype=torch.bfloat16,
|
|
579
579
|
rms_norm_eps=1e-5,
|
|
580
580
|
hidden_size=32,
|
|
581
581
|
intermediate_size=64,
|
|
@@ -656,7 +656,7 @@ def test_apply_liger_kernel_to_instance_for_mistral():
|
|
|
656
656
|
with patch("transformers.models.mistral.modeling_mistral"):
|
|
657
657
|
# Instantiate a dummy model
|
|
658
658
|
config = transformers.models.mistral.configuration_mistral.MistralConfig(
|
|
659
|
-
|
|
659
|
+
dtype=torch.bfloat16,
|
|
660
660
|
rms_norm_eps=1e-5,
|
|
661
661
|
hidden_size=32,
|
|
662
662
|
intermediate_size=64,
|
|
@@ -695,7 +695,7 @@ def test_apply_liger_kernel_to_instance_for_mixtral():
|
|
|
695
695
|
with patch("transformers.models.mixtral.modeling_mixtral"):
|
|
696
696
|
# Instantiate a dummy model
|
|
697
697
|
config = transformers.models.mixtral.configuration_mixtral.MixtralConfig(
|
|
698
|
-
|
|
698
|
+
dtype=torch.bfloat16,
|
|
699
699
|
rms_norm_eps=1e-5,
|
|
700
700
|
hidden_size=32,
|
|
701
701
|
intermediate_size=64,
|
|
@@ -738,7 +738,7 @@ def test_apply_liger_kernel_to_instance_for_gemma():
|
|
|
738
738
|
with patch("transformers.models.gemma.modeling_gemma"):
|
|
739
739
|
# Instantiate a dummy model
|
|
740
740
|
config = transformers.models.gemma.configuration_gemma.GemmaConfig(
|
|
741
|
-
|
|
741
|
+
dtype=torch.bfloat16,
|
|
742
742
|
rms_norm_eps=1e-5,
|
|
743
743
|
hidden_size=32,
|
|
744
744
|
intermediate_size=64,
|
|
@@ -777,7 +777,7 @@ def test_apply_liger_kernel_to_instance_for_gemma2():
|
|
|
777
777
|
with patch("transformers.models.gemma2.modeling_gemma2"):
|
|
778
778
|
# Instantiate a dummy model
|
|
779
779
|
config = transformers.models.gemma2.configuration_gemma2.Gemma2Config(
|
|
780
|
-
|
|
780
|
+
dtype=torch.bfloat16,
|
|
781
781
|
rms_norm_eps=1e-5,
|
|
782
782
|
hidden_size=32,
|
|
783
783
|
intermediate_size=64,
|
|
@@ -827,7 +827,7 @@ def test_apply_liger_kernel_to_instance_for_paligemma():
|
|
|
827
827
|
|
|
828
828
|
# Instantiate a dummy model
|
|
829
829
|
config = transformers.models.paligemma.configuration_paligemma.PaliGemmaConfig(
|
|
830
|
-
|
|
830
|
+
dtype=torch.bfloat16,
|
|
831
831
|
text_config={
|
|
832
832
|
"num_hidden_layers": 2,
|
|
833
833
|
"rms_norm_eps": 1e-5,
|
|
@@ -883,7 +883,7 @@ def test_apply_liger_kernel_to_instance_for_gemma3_text():
|
|
|
883
883
|
|
|
884
884
|
# Instantiate a dummy model
|
|
885
885
|
config = transformers.models.gemma3.configuration_gemma3.Gemma3TextConfig(
|
|
886
|
-
|
|
886
|
+
dtype=torch.bfloat16,
|
|
887
887
|
rms_norm_eps=1e-5,
|
|
888
888
|
hidden_size=32,
|
|
889
889
|
intermediate_size=64,
|
|
@@ -939,7 +939,7 @@ def test_apply_liger_kernel_to_instance_for_gemma3_conditional_generation():
|
|
|
939
939
|
|
|
940
940
|
# Instantiate a dummy model
|
|
941
941
|
text_config = transformers.models.gemma3.configuration_gemma3.Gemma3TextConfig(
|
|
942
|
-
|
|
942
|
+
dtype=torch.bfloat16,
|
|
943
943
|
rms_norm_eps=1e-5,
|
|
944
944
|
hidden_size=32,
|
|
945
945
|
intermediate_size=64,
|
|
@@ -1026,7 +1026,7 @@ def test_apply_liger_kernel_to_instance_for_qwen2():
|
|
|
1026
1026
|
with patch("transformers.models.qwen2.modeling_qwen2"):
|
|
1027
1027
|
# Instantiate a dummy model
|
|
1028
1028
|
config = transformers.models.qwen2.configuration_qwen2.Qwen2Config(
|
|
1029
|
-
|
|
1029
|
+
dtype=torch.bfloat16,
|
|
1030
1030
|
rms_norm_eps=1e-5,
|
|
1031
1031
|
hidden_size=32,
|
|
1032
1032
|
intermediate_size=64,
|
|
@@ -1068,7 +1068,7 @@ def test_apply_liger_kernel_to_instance_for_qwen3():
|
|
|
1068
1068
|
|
|
1069
1069
|
# Instantiate a dummy model
|
|
1070
1070
|
config = transformers.models.qwen3.configuration_qwen3.Qwen3Config(
|
|
1071
|
-
|
|
1071
|
+
dtype=torch.bfloat16,
|
|
1072
1072
|
rms_norm_eps=1e-5,
|
|
1073
1073
|
hidden_size=32,
|
|
1074
1074
|
intermediate_size=64,
|
|
@@ -1110,7 +1110,7 @@ def test_apply_liger_kernel_to_instance_for_qwen3_moe():
|
|
|
1110
1110
|
|
|
1111
1111
|
# Instantiate a dummy model
|
|
1112
1112
|
config = transformers.models.qwen3_moe.configuration_qwen3_moe.Qwen3MoeConfig(
|
|
1113
|
-
|
|
1113
|
+
dtype=torch.bfloat16,
|
|
1114
1114
|
rms_norm_eps=1e-5,
|
|
1115
1115
|
hidden_size=32,
|
|
1116
1116
|
intermediate_size=64,
|
|
@@ -1158,7 +1158,7 @@ def test_apply_liger_kernel_to_instance_for_qwen2_vl_for_conditional_generation(
|
|
|
1158
1158
|
|
|
1159
1159
|
# Instantiate a dummy model
|
|
1160
1160
|
config = transformers.models.qwen2_vl.configuration_qwen2_vl.Qwen2VLConfig(
|
|
1161
|
-
|
|
1161
|
+
dtype=torch.bfloat16,
|
|
1162
1162
|
rms_norm_eps=1e-5,
|
|
1163
1163
|
hidden_size=32,
|
|
1164
1164
|
intermediate_size=48,
|
|
@@ -1227,7 +1227,7 @@ def test_apply_liger_kernel_to_instance_for_qwen2_vl():
|
|
|
1227
1227
|
|
|
1228
1228
|
# Instantiate a dummy model
|
|
1229
1229
|
config = transformers.models.qwen2_vl.configuration_qwen2_vl.Qwen2VLConfig(
|
|
1230
|
-
|
|
1230
|
+
dtype=torch.bfloat16,
|
|
1231
1231
|
rms_norm_eps=1e-5,
|
|
1232
1232
|
hidden_size=32,
|
|
1233
1233
|
intermediate_size=48,
|
|
@@ -1294,7 +1294,7 @@ def test_apply_liger_kernel_to_instance_for_qwen2_vl_text():
|
|
|
1294
1294
|
|
|
1295
1295
|
# Instantiate a dummy model
|
|
1296
1296
|
config = transformers.models.qwen2_vl.configuration_qwen2_vl.Qwen2VLTextConfig(
|
|
1297
|
-
|
|
1297
|
+
dtype=torch.bfloat16,
|
|
1298
1298
|
rms_norm_eps=1e-5,
|
|
1299
1299
|
hidden_size=32,
|
|
1300
1300
|
intermediate_size=48,
|
|
@@ -1347,7 +1347,7 @@ def test_apply_liger_kernel_to_instance_for_qwen2_5_vl():
|
|
|
1347
1347
|
|
|
1348
1348
|
# Instantiate a dummy model
|
|
1349
1349
|
config = transformers.models.qwen2_5_vl.configuration_qwen2_5_vl.Qwen2_5_VLConfig(
|
|
1350
|
-
|
|
1350
|
+
dtype=torch.bfloat16,
|
|
1351
1351
|
rms_norm_eps=1e-5,
|
|
1352
1352
|
hidden_size=32,
|
|
1353
1353
|
intermediate_size=48,
|
|
@@ -1416,7 +1416,7 @@ def test_apply_liger_kernel_to_instance_for_qwen2_5_vl_for_conditional_generatio
|
|
|
1416
1416
|
|
|
1417
1417
|
# Instantiate a dummy model
|
|
1418
1418
|
config = transformers.models.qwen2_5_vl.configuration_qwen2_5_vl.Qwen2_5_VLConfig(
|
|
1419
|
-
|
|
1419
|
+
dtype=torch.bfloat16,
|
|
1420
1420
|
rms_norm_eps=1e-5,
|
|
1421
1421
|
hidden_size=32,
|
|
1422
1422
|
intermediate_size=48,
|
|
@@ -1483,7 +1483,7 @@ def test_apply_liger_kernel_to_instance_for_qwen2_5_vl_text():
|
|
|
1483
1483
|
|
|
1484
1484
|
# Instantiate a dummy model
|
|
1485
1485
|
config = transformers.models.qwen2_5_vl.configuration_qwen2_5_vl.Qwen2_5_VLTextConfig(
|
|
1486
|
-
|
|
1486
|
+
dtype=torch.bfloat16,
|
|
1487
1487
|
rms_norm_eps=1e-5,
|
|
1488
1488
|
hidden_size=32,
|
|
1489
1489
|
intermediate_size=48,
|
|
@@ -1528,7 +1528,7 @@ def test_apply_liger_kernel_to_instance_for_phi3():
|
|
|
1528
1528
|
with patch("transformers.models.phi3.modeling_phi3"):
|
|
1529
1529
|
# Instantiate a dummy model
|
|
1530
1530
|
config = transformers.models.phi3.configuration_phi3.Phi3Config(
|
|
1531
|
-
|
|
1531
|
+
dtype=torch.bfloat16,
|
|
1532
1532
|
rms_norm_eps=1e-5,
|
|
1533
1533
|
hidden_size=32,
|
|
1534
1534
|
intermediate_size=64,
|
|
@@ -1570,7 +1570,7 @@ def test_apply_liger_kernel_to_instance_for_olmo2():
|
|
|
1570
1570
|
|
|
1571
1571
|
# Instantiate a dummy model
|
|
1572
1572
|
config = transformers.models.olmo2.configuration_olmo2.Olmo2Config(
|
|
1573
|
-
|
|
1573
|
+
dtype=torch.bfloat16,
|
|
1574
1574
|
rms_norm_eps=1e-5,
|
|
1575
1575
|
hidden_size=32,
|
|
1576
1576
|
intermediate_size=64,
|
|
@@ -1616,7 +1616,7 @@ def test_apply_liger_kernel_to_instance_for_glm4():
|
|
|
1616
1616
|
|
|
1617
1617
|
# Instantiate a dummy model
|
|
1618
1618
|
config = transformers.models.glm4.configuration_glm4.Glm4Config(
|
|
1619
|
-
|
|
1619
|
+
dtype=torch.bfloat16,
|
|
1620
1620
|
rms_norm_eps=1e-5,
|
|
1621
1621
|
hidden_size=32,
|
|
1622
1622
|
intermediate_size=64,
|
|
@@ -1664,7 +1664,7 @@ def test_apply_liger_kernel_to_instance_for_glm4v():
|
|
|
1664
1664
|
|
|
1665
1665
|
# Instantiate a dummy model
|
|
1666
1666
|
config = transformers.models.glm4v.configuration_glm4v.Glm4vConfig(
|
|
1667
|
-
|
|
1667
|
+
dtype=torch.bfloat16,
|
|
1668
1668
|
text_config={
|
|
1669
1669
|
"num_hidden_layers": 2,
|
|
1670
1670
|
"rms_norm_eps": 1e-5,
|
|
@@ -1734,7 +1734,7 @@ def test_apply_liger_kernel_to_instance_for_glm4v_moe():
|
|
|
1734
1734
|
|
|
1735
1735
|
# Instantiate a dummy model
|
|
1736
1736
|
config = transformers.models.glm4v_moe.configuration_glm4v_moe.Glm4vMoeConfig(
|
|
1737
|
-
|
|
1737
|
+
dtype=torch.bfloat16,
|
|
1738
1738
|
hidden_size=32,
|
|
1739
1739
|
num_attention_heads=4,
|
|
1740
1740
|
num_key_value_heads=2,
|
|
@@ -1837,7 +1837,7 @@ def test_apply_liger_kernel_to_instance_for_smollm3():
|
|
|
1837
1837
|
with patch("transformers.models.smollm3.modeling_smollm3"):
|
|
1838
1838
|
# Instantiate a dummy model
|
|
1839
1839
|
config = transformers.models.smollm3.configuration_smollm3.SmolLM3Config(
|
|
1840
|
-
|
|
1840
|
+
dtype=torch.bfloat16,
|
|
1841
1841
|
rms_norm_eps=1e-5,
|
|
1842
1842
|
hidden_size=32,
|
|
1843
1843
|
intermediate_size=64,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250919191028}/NOTICE
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|