liger-kernel-nightly 0.6.2.dev20250913213534__tar.gz → 0.6.2.dev20250916231145__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of liger-kernel-nightly might be problematic. Click here for more details.
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/PKG-INFO +1 -1
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/docs/Examples.md +1 -1
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/alignment/run_orpo.py +1 -1
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/huggingface/training.py +2 -2
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/huggingface/training_multimodal.py +1 -1
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/medusa/train.py +1 -1
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/pyproject.toml +1 -1
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/fused_linear_cross_entropy.py +1 -1
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/model/glm4v.py +1 -1
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/model/glm4v_moe.py +1 -1
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel_nightly.egg-info/PKG-INFO +1 -1
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/transformers/test_monkey_patch.py +27 -27
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/.github/ISSUE_TEMPLATE/bug_report.yaml +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/.github/ISSUE_TEMPLATE/feature_request.yaml +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/.github/pull_request_template.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/.github/workflows/amd-ci.yml +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/.github/workflows/benchmark.yml +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/.github/workflows/docs.yml +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/.github/workflows/intel-ci.yml +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/.github/workflows/nvi-ci.yml +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/.github/workflows/publish-nightly.yml +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/.github/workflows/publish-release.yml +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/.gitignore +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/LICENSE +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/Makefile +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/NOTICE +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/README.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/README.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/benchmarks_visualizer.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/data/all_benchmark_data.csv +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/scripts/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/scripts/benchmark_cpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/scripts/benchmark_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/scripts/benchmark_distill_cosine_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/scripts/benchmark_distill_jsd_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/scripts/benchmark_dpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/scripts/benchmark_dyt.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/scripts/benchmark_embedding.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/scripts/benchmark_fused_add_rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/scripts/benchmark_fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/scripts/benchmark_fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/scripts/benchmark_fused_neighborhood_attention.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/scripts/benchmark_geglu.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/scripts/benchmark_group_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/scripts/benchmark_grpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/scripts/benchmark_jsd.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/scripts/benchmark_kl_div.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/scripts/benchmark_kto_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/scripts/benchmark_layer_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/scripts/benchmark_llama4_rope.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/scripts/benchmark_multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/scripts/benchmark_orpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/scripts/benchmark_qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/scripts/benchmark_rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/scripts/benchmark_rope.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/scripts/benchmark_simpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/scripts/benchmark_softmax.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/scripts/benchmark_sparse_multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/scripts/benchmark_sparsemax.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/scripts/benchmark_swiglu.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/scripts/benchmark_tvd.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/benchmark/scripts/utils.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/dev/fmt-requirements.txt +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/dev/modal/benchmarks.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/dev/modal/tests.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/dev/modal/tests_bwd.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/docs/Getting-Started.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/docs/High-Level-APIs.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/docs/Low-Level-APIs.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/docs/acknowledgement.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/docs/contributing.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/docs/images/banner.GIF +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/docs/images/compose.gif +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/docs/images/e2e-memory.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/docs/images/e2e-tps.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/docs/images/logo-banner.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/docs/images/patch.gif +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/docs/images/post-training.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/docs/index.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/docs/license.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/alignment/accelerate_config.yaml +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/huggingface/README.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/huggingface/callback.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/huggingface/config/fsdp_config.json +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/huggingface/img/gemma_7b_mem.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/huggingface/img/gemma_7b_tp.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/huggingface/img/llama_mem_alloc.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/huggingface/img/llama_tps.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/huggingface/img/qwen_mem_alloc.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/huggingface/img/qwen_tps.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/huggingface/launch_on_modal.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/huggingface/requirements.txt +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/huggingface/run_benchmarks.sh +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/huggingface/run_gemma.sh +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/huggingface/run_llama.sh +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/huggingface/run_qwen.sh +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/huggingface/run_qwen2_vl.sh +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/lightning/README.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/lightning/requirements.txt +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/lightning/training.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/medusa/README.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/medusa/callback.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/medusa/docs/images/Memory_Stage1_num_head_3.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/medusa/docs/images/Memory_Stage1_num_head_5.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/medusa/docs/images/Memory_Stage2_num_head_3.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/medusa/docs/images/Memory_Stage2_num_head_5.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/medusa/docs/images/Throughput_Stage1_num_head_3.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/medusa/docs/images/Throughput_Stage1_num_head_5.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/medusa/docs/images/Throughput_Stage2_num_head_3.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/medusa/docs/images/Throughput_Stage2_num_head_5.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/medusa/fsdp/acc-fsdp.conf +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/medusa/medusa_util.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/medusa/requirements.txt +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/examples/medusa/scripts/llama3_8b_medusa.sh +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/licenses/LICENSE-Apache-2.0 +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/licenses/LICENSE-MIT-AutoAWQ +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/licenses/LICENSE-MIT-Efficient-Cross-Entropy +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/licenses/LICENSE-MIT-llmc +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/licenses/LICENSE-MIT-triton +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/mkdocs.yml +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/setup.cfg +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/setup.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/chunked_loss/README.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/chunked_loss/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/chunked_loss/cosine_similarity_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/chunked_loss/cpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/chunked_loss/dpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/chunked_loss/functional.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/chunked_loss/fused_linear_distillation.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/chunked_loss/fused_linear_ppo.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/chunked_loss/fused_linear_preference.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/chunked_loss/fused_linear_unpaired_preference.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/chunked_loss/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/chunked_loss/jsd_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/chunked_loss/kto_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/chunked_loss/orpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/chunked_loss/simpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/env_report.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/ops/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/ops/cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/ops/dyt.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/ops/experimental/embedding.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/ops/experimental/mm_int8int2.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/ops/fused_add_rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/ops/fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/ops/fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/ops/fused_neighborhood_attention.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/ops/geglu.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/ops/group_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/ops/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/ops/jsd.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/ops/kl_div.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/ops/layer_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/ops/llama4_rope.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/ops/multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/ops/qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/ops/rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/ops/rope.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/ops/softmax.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/ops/sparsemax.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/ops/swiglu.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/ops/tvd.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/ops/utils.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/auto_model.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/dyt.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/experimental/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/experimental/embedding.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/fsdp.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/functional.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/fused_add_rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/fused_neighborhood_attention.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/geglu.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/group_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/jsd.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/kl_div.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/layer_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/llama4_rope.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/model/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/model/gemma.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/model/gemma2.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/model/gemma3.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/model/glm4.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/model/llama.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/model/llama4.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/model/llava.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/model/loss_utils.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/model/mistral.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/model/mixtral.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/model/mllama.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/model/olmo2.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/model/paligemma.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/model/phi3.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/model/qwen2.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/model/qwen2_5_vl.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/model/qwen2_vl.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/model/qwen3.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/model/qwen3_moe.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/model/smollm3.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/monkey_patch.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/rope.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/softmax.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/sparsemax.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/swiglu.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/trainer/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/trainer/orpo_trainer.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/trainer_integration.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/transformers/tvd.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/triton/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/triton/monkey_patch.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel/utils.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel_nightly.egg-info/SOURCES.txt +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel_nightly.egg-info/dependency_links.txt +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel_nightly.egg-info/requires.txt +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/src/liger_kernel_nightly.egg-info/top_level.txt +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/chunked_loss/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/chunked_loss/test_cosine_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/chunked_loss/test_cpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/chunked_loss/test_dpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/chunked_loss/test_grpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/chunked_loss/test_jsd_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/chunked_loss/test_kto_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/chunked_loss/test_orpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/chunked_loss/test_simpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/conftest.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/convergence/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/convergence/bf16/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/convergence/bf16/test_mini_models.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/convergence/bf16/test_mini_models_multimodal.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/convergence/bf16/test_mini_models_with_logits.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/convergence/fp32/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/convergence/fp32/test_mini_models.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/convergence/fp32/test_mini_models_multimodal.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/convergence/fp32/test_mini_models_with_logits.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/resources/fake_configs/Google/Gemma3/gemma-3-4b-it/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/resources/fake_configs/Google/Paligemma/paligemma-3b-pt-224/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/preprocessor_config.json +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/processor_config.json +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/resources/fake_configs/Qwen/Qwen2-VL-7B-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/resources/fake_configs/Qwen/Qwen2.5-VL-7B-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/resources/fake_configs/meta-llama/Llama-3.2-11B-Vision-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/resources/fake_configs/meta-llama/Llama-4-Scout-17B-16E-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/resources/scripts/generate_tokenized_dataset.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/resources/tiny_shakespeare.txt +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/resources/tiny_shakespeare_tokenized/data-00000-of-00001.arrow +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/resources/tiny_shakespeare_tokenized/dataset_info.json +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/resources/tiny_shakespeare_tokenized/state.json +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/transformers/test_auto_model.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/transformers/test_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/transformers/test_dyt.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/transformers/test_embedding.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/transformers/test_flex_attention.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/transformers/test_fused_add_rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/transformers/test_fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/transformers/test_fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/transformers/test_fused_neighborhood_attention.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/transformers/test_geglu.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/transformers/test_group_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/transformers/test_grpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/transformers/test_jsd.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/transformers/test_kl_div.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/transformers/test_layer_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/transformers/test_mm_int8int2.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/transformers/test_multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/transformers/test_qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/transformers/test_rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/transformers/test_rope.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/transformers/test_softmax.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/transformers/test_sparsemax.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/transformers/test_swiglu.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/transformers/test_trainer_integration.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/transformers/test_transformers.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/transformers/test_tvd.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/triton/test_triton_monkey_patch.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/test/utils.py +0 -0
|
@@ -239,7 +239,7 @@ from liger_kernel.transformers.trainer import LigerORPOTrainer # noqa: F401
|
|
|
239
239
|
|
|
240
240
|
model = AutoModelForCausalLM.from_pretrained(
|
|
241
241
|
"meta-llama/Llama-3.2-1B-Instruct",
|
|
242
|
-
|
|
242
|
+
dtype=torch.bfloat16,
|
|
243
243
|
)
|
|
244
244
|
|
|
245
245
|
tokenizer = AutoTokenizer.from_pretrained(
|
|
@@ -9,7 +9,7 @@ from liger_kernel.transformers.trainer import LigerORPOTrainer # noqa: F401
|
|
|
9
9
|
|
|
10
10
|
model = AutoModelForCausalLM.from_pretrained(
|
|
11
11
|
"meta-llama/Llama-3.2-1B-Instruct",
|
|
12
|
-
|
|
12
|
+
dtype=torch.bfloat16,
|
|
13
13
|
)
|
|
14
14
|
|
|
15
15
|
tokenizer = AutoTokenizer.from_pretrained(
|
|
@@ -48,7 +48,7 @@ def train():
|
|
|
48
48
|
custom_args.model_name,
|
|
49
49
|
trust_remote_code=True,
|
|
50
50
|
use_cache=False,
|
|
51
|
-
|
|
51
|
+
dtype=torch.bfloat16,
|
|
52
52
|
# These args will get passed to the appropriate apply_liger_kernel_to_* function
|
|
53
53
|
# to override the default settings
|
|
54
54
|
# cross_entropy=True,
|
|
@@ -59,7 +59,7 @@ def train():
|
|
|
59
59
|
custom_args.model_name,
|
|
60
60
|
trust_remote_code=True,
|
|
61
61
|
use_cache=False,
|
|
62
|
-
|
|
62
|
+
dtype=torch.bfloat16,
|
|
63
63
|
)
|
|
64
64
|
|
|
65
65
|
trainer = SFTTrainer(
|
|
@@ -56,7 +56,7 @@ def construct_model_and_processor(model_name: str, use_liger: bool) -> torch.nn.
|
|
|
56
56
|
model = Qwen2VLForConditionalGeneration.from_pretrained(
|
|
57
57
|
pretrained_model_name_or_path=model_name,
|
|
58
58
|
use_cache=False,
|
|
59
|
-
|
|
59
|
+
dtype=torch.bfloat16,
|
|
60
60
|
low_cpu_mem_usage=True,
|
|
61
61
|
attn_implementation="sdpa",
|
|
62
62
|
)
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "liger_kernel_nightly"
|
|
7
|
-
version = "0.6.2.
|
|
7
|
+
version = "0.6.2.dev20250916231145"
|
|
8
8
|
description = "Efficient Triton kernels for LLM Training"
|
|
9
9
|
urls = { "Homepage" = "https://github.com/linkedin/Liger-Kernel" }
|
|
10
10
|
readme = { file = "README.md", content-type = "text/markdown" }
|
|
@@ -25,7 +25,7 @@ class LigerFusedLinearCrossEntropyLoss(torch.nn.Module):
|
|
|
25
25
|
assert reduction in {
|
|
26
26
|
"mean",
|
|
27
27
|
"sum",
|
|
28
|
-
"none",
|
|
28
|
+
"none",
|
|
29
29
|
}, f"reduction must be 'mean' or 'sum' or 'none'. Got: {reduction}"
|
|
30
30
|
assert softcap is None or softcap > 0, f"softcap must greater than 0.0 or None. Got: {softcap}"
|
|
31
31
|
self.ce_weight = ce_weight
|
|
@@ -70,7 +70,7 @@ def lce_forward(
|
|
|
70
70
|
>>> processor = AutoProcessor.from_pretrained(MODEL_PATH, use_fast=True)
|
|
71
71
|
>>> model = Glm4vForConditionalGeneration.from_pretrained(
|
|
72
72
|
pretrained_model_name_or_path=MODEL_PATH,
|
|
73
|
-
|
|
73
|
+
dtype=torch.bfloat16,
|
|
74
74
|
device_map="auto",
|
|
75
75
|
)
|
|
76
76
|
>>> inputs = processor.apply_chat_template(
|
|
@@ -75,7 +75,7 @@ def lce_forward(
|
|
|
75
75
|
>>> processor = AutoProcessor.from_pretrained(MODEL_PATH)
|
|
76
76
|
>>> model = Glm4vMoeForConditionalGeneration.from_pretrained(
|
|
77
77
|
pretrained_model_name_or_path=MODEL_PATH,
|
|
78
|
-
|
|
78
|
+
dtype="auto",
|
|
79
79
|
device_map="auto",
|
|
80
80
|
)
|
|
81
81
|
>>> inputs = processor.apply_chat_template(
|
|
@@ -338,7 +338,7 @@ def test_apply_liger_kernel_to_instance_for_llama():
|
|
|
338
338
|
with patch("transformers.models.llama.modeling_llama"):
|
|
339
339
|
# Instantiate a dummy model
|
|
340
340
|
config = transformers.models.llama.configuration_llama.LlamaConfig(
|
|
341
|
-
|
|
341
|
+
dtype=torch.bfloat16,
|
|
342
342
|
rms_norm_eps=1e-5,
|
|
343
343
|
hidden_size=32,
|
|
344
344
|
intermediate_size=64,
|
|
@@ -382,7 +382,7 @@ def test_apply_liger_kernel_to_instance_for_mllama_for_conditional_generation():
|
|
|
382
382
|
|
|
383
383
|
# Instantiate a dummy model
|
|
384
384
|
config = transformers.models.mllama.configuration_mllama.MllamaConfig(
|
|
385
|
-
|
|
385
|
+
dtype=torch.bfloat16,
|
|
386
386
|
text_config=transformers.models.mllama.configuration_mllama.MllamaTextConfig(
|
|
387
387
|
rms_norm_eps=1e-5,
|
|
388
388
|
hidden_size=32,
|
|
@@ -533,7 +533,7 @@ def test_apply_liger_kernel_to_instance_for_llama4_for_causal_lm():
|
|
|
533
533
|
|
|
534
534
|
# Instantiate a dummy model
|
|
535
535
|
config = transformers.models.llama4.configuration_llama4.Llama4TextConfig(
|
|
536
|
-
|
|
536
|
+
dtype=torch.bfloat16,
|
|
537
537
|
rms_norm_eps=1e-5,
|
|
538
538
|
hidden_size=32,
|
|
539
539
|
intermediate_size=64,
|
|
@@ -573,9 +573,9 @@ def test_apply_liger_kernel_to_instance_for_llama4_for_conditional_generation():
|
|
|
573
573
|
|
|
574
574
|
# Instantiate a dummy model
|
|
575
575
|
config = transformers.models.llama4.configuration_llama4.Llama4Config(
|
|
576
|
-
|
|
576
|
+
dtype=torch.bfloat16,
|
|
577
577
|
text_config=transformers.models.llama4.configuration_llama4.Llama4TextConfig(
|
|
578
|
-
|
|
578
|
+
dtype=torch.bfloat16,
|
|
579
579
|
rms_norm_eps=1e-5,
|
|
580
580
|
hidden_size=32,
|
|
581
581
|
intermediate_size=64,
|
|
@@ -656,7 +656,7 @@ def test_apply_liger_kernel_to_instance_for_mistral():
|
|
|
656
656
|
with patch("transformers.models.mistral.modeling_mistral"):
|
|
657
657
|
# Instantiate a dummy model
|
|
658
658
|
config = transformers.models.mistral.configuration_mistral.MistralConfig(
|
|
659
|
-
|
|
659
|
+
dtype=torch.bfloat16,
|
|
660
660
|
rms_norm_eps=1e-5,
|
|
661
661
|
hidden_size=32,
|
|
662
662
|
intermediate_size=64,
|
|
@@ -695,7 +695,7 @@ def test_apply_liger_kernel_to_instance_for_mixtral():
|
|
|
695
695
|
with patch("transformers.models.mixtral.modeling_mixtral"):
|
|
696
696
|
# Instantiate a dummy model
|
|
697
697
|
config = transformers.models.mixtral.configuration_mixtral.MixtralConfig(
|
|
698
|
-
|
|
698
|
+
dtype=torch.bfloat16,
|
|
699
699
|
rms_norm_eps=1e-5,
|
|
700
700
|
hidden_size=32,
|
|
701
701
|
intermediate_size=64,
|
|
@@ -738,7 +738,7 @@ def test_apply_liger_kernel_to_instance_for_gemma():
|
|
|
738
738
|
with patch("transformers.models.gemma.modeling_gemma"):
|
|
739
739
|
# Instantiate a dummy model
|
|
740
740
|
config = transformers.models.gemma.configuration_gemma.GemmaConfig(
|
|
741
|
-
|
|
741
|
+
dtype=torch.bfloat16,
|
|
742
742
|
rms_norm_eps=1e-5,
|
|
743
743
|
hidden_size=32,
|
|
744
744
|
intermediate_size=64,
|
|
@@ -777,7 +777,7 @@ def test_apply_liger_kernel_to_instance_for_gemma2():
|
|
|
777
777
|
with patch("transformers.models.gemma2.modeling_gemma2"):
|
|
778
778
|
# Instantiate a dummy model
|
|
779
779
|
config = transformers.models.gemma2.configuration_gemma2.Gemma2Config(
|
|
780
|
-
|
|
780
|
+
dtype=torch.bfloat16,
|
|
781
781
|
rms_norm_eps=1e-5,
|
|
782
782
|
hidden_size=32,
|
|
783
783
|
intermediate_size=64,
|
|
@@ -827,7 +827,7 @@ def test_apply_liger_kernel_to_instance_for_paligemma():
|
|
|
827
827
|
|
|
828
828
|
# Instantiate a dummy model
|
|
829
829
|
config = transformers.models.paligemma.configuration_paligemma.PaliGemmaConfig(
|
|
830
|
-
|
|
830
|
+
dtype=torch.bfloat16,
|
|
831
831
|
text_config={
|
|
832
832
|
"num_hidden_layers": 2,
|
|
833
833
|
"rms_norm_eps": 1e-5,
|
|
@@ -883,7 +883,7 @@ def test_apply_liger_kernel_to_instance_for_gemma3_text():
|
|
|
883
883
|
|
|
884
884
|
# Instantiate a dummy model
|
|
885
885
|
config = transformers.models.gemma3.configuration_gemma3.Gemma3TextConfig(
|
|
886
|
-
|
|
886
|
+
dtype=torch.bfloat16,
|
|
887
887
|
rms_norm_eps=1e-5,
|
|
888
888
|
hidden_size=32,
|
|
889
889
|
intermediate_size=64,
|
|
@@ -939,7 +939,7 @@ def test_apply_liger_kernel_to_instance_for_gemma3_conditional_generation():
|
|
|
939
939
|
|
|
940
940
|
# Instantiate a dummy model
|
|
941
941
|
text_config = transformers.models.gemma3.configuration_gemma3.Gemma3TextConfig(
|
|
942
|
-
|
|
942
|
+
dtype=torch.bfloat16,
|
|
943
943
|
rms_norm_eps=1e-5,
|
|
944
944
|
hidden_size=32,
|
|
945
945
|
intermediate_size=64,
|
|
@@ -1026,7 +1026,7 @@ def test_apply_liger_kernel_to_instance_for_qwen2():
|
|
|
1026
1026
|
with patch("transformers.models.qwen2.modeling_qwen2"):
|
|
1027
1027
|
# Instantiate a dummy model
|
|
1028
1028
|
config = transformers.models.qwen2.configuration_qwen2.Qwen2Config(
|
|
1029
|
-
|
|
1029
|
+
dtype=torch.bfloat16,
|
|
1030
1030
|
rms_norm_eps=1e-5,
|
|
1031
1031
|
hidden_size=32,
|
|
1032
1032
|
intermediate_size=64,
|
|
@@ -1068,7 +1068,7 @@ def test_apply_liger_kernel_to_instance_for_qwen3():
|
|
|
1068
1068
|
|
|
1069
1069
|
# Instantiate a dummy model
|
|
1070
1070
|
config = transformers.models.qwen3.configuration_qwen3.Qwen3Config(
|
|
1071
|
-
|
|
1071
|
+
dtype=torch.bfloat16,
|
|
1072
1072
|
rms_norm_eps=1e-5,
|
|
1073
1073
|
hidden_size=32,
|
|
1074
1074
|
intermediate_size=64,
|
|
@@ -1110,7 +1110,7 @@ def test_apply_liger_kernel_to_instance_for_qwen3_moe():
|
|
|
1110
1110
|
|
|
1111
1111
|
# Instantiate a dummy model
|
|
1112
1112
|
config = transformers.models.qwen3_moe.configuration_qwen3_moe.Qwen3MoeConfig(
|
|
1113
|
-
|
|
1113
|
+
dtype=torch.bfloat16,
|
|
1114
1114
|
rms_norm_eps=1e-5,
|
|
1115
1115
|
hidden_size=32,
|
|
1116
1116
|
intermediate_size=64,
|
|
@@ -1158,7 +1158,7 @@ def test_apply_liger_kernel_to_instance_for_qwen2_vl_for_conditional_generation(
|
|
|
1158
1158
|
|
|
1159
1159
|
# Instantiate a dummy model
|
|
1160
1160
|
config = transformers.models.qwen2_vl.configuration_qwen2_vl.Qwen2VLConfig(
|
|
1161
|
-
|
|
1161
|
+
dtype=torch.bfloat16,
|
|
1162
1162
|
rms_norm_eps=1e-5,
|
|
1163
1163
|
hidden_size=32,
|
|
1164
1164
|
intermediate_size=48,
|
|
@@ -1227,7 +1227,7 @@ def test_apply_liger_kernel_to_instance_for_qwen2_vl():
|
|
|
1227
1227
|
|
|
1228
1228
|
# Instantiate a dummy model
|
|
1229
1229
|
config = transformers.models.qwen2_vl.configuration_qwen2_vl.Qwen2VLConfig(
|
|
1230
|
-
|
|
1230
|
+
dtype=torch.bfloat16,
|
|
1231
1231
|
rms_norm_eps=1e-5,
|
|
1232
1232
|
hidden_size=32,
|
|
1233
1233
|
intermediate_size=48,
|
|
@@ -1294,7 +1294,7 @@ def test_apply_liger_kernel_to_instance_for_qwen2_vl_text():
|
|
|
1294
1294
|
|
|
1295
1295
|
# Instantiate a dummy model
|
|
1296
1296
|
config = transformers.models.qwen2_vl.configuration_qwen2_vl.Qwen2VLTextConfig(
|
|
1297
|
-
|
|
1297
|
+
dtype=torch.bfloat16,
|
|
1298
1298
|
rms_norm_eps=1e-5,
|
|
1299
1299
|
hidden_size=32,
|
|
1300
1300
|
intermediate_size=48,
|
|
@@ -1347,7 +1347,7 @@ def test_apply_liger_kernel_to_instance_for_qwen2_5_vl():
|
|
|
1347
1347
|
|
|
1348
1348
|
# Instantiate a dummy model
|
|
1349
1349
|
config = transformers.models.qwen2_5_vl.configuration_qwen2_5_vl.Qwen2_5_VLConfig(
|
|
1350
|
-
|
|
1350
|
+
dtype=torch.bfloat16,
|
|
1351
1351
|
rms_norm_eps=1e-5,
|
|
1352
1352
|
hidden_size=32,
|
|
1353
1353
|
intermediate_size=48,
|
|
@@ -1416,7 +1416,7 @@ def test_apply_liger_kernel_to_instance_for_qwen2_5_vl_for_conditional_generatio
|
|
|
1416
1416
|
|
|
1417
1417
|
# Instantiate a dummy model
|
|
1418
1418
|
config = transformers.models.qwen2_5_vl.configuration_qwen2_5_vl.Qwen2_5_VLConfig(
|
|
1419
|
-
|
|
1419
|
+
dtype=torch.bfloat16,
|
|
1420
1420
|
rms_norm_eps=1e-5,
|
|
1421
1421
|
hidden_size=32,
|
|
1422
1422
|
intermediate_size=48,
|
|
@@ -1483,7 +1483,7 @@ def test_apply_liger_kernel_to_instance_for_qwen2_5_vl_text():
|
|
|
1483
1483
|
|
|
1484
1484
|
# Instantiate a dummy model
|
|
1485
1485
|
config = transformers.models.qwen2_5_vl.configuration_qwen2_5_vl.Qwen2_5_VLTextConfig(
|
|
1486
|
-
|
|
1486
|
+
dtype=torch.bfloat16,
|
|
1487
1487
|
rms_norm_eps=1e-5,
|
|
1488
1488
|
hidden_size=32,
|
|
1489
1489
|
intermediate_size=48,
|
|
@@ -1528,7 +1528,7 @@ def test_apply_liger_kernel_to_instance_for_phi3():
|
|
|
1528
1528
|
with patch("transformers.models.phi3.modeling_phi3"):
|
|
1529
1529
|
# Instantiate a dummy model
|
|
1530
1530
|
config = transformers.models.phi3.configuration_phi3.Phi3Config(
|
|
1531
|
-
|
|
1531
|
+
dtype=torch.bfloat16,
|
|
1532
1532
|
rms_norm_eps=1e-5,
|
|
1533
1533
|
hidden_size=32,
|
|
1534
1534
|
intermediate_size=64,
|
|
@@ -1570,7 +1570,7 @@ def test_apply_liger_kernel_to_instance_for_olmo2():
|
|
|
1570
1570
|
|
|
1571
1571
|
# Instantiate a dummy model
|
|
1572
1572
|
config = transformers.models.olmo2.configuration_olmo2.Olmo2Config(
|
|
1573
|
-
|
|
1573
|
+
dtype=torch.bfloat16,
|
|
1574
1574
|
rms_norm_eps=1e-5,
|
|
1575
1575
|
hidden_size=32,
|
|
1576
1576
|
intermediate_size=64,
|
|
@@ -1616,7 +1616,7 @@ def test_apply_liger_kernel_to_instance_for_glm4():
|
|
|
1616
1616
|
|
|
1617
1617
|
# Instantiate a dummy model
|
|
1618
1618
|
config = transformers.models.glm4.configuration_glm4.Glm4Config(
|
|
1619
|
-
|
|
1619
|
+
dtype=torch.bfloat16,
|
|
1620
1620
|
rms_norm_eps=1e-5,
|
|
1621
1621
|
hidden_size=32,
|
|
1622
1622
|
intermediate_size=64,
|
|
@@ -1664,7 +1664,7 @@ def test_apply_liger_kernel_to_instance_for_glm4v():
|
|
|
1664
1664
|
|
|
1665
1665
|
# Instantiate a dummy model
|
|
1666
1666
|
config = transformers.models.glm4v.configuration_glm4v.Glm4vConfig(
|
|
1667
|
-
|
|
1667
|
+
dtype=torch.bfloat16,
|
|
1668
1668
|
text_config={
|
|
1669
1669
|
"num_hidden_layers": 2,
|
|
1670
1670
|
"rms_norm_eps": 1e-5,
|
|
@@ -1734,7 +1734,7 @@ def test_apply_liger_kernel_to_instance_for_glm4v_moe():
|
|
|
1734
1734
|
|
|
1735
1735
|
# Instantiate a dummy model
|
|
1736
1736
|
config = transformers.models.glm4v_moe.configuration_glm4v_moe.Glm4vMoeConfig(
|
|
1737
|
-
|
|
1737
|
+
dtype=torch.bfloat16,
|
|
1738
1738
|
hidden_size=32,
|
|
1739
1739
|
num_attention_heads=4,
|
|
1740
1740
|
num_key_value_heads=2,
|
|
@@ -1837,7 +1837,7 @@ def test_apply_liger_kernel_to_instance_for_smollm3():
|
|
|
1837
1837
|
with patch("transformers.models.smollm3.modeling_smollm3"):
|
|
1838
1838
|
# Instantiate a dummy model
|
|
1839
1839
|
config = transformers.models.smollm3.configuration_smollm3.SmolLM3Config(
|
|
1840
|
-
|
|
1840
|
+
dtype=torch.bfloat16,
|
|
1841
1841
|
rms_norm_eps=1e-5,
|
|
1842
1842
|
hidden_size=32,
|
|
1843
1843
|
intermediate_size=64,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{liger_kernel_nightly-0.6.2.dev20250913213534 → liger_kernel_nightly-0.6.2.dev20250916231145}/NOTICE
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|