liger-kernel-nightly 0.6.2.dev20251010184612__tar.gz → 0.6.2.dev20251011154226__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/PKG-INFO +1 -1
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/pyproject.toml +1 -1
- liger_kernel_nightly-0.6.2.dev20251011154226/src/liger_kernel/transformers/model/falcon_h1.py +108 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/transformers/monkey_patch.py +8 -4
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel_nightly.egg-info/PKG-INFO +1 -1
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel_nightly.egg-info/SOURCES.txt +1 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/test/convergence/bf16/test_mini_models.py +60 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/test/convergence/bf16/test_mini_models_multimodal.py +4 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/test/convergence/bf16/test_mini_models_with_logits.py +59 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/test/convergence/fp32/test_mini_models.py +56 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/test/convergence/fp32/test_mini_models_multimodal.py +10 -4
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/test/convergence/fp32/test_mini_models_with_logits.py +56 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/test/transformers/test_monkey_patch.py +51 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/test/utils.py +12 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/.github/ISSUE_TEMPLATE/bug_report.yaml +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/.github/ISSUE_TEMPLATE/feature_request.yaml +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/.github/pull_request_template.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/.github/workflows/amd-ci.yml +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/.github/workflows/benchmark.yml +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/.github/workflows/docs.yml +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/.github/workflows/intel-ci.yml +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/.github/workflows/nvi-ci.yml +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/.github/workflows/publish-nightly.yml +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/.github/workflows/publish-release.yml +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/.gitignore +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/LICENSE +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/Makefile +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/NOTICE +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/README.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/benchmark/README.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/benchmark/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/benchmark/benchmarks_visualizer.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/benchmark/data/all_benchmark_data.csv +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/benchmark/scripts/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/benchmark/scripts/benchmark_cpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/benchmark/scripts/benchmark_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/benchmark/scripts/benchmark_distill_cosine_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/benchmark/scripts/benchmark_distill_jsd_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/benchmark/scripts/benchmark_dpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/benchmark/scripts/benchmark_dyt.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/benchmark/scripts/benchmark_embedding.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/benchmark/scripts/benchmark_fused_add_rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/benchmark/scripts/benchmark_fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/benchmark/scripts/benchmark_fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/benchmark/scripts/benchmark_fused_neighborhood_attention.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/benchmark/scripts/benchmark_geglu.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/benchmark/scripts/benchmark_group_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/benchmark/scripts/benchmark_grpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/benchmark/scripts/benchmark_jsd.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/benchmark/scripts/benchmark_kl_div.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/benchmark/scripts/benchmark_kto_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/benchmark/scripts/benchmark_layer_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/benchmark/scripts/benchmark_llama4_rope.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/benchmark/scripts/benchmark_multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/benchmark/scripts/benchmark_orpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/benchmark/scripts/benchmark_qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/benchmark/scripts/benchmark_rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/benchmark/scripts/benchmark_rope.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/benchmark/scripts/benchmark_simpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/benchmark/scripts/benchmark_softmax.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/benchmark/scripts/benchmark_sparse_multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/benchmark/scripts/benchmark_sparsemax.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/benchmark/scripts/benchmark_swiglu.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/benchmark/scripts/benchmark_tvd.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/benchmark/scripts/utils.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/dev/fmt-requirements.txt +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/dev/modal/benchmarks.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/dev/modal/tests.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/dev/modal/tests_bwd.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/docs/Examples.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/docs/Getting-Started.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/docs/High-Level-APIs.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/docs/Low-Level-APIs.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/docs/acknowledgement.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/docs/contributing.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/docs/images/banner.GIF +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/docs/images/compose.gif +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/docs/images/e2e-memory.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/docs/images/e2e-tps.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/docs/images/logo-banner.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/docs/images/patch.gif +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/docs/images/post-training.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/docs/index.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/docs/license.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/examples/alignment/accelerate_config.yaml +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/examples/alignment/run_orpo.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/examples/huggingface/README.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/examples/huggingface/callback.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/examples/huggingface/config/fsdp_config.json +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/examples/huggingface/img/gemma_7b_mem.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/examples/huggingface/img/gemma_7b_tp.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/examples/huggingface/img/llama_mem_alloc.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/examples/huggingface/img/llama_tps.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/examples/huggingface/img/qwen_mem_alloc.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/examples/huggingface/img/qwen_tps.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/examples/huggingface/launch_on_modal.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/examples/huggingface/requirements.txt +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/examples/huggingface/run_benchmarks.sh +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/examples/huggingface/run_gemma.sh +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/examples/huggingface/run_llama.sh +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/examples/huggingface/run_qwen.sh +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/examples/huggingface/run_qwen2_vl.sh +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/examples/huggingface/training.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/examples/huggingface/training_multimodal.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/examples/lightning/README.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/examples/lightning/requirements.txt +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/examples/lightning/training.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/examples/medusa/README.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/examples/medusa/callback.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/examples/medusa/docs/images/Memory_Stage1_num_head_3.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/examples/medusa/docs/images/Memory_Stage1_num_head_5.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/examples/medusa/docs/images/Memory_Stage2_num_head_3.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/examples/medusa/docs/images/Memory_Stage2_num_head_5.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/examples/medusa/docs/images/Throughput_Stage1_num_head_3.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/examples/medusa/docs/images/Throughput_Stage1_num_head_5.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/examples/medusa/docs/images/Throughput_Stage2_num_head_3.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/examples/medusa/docs/images/Throughput_Stage2_num_head_5.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/examples/medusa/fsdp/acc-fsdp.conf +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/examples/medusa/medusa_util.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/examples/medusa/requirements.txt +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/examples/medusa/scripts/llama3_8b_medusa.sh +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/examples/medusa/train.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/licenses/LICENSE-Apache-2.0 +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/licenses/LICENSE-MIT-AutoAWQ +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/licenses/LICENSE-MIT-Efficient-Cross-Entropy +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/licenses/LICENSE-MIT-llmc +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/licenses/LICENSE-MIT-triton +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/mkdocs.yml +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/setup.cfg +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/setup.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/chunked_loss/README.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/chunked_loss/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/chunked_loss/cosine_similarity_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/chunked_loss/cpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/chunked_loss/dpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/chunked_loss/functional.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/chunked_loss/fused_linear_distillation.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/chunked_loss/fused_linear_ppo.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/chunked_loss/fused_linear_preference.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/chunked_loss/fused_linear_unpaired_preference.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/chunked_loss/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/chunked_loss/jsd_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/chunked_loss/kto_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/chunked_loss/orpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/chunked_loss/simpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/env_report.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/ops/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/ops/cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/ops/dyt.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/ops/experimental/embedding.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/ops/experimental/mm_int8int2.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/ops/fused_add_rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/ops/fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/ops/fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/ops/fused_neighborhood_attention.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/ops/geglu.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/ops/group_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/ops/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/ops/jsd.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/ops/kl_div.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/ops/layer_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/ops/llama4_rope.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/ops/multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/ops/qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/ops/rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/ops/rope.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/ops/softmax.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/ops/sparsemax.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/ops/swiglu.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/ops/tvd.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/ops/utils.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/transformers/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/transformers/auto_model.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/transformers/cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/transformers/dyt.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/transformers/experimental/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/transformers/experimental/embedding.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/transformers/fsdp.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/transformers/functional.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/transformers/fused_add_rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/transformers/fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/transformers/fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/transformers/fused_neighborhood_attention.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/transformers/geglu.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/transformers/group_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/transformers/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/transformers/jsd.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/transformers/kl_div.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/transformers/layer_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/transformers/llama4_rope.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/transformers/model/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/transformers/model/gemma.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/transformers/model/gemma2.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/transformers/model/gemma3.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/transformers/model/glm4.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/transformers/model/glm4v.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/transformers/model/glm4v_moe.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/transformers/model/internvl.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/transformers/model/llama.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/transformers/model/llama4.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/transformers/model/llava.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/transformers/model/loss_utils.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/transformers/model/mistral.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/transformers/model/mixtral.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/transformers/model/mllama.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/transformers/model/olmo2.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/transformers/model/paligemma.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/transformers/model/phi3.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/transformers/model/qwen2.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/transformers/model/qwen2_5_vl.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/transformers/model/qwen2_vl.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/transformers/model/qwen3.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/transformers/model/qwen3_moe.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/transformers/model/smollm3.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/transformers/multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/transformers/qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/transformers/rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/transformers/rope.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/transformers/softmax.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/transformers/sparsemax.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/transformers/swiglu.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/transformers/trainer/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/transformers/trainer/orpo_trainer.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/transformers/trainer_integration.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/transformers/tvd.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/triton/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/triton/monkey_patch.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel/utils.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel_nightly.egg-info/dependency_links.txt +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel_nightly.egg-info/requires.txt +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/src/liger_kernel_nightly.egg-info/top_level.txt +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/test/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/test/chunked_loss/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/test/chunked_loss/test_cosine_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/test/chunked_loss/test_cpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/test/chunked_loss/test_dpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/test/chunked_loss/test_grpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/test/chunked_loss/test_jsd_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/test/chunked_loss/test_kto_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/test/chunked_loss/test_orpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/test/chunked_loss/test_simpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/test/conftest.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/test/convergence/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/test/convergence/bf16/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/test/convergence/fp32/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/test/resources/fake_configs/Google/Gemma3/gemma-3-4b-it/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/test/resources/fake_configs/Google/Paligemma/paligemma-3b-pt-224/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/preprocessor_config.json +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/processor_config.json +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/test/resources/fake_configs/OpenGVLab/InternVL3-1B-hf/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/test/resources/fake_configs/Qwen/Qwen2-VL-7B-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/test/resources/fake_configs/Qwen/Qwen2.5-VL-7B-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/test/resources/fake_configs/meta-llama/Llama-3.2-11B-Vision-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/test/resources/fake_configs/meta-llama/Llama-4-Scout-17B-16E-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/test/resources/scripts/generate_tokenized_dataset.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/test/resources/tiny_shakespeare.txt +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/test/resources/tiny_shakespeare_tokenized/data-00000-of-00001.arrow +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/test/resources/tiny_shakespeare_tokenized/dataset_info.json +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/test/resources/tiny_shakespeare_tokenized/state.json +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/test/transformers/test_auto_model.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/test/transformers/test_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/test/transformers/test_dyt.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/test/transformers/test_embedding.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/test/transformers/test_flex_attention.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/test/transformers/test_fused_add_rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/test/transformers/test_fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/test/transformers/test_fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/test/transformers/test_fused_neighborhood_attention.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/test/transformers/test_geglu.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/test/transformers/test_group_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/test/transformers/test_grpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/test/transformers/test_jsd.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/test/transformers/test_kl_div.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/test/transformers/test_layer_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/test/transformers/test_mm_int8int2.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/test/transformers/test_multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/test/transformers/test_qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/test/transformers/test_rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/test/transformers/test_rope.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/test/transformers/test_softmax.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/test/transformers/test_sparsemax.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/test/transformers/test_swiglu.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/test/transformers/test_trainer_integration.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/test/transformers/test_transformers.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/test/transformers/test_tvd.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251010184612 → liger_kernel_nightly-0.6.2.dev20251011154226}/test/triton/test_triton_monkey_patch.py +0 -0
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "liger_kernel_nightly"
|
|
7
|
-
version = "0.6.2.
|
|
7
|
+
version = "0.6.2.dev20251011154226"
|
|
8
8
|
description = "Efficient Triton kernels for LLM Training"
|
|
9
9
|
urls = { "Homepage" = "https://github.com/linkedin/Liger-Kernel" }
|
|
10
10
|
readme = { file = "README.md", content-type = "text/markdown" }
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
from typing import TYPE_CHECKING
|
|
2
|
+
from typing import Optional
|
|
3
|
+
from typing import Union
|
|
4
|
+
|
|
5
|
+
import torch
|
|
6
|
+
|
|
7
|
+
from transformers.modeling_outputs import CausalLMOutputWithPast
|
|
8
|
+
|
|
9
|
+
if TYPE_CHECKING:
|
|
10
|
+
from transformers.models.falcon_h1.modeling_falcon_h1 import FalconHybridMambaAttentionDynamicCache
|
|
11
|
+
|
|
12
|
+
from liger_kernel.transformers.model.loss_utils import LigerForCausalLMLoss
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def lce_forward(
|
|
16
|
+
self,
|
|
17
|
+
input_ids: torch.LongTensor = None,
|
|
18
|
+
attention_mask: Optional[torch.Tensor] = None,
|
|
19
|
+
position_ids: Optional[torch.LongTensor] = None,
|
|
20
|
+
past_key_values: Optional["FalconHybridMambaAttentionDynamicCache"] = None,
|
|
21
|
+
inputs_embeds: Optional[torch.FloatTensor] = None,
|
|
22
|
+
labels: Optional[torch.LongTensor] = None,
|
|
23
|
+
use_cache: Optional[bool] = None,
|
|
24
|
+
output_attentions: Optional[bool] = None,
|
|
25
|
+
output_hidden_states: Optional[bool] = None,
|
|
26
|
+
cache_position: Optional[torch.LongTensor] = None,
|
|
27
|
+
logits_to_keep: Union[int, torch.Tensor] = 0,
|
|
28
|
+
skip_logits: Optional[bool] = None,
|
|
29
|
+
**kwargs,
|
|
30
|
+
) -> Union[tuple, CausalLMOutputWithPast]:
|
|
31
|
+
r"""
|
|
32
|
+
labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
|
|
33
|
+
Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
|
|
34
|
+
config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
|
|
35
|
+
(masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.
|
|
36
|
+
|
|
37
|
+
Example:
|
|
38
|
+
|
|
39
|
+
```python
|
|
40
|
+
>>> from transformers import AutoTokenizer, FalconH1ForCausalLM
|
|
41
|
+
|
|
42
|
+
>>> model = FalconH1ForCausalLM.from_pretrained("...")
|
|
43
|
+
>>> tokenizer = AutoTokenizer.from_pretrained("...")
|
|
44
|
+
|
|
45
|
+
>>> prompt = "Hey, are you conscious? Can you talk to me?"
|
|
46
|
+
>>> inputs = tokenizer(prompt, return_tensors="pt")
|
|
47
|
+
|
|
48
|
+
>>> # Generate
|
|
49
|
+
>>> generate_ids = model.generate(inputs.input_ids, max_length=30)
|
|
50
|
+
>>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
|
|
51
|
+
"Hey, are you conscious? Can you talk to me?\nI'm not conscious, but I can talk to you."
|
|
52
|
+
```"""
|
|
53
|
+
output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
|
|
54
|
+
output_hidden_states = (
|
|
55
|
+
output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
# decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn)
|
|
59
|
+
outputs = self.model(
|
|
60
|
+
input_ids=input_ids,
|
|
61
|
+
attention_mask=attention_mask,
|
|
62
|
+
position_ids=position_ids,
|
|
63
|
+
past_key_values=past_key_values,
|
|
64
|
+
inputs_embeds=inputs_embeds,
|
|
65
|
+
use_cache=use_cache,
|
|
66
|
+
output_attentions=output_attentions,
|
|
67
|
+
output_hidden_states=output_hidden_states,
|
|
68
|
+
cache_position=cache_position,
|
|
69
|
+
**kwargs,
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
hidden_states = outputs[0]
|
|
73
|
+
# Only compute necessary logits, and do not upcast them to float if we are not computing the loss
|
|
74
|
+
slice_indices = slice(-logits_to_keep, None) if isinstance(logits_to_keep, int) else logits_to_keep
|
|
75
|
+
kept_hidden_states = hidden_states[:, slice_indices, :]
|
|
76
|
+
|
|
77
|
+
shift_labels = kwargs.pop("shift_labels", None)
|
|
78
|
+
logits = None
|
|
79
|
+
loss = None
|
|
80
|
+
# if in training mode, don't materialize logits
|
|
81
|
+
if skip_logits and labels is None:
|
|
82
|
+
raise ValueError("skip_logits is True, but labels and shift_labels are None")
|
|
83
|
+
|
|
84
|
+
if skip_logits is None:
|
|
85
|
+
# By default, if in training mode, don't materialize logits
|
|
86
|
+
skip_logits = self.training and labels is not None
|
|
87
|
+
|
|
88
|
+
if skip_logits:
|
|
89
|
+
loss = LigerForCausalLMLoss(
|
|
90
|
+
hidden_states=kept_hidden_states,
|
|
91
|
+
lm_head_weight=self.lm_head.weight,
|
|
92
|
+
labels=labels,
|
|
93
|
+
shift_labels=shift_labels,
|
|
94
|
+
hidden_size=self.config.hidden_size,
|
|
95
|
+
**kwargs,
|
|
96
|
+
)
|
|
97
|
+
else:
|
|
98
|
+
logits = self.lm_head(kept_hidden_states)
|
|
99
|
+
if labels is not None or shift_labels is not None:
|
|
100
|
+
loss = self.loss_function(logits=logits, labels=labels, vocab_size=self.config.vocab_size, **kwargs)
|
|
101
|
+
|
|
102
|
+
return CausalLMOutputWithPast(
|
|
103
|
+
loss=loss,
|
|
104
|
+
logits=logits,
|
|
105
|
+
past_key_values=outputs.past_key_values,
|
|
106
|
+
hidden_states=outputs.hidden_states,
|
|
107
|
+
attentions=outputs.attentions,
|
|
108
|
+
)
|
|
@@ -15,6 +15,7 @@ from liger_kernel.transformers.cross_entropy import LigerCrossEntropyLoss
|
|
|
15
15
|
from liger_kernel.transformers.functional import liger_cross_entropy
|
|
16
16
|
from liger_kernel.transformers.geglu import LigerGEGLUMLP
|
|
17
17
|
from liger_kernel.transformers.layer_norm import LigerLayerNorm
|
|
18
|
+
from liger_kernel.transformers.model.falcon_h1 import lce_forward as falcon_h1_lce_forward
|
|
18
19
|
from liger_kernel.transformers.model.gemma import lce_forward as gemma_lce_forward
|
|
19
20
|
from liger_kernel.transformers.model.gemma import lce_forward_deprecated as gemma_lce_forward_deprecated
|
|
20
21
|
from liger_kernel.transformers.model.gemma2 import lce_forward as gemma2_lce_forward
|
|
@@ -2109,8 +2110,8 @@ def apply_liger_kernel_to_internvl(
|
|
|
2109
2110
|
|
|
2110
2111
|
def apply_liger_kernel_to_falcon_h1(
|
|
2111
2112
|
rope: bool = True,
|
|
2112
|
-
cross_entropy: bool =
|
|
2113
|
-
fused_linear_cross_entropy: bool =
|
|
2113
|
+
cross_entropy: bool = False,
|
|
2114
|
+
fused_linear_cross_entropy: bool = True,
|
|
2114
2115
|
rms_norm: bool = True,
|
|
2115
2116
|
swiglu: bool = False,
|
|
2116
2117
|
model: PreTrainedModel = None,
|
|
@@ -2144,7 +2145,7 @@ def apply_liger_kernel_to_falcon_h1(
|
|
|
2144
2145
|
logger.info("Apply liger RMSNorm")
|
|
2145
2146
|
modeling_falcon_h1.FalconH1RMSNorm = LigerRMSNorm
|
|
2146
2147
|
if swiglu:
|
|
2147
|
-
|
|
2148
|
+
logger.warning("LigerSwiGLUMLP is not available for Falcon-H1 models. There will be no effect.")
|
|
2148
2149
|
|
|
2149
2150
|
if cross_entropy:
|
|
2150
2151
|
logger.info("Apply liger cross entropy")
|
|
@@ -2153,7 +2154,10 @@ def apply_liger_kernel_to_falcon_h1(
|
|
|
2153
2154
|
nn.functional.cross_entropy = liger_cross_entropy
|
|
2154
2155
|
|
|
2155
2156
|
if fused_linear_cross_entropy:
|
|
2156
|
-
|
|
2157
|
+
if model is not None:
|
|
2158
|
+
model.forward = MethodType(falcon_h1_lce_forward, model)
|
|
2159
|
+
else:
|
|
2160
|
+
modeling_falcon_h1.FalconH1ForCausalLM.forward = falcon_h1_lce_forward
|
|
2157
2161
|
|
|
2158
2162
|
if model is not None:
|
|
2159
2163
|
# The model instance already exists, so we need to additionally patch the
|
|
@@ -187,6 +187,7 @@ src/liger_kernel/transformers/tvd.py
|
|
|
187
187
|
src/liger_kernel/transformers/experimental/__init__.py
|
|
188
188
|
src/liger_kernel/transformers/experimental/embedding.py
|
|
189
189
|
src/liger_kernel/transformers/model/__init__.py
|
|
190
|
+
src/liger_kernel/transformers/model/falcon_h1.py
|
|
190
191
|
src/liger_kernel/transformers/model/gemma.py
|
|
191
192
|
src/liger_kernel/transformers/model/gemma2.py
|
|
192
193
|
src/liger_kernel/transformers/model/gemma3.py
|
|
@@ -18,6 +18,7 @@ from transformers.models.phi3 import Phi3ForCausalLM
|
|
|
18
18
|
from transformers.models.qwen2 import Qwen2Config
|
|
19
19
|
from transformers.models.qwen2 import Qwen2ForCausalLM
|
|
20
20
|
|
|
21
|
+
from liger_kernel.transformers import apply_liger_kernel_to_falcon_h1
|
|
21
22
|
from liger_kernel.transformers import apply_liger_kernel_to_gemma
|
|
22
23
|
from liger_kernel.transformers import apply_liger_kernel_to_gemma2
|
|
23
24
|
from liger_kernel.transformers import apply_liger_kernel_to_gemma3_text
|
|
@@ -45,6 +46,7 @@ from test.utils import MiniModelConfig
|
|
|
45
46
|
from test.utils import assert_verbose_allclose
|
|
46
47
|
from test.utils import get_logprobs
|
|
47
48
|
from test.utils import get_topk
|
|
49
|
+
from test.utils import revert_liger_kernel_to_falcon_h1
|
|
48
50
|
from test.utils import revert_liger_kernel_to_gemma
|
|
49
51
|
from test.utils import revert_liger_kernel_to_gemma2
|
|
50
52
|
from test.utils import revert_liger_kernel_to_gemma3_text
|
|
@@ -201,6 +203,15 @@ try:
|
|
|
201
203
|
except ImportError:
|
|
202
204
|
INTERNVL_AVAILABLE = False
|
|
203
205
|
|
|
206
|
+
try:
|
|
207
|
+
# FalconH1 is only available in transformers>=4.53.0
|
|
208
|
+
from transformers.models.falcon_h1.configuration_falcon_h1 import FalconH1Config
|
|
209
|
+
from transformers.models.falcon_h1.modeling_falcon_h1 import FalconH1ForCausalLM
|
|
210
|
+
|
|
211
|
+
FALCONH1_AVAILABLE = True
|
|
212
|
+
except ImportError:
|
|
213
|
+
FALCONH1_AVAILABLE = False
|
|
214
|
+
|
|
204
215
|
from liger_kernel.utils import infer_device
|
|
205
216
|
|
|
206
217
|
device = infer_device()
|
|
@@ -1065,6 +1076,36 @@ if INTERNVL_AVAILABLE:
|
|
|
1065
1076
|
),
|
|
1066
1077
|
)
|
|
1067
1078
|
|
|
1079
|
+
if FALCONH1_AVAILABLE:
|
|
1080
|
+
MINI_MODEL_SETUPS["mini_falcon_h1"] = MiniModelConfig(
|
|
1081
|
+
liger_kernel_patch_func=apply_liger_kernel_to_falcon_h1,
|
|
1082
|
+
liger_kernel_patch_revert_func=revert_liger_kernel_to_falcon_h1,
|
|
1083
|
+
model_class=FalconH1ForCausalLM,
|
|
1084
|
+
mini_model_config=FalconH1Config(
|
|
1085
|
+
model_type="falcon_h1",
|
|
1086
|
+
vocab_size=32000,
|
|
1087
|
+
hidden_size=256, # 4096
|
|
1088
|
+
num_hidden_layers=4, # 24
|
|
1089
|
+
num_attention_heads=4, # 32
|
|
1090
|
+
num_key_value_heads=2, # 8
|
|
1091
|
+
intermediate_size=1024, # 11008
|
|
1092
|
+
hidden_act="silu",
|
|
1093
|
+
max_position_embeddings=4096,
|
|
1094
|
+
initializer_range=0.02,
|
|
1095
|
+
rms_norm_eps=1e-6,
|
|
1096
|
+
use_cache=True,
|
|
1097
|
+
pad_token_id=0,
|
|
1098
|
+
bos_token_id=1,
|
|
1099
|
+
eos_token_id=2,
|
|
1100
|
+
tie_word_embeddings=False,
|
|
1101
|
+
mamba_d_ssm=128, # 1024
|
|
1102
|
+
mamba_n_heads=16, # 128
|
|
1103
|
+
mamba_d_state=32, # 245
|
|
1104
|
+
mamba_d_conv=2, # 4
|
|
1105
|
+
attn_implementation="eager",
|
|
1106
|
+
),
|
|
1107
|
+
)
|
|
1108
|
+
|
|
1068
1109
|
|
|
1069
1110
|
def create_model(model_name="mini_llama4"):
|
|
1070
1111
|
"""
|
|
@@ -1574,6 +1615,25 @@ def run_mini_model(
|
|
|
1574
1615
|
),
|
|
1575
1616
|
],
|
|
1576
1617
|
),
|
|
1618
|
+
pytest.param(
|
|
1619
|
+
"mini_falcon_h1",
|
|
1620
|
+
32,
|
|
1621
|
+
1e-5,
|
|
1622
|
+
torch.bfloat16,
|
|
1623
|
+
1e-2,
|
|
1624
|
+
1e-2,
|
|
1625
|
+
1e-1,
|
|
1626
|
+
1e-2,
|
|
1627
|
+
1e-2,
|
|
1628
|
+
1e-2,
|
|
1629
|
+
marks=[
|
|
1630
|
+
pytest.mark.skipif(not supports_bfloat16(), reason="bfloat16 not supported on this GPU"),
|
|
1631
|
+
pytest.mark.skipif(
|
|
1632
|
+
not FALCONH1_AVAILABLE,
|
|
1633
|
+
reason="FalconH1 not available in this version of transformers",
|
|
1634
|
+
),
|
|
1635
|
+
],
|
|
1636
|
+
),
|
|
1577
1637
|
],
|
|
1578
1638
|
)
|
|
1579
1639
|
def test_mini_model(
|
|
@@ -1066,6 +1066,10 @@ def run_mini_model_multimodal(
|
|
|
1066
1066
|
not MLLAMA_AVAILABLE,
|
|
1067
1067
|
reason="Mllama not available in this version of transformers",
|
|
1068
1068
|
),
|
|
1069
|
+
pytest.mark.skipif(
|
|
1070
|
+
version.parse("4.51.0") > version.parse(transformers.__version__),
|
|
1071
|
+
reason="MllamaForConditionalGeneration doesn't accecpt `skip_logits` kwargs",
|
|
1072
|
+
),
|
|
1069
1073
|
],
|
|
1070
1074
|
),
|
|
1071
1075
|
pytest.param(
|
|
@@ -18,6 +18,7 @@ from transformers.models.phi3 import Phi3ForCausalLM
|
|
|
18
18
|
from transformers.models.qwen2 import Qwen2Config
|
|
19
19
|
from transformers.models.qwen2 import Qwen2ForCausalLM
|
|
20
20
|
|
|
21
|
+
from liger_kernel.transformers import apply_liger_kernel_to_falcon_h1
|
|
21
22
|
from liger_kernel.transformers import apply_liger_kernel_to_gemma
|
|
22
23
|
from liger_kernel.transformers import apply_liger_kernel_to_gemma2
|
|
23
24
|
from liger_kernel.transformers import apply_liger_kernel_to_gemma3_text
|
|
@@ -45,6 +46,7 @@ from test.utils import MiniModelConfig
|
|
|
45
46
|
from test.utils import assert_verbose_allclose
|
|
46
47
|
from test.utils import get_logprobs
|
|
47
48
|
from test.utils import get_topk
|
|
49
|
+
from test.utils import revert_liger_kernel_to_falcon_h1
|
|
48
50
|
from test.utils import revert_liger_kernel_to_gemma
|
|
49
51
|
from test.utils import revert_liger_kernel_to_gemma2
|
|
50
52
|
from test.utils import revert_liger_kernel_to_gemma3_text
|
|
@@ -201,6 +203,15 @@ try:
|
|
|
201
203
|
except ImportError:
|
|
202
204
|
INTERNVL_AVAILABLE = False
|
|
203
205
|
|
|
206
|
+
try:
|
|
207
|
+
# FalconH1 is only available in transformers>=4.53.0
|
|
208
|
+
from transformers.models.falcon_h1.configuration_falcon_h1 import FalconH1Config
|
|
209
|
+
from transformers.models.falcon_h1.modeling_falcon_h1 import FalconH1ForCausalLM
|
|
210
|
+
|
|
211
|
+
FALCONH1_AVAILABLE = True
|
|
212
|
+
except ImportError:
|
|
213
|
+
FALCONH1_AVAILABLE = False
|
|
214
|
+
|
|
204
215
|
from liger_kernel.utils import infer_device
|
|
205
216
|
|
|
206
217
|
device = infer_device()
|
|
@@ -1063,6 +1074,35 @@ if INTERNVL_AVAILABLE:
|
|
|
1063
1074
|
),
|
|
1064
1075
|
)
|
|
1065
1076
|
|
|
1077
|
+
if FALCONH1_AVAILABLE:
|
|
1078
|
+
MINI_MODEL_SETUPS["mini_falcon_h1"] = MiniModelConfig(
|
|
1079
|
+
liger_kernel_patch_func=apply_liger_kernel_to_falcon_h1,
|
|
1080
|
+
liger_kernel_patch_revert_func=revert_liger_kernel_to_falcon_h1,
|
|
1081
|
+
model_class=FalconH1ForCausalLM,
|
|
1082
|
+
mini_model_config=FalconH1Config(
|
|
1083
|
+
model_type="falcon_h1",
|
|
1084
|
+
vocab_size=32000,
|
|
1085
|
+
hidden_size=256, # 4096
|
|
1086
|
+
num_hidden_layers=4, # 24
|
|
1087
|
+
num_attention_heads=4, # 32
|
|
1088
|
+
num_key_value_heads=2, # 8
|
|
1089
|
+
intermediate_size=1024, # 11008
|
|
1090
|
+
hidden_act="silu",
|
|
1091
|
+
max_position_embeddings=4096,
|
|
1092
|
+
initializer_range=0.02,
|
|
1093
|
+
rms_norm_eps=1e-6,
|
|
1094
|
+
use_cache=True,
|
|
1095
|
+
pad_token_id=0,
|
|
1096
|
+
bos_token_id=1,
|
|
1097
|
+
eos_token_id=2,
|
|
1098
|
+
tie_word_embeddings=False,
|
|
1099
|
+
mamba_d_ssm=128, # 1024
|
|
1100
|
+
mamba_n_heads=16, # 128
|
|
1101
|
+
mamba_d_state=32, # 245
|
|
1102
|
+
mamba_d_conv=2, # 4
|
|
1103
|
+
),
|
|
1104
|
+
)
|
|
1105
|
+
|
|
1066
1106
|
|
|
1067
1107
|
def create_model(model_name="mini_llama3"):
|
|
1068
1108
|
"""
|
|
@@ -1547,6 +1587,25 @@ def run_mini_model(
|
|
|
1547
1587
|
),
|
|
1548
1588
|
],
|
|
1549
1589
|
),
|
|
1590
|
+
pytest.param(
|
|
1591
|
+
"mini_falcon_h1",
|
|
1592
|
+
32,
|
|
1593
|
+
1e-5,
|
|
1594
|
+
torch.bfloat16,
|
|
1595
|
+
1e-2,
|
|
1596
|
+
1e-2,
|
|
1597
|
+
1e-1,
|
|
1598
|
+
1e-2,
|
|
1599
|
+
1e-2,
|
|
1600
|
+
1e-2,
|
|
1601
|
+
marks=[
|
|
1602
|
+
pytest.mark.skipif(not supports_bfloat16(), reason="bfloat16 not supported on this GPU"),
|
|
1603
|
+
pytest.mark.skipif(
|
|
1604
|
+
not FALCONH1_AVAILABLE,
|
|
1605
|
+
reason="FalconH1 not available in this version of transformers",
|
|
1606
|
+
),
|
|
1607
|
+
],
|
|
1608
|
+
),
|
|
1550
1609
|
],
|
|
1551
1610
|
)
|
|
1552
1611
|
def test_mini_model(
|
|
@@ -18,6 +18,7 @@ from transformers.models.phi3 import Phi3ForCausalLM
|
|
|
18
18
|
from transformers.models.qwen2 import Qwen2Config
|
|
19
19
|
from transformers.models.qwen2 import Qwen2ForCausalLM
|
|
20
20
|
|
|
21
|
+
from liger_kernel.transformers import apply_liger_kernel_to_falcon_h1
|
|
21
22
|
from liger_kernel.transformers import apply_liger_kernel_to_gemma
|
|
22
23
|
from liger_kernel.transformers import apply_liger_kernel_to_gemma2
|
|
23
24
|
from liger_kernel.transformers import apply_liger_kernel_to_gemma3_text
|
|
@@ -45,6 +46,7 @@ from test.utils import MiniModelConfig
|
|
|
45
46
|
from test.utils import assert_verbose_allclose
|
|
46
47
|
from test.utils import get_logprobs
|
|
47
48
|
from test.utils import get_topk
|
|
49
|
+
from test.utils import revert_liger_kernel_to_falcon_h1
|
|
48
50
|
from test.utils import revert_liger_kernel_to_gemma
|
|
49
51
|
from test.utils import revert_liger_kernel_to_gemma2
|
|
50
52
|
from test.utils import revert_liger_kernel_to_gemma3_text
|
|
@@ -200,6 +202,15 @@ try:
|
|
|
200
202
|
except ImportError:
|
|
201
203
|
INTERNVL_AVAILABLE = False
|
|
202
204
|
|
|
205
|
+
try:
|
|
206
|
+
# FalconH1 is only available in transformers>=4.53.0
|
|
207
|
+
from transformers.models.falcon_h1.configuration_falcon_h1 import FalconH1Config
|
|
208
|
+
from transformers.models.falcon_h1.modeling_falcon_h1 import FalconH1ForCausalLM
|
|
209
|
+
|
|
210
|
+
FALCONH1_AVAILABLE = True
|
|
211
|
+
except ImportError:
|
|
212
|
+
FALCONH1_AVAILABLE = False
|
|
213
|
+
|
|
203
214
|
from liger_kernel.utils import infer_device
|
|
204
215
|
|
|
205
216
|
device = infer_device()
|
|
@@ -1061,6 +1072,35 @@ if INTERNVL_AVAILABLE:
|
|
|
1061
1072
|
),
|
|
1062
1073
|
)
|
|
1063
1074
|
|
|
1075
|
+
if FALCONH1_AVAILABLE:
|
|
1076
|
+
MINI_MODEL_SETUPS["mini_falcon_h1"] = MiniModelConfig(
|
|
1077
|
+
liger_kernel_patch_func=apply_liger_kernel_to_falcon_h1,
|
|
1078
|
+
liger_kernel_patch_revert_func=revert_liger_kernel_to_falcon_h1,
|
|
1079
|
+
model_class=FalconH1ForCausalLM,
|
|
1080
|
+
mini_model_config=FalconH1Config(
|
|
1081
|
+
model_type="falcon_h1",
|
|
1082
|
+
vocab_size=32000,
|
|
1083
|
+
hidden_size=256, # 4096
|
|
1084
|
+
num_hidden_layers=4, # 24
|
|
1085
|
+
num_attention_heads=4, # 32
|
|
1086
|
+
num_key_value_heads=2, # 8
|
|
1087
|
+
intermediate_size=1024, # 11008
|
|
1088
|
+
hidden_act="silu",
|
|
1089
|
+
max_position_embeddings=4096,
|
|
1090
|
+
initializer_range=0.02,
|
|
1091
|
+
rms_norm_eps=1e-6,
|
|
1092
|
+
use_cache=True,
|
|
1093
|
+
pad_token_id=0,
|
|
1094
|
+
bos_token_id=1,
|
|
1095
|
+
eos_token_id=2,
|
|
1096
|
+
tie_word_embeddings=False,
|
|
1097
|
+
mamba_d_ssm=128, # 1024
|
|
1098
|
+
mamba_n_heads=16, # 128
|
|
1099
|
+
mamba_d_state=32, # 245
|
|
1100
|
+
mamba_d_conv=2, # 4
|
|
1101
|
+
),
|
|
1102
|
+
)
|
|
1103
|
+
|
|
1064
1104
|
|
|
1065
1105
|
def create_model(model_name="mini_llama3"):
|
|
1066
1106
|
"""
|
|
@@ -1432,6 +1472,22 @@ def run_mini_model(
|
|
|
1432
1472
|
reason="InternVL not available in this version of transformers",
|
|
1433
1473
|
),
|
|
1434
1474
|
),
|
|
1475
|
+
pytest.param(
|
|
1476
|
+
"mini_falcon_h1",
|
|
1477
|
+
32,
|
|
1478
|
+
1e-5,
|
|
1479
|
+
torch.float32,
|
|
1480
|
+
1e-8,
|
|
1481
|
+
1e-4,
|
|
1482
|
+
4e-2,
|
|
1483
|
+
1e-5,
|
|
1484
|
+
5e-3,
|
|
1485
|
+
1e-5,
|
|
1486
|
+
marks=pytest.mark.skipif(
|
|
1487
|
+
not FALCONH1_AVAILABLE,
|
|
1488
|
+
reason="FalconH1 not available in this version of transformers",
|
|
1489
|
+
),
|
|
1490
|
+
),
|
|
1435
1491
|
],
|
|
1436
1492
|
)
|
|
1437
1493
|
def test_mini_model(
|
|
@@ -1061,10 +1061,16 @@ def run_mini_model_multimodal(
|
|
|
1061
1061
|
1e-5,
|
|
1062
1062
|
5e-3,
|
|
1063
1063
|
1e-5,
|
|
1064
|
-
marks=
|
|
1065
|
-
|
|
1066
|
-
|
|
1067
|
-
|
|
1064
|
+
marks=[
|
|
1065
|
+
pytest.mark.skipif(
|
|
1066
|
+
not MLLAMA_AVAILABLE,
|
|
1067
|
+
reason="Mllama not available in this version of transformers",
|
|
1068
|
+
),
|
|
1069
|
+
pytest.mark.skipif(
|
|
1070
|
+
version.parse("4.51.0") > version.parse(transformers.__version__),
|
|
1071
|
+
reason="MllamaForConditionalGeneration doesn't accecpt `skip_logits` kwargs",
|
|
1072
|
+
),
|
|
1073
|
+
],
|
|
1068
1074
|
),
|
|
1069
1075
|
pytest.param(
|
|
1070
1076
|
"mini_paligemma",
|
|
@@ -18,6 +18,7 @@ from transformers.models.phi3 import Phi3ForCausalLM
|
|
|
18
18
|
from transformers.models.qwen2 import Qwen2Config
|
|
19
19
|
from transformers.models.qwen2 import Qwen2ForCausalLM
|
|
20
20
|
|
|
21
|
+
from liger_kernel.transformers import apply_liger_kernel_to_falcon_h1
|
|
21
22
|
from liger_kernel.transformers import apply_liger_kernel_to_gemma
|
|
22
23
|
from liger_kernel.transformers import apply_liger_kernel_to_gemma2
|
|
23
24
|
from liger_kernel.transformers import apply_liger_kernel_to_gemma3_text
|
|
@@ -45,6 +46,7 @@ from test.utils import MiniModelConfig
|
|
|
45
46
|
from test.utils import assert_verbose_allclose
|
|
46
47
|
from test.utils import get_logprobs
|
|
47
48
|
from test.utils import get_topk
|
|
49
|
+
from test.utils import revert_liger_kernel_to_falcon_h1
|
|
48
50
|
from test.utils import revert_liger_kernel_to_gemma
|
|
49
51
|
from test.utils import revert_liger_kernel_to_gemma2
|
|
50
52
|
from test.utils import revert_liger_kernel_to_gemma3_text
|
|
@@ -200,6 +202,15 @@ try:
|
|
|
200
202
|
except ImportError:
|
|
201
203
|
INTERNVL_AVAILABLE = False
|
|
202
204
|
|
|
205
|
+
try:
|
|
206
|
+
# FalconH1 is only available in transformers>=4.53.0
|
|
207
|
+
from transformers.models.falcon_h1.configuration_falcon_h1 import FalconH1Config
|
|
208
|
+
from transformers.models.falcon_h1.modeling_falcon_h1 import FalconH1ForCausalLM
|
|
209
|
+
|
|
210
|
+
FALCONH1_AVAILABLE = True
|
|
211
|
+
except ImportError:
|
|
212
|
+
FALCONH1_AVAILABLE = False
|
|
213
|
+
|
|
203
214
|
from liger_kernel.utils import infer_device
|
|
204
215
|
|
|
205
216
|
device = infer_device()
|
|
@@ -1061,6 +1072,35 @@ if INTERNVL_AVAILABLE:
|
|
|
1061
1072
|
),
|
|
1062
1073
|
)
|
|
1063
1074
|
|
|
1075
|
+
if FALCONH1_AVAILABLE:
|
|
1076
|
+
MINI_MODEL_SETUPS["mini_falcon_h1"] = MiniModelConfig(
|
|
1077
|
+
liger_kernel_patch_func=apply_liger_kernel_to_falcon_h1,
|
|
1078
|
+
liger_kernel_patch_revert_func=revert_liger_kernel_to_falcon_h1,
|
|
1079
|
+
model_class=FalconH1ForCausalLM,
|
|
1080
|
+
mini_model_config=FalconH1Config(
|
|
1081
|
+
model_type="falcon_h1",
|
|
1082
|
+
vocab_size=32000,
|
|
1083
|
+
hidden_size=256, # 4096
|
|
1084
|
+
num_hidden_layers=4, # 24
|
|
1085
|
+
num_attention_heads=4, # 32
|
|
1086
|
+
num_key_value_heads=2, # 8
|
|
1087
|
+
intermediate_size=1024, # 11008
|
|
1088
|
+
hidden_act="silu",
|
|
1089
|
+
max_position_embeddings=4096,
|
|
1090
|
+
initializer_range=0.02,
|
|
1091
|
+
rms_norm_eps=1e-6,
|
|
1092
|
+
use_cache=True,
|
|
1093
|
+
pad_token_id=0,
|
|
1094
|
+
bos_token_id=1,
|
|
1095
|
+
eos_token_id=2,
|
|
1096
|
+
tie_word_embeddings=False,
|
|
1097
|
+
mamba_d_ssm=128, # 1024
|
|
1098
|
+
mamba_n_heads=16, # 128
|
|
1099
|
+
mamba_d_state=32, # 245
|
|
1100
|
+
mamba_d_conv=2, # 4
|
|
1101
|
+
),
|
|
1102
|
+
)
|
|
1103
|
+
|
|
1064
1104
|
|
|
1065
1105
|
def create_model(model_name="mini_llama3"):
|
|
1066
1106
|
"""
|
|
@@ -1398,6 +1438,22 @@ def run_mini_model(
|
|
|
1398
1438
|
reason="InternVL not available in this version of transformers",
|
|
1399
1439
|
),
|
|
1400
1440
|
),
|
|
1441
|
+
pytest.param(
|
|
1442
|
+
"mini_falcon_h1",
|
|
1443
|
+
32,
|
|
1444
|
+
1e-5,
|
|
1445
|
+
torch.float32,
|
|
1446
|
+
1e-8,
|
|
1447
|
+
1e-4,
|
|
1448
|
+
4e-2,
|
|
1449
|
+
1e-5,
|
|
1450
|
+
5e-3,
|
|
1451
|
+
1e-5,
|
|
1452
|
+
marks=pytest.mark.skipif(
|
|
1453
|
+
not FALCONH1_AVAILABLE,
|
|
1454
|
+
reason="FalconH1 not available in this version of transformers",
|
|
1455
|
+
),
|
|
1456
|
+
),
|
|
1401
1457
|
],
|
|
1402
1458
|
)
|
|
1403
1459
|
def test_mini_model(
|
|
@@ -32,6 +32,7 @@ SUPPORTED_TRANSFORMER_VERSION = "4.46.1"
|
|
|
32
32
|
|
|
33
33
|
# Import forward functions based on transformer version
|
|
34
34
|
if transformer_version >= version.parse(SUPPORTED_TRANSFORMER_VERSION):
|
|
35
|
+
from liger_kernel.transformers.model.falcon_h1 import lce_forward as falcon_h1_lce_forward
|
|
35
36
|
from liger_kernel.transformers.model.gemma import lce_forward as gemma_lce_forward
|
|
36
37
|
from liger_kernel.transformers.model.gemma2 import lce_forward as gemma2_lce_forward
|
|
37
38
|
from liger_kernel.transformers.model.llama import lce_forward as llama_lce_forward
|
|
@@ -156,6 +157,15 @@ def is_paligemma_available():
|
|
|
156
157
|
return False
|
|
157
158
|
|
|
158
159
|
|
|
160
|
+
def is_falcon_h1_available():
|
|
161
|
+
try:
|
|
162
|
+
import transformers.models.falcon_h1 # noqa: F401
|
|
163
|
+
|
|
164
|
+
return True
|
|
165
|
+
except ImportError:
|
|
166
|
+
return False
|
|
167
|
+
|
|
168
|
+
|
|
159
169
|
def test_import_from_root():
|
|
160
170
|
try:
|
|
161
171
|
from liger_kernel.transformers import AutoLigerKernelForCausalLM # noqa: F401
|
|
@@ -383,6 +393,47 @@ def test_apply_liger_kernel_to_instance_for_llama():
|
|
|
383
393
|
pytest.fail(f"An exception occured in extra_expr: {type(e).__name__} - {e}")
|
|
384
394
|
|
|
385
395
|
|
|
396
|
+
@pytest.mark.skipif(not is_falcon_h1_available(), reason="falcon_h1 module not available")
|
|
397
|
+
def test_apply_liger_kernel_to_falcon_h1_for_causal_lm():
|
|
398
|
+
with patch("transformers.models.falcon_h1.modeling_falcon_h1"):
|
|
399
|
+
from transformers.models.falcon_h1.modeling_falcon_h1 import FalconH1ForCausalLM
|
|
400
|
+
|
|
401
|
+
# Instantiate a dummy model
|
|
402
|
+
config = transformers.models.falcon_h1.configuration_falcon_h1.FalconH1Config(
|
|
403
|
+
hidden_size=256,
|
|
404
|
+
num_hidden_layers=2,
|
|
405
|
+
num_attention_heads=4,
|
|
406
|
+
num_key_value_heads=2,
|
|
407
|
+
intermediate_size=1024,
|
|
408
|
+
)
|
|
409
|
+
dummy_model_instance = FalconH1ForCausalLM(config)
|
|
410
|
+
|
|
411
|
+
# Check that model instance variables are not yet patched with Liger modules
|
|
412
|
+
assert inspect.getsource(dummy_model_instance.forward) != inspect.getsource(falcon_h1_lce_forward)
|
|
413
|
+
assert inspect.getsource(dummy_model_instance.model.final_layernorm.forward) != inspect.getsource(
|
|
414
|
+
LigerRMSNorm.forward
|
|
415
|
+
)
|
|
416
|
+
for layer in dummy_model_instance.model.layers:
|
|
417
|
+
assert inspect.getsource(layer.input_layernorm.forward) != inspect.getsource(LigerRMSNorm.forward)
|
|
418
|
+
assert inspect.getsource(layer.pre_ff_layernorm.forward) != inspect.getsource(LigerRMSNorm.forward)
|
|
419
|
+
|
|
420
|
+
# Test applying kernels to the model instance
|
|
421
|
+
_apply_liger_kernel_to_instance(model=dummy_model_instance)
|
|
422
|
+
|
|
423
|
+
# Check that the model's instance variables were correctly patched with Liger modules
|
|
424
|
+
assert inspect.getsource(dummy_model_instance.forward) == inspect.getsource(falcon_h1_lce_forward)
|
|
425
|
+
assert inspect.getsource(dummy_model_instance.model.final_layernorm.forward) == inspect.getsource(
|
|
426
|
+
LigerRMSNorm.forward
|
|
427
|
+
)
|
|
428
|
+
for layer in dummy_model_instance.model.layers:
|
|
429
|
+
assert inspect.getsource(layer.input_layernorm.forward) == inspect.getsource(LigerRMSNorm.forward)
|
|
430
|
+
assert inspect.getsource(layer.pre_ff_layernorm.forward) == inspect.getsource(LigerRMSNorm.forward)
|
|
431
|
+
try:
|
|
432
|
+
print(dummy_model_instance)
|
|
433
|
+
except Exception as e:
|
|
434
|
+
pytest.fail(f"An exception occured in extra_expr: {type(e).__name__} - {e}")
|
|
435
|
+
|
|
436
|
+
|
|
386
437
|
@pytest.mark.skipif(not is_mllama_available(), reason="mllama module not available")
|
|
387
438
|
def test_apply_liger_kernel_to_instance_for_mllama_for_conditional_generation():
|
|
388
439
|
# Ensure any monkey patching is cleaned up for subsequent tests
|
|
@@ -582,6 +582,18 @@ def revert_liger_kernel_to_internvl(model_config: MiniModelConfig):
|
|
|
582
582
|
print("Liger kernel patches have been reverted.")
|
|
583
583
|
|
|
584
584
|
|
|
585
|
+
def revert_liger_kernel_to_falcon_h1(model_config: MiniModelConfig):
|
|
586
|
+
"""
|
|
587
|
+
Revert all Liger kernel patches applied to FalconH1.
|
|
588
|
+
"""
|
|
589
|
+
|
|
590
|
+
from transformers.models.falcon_h1 import modeling_falcon_h1
|
|
591
|
+
|
|
592
|
+
importlib.reload(modeling_falcon_h1)
|
|
593
|
+
model_config.model_class = modeling_falcon_h1.FalconH1ForCausalLM
|
|
594
|
+
print("Liger kernel patches have been reverted.")
|
|
595
|
+
|
|
596
|
+
|
|
585
597
|
class HFAlignmentLoss:
|
|
586
598
|
def __init__(
|
|
587
599
|
self,
|
|
File without changes
|