liger-kernel-nightly 0.5.9.dev20250519015630__tar.gz → 0.5.9.dev20250519035525__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/PKG-INFO +1 -1
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/pyproject.toml +1 -1
- liger_kernel_nightly-0.5.9.dev20250519035525/src/liger_kernel/transformers/fsdp.py +55 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/transformers/model/llama.py +56 -3
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/transformers/monkey_patch.py +66 -13
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/transformers/trainer/orpo_trainer.py +1 -53
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/utils.py +11 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel_nightly.egg-info/PKG-INFO +1 -1
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel_nightly.egg-info/SOURCES.txt +1 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/.github/ISSUE_TEMPLATE/bug_report.yaml +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/.github/ISSUE_TEMPLATE/feature_request.yaml +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/.github/pull_request_template.md +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/.github/workflows/amd-ci.yml +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/.github/workflows/docs.yml +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/.github/workflows/intel-ci.yml +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/.github/workflows/nvi-ci.yml +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/.github/workflows/publish-nightly.yml +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/.github/workflows/publish-release.yml +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/.gitignore +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/.idea/workspace.xml +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/LICENSE +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/Makefile +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/NOTICE +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/README.md +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/benchmark/README.md +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/benchmark/__init__.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/benchmark/benchmarks_visualizer.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/benchmark/data/all_benchmark_data.csv +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/benchmark/scripts/__init__.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/benchmark/scripts/benchmark_cpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/benchmark/scripts/benchmark_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/benchmark/scripts/benchmark_distill_jsd_loss.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/benchmark/scripts/benchmark_dpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/benchmark/scripts/benchmark_dyt.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/benchmark/scripts/benchmark_embedding.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/benchmark/scripts/benchmark_fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/benchmark/scripts/benchmark_fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/benchmark/scripts/benchmark_geglu.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/benchmark/scripts/benchmark_group_norm.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/benchmark/scripts/benchmark_jsd.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/benchmark/scripts/benchmark_kl_div.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/benchmark/scripts/benchmark_kto_loss.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/benchmark/scripts/benchmark_layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/benchmark/scripts/benchmark_orpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/benchmark/scripts/benchmark_qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/benchmark/scripts/benchmark_rms_norm.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/benchmark/scripts/benchmark_rope.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/benchmark/scripts/benchmark_simpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/benchmark/scripts/benchmark_sparsemax.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/benchmark/scripts/benchmark_swiglu.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/benchmark/scripts/benchmark_tvd.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/benchmark/scripts/utils.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/dev/fmt-requirements.txt +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/dev/modal/tests.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/dev/modal/tests_bwd.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/docs/Examples.md +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/docs/Getting-Started.md +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/docs/High-Level-APIs.md +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/docs/Low-Level-APIs.md +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/docs/acknowledgement.md +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/docs/contributing.md +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/docs/images/banner.GIF +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/docs/images/compose.gif +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/docs/images/e2e-memory.png +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/docs/images/e2e-tps.png +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/docs/images/logo-banner.png +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/docs/images/patch.gif +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/docs/images/post-training.png +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/docs/index.md +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/docs/license.md +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/examples/alignment/accelerate_config.yaml +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/examples/alignment/run_orpo.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/examples/huggingface/README.md +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/examples/huggingface/callback.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/examples/huggingface/config/fsdp_config.json +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/examples/huggingface/img/gemma_7b_mem.png +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/examples/huggingface/img/gemma_7b_tp.png +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/examples/huggingface/img/llama_mem_alloc.png +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/examples/huggingface/img/llama_tps.png +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/examples/huggingface/img/qwen_mem_alloc.png +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/examples/huggingface/img/qwen_tps.png +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/examples/huggingface/launch_on_modal.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/examples/huggingface/requirements.txt +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/examples/huggingface/run_benchmarks.sh +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/examples/huggingface/run_gemma.sh +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/examples/huggingface/run_llama.sh +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/examples/huggingface/run_qwen.sh +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/examples/huggingface/run_qwen2_vl.sh +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/examples/huggingface/training.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/examples/huggingface/training_multimodal.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/examples/lightning/README.md +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/examples/lightning/requirements.txt +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/examples/lightning/training.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/examples/medusa/README.md +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/examples/medusa/callback.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/examples/medusa/docs/images/Memory_Stage1_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/examples/medusa/docs/images/Memory_Stage1_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/examples/medusa/docs/images/Memory_Stage2_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/examples/medusa/docs/images/Memory_Stage2_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/examples/medusa/docs/images/Throughput_Stage1_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/examples/medusa/docs/images/Throughput_Stage1_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/examples/medusa/docs/images/Throughput_Stage2_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/examples/medusa/docs/images/Throughput_Stage2_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/examples/medusa/fsdp/acc-fsdp.conf +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/examples/medusa/medusa_util.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/examples/medusa/requirements.txt +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/examples/medusa/scripts/llama3_8b_medusa.sh +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/examples/medusa/train.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/licenses/LICENSE-Apache-2.0 +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/licenses/LICENSE-MIT-AutoAWQ +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/licenses/LICENSE-MIT-Efficient-Cross-Entropy +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/licenses/LICENSE-MIT-llmc +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/licenses/LICENSE-MIT-triton +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/mkdocs.yml +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/setup.cfg +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/setup.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/__init__.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/chunked_loss/README.md +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/chunked_loss/__init__.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/chunked_loss/cpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/chunked_loss/dpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/chunked_loss/functional.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/chunked_loss/fused_linear_distillation.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/chunked_loss/fused_linear_ppo.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/chunked_loss/fused_linear_preference.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/chunked_loss/fused_linear_unpaired_preference.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/chunked_loss/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/chunked_loss/jsd_loss.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/chunked_loss/kto_loss.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/chunked_loss/orpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/chunked_loss/simpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/env_report.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/ops/__init__.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/ops/cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/ops/dyt.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/ops/experimental/embedding.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/ops/experimental/mm_int8int2.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/ops/fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/ops/fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/ops/geglu.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/ops/group_norm.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/ops/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/ops/jsd.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/ops/kl_div.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/ops/layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/ops/qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/ops/rms_norm.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/ops/rope.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/ops/sparsemax.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/ops/swiglu.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/ops/tvd.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/ops/utils.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/transformers/__init__.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/transformers/auto_model.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/transformers/cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/transformers/dyt.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/transformers/experimental/embedding.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/transformers/functional.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/transformers/fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/transformers/fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/transformers/geglu.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/transformers/gema3_rms.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/transformers/group_norm.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/transformers/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/transformers/jsd.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/transformers/kl_div.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/transformers/layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/transformers/model/__init__.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/transformers/model/gemma.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/transformers/model/gemma2.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/transformers/model/gemma3.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/transformers/model/glm4.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/transformers/model/llava.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/transformers/model/loss_utils.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/transformers/model/mistral.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/transformers/model/mixtral.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/transformers/model/mllama.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/transformers/model/olmo2.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/transformers/model/paligemma.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/transformers/model/phi3.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/transformers/model/qwen2.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/transformers/model/qwen2_5_vl.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/transformers/model/qwen2_vl.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/transformers/model/qwen3.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/transformers/model/qwen3_moe.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/transformers/qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/transformers/rms_norm.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/transformers/rope.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/transformers/sparsemax.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/transformers/swiglu.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/transformers/trainer/__init__.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/transformers/trainer_integration.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/transformers/tvd.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/triton/__init__.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel/triton/monkey_patch.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel_nightly.egg-info/dependency_links.txt +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel_nightly.egg-info/requires.txt +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/src/liger_kernel_nightly.egg-info/top_level.txt +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/test/__init__.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/test/chunked_loss/__init__.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/test/chunked_loss/test_cpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/test/chunked_loss/test_dpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/test/chunked_loss/test_grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/test/chunked_loss/test_jsd_loss.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/test/chunked_loss/test_kto_loss.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/test/chunked_loss/test_orpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/test/chunked_loss/test_simpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/test/conftest.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/test/convergence/__init__.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/test/convergence/bf16/__init__.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/test/convergence/bf16/test_mini_models.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/test/convergence/bf16/test_mini_models_multimodal.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/test/convergence/bf16/test_mini_models_with_logits.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/test/convergence/fp32/__init__.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/test/convergence/fp32/test_mini_models.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/test/convergence/fp32/test_mini_models_multimodal.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/test/convergence/fp32/test_mini_models_with_logits.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/test/resources/fake_configs/Google/Gemma3/gemma-3-4b-it/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/test/resources/fake_configs/Google/Paligemma/paligemma-3b-pt-224/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/preprocessor_config.json +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/processor_config.json +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/test/resources/fake_configs/Qwen/Qwen2-VL-7B-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/test/resources/fake_configs/Qwen/Qwen2.5-VL-7B-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/test/resources/fake_configs/meta-llama/Llama-3.2-11B-Vision-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/test/resources/scripts/generate_tokenized_dataset.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/test/resources/tiny_shakespeare.txt +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/test/resources/tiny_shakespeare_tokenized/data-00000-of-00001.arrow +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/test/resources/tiny_shakespeare_tokenized/dataset_info.json +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/test/resources/tiny_shakespeare_tokenized/state.json +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/test/transformers/test_auto_model.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/test/transformers/test_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/test/transformers/test_dyt.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/test/transformers/test_embedding.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/test/transformers/test_flex_attention.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/test/transformers/test_fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/test/transformers/test_fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/test/transformers/test_geglu.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/test/transformers/test_group_norm.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/test/transformers/test_grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/test/transformers/test_jsd.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/test/transformers/test_kl_div.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/test/transformers/test_layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/test/transformers/test_mm_int8int2.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/test/transformers/test_monkey_patch.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/test/transformers/test_qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/test/transformers/test_rms_norm.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/test/transformers/test_rope.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/test/transformers/test_sparsemax.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/test/transformers/test_swiglu.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/test/transformers/test_trainer_integration.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/test/transformers/test_transformers.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/test/transformers/test_tvd.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/test/triton/test_triton_monkey_patch.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/test/utils.py +0 -0
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
4
4
|
|
5
5
|
[project]
|
6
6
|
name = "liger_kernel_nightly"
|
7
|
-
version = "0.5.9.
|
7
|
+
version = "0.5.9.dev20250519035525"
|
8
8
|
description = "Efficient Triton kernels for LLM Training"
|
9
9
|
urls = { "Homepage" = "https://github.com/linkedin/Liger-Kernel" }
|
10
10
|
readme = { file = "README.md", content-type = "text/markdown" }
|
@@ -0,0 +1,55 @@
|
|
1
|
+
from typing import Any
|
2
|
+
from typing import Callable
|
3
|
+
|
4
|
+
from torch.distributed.fsdp import FullyShardedDataParallel
|
5
|
+
|
6
|
+
|
7
|
+
class _FSDPForwardRedirection:
|
8
|
+
"""
|
9
|
+
Modified based on
|
10
|
+
https://github.com/Lightning-AI/pytorch-lightning/blob/d3f9c83d6efa4f1def36aa6c199600946cdb9117/src/lightning/pytorch/strategies/strategy.py#L601-L648
|
11
|
+
Redirect a method call through FullyShardedDataParallel.forward so that the FSDP module's root pre-forward and
|
12
|
+
post-forward can be properly executed around the method call.
|
13
|
+
This is needed in cases where we call a submodule of a FSDP module. For instance, when we want to call only
|
14
|
+
the `LlamaModel` part out of a FSDP-wrapped `LlamaForCausalLM` to get the hidden states without involving
|
15
|
+
GPU-memory-heavy `lm_head` and cross entropy computation, doing this directly (i.e. `model.model.forward()`)
|
16
|
+
will not work because the first `nn.Embedding` layer is not independently wrapped as a FSDP module (because of
|
17
|
+
the transformer-based wrapping policy), and not calling it through FSDP root module forward will not all-gather
|
18
|
+
its parameter, thus resulting in "RuntimeError: 'weight' must be 2-D" error. Similarly, if we want to call just
|
19
|
+
the `lm_head` part of a model, we need this trick too to properly get its params all-gathered.
|
20
|
+
"""
|
21
|
+
|
22
|
+
def __call__(
|
23
|
+
self,
|
24
|
+
wrapper_module: FullyShardedDataParallel,
|
25
|
+
method: Callable,
|
26
|
+
*args: Any,
|
27
|
+
**kwargs: Any,
|
28
|
+
):
|
29
|
+
"""Reroutes a method call through the `wrapper_module`'s `forward` method.
|
30
|
+
Args:
|
31
|
+
wrapper_module: The module that has `original_module` wrapped.
|
32
|
+
original_module: The module that was wrapped inside `wrapper_module`.
|
33
|
+
method_name: The name of the method that should be called on the `original_module` after inputs get
|
34
|
+
redirected through the `wrapper_module`'s `forward` method.
|
35
|
+
*args: The positional arguments to the method `method_name`. They will get passed to a patched
|
36
|
+
`forward` method instead.
|
37
|
+
**kwargs: The keyword arguments to the method `method_name`. They will get passed to a patched
|
38
|
+
`forward` method instead.
|
39
|
+
"""
|
40
|
+
assert isinstance(wrapper_module, FullyShardedDataParallel)
|
41
|
+
original_module = wrapper_module._fsdp_wrapped_module
|
42
|
+
original_forward = original_module.forward
|
43
|
+
|
44
|
+
def wrapped_forward(*_args: Any, **_kwargs: Any) -> Any:
|
45
|
+
# Unpatch ourselves immediately before calling the method `method_name`
|
46
|
+
# because itself may want to call the real `forward`
|
47
|
+
original_module.forward = original_forward # type: ignore[method-assign]
|
48
|
+
# Call the actual method e.g. `.training_step(...)`
|
49
|
+
out = method(*_args, **_kwargs)
|
50
|
+
return out
|
51
|
+
|
52
|
+
# Patch the original_module's forward so we can redirect the arguments back to the real method
|
53
|
+
original_module.forward = wrapped_forward # type: ignore[method-assign]
|
54
|
+
wrapper_output = wrapper_module(*args, **kwargs)
|
55
|
+
return wrapper_output
|
@@ -7,16 +7,22 @@ from typing import Union
|
|
7
7
|
import torch
|
8
8
|
import torch.nn.functional as F
|
9
9
|
|
10
|
+
from torch.distributed.fsdp import FullyShardedDataParallel
|
10
11
|
from torch.nn import CrossEntropyLoss
|
11
12
|
from transformers.modeling_outputs import CausalLMOutputWithPast
|
12
13
|
from transformers.utils.deprecation import deprecate_kwarg
|
13
14
|
|
15
|
+
from liger_kernel.transformers.fsdp import _FSDPForwardRedirection
|
14
16
|
from liger_kernel.transformers.fused_linear_cross_entropy import LigerFusedLinearCrossEntropyLoss
|
15
17
|
from liger_kernel.transformers.model.loss_utils import LigerForCausalLMLoss
|
18
|
+
from liger_kernel.utils import PEFT_AVAILABLE
|
16
19
|
|
17
20
|
if TYPE_CHECKING:
|
18
21
|
from transformers.cache_utils import Cache
|
19
22
|
|
23
|
+
if PEFT_AVAILABLE:
|
24
|
+
from peft.utils.other import ModulesToSaveWrapper
|
25
|
+
|
20
26
|
|
21
27
|
def lce_forward_deprecated(
|
22
28
|
self,
|
@@ -213,12 +219,12 @@ def lce_forward(
|
|
213
219
|
loss = None
|
214
220
|
# if in training mode, don't materialize logits
|
215
221
|
if self.training and (labels is not None or shift_labels is not None):
|
216
|
-
loss =
|
222
|
+
loss = lce_maybe_trainable_lm_head(
|
223
|
+
self,
|
217
224
|
hidden_states=kept_hidden_states,
|
218
|
-
|
225
|
+
hidden_size=self.config.hidden_size,
|
219
226
|
labels=labels,
|
220
227
|
shift_labels=shift_labels,
|
221
|
-
hidden_size=self.config.hidden_size,
|
222
228
|
**loss_kwargs,
|
223
229
|
)
|
224
230
|
|
@@ -243,3 +249,50 @@ def lce_forward(
|
|
243
249
|
hidden_states=outputs.hidden_states,
|
244
250
|
attentions=outputs.attentions,
|
245
251
|
)
|
252
|
+
|
253
|
+
|
254
|
+
def lce_maybe_trainable_lm_head(self, hidden_states, hidden_size, labels, shift_labels, **loss_kwargs):
|
255
|
+
lm_head = self.lm_head
|
256
|
+
|
257
|
+
# Unwrap the module if lm_head has been added as trainable module in PEFT LoRA configuration,
|
258
|
+
# i.e. listed in the modules_to_save field of LoraConfig, so the lm_head weights are read
|
259
|
+
# from the unwrapped module.
|
260
|
+
# See https://huggingface.co/docs/peft/package_reference/lora for reference.
|
261
|
+
if PEFT_AVAILABLE and isinstance(lm_head, ModulesToSaveWrapper):
|
262
|
+
lm_head = lm_head.modules_to_save.default
|
263
|
+
|
264
|
+
# If FSDP is used and lm_head is trainable, e.g., during full fine-tuning or with LoRA,
|
265
|
+
# reading the lm_head module weights and calling the kernel must be done within FSDP forward pass
|
266
|
+
# so the module entire parameters are summoned and kept in memory during the kernel execution.
|
267
|
+
if isinstance(lm_head, FullyShardedDataParallel):
|
268
|
+
return _FSDPForwardRedirection()(
|
269
|
+
lm_head,
|
270
|
+
_liger_for_causal_lm_loss,
|
271
|
+
lm_head.module,
|
272
|
+
hidden_states,
|
273
|
+
hidden_size,
|
274
|
+
labels,
|
275
|
+
shift_labels,
|
276
|
+
**loss_kwargs,
|
277
|
+
)
|
278
|
+
|
279
|
+
# FSDP is not used so we can read the lm_head weights and call the kernel directly
|
280
|
+
return _liger_for_causal_lm_loss(
|
281
|
+
lm_head=self.lm_head,
|
282
|
+
hidden_states=hidden_states,
|
283
|
+
hidden_size=hidden_size,
|
284
|
+
labels=labels,
|
285
|
+
shift_labels=shift_labels,
|
286
|
+
**loss_kwargs,
|
287
|
+
)
|
288
|
+
|
289
|
+
|
290
|
+
def _liger_for_causal_lm_loss(lm_head, hidden_states, hidden_size, labels, shift_labels, **loss_kwargs):
|
291
|
+
return LigerForCausalLMLoss(
|
292
|
+
hidden_states=hidden_states,
|
293
|
+
lm_head_weight=lm_head.weight,
|
294
|
+
labels=labels,
|
295
|
+
hidden_size=hidden_size,
|
296
|
+
shift_labels=shift_labels,
|
297
|
+
**loss_kwargs,
|
298
|
+
)
|
@@ -35,6 +35,13 @@ from liger_kernel.transformers.swiglu import LigerBlockSparseTop2MLP
|
|
35
35
|
from liger_kernel.transformers.swiglu import LigerPhi3SwiGLUMLP
|
36
36
|
from liger_kernel.transformers.swiglu import LigerSwiGLUMLP
|
37
37
|
|
38
|
+
try:
|
39
|
+
import peft
|
40
|
+
|
41
|
+
PEFT_AVAILABLE = True
|
42
|
+
except ImportError:
|
43
|
+
PEFT_AVAILABLE = False
|
44
|
+
|
38
45
|
transformer_version = version.parse(transformers.__version__)
|
39
46
|
|
40
47
|
logger = logging.getLogger(__name__)
|
@@ -48,22 +55,68 @@ def _bind_method_to_module(module, method_name: str, new_method: Callable):
|
|
48
55
|
|
49
56
|
|
50
57
|
def _patch_rms_norm_module(module, offset=0.0, eps=1e-6, casting_mode="llama", in_place=True):
|
51
|
-
module
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
+
# Check if the module is a PEFT ModulesToSaveWrapper
|
59
|
+
# If it is, we need to patch the modules_to_save.default and original_modules
|
60
|
+
if PEFT_AVAILABLE and isinstance(module, peft.utils.other.ModulesToSaveWrapper):
|
61
|
+
module.modules_to_save.default.offset = offset
|
62
|
+
module.modules_to_save.default.casting_mode = casting_mode
|
63
|
+
module.modules_to_save.default.variance_epsilon = (
|
64
|
+
getattr(module, "variance_epsilon", None) or getattr(module, "eps", None) or eps
|
65
|
+
)
|
66
|
+
module.modules_to_save.default.in_place = in_place
|
67
|
+
module.original_module.offset = offset
|
68
|
+
module.original_module.casting_mode = casting_mode
|
69
|
+
module.original_module.variance_epsilon = (
|
70
|
+
getattr(module, "variance_epsilon", None) or getattr(module, "eps", None) or eps
|
71
|
+
)
|
72
|
+
module.original_module.in_place = in_place
|
73
|
+
_bind_method_to_module(module.modules_to_save.default, "forward", LigerRMSNorm.forward)
|
74
|
+
_bind_method_to_module(module.modules_to_save.default, "extra_repr", LigerRMSNorm.extra_repr)
|
75
|
+
_bind_method_to_module(module.original_module, "forward", LigerRMSNorm.forward)
|
76
|
+
_bind_method_to_module(module.original_module, "extra_repr", LigerRMSNorm.extra_repr)
|
77
|
+
module.modules_to_save.default.__class__.__name__ = LigerRMSNorm.__name__
|
78
|
+
module.original_module.__class__.__name__ = LigerRMSNorm.__name__
|
79
|
+
else:
|
80
|
+
module.offset = offset
|
81
|
+
module.casting_mode = casting_mode
|
82
|
+
module.variance_epsilon = getattr(module, "variance_epsilon", None) or getattr(module, "eps", None) or eps
|
83
|
+
module.in_place = in_place
|
84
|
+
_bind_method_to_module(module, "forward", LigerRMSNorm.forward)
|
85
|
+
_bind_method_to_module(module, "extra_repr", LigerRMSNorm.extra_repr)
|
86
|
+
module.__class__.__name__ = LigerRMSNorm.__name__
|
58
87
|
|
59
88
|
|
60
89
|
def _patch_layer_norm_module(module, eps=1e-6):
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
90
|
+
# Check if the module is a PEFT ModulesToSaveWrapper
|
91
|
+
# If it is, we need to patch the modules_to_save.default and original_modules
|
92
|
+
if PEFT_AVAILABLE and isinstance(module, peft.utils.other.ModulesToSaveWrapper):
|
93
|
+
module.hidden_size = module.normalized_shape
|
94
|
+
_bind_method_to_module(module, "forward", LigerLayerNorm.forward)
|
95
|
+
_bind_method_to_module(module, "extra_repr", LigerLayerNorm.extra_repr)
|
96
|
+
module.modules_to_save.default.variance_epsilon = (
|
97
|
+
getattr(module, "variance_epsilon", None) or getattr(module, "eps", None) or eps
|
98
|
+
)
|
99
|
+
module.original_module.hidden_size = getattr(module, "hidden_size", None) or getattr(
|
100
|
+
module, "normalized_shape", None
|
101
|
+
)
|
102
|
+
module.original_module.variance_epsilon = (
|
103
|
+
getattr(module, "variance_epsilon", None) or getattr(module, "eps", None) or eps
|
104
|
+
)
|
105
|
+
module.original_module.hidden_size = getattr(module, "hidden_size", None) or getattr(
|
106
|
+
module, "normalized_shape", None
|
107
|
+
)
|
108
|
+
_bind_method_to_module(module.modules_to_save.default, "forward", LigerRMSNorm.forward)
|
109
|
+
_bind_method_to_module(module.modules_to_save.default, "extra_repr", LigerRMSNorm.extra_repr)
|
110
|
+
_bind_method_to_module(module.original_module, "forward", LigerRMSNorm.forward)
|
111
|
+
_bind_method_to_module(module.original_module, "extra_repr", LigerRMSNorm.extra_repr)
|
112
|
+
module.modules_to_save.default.__class__.__name__ = LigerLayerNorm.__name__
|
113
|
+
module.original_module.__class__.__name__ = LigerLayerNorm.__name__
|
114
|
+
else:
|
115
|
+
module.variance_epsilon = getattr(module, "variance_epsilon", None) or getattr(module, "eps", None) or eps
|
116
|
+
module.hidden_size = getattr(module, "hidden_size", None) or getattr(module, "normalized_shape", None)
|
117
|
+
_bind_method_to_module(module, "forward", LigerLayerNorm.forward)
|
118
|
+
_bind_method_to_module(module, "extra_repr", LigerLayerNorm.extra_repr)
|
119
|
+
module.__class__.__name__ = LigerLayerNorm.__name__
|
67
120
|
|
68
121
|
|
69
122
|
def _patch_swiglu_module(module, liger_module):
|
@@ -1,5 +1,3 @@
|
|
1
|
-
from typing import Any
|
2
|
-
from typing import Callable
|
3
1
|
from typing import Dict
|
4
2
|
from typing import List
|
5
3
|
from typing import Literal
|
@@ -13,57 +11,7 @@ from torch.distributed.fsdp import FullyShardedDataParallel
|
|
13
11
|
from trl.trainer import ORPOTrainer
|
14
12
|
|
15
13
|
from liger_kernel.chunked_loss import LigerFusedLinearORPOLoss
|
16
|
-
|
17
|
-
|
18
|
-
class _FSDPForwardRedirection:
|
19
|
-
"""
|
20
|
-
Modified based on
|
21
|
-
https://github.com/Lightning-AI/pytorch-lightning/blob/d3f9c83d6efa4f1def36aa6c199600946cdb9117/src/lightning/pytorch/strategies/strategy.py#L601-L648
|
22
|
-
Redirect a method call through FullyShardedDataParallel.forward so that the FSDP module's root pre-forward and
|
23
|
-
post-forward can be properly executed around the method call.
|
24
|
-
This is needed in cases where we call a submodule of a FSDP module. For instance, when we want to call only
|
25
|
-
the `LlamaModel` part out of a FSDP-wrapped `LlamaForCausalLM` to get the hidden states without involving
|
26
|
-
GPU-memory-heavy `lm_head` and cross entropy computation, doing this directly (i.e. `model.model.forward()`)
|
27
|
-
will not work because the first `nn.Embedding` layer is not independently wrapped as a FSDP module (because of
|
28
|
-
the transformer-based wrapping policy), and not calling it through FSDP root module forward will not all-gather
|
29
|
-
its parameter, thus resulting in "RuntimeError: 'weight' must be 2-D" error. Similarly, if we want to call just
|
30
|
-
the `lm_head` part of a model, we need this trick too to properly get its params all-gathered.
|
31
|
-
"""
|
32
|
-
|
33
|
-
def __call__(
|
34
|
-
self,
|
35
|
-
wrapper_module: FullyShardedDataParallel,
|
36
|
-
method: Callable,
|
37
|
-
*args: Any,
|
38
|
-
**kwargs: Any,
|
39
|
-
):
|
40
|
-
"""Reroutes a method call through the `wrapper_module`'s `forward` method.
|
41
|
-
Args:
|
42
|
-
wrapper_module: The module that has `original_module` wrapped.
|
43
|
-
original_module: The module that was wrapped inside `wrapper_module`.
|
44
|
-
method_name: The name of the method that should be called on the `original_module` after inputs get
|
45
|
-
redirected through the `wrapper_module`'s `forward` method.
|
46
|
-
*args: The positional arguments to the method `method_name`. They will get passed to a patched
|
47
|
-
`forward` method instead.
|
48
|
-
**kwargs: The keyword arguments to the method `method_name`. They will get passed to a patched
|
49
|
-
`forward` method instead.
|
50
|
-
"""
|
51
|
-
assert isinstance(wrapper_module, FullyShardedDataParallel)
|
52
|
-
original_module = wrapper_module._fsdp_wrapped_module
|
53
|
-
original_forward = original_module.forward
|
54
|
-
|
55
|
-
def wrapped_forward(*_args: Any, **_kwargs: Any) -> Any:
|
56
|
-
# Unpatch ourselves immediately before calling the method `method_name`
|
57
|
-
# because itself may want to call the real `forward`
|
58
|
-
original_module.forward = original_forward # type: ignore[method-assign]
|
59
|
-
# Call the actual method e.g. `.training_step(...)`
|
60
|
-
out = method(*_args, **_kwargs)
|
61
|
-
return out
|
62
|
-
|
63
|
-
# Patch the original_module's forward so we can redirect the arguments back to the real method
|
64
|
-
original_module.forward = wrapped_forward # type: ignore[method-assign]
|
65
|
-
wrapper_output = wrapper_module(*args, **kwargs)
|
66
|
-
return wrapper_output
|
14
|
+
from liger_kernel.transformers.fsdp import _FSDPForwardRedirection
|
67
15
|
|
68
16
|
|
69
17
|
class LigerORPOTrainer(ORPOTrainer):
|
@@ -1,6 +1,17 @@
|
|
1
|
+
try:
|
2
|
+
import peft # noqa: F401
|
3
|
+
|
4
|
+
PEFT_AVAILABLE = True
|
5
|
+
except ImportError:
|
6
|
+
PEFT_AVAILABLE = False
|
7
|
+
|
1
8
|
import torch
|
2
9
|
|
3
10
|
|
11
|
+
def is_peft_available():
|
12
|
+
return PEFT_AVAILABLE
|
13
|
+
|
14
|
+
|
4
15
|
def infer_device():
|
5
16
|
"""
|
6
17
|
Get current device name based on available devices
|
@@ -146,6 +146,7 @@ src/liger_kernel/transformers/__init__.py
|
|
146
146
|
src/liger_kernel/transformers/auto_model.py
|
147
147
|
src/liger_kernel/transformers/cross_entropy.py
|
148
148
|
src/liger_kernel/transformers/dyt.py
|
149
|
+
src/liger_kernel/transformers/fsdp.py
|
149
150
|
src/liger_kernel/transformers/functional.py
|
150
151
|
src/liger_kernel/transformers/fused_linear_cross_entropy.py
|
151
152
|
src/liger_kernel/transformers/fused_linear_jsd.py
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{liger_kernel_nightly-0.5.9.dev20250519015630 → liger_kernel_nightly-0.5.9.dev20250519035525}/NOTICE
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|