liger-kernel-nightly 0.5.9.dev20250512213150__tar.gz → 0.5.9.dev20250515034325__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/PKG-INFO +1 -1
- liger_kernel_nightly-0.5.9.dev20250515034325/examples/medusa/requirements.txt +3 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/examples/medusa/scripts/llama3_8b_medusa.sh +2 -5
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/examples/medusa/train.py +36 -38
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/pyproject.toml +1 -1
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel_nightly.egg-info/PKG-INFO +1 -1
- liger_kernel_nightly-0.5.9.dev20250512213150/examples/medusa/requirements.txt +0 -3
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/.github/ISSUE_TEMPLATE/bug_report.yaml +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/.github/ISSUE_TEMPLATE/feature_request.yaml +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/.github/pull_request_template.md +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/.github/workflows/amd-ci.yml +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/.github/workflows/docs.yml +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/.github/workflows/intel-ci.yml +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/.github/workflows/nvi-ci.yml +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/.github/workflows/publish-nightly.yml +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/.github/workflows/publish-release.yml +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/.gitignore +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/.idea/workspace.xml +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/LICENSE +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/Makefile +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/NOTICE +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/README.md +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/benchmark/README.md +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/benchmark/__init__.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/benchmark/benchmarks_visualizer.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/benchmark/data/all_benchmark_data.csv +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/benchmark/scripts/__init__.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/benchmark/scripts/benchmark_cpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/benchmark/scripts/benchmark_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/benchmark/scripts/benchmark_distill_jsd_loss.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/benchmark/scripts/benchmark_dpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/benchmark/scripts/benchmark_dyt.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/benchmark/scripts/benchmark_embedding.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/benchmark/scripts/benchmark_fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/benchmark/scripts/benchmark_fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/benchmark/scripts/benchmark_geglu.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/benchmark/scripts/benchmark_group_norm.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/benchmark/scripts/benchmark_jsd.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/benchmark/scripts/benchmark_kl_div.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/benchmark/scripts/benchmark_kto_loss.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/benchmark/scripts/benchmark_layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/benchmark/scripts/benchmark_orpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/benchmark/scripts/benchmark_qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/benchmark/scripts/benchmark_rms_norm.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/benchmark/scripts/benchmark_rope.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/benchmark/scripts/benchmark_simpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/benchmark/scripts/benchmark_swiglu.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/benchmark/scripts/benchmark_tvd.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/benchmark/scripts/utils.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/dev/fmt-requirements.txt +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/dev/modal/tests.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/dev/modal/tests_bwd.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/docs/Examples.md +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/docs/Getting-Started.md +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/docs/High-Level-APIs.md +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/docs/Low-Level-APIs.md +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/docs/acknowledgement.md +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/docs/contributing.md +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/docs/images/banner.GIF +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/docs/images/compose.gif +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/docs/images/e2e-memory.png +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/docs/images/e2e-tps.png +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/docs/images/logo-banner.png +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/docs/images/patch.gif +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/docs/images/post-training.png +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/docs/index.md +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/docs/license.md +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/examples/alignment/accelerate_config.yaml +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/examples/alignment/run_orpo.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/examples/huggingface/README.md +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/examples/huggingface/callback.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/examples/huggingface/config/fsdp_config.json +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/examples/huggingface/img/gemma_7b_mem.png +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/examples/huggingface/img/gemma_7b_tp.png +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/examples/huggingface/img/llama_mem_alloc.png +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/examples/huggingface/img/llama_tps.png +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/examples/huggingface/img/qwen_mem_alloc.png +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/examples/huggingface/img/qwen_tps.png +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/examples/huggingface/launch_on_modal.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/examples/huggingface/requirements.txt +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/examples/huggingface/run_benchmarks.sh +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/examples/huggingface/run_gemma.sh +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/examples/huggingface/run_llama.sh +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/examples/huggingface/run_qwen.sh +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/examples/huggingface/run_qwen2_vl.sh +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/examples/huggingface/training.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/examples/huggingface/training_multimodal.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/examples/lightning/README.md +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/examples/lightning/requirements.txt +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/examples/lightning/training.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/examples/medusa/README.md +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/examples/medusa/callback.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/examples/medusa/docs/images/Memory_Stage1_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/examples/medusa/docs/images/Memory_Stage1_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/examples/medusa/docs/images/Memory_Stage2_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/examples/medusa/docs/images/Memory_Stage2_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/examples/medusa/docs/images/Throughput_Stage1_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/examples/medusa/docs/images/Throughput_Stage1_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/examples/medusa/docs/images/Throughput_Stage2_num_head_3.png +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/examples/medusa/docs/images/Throughput_Stage2_num_head_5.png +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/examples/medusa/fsdp/acc-fsdp.conf +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/examples/medusa/medusa_util.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/licenses/LICENSE-Apache-2.0 +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/licenses/LICENSE-MIT-AutoAWQ +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/licenses/LICENSE-MIT-Efficient-Cross-Entropy +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/licenses/LICENSE-MIT-llmc +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/licenses/LICENSE-MIT-triton +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/mkdocs.yml +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/setup.cfg +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/setup.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/__init__.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/chunked_loss/README.md +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/chunked_loss/__init__.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/chunked_loss/cpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/chunked_loss/dpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/chunked_loss/functional.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/chunked_loss/fused_linear_distillation.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/chunked_loss/fused_linear_ppo.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/chunked_loss/fused_linear_preference.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/chunked_loss/fused_linear_unpaired_preference.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/chunked_loss/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/chunked_loss/jsd_loss.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/chunked_loss/kto_loss.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/chunked_loss/orpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/chunked_loss/simpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/env_report.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/ops/__init__.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/ops/cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/ops/dyt.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/ops/experimental/embedding.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/ops/experimental/mm_int8int2.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/ops/fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/ops/fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/ops/geglu.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/ops/group_norm.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/ops/jsd.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/ops/kl_div.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/ops/layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/ops/qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/ops/rms_norm.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/ops/rope.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/ops/swiglu.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/ops/tvd.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/ops/utils.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/transformers/__init__.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/transformers/auto_model.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/transformers/cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/transformers/dyt.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/transformers/experimental/embedding.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/transformers/functional.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/transformers/fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/transformers/fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/transformers/geglu.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/transformers/gema3_rms.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/transformers/group_norm.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/transformers/jsd.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/transformers/kl_div.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/transformers/layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/transformers/model/__init__.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/transformers/model/gemma.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/transformers/model/gemma2.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/transformers/model/gemma3.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/transformers/model/glm4.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/transformers/model/llama.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/transformers/model/llava.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/transformers/model/loss_utils.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/transformers/model/mistral.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/transformers/model/mixtral.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/transformers/model/mllama.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/transformers/model/olmo2.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/transformers/model/paligemma.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/transformers/model/phi3.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/transformers/model/qwen2.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/transformers/model/qwen2_5_vl.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/transformers/model/qwen2_vl.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/transformers/model/qwen3.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/transformers/model/qwen3_moe.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/transformers/monkey_patch.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/transformers/qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/transformers/rms_norm.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/transformers/rope.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/transformers/swiglu.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/transformers/trainer/__init__.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/transformers/trainer/orpo_trainer.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/transformers/trainer_integration.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/transformers/tvd.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/triton/__init__.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/triton/monkey_patch.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel/utils.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel_nightly.egg-info/SOURCES.txt +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel_nightly.egg-info/dependency_links.txt +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel_nightly.egg-info/requires.txt +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/src/liger_kernel_nightly.egg-info/top_level.txt +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/test/__init__.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/test/chunked_loss/__init__.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/test/chunked_loss/test_cpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/test/chunked_loss/test_dpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/test/chunked_loss/test_grpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/test/chunked_loss/test_jsd_loss.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/test/chunked_loss/test_kto_loss.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/test/chunked_loss/test_orpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/test/chunked_loss/test_simpo_loss.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/test/conftest.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/test/convergence/__init__.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/test/convergence/bf16/__init__.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/test/convergence/bf16/test_mini_models.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/test/convergence/bf16/test_mini_models_multimodal.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/test/convergence/bf16/test_mini_models_with_logits.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/test/convergence/fp32/__init__.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/test/convergence/fp32/test_mini_models.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/test/convergence/fp32/test_mini_models_multimodal.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/test/convergence/fp32/test_mini_models_with_logits.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/test/resources/fake_configs/Google/Gemma3/gemma-3-4b-it/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/test/resources/fake_configs/Google/Paligemma/paligemma-3b-pt-224/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/preprocessor_config.json +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/processor_config.json +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/test/resources/fake_configs/Qwen/Qwen2-VL-7B-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/test/resources/fake_configs/Qwen/Qwen2.5-VL-7B-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/test/resources/fake_configs/meta-llama/Llama-3.2-11B-Vision-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/test/resources/scripts/generate_tokenized_dataset.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/test/resources/tiny_shakespeare.txt +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/test/resources/tiny_shakespeare_tokenized/data-00000-of-00001.arrow +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/test/resources/tiny_shakespeare_tokenized/dataset_info.json +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/test/resources/tiny_shakespeare_tokenized/state.json +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/test/transformers/test_auto_model.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/test/transformers/test_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/test/transformers/test_dyt.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/test/transformers/test_embedding.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/test/transformers/test_flex_attention.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/test/transformers/test_fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/test/transformers/test_fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/test/transformers/test_geglu.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/test/transformers/test_group_norm.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/test/transformers/test_jsd.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/test/transformers/test_kl_div.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/test/transformers/test_layer_norm.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/test/transformers/test_mm_int8int2.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/test/transformers/test_monkey_patch.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/test/transformers/test_qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/test/transformers/test_rms_norm.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/test/transformers/test_rope.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/test/transformers/test_swiglu.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/test/transformers/test_trainer_integration.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/test/transformers/test_transformers.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/test/transformers/test_tvd.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/test/triton/test_triton_monkey_patch.py +0 -0
- {liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/test/utils.py +0 -0
@@ -22,9 +22,6 @@ export MEDUSA_LR_MULTIPLIER=4.0
|
|
22
22
|
accelerate launch --config_file fsdp/acc-fsdp.conf \
|
23
23
|
--num_machines $NUM_NODES \
|
24
24
|
--num_processes $WORLD_SIZE \
|
25
|
-
--main_process_ip $MASTER_ADDR \
|
26
|
-
--main_process_port $MASTER_PORT \
|
27
|
-
--machine_rank $RANK \
|
28
25
|
train.py \
|
29
26
|
--bf16 True \
|
30
27
|
--output_dir $OUTPUT_DIR \
|
@@ -32,7 +29,7 @@ accelerate launch --config_file fsdp/acc-fsdp.conf \
|
|
32
29
|
--per_device_train_batch_size $LOCAL_TRAIN_BATCH_SIZE \
|
33
30
|
--per_device_eval_batch_size 1 \
|
34
31
|
--gradient_accumulation_steps $GRADIENT_ACCUMULATION_STEPS \
|
35
|
-
--
|
32
|
+
--eval_strategy "no" \
|
36
33
|
--save_strategy "no" \
|
37
34
|
--prediction_loss_only \
|
38
35
|
--learning_rate $LR \
|
@@ -53,4 +50,4 @@ accelerate launch --config_file fsdp/acc-fsdp.conf \
|
|
53
50
|
--medusa_lr_multiplier $MEDUSA_LR_MULTIPLIER \
|
54
51
|
--medusa_only_heads False \
|
55
52
|
--medusa_return True \
|
56
|
-
--use_liger True
|
53
|
+
--use_liger True
|
@@ -32,21 +32,18 @@ from callback import EfficiencyCallback
|
|
32
32
|
from medusa_util import add_medusa_heads
|
33
33
|
from safetensors.torch import save_file
|
34
34
|
from sklearn.model_selection import train_test_split
|
35
|
-
from torch.distributed.fsdp import FullyShardedDataParallel as FSDP
|
36
|
-
from torch.distributed.fsdp.fully_sharded_data_parallel import FullStateDictConfig
|
37
|
-
from torch.distributed.fsdp.fully_sharded_data_parallel import StateDictType
|
38
35
|
from torch.utils.data import Dataset
|
39
36
|
from transformers import Trainer
|
40
37
|
from transformers.trainer_pt_utils import LabelSmoother
|
41
38
|
|
42
|
-
from liger_kernel.transformers import
|
39
|
+
from liger_kernel.transformers import AutoLigerKernelForCausalLM
|
43
40
|
|
44
41
|
IGNORE_TOKEN_ID = LabelSmoother.ignore_index
|
45
42
|
|
46
43
|
|
47
44
|
@dataclass
|
48
45
|
class ModelArguments:
|
49
|
-
model_name_or_path: Optional[str] = field(default="meta-llama/Meta-Llama-3-8B")
|
46
|
+
model_name_or_path: Optional[str] = field(default="meta-llama/Meta-Llama-3-8B-Instruct")
|
50
47
|
|
51
48
|
|
52
49
|
@dataclass
|
@@ -310,29 +307,36 @@ def train():
|
|
310
307
|
print(tokenizer(["This is a test", "secondary"], padding=True))
|
311
308
|
print(tokenizer.apply_chat_template([{"role": "user", "content": "This is a test"}]))
|
312
309
|
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
# config=config,
|
317
|
-
cache_dir=training_args.cache_dir,
|
318
|
-
torch_dtype=torch.bfloat16,
|
319
|
-
)
|
310
|
+
def _model_loader():
|
311
|
+
# we use a customized model loader to inject medusa heads to FSDP-wrapped model variables properly.
|
312
|
+
# see https://github.com/linkedin/Liger-Kernel/issues/309#issuecomment-2455077623 for details.
|
320
313
|
|
321
|
-
|
322
|
-
|
314
|
+
# Load model
|
315
|
+
if training_args.use_liger:
|
316
|
+
model_builder = AutoLigerKernelForCausalLM.from_pretrained
|
317
|
+
else:
|
318
|
+
model_builder = transformers.AutoModelForCausalLM.from_pretrained
|
319
|
+
model = model_builder(
|
320
|
+
model_args.model_name_or_path,
|
321
|
+
cache_dir=training_args.cache_dir,
|
322
|
+
torch_dtype=torch.bfloat16,
|
323
|
+
)
|
323
324
|
|
324
|
-
|
325
|
-
|
326
|
-
|
325
|
+
# Freeze the base model
|
326
|
+
for param in model.base_model.parameters():
|
327
|
+
param.requires_grad = False
|
328
|
+
|
329
|
+
# Inject Medusa heads
|
330
|
+
add_medusa_heads(
|
331
|
+
model,
|
332
|
+
training_args.medusa_num_heads,
|
333
|
+
training_args.medusa_num_layers,
|
334
|
+
training_args.medusa_return,
|
335
|
+
training_args.medusa_only_heads,
|
336
|
+
training_args.use_liger,
|
337
|
+
)
|
338
|
+
return model
|
327
339
|
|
328
|
-
add_medusa_heads(
|
329
|
-
model,
|
330
|
-
training_args.medusa_num_heads,
|
331
|
-
training_args.medusa_num_layers,
|
332
|
-
training_args.medusa_return,
|
333
|
-
training_args.medusa_only_heads,
|
334
|
-
training_args.use_liger,
|
335
|
-
)
|
336
340
|
# Format output dir
|
337
341
|
training_args.output_dir = f"{training_args.output_dir}_medusa_mlp_{model_args.model_name_or_path.split('/')[-1]}_medusa_{training_args.medusa_num_heads}_lr_{training_args.learning_rate}_layers_{training_args.medusa_num_layers}"
|
338
342
|
|
@@ -341,7 +345,7 @@ def train():
|
|
341
345
|
|
342
346
|
# Start trainner
|
343
347
|
trainer = Trainer(
|
344
|
-
|
348
|
+
model_init=_model_loader,
|
345
349
|
tokenizer=tokenizer,
|
346
350
|
args=training_args,
|
347
351
|
callbacks=[EfficiencyCallback()],
|
@@ -355,17 +359,11 @@ def train():
|
|
355
359
|
|
356
360
|
if training_args.medusa_return and training_args.medusa_only_heads:
|
357
361
|
# Save only the updated head without saving the backbone model
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
with FSDP.state_dict_type(
|
364
|
-
model,
|
365
|
-
StateDictType.FULL_STATE_DICT,
|
366
|
-
FullStateDictConfig(offload_to_cpu=True),
|
367
|
-
):
|
368
|
-
state_dict = lm_head.state_dict()
|
362
|
+
state_dict = {
|
363
|
+
k.replace("medusa_head.", ""): v.to(torch.bfloat16)
|
364
|
+
for k, v in trainer.accelerator.get_state_dict(trainer.model).items()
|
365
|
+
if "medusa_head" in k
|
366
|
+
}
|
369
367
|
|
370
368
|
# Save Medusa heads
|
371
369
|
if local_rank == 0:
|
@@ -373,9 +371,9 @@ def train():
|
|
373
371
|
state_dict,
|
374
372
|
os.path.join(training_args.output_dir, "medusa_lm_head.safetensors"),
|
375
373
|
)
|
374
|
+
trainer.accelerator.wait_for_everyone()
|
376
375
|
else:
|
377
376
|
# Save the whole model weight
|
378
|
-
trainer.accelerator.state.fsdp_plugin.set_state_dict_type("FULL_STATE_DICT")
|
379
377
|
trainer.save_model(training_args.output_dir)
|
380
378
|
|
381
379
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
4
4
|
|
5
5
|
[project]
|
6
6
|
name = "liger_kernel_nightly"
|
7
|
-
version = "0.5.9.
|
7
|
+
version = "0.5.9.dev20250515034325"
|
8
8
|
description = "Efficient Triton kernels for LLM Training"
|
9
9
|
urls = { "Homepage" = "https://github.com/linkedin/Liger-Kernel" }
|
10
10
|
readme = { file = "README.md", content-type = "text/markdown" }
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{liger_kernel_nightly-0.5.9.dev20250512213150 → liger_kernel_nightly-0.5.9.dev20250515034325}/NOTICE
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|