liger-kernel-nightly 0.6.2.dev20251014053719__tar.gz → 0.6.2.dev20251016055812__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- liger_kernel_nightly-0.6.2.dev20251016055812/.github/workflows/docs.yml +64 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/PKG-INFO +1 -1
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/docs/acknowledgement.md +0 -1
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/pyproject.toml +1 -1
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/ops/cross_entropy.py +4 -1
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/ops/fused_linear_cross_entropy.py +14 -10
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel_nightly.egg-info/PKG-INFO +1 -1
- liger_kernel_nightly-0.6.2.dev20251014053719/.github/workflows/docs.yml +0 -33
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/.github/ISSUE_TEMPLATE/bug_report.yaml +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/.github/ISSUE_TEMPLATE/feature_request.yaml +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/.github/pull_request_template.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/.github/workflows/amd-ci.yml +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/.github/workflows/benchmark.yml +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/.github/workflows/intel-ci.yml +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/.github/workflows/nvi-ci.yml +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/.github/workflows/publish-nightly.yml +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/.github/workflows/publish-release.yml +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/.gitignore +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/LICENSE +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/Makefile +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/NOTICE +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/README.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/benchmark/README.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/benchmark/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/benchmark/benchmarks_visualizer.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/benchmark/data/all_benchmark_data.csv +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/benchmark/scripts/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/benchmark/scripts/benchmark_cpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/benchmark/scripts/benchmark_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/benchmark/scripts/benchmark_distill_cosine_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/benchmark/scripts/benchmark_distill_jsd_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/benchmark/scripts/benchmark_dpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/benchmark/scripts/benchmark_dyt.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/benchmark/scripts/benchmark_embedding.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/benchmark/scripts/benchmark_fused_add_rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/benchmark/scripts/benchmark_fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/benchmark/scripts/benchmark_fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/benchmark/scripts/benchmark_fused_neighborhood_attention.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/benchmark/scripts/benchmark_geglu.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/benchmark/scripts/benchmark_group_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/benchmark/scripts/benchmark_grpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/benchmark/scripts/benchmark_jsd.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/benchmark/scripts/benchmark_kl_div.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/benchmark/scripts/benchmark_kto_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/benchmark/scripts/benchmark_layer_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/benchmark/scripts/benchmark_llama4_rope.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/benchmark/scripts/benchmark_multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/benchmark/scripts/benchmark_orpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/benchmark/scripts/benchmark_poly_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/benchmark/scripts/benchmark_qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/benchmark/scripts/benchmark_rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/benchmark/scripts/benchmark_rope.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/benchmark/scripts/benchmark_simpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/benchmark/scripts/benchmark_softmax.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/benchmark/scripts/benchmark_sparse_multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/benchmark/scripts/benchmark_sparsemax.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/benchmark/scripts/benchmark_swiglu.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/benchmark/scripts/benchmark_tvd.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/benchmark/scripts/utils.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/dev/fmt-requirements.txt +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/dev/modal/benchmarks.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/dev/modal/tests.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/dev/modal/tests_bwd.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/docs/Examples.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/docs/Getting-Started.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/docs/High-Level-APIs.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/docs/Low-Level-APIs.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/docs/contributing.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/docs/images/banner.GIF +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/docs/images/compose.gif +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/docs/images/e2e-memory.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/docs/images/e2e-tps.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/docs/images/logo-banner.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/docs/images/patch.gif +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/docs/images/post-training.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/docs/index.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/docs/license.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/examples/alignment/accelerate_config.yaml +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/examples/alignment/run_orpo.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/examples/huggingface/README.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/examples/huggingface/callback.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/examples/huggingface/config/fsdp_config.json +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/examples/huggingface/img/gemma_7b_mem.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/examples/huggingface/img/gemma_7b_tp.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/examples/huggingface/img/llama_mem_alloc.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/examples/huggingface/img/llama_tps.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/examples/huggingface/img/qwen_mem_alloc.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/examples/huggingface/img/qwen_tps.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/examples/huggingface/launch_on_modal.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/examples/huggingface/requirements.txt +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/examples/huggingface/run_benchmarks.sh +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/examples/huggingface/run_gemma.sh +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/examples/huggingface/run_llama.sh +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/examples/huggingface/run_qwen.sh +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/examples/huggingface/run_qwen2_vl.sh +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/examples/huggingface/training.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/examples/huggingface/training_multimodal.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/examples/lightning/README.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/examples/lightning/requirements.txt +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/examples/lightning/training.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/examples/medusa/README.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/examples/medusa/callback.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/examples/medusa/docs/images/Memory_Stage1_num_head_3.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/examples/medusa/docs/images/Memory_Stage1_num_head_5.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/examples/medusa/docs/images/Memory_Stage2_num_head_3.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/examples/medusa/docs/images/Memory_Stage2_num_head_5.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/examples/medusa/docs/images/Throughput_Stage1_num_head_3.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/examples/medusa/docs/images/Throughput_Stage1_num_head_5.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/examples/medusa/docs/images/Throughput_Stage2_num_head_3.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/examples/medusa/docs/images/Throughput_Stage2_num_head_5.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/examples/medusa/fsdp/acc-fsdp.conf +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/examples/medusa/medusa_util.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/examples/medusa/requirements.txt +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/examples/medusa/scripts/llama3_8b_medusa.sh +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/examples/medusa/train.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/licenses/LICENSE-Apache-2.0 +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/licenses/LICENSE-MIT-AutoAWQ +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/licenses/LICENSE-MIT-Efficient-Cross-Entropy +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/licenses/LICENSE-MIT-llmc +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/licenses/LICENSE-MIT-triton +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/mkdocs.yml +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/setup.cfg +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/setup.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/chunked_loss/README.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/chunked_loss/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/chunked_loss/cosine_similarity_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/chunked_loss/cpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/chunked_loss/dpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/chunked_loss/functional.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/chunked_loss/fused_linear_distillation.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/chunked_loss/fused_linear_ppo.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/chunked_loss/fused_linear_preference.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/chunked_loss/fused_linear_unpaired_preference.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/chunked_loss/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/chunked_loss/jsd_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/chunked_loss/kto_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/chunked_loss/orpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/chunked_loss/simpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/env_report.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/ops/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/ops/dyt.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/ops/experimental/embedding.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/ops/experimental/mm_int8int2.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/ops/fused_add_rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/ops/fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/ops/fused_neighborhood_attention.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/ops/geglu.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/ops/group_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/ops/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/ops/jsd.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/ops/kl_div.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/ops/layer_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/ops/llama4_rope.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/ops/multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/ops/poly_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/ops/qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/ops/rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/ops/rope.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/ops/softmax.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/ops/sparsemax.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/ops/swiglu.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/ops/tvd.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/ops/utils.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/transformers/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/transformers/auto_model.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/transformers/cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/transformers/dyt.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/transformers/experimental/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/transformers/experimental/embedding.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/transformers/fsdp.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/transformers/functional.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/transformers/fused_add_rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/transformers/fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/transformers/fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/transformers/fused_neighborhood_attention.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/transformers/geglu.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/transformers/group_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/transformers/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/transformers/jsd.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/transformers/kl_div.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/transformers/layer_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/transformers/llama4_rope.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/transformers/model/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/transformers/model/falcon_h1.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/transformers/model/gemma.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/transformers/model/gemma2.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/transformers/model/gemma3.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/transformers/model/glm4.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/transformers/model/glm4v.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/transformers/model/glm4v_moe.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/transformers/model/internvl.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/transformers/model/llama.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/transformers/model/llama4.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/transformers/model/llava.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/transformers/model/loss_utils.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/transformers/model/mistral.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/transformers/model/mixtral.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/transformers/model/mllama.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/transformers/model/olmo2.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/transformers/model/paligemma.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/transformers/model/phi3.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/transformers/model/qwen2.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/transformers/model/qwen2_5_vl.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/transformers/model/qwen2_vl.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/transformers/model/qwen3.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/transformers/model/qwen3_moe.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/transformers/model/smollm3.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/transformers/monkey_patch.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/transformers/multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/transformers/poly_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/transformers/qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/transformers/rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/transformers/rope.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/transformers/softmax.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/transformers/sparsemax.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/transformers/swiglu.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/transformers/trainer/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/transformers/trainer/orpo_trainer.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/transformers/trainer_integration.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/transformers/tvd.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/triton/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/triton/monkey_patch.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel/utils.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel_nightly.egg-info/SOURCES.txt +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel_nightly.egg-info/dependency_links.txt +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel_nightly.egg-info/requires.txt +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/src/liger_kernel_nightly.egg-info/top_level.txt +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/test/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/test/chunked_loss/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/test/chunked_loss/test_cosine_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/test/chunked_loss/test_cpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/test/chunked_loss/test_dpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/test/chunked_loss/test_grpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/test/chunked_loss/test_jsd_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/test/chunked_loss/test_kto_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/test/chunked_loss/test_orpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/test/chunked_loss/test_simpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/test/conftest.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/test/convergence/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/test/convergence/bf16/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/test/convergence/bf16/test_mini_models.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/test/convergence/bf16/test_mini_models_multimodal.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/test/convergence/bf16/test_mini_models_with_logits.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/test/convergence/fp32/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/test/convergence/fp32/test_mini_models.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/test/convergence/fp32/test_mini_models_multimodal.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/test/convergence/fp32/test_mini_models_with_logits.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/test/resources/fake_configs/Google/Gemma3/gemma-3-4b-it/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/test/resources/fake_configs/Google/Paligemma/paligemma-3b-pt-224/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/preprocessor_config.json +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/processor_config.json +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/test/resources/fake_configs/OpenGVLab/InternVL3-1B-hf/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/test/resources/fake_configs/Qwen/Qwen2-VL-7B-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/test/resources/fake_configs/Qwen/Qwen2.5-VL-7B-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/test/resources/fake_configs/meta-llama/Llama-3.2-11B-Vision-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/test/resources/fake_configs/meta-llama/Llama-4-Scout-17B-16E-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/test/resources/scripts/generate_tokenized_dataset.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/test/resources/tiny_shakespeare.txt +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/test/resources/tiny_shakespeare_tokenized/data-00000-of-00001.arrow +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/test/resources/tiny_shakespeare_tokenized/dataset_info.json +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/test/resources/tiny_shakespeare_tokenized/state.json +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/test/transformers/test_auto_model.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/test/transformers/test_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/test/transformers/test_dyt.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/test/transformers/test_embedding.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/test/transformers/test_flex_attention.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/test/transformers/test_fused_add_rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/test/transformers/test_fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/test/transformers/test_fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/test/transformers/test_fused_neighborhood_attention.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/test/transformers/test_geglu.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/test/transformers/test_group_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/test/transformers/test_grpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/test/transformers/test_jsd.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/test/transformers/test_kl_div.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/test/transformers/test_layer_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/test/transformers/test_mm_int8int2.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/test/transformers/test_monkey_patch.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/test/transformers/test_multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/test/transformers/test_poly_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/test/transformers/test_qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/test/transformers/test_rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/test/transformers/test_rope.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/test/transformers/test_softmax.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/test/transformers/test_sparsemax.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/test/transformers/test_swiglu.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/test/transformers/test_trainer_integration.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/test/transformers/test_transformers.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/test/transformers/test_tvd.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/test/triton/test_triton_monkey_patch.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/test/utils.py +0 -0
@@ -0,0 +1,64 @@
|
|
1
|
+
name: Publish documentation
|
2
|
+
on:
|
3
|
+
push:
|
4
|
+
branches:
|
5
|
+
- main
|
6
|
+
paths:
|
7
|
+
- 'docs/**'
|
8
|
+
- 'mkdocs.yml'
|
9
|
+
|
10
|
+
permissions:
|
11
|
+
contents: write
|
12
|
+
jobs:
|
13
|
+
deploy:
|
14
|
+
runs-on: ubuntu-latest
|
15
|
+
steps:
|
16
|
+
- uses: actions/checkout@v4
|
17
|
+
- name: Configure Git Credentials
|
18
|
+
run: |
|
19
|
+
git config user.name github-actions[bot]
|
20
|
+
git config user.email 41898282+github-actions[bot]@users.noreply.github.com
|
21
|
+
- uses: actions/setup-python@v5
|
22
|
+
with:
|
23
|
+
python-version: 3.x
|
24
|
+
- run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV
|
25
|
+
- uses: actions/cache@v4
|
26
|
+
with:
|
27
|
+
key: mkdocs-material-${{ env.cache_id }}
|
28
|
+
path: .cache
|
29
|
+
restore-keys: |
|
30
|
+
mkdocs-material-
|
31
|
+
- run: pip install mkdocs-material mkdocstrings[python]
|
32
|
+
# ====== Backup the benchmarks from gh-pages ======
|
33
|
+
# This is necessary because the benchmarks are not included in the documentation build process.
|
34
|
+
# So we need to backup the benchmarks from gh-pages and restore them after the documentation is built.
|
35
|
+
- name: Backup benchmarks from gh-pages
|
36
|
+
run: |
|
37
|
+
git fetch origin gh-pages
|
38
|
+
# create worktree bound to local gh-pages, tracking origin/gh-pages
|
39
|
+
git branch -f gh-pages origin/gh-pages || true
|
40
|
+
mkdir -p ghp && git worktree add ghp gh-pages || true
|
41
|
+
if [ -d ghp/benchmarks ]; then
|
42
|
+
tar -C ghp -czf /tmp/benchmarks.tgz benchmarks
|
43
|
+
fi
|
44
|
+
# IMPORTANT: remove worktree so gh-pages isn't checked out anywhere
|
45
|
+
git worktree remove ghp --force || true
|
46
|
+
echo "Backed up benchmarks from gh-pages"
|
47
|
+
# ====== Deploy the documentation ======
|
48
|
+
- name: Deploy documentation
|
49
|
+
run: mkdocs gh-deploy --force
|
50
|
+
# ====== Restore the benchmarks onto gh-pages ======
|
51
|
+
# This is necessary because the benchmarks are not included in the documentation build process.
|
52
|
+
# So we need to restore the benchmarks onto gh-pages after the documentation is built.
|
53
|
+
- name: Restore benchmarks onto gh-pages
|
54
|
+
run: |
|
55
|
+
# Refresh remote tracking and recreate a clean worktree
|
56
|
+
git fetch origin gh-pages
|
57
|
+
git worktree add -B gh-pages ghp origin/gh-pages
|
58
|
+
if [ -f /tmp/benchmarks.tgz ]; then
|
59
|
+
tar -C ghp -xzf /tmp/benchmarks.tgz
|
60
|
+
git -C ghp add -A
|
61
|
+
git -C ghp commit -m "Restore benchmarks after gh-deploy" || echo "No changes"
|
62
|
+
git -C ghp push origin gh-pages
|
63
|
+
fi
|
64
|
+
git worktree remove ghp --force || true
|
@@ -9,7 +9,6 @@
|
|
9
9
|
We referenced or used the following projects:
|
10
10
|
|
11
11
|
|
12
|
-
|
13
12
|
| # | Project | Description | Location | License |
|
14
13
|
|---|----------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------|
|
15
14
|
| 1 | [Unsloth](https://github.com/unslothai/unsloth/blob/fd753fed99ed5f10ef8a9b7139588d9de9ddecfb/unsloth/kernels/utils.py#L43) | `calculate_settings` to determine block size and warp; We reuse it for Norm and MLP | [Liger Kernel Utils](https://github.com/linkedin/Liger-Kernel/blob/e249eee723978bf8610ff1ea2297d048a2417e20/src/liger_kernel/ops/utils.py#L23) | [Apache](https://github.com/unslothai/unsloth/blob/fd753fed99ed5f10ef8a9b7139588d9de9ddecfb/LICENSE) |
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
4
4
|
|
5
5
|
[project]
|
6
6
|
name = "liger_kernel_nightly"
|
7
|
-
version = "0.6.2.
|
7
|
+
version = "0.6.2.dev20251016055812"
|
8
8
|
description = "Efficient Triton kernels for LLM Training"
|
9
9
|
urls = { "Homepage" = "https://github.com/linkedin/Liger-Kernel" }
|
10
10
|
readme = { file = "README.md", content-type = "text/markdown" }
|
@@ -414,6 +414,8 @@ class LigerCrossEntropyFunction(torch.autograd.Function):
|
|
414
414
|
Returns:
|
415
415
|
tuple: A tuple with the compouted losses with respect to loss and z loss. The elements are tensors or None.
|
416
416
|
"""
|
417
|
+
input_requires_grad = _input.requires_grad
|
418
|
+
|
417
419
|
loss, z_loss, _input = cross_entropy_forward(
|
418
420
|
_input,
|
419
421
|
target,
|
@@ -428,7 +430,8 @@ class LigerCrossEntropyFunction(torch.autograd.Function):
|
|
428
430
|
# TODO: investigation
|
429
431
|
# If we don't detach the _input tensor, the memory will double
|
430
432
|
# Not sure why but seems that there will be a time both grad and value exist but in different location
|
431
|
-
|
433
|
+
if input_requires_grad:
|
434
|
+
ctx.save_for_backward(_input.detach())
|
432
435
|
ctx.return_z_loss = return_z_loss
|
433
436
|
|
434
437
|
return loss, z_loss
|
@@ -31,6 +31,8 @@ def fused_linear_cross_entropy_forward(
|
|
31
31
|
assert isinstance(return_z_loss, bool), f"return_z_loss must be True or False. Got: {return_z_loss}"
|
32
32
|
device = _input.device
|
33
33
|
|
34
|
+
input_requires_grad = _input.requires_grad
|
35
|
+
|
34
36
|
# inputs have shape: BT x H
|
35
37
|
# materialized activations will have shape: BT x V
|
36
38
|
# the increase in memory = BT x V
|
@@ -49,12 +51,13 @@ def fused_linear_cross_entropy_forward(
|
|
49
51
|
grad_input = torch.zeros_like(_input, device=device)
|
50
52
|
|
51
53
|
# we use fp32 for loss and gradients accumulator
|
52
|
-
if
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
54
|
+
if input_requires_grad:
|
55
|
+
if accum_dtype is None:
|
56
|
+
grad_weight = torch.zeros_like(weight, device=device) if weight.requires_grad else None
|
57
|
+
grad_bias = torch.zeros_like(bias, device=device) if bias is not None else None
|
58
|
+
else:
|
59
|
+
grad_weight = torch.zeros_like(weight, dtype=accum_dtype, device=device) if weight.requires_grad else None
|
60
|
+
grad_bias = torch.zeros_like(bias, dtype=accum_dtype, device=device) if bias is not None else None
|
58
61
|
|
59
62
|
loss_1d = torch.zeros(BT, dtype=torch.float32, device=device)
|
60
63
|
z_loss_1d = torch.zeros(BT, dtype=_input.dtype, device=_input.device) if return_z_loss else None
|
@@ -150,7 +153,7 @@ def fused_linear_cross_entropy_forward(
|
|
150
153
|
RETURN_Z_LOSS=return_z_loss,
|
151
154
|
HAS_WEIGHT=True if ce_weight is not None else False,
|
152
155
|
HAS_SOFTCAPPING=True if softcap is not None else False,
|
153
|
-
HAS_GRADIENTS=
|
156
|
+
HAS_GRADIENTS=input_requires_grad,
|
154
157
|
BLOCK_SIZE=BLOCK_SIZE,
|
155
158
|
num_warps=32 if not is_hip() else 16,
|
156
159
|
)
|
@@ -172,12 +175,13 @@ def fused_linear_cross_entropy_forward(
|
|
172
175
|
scaling_factors_expanded = scaling_factors.unsqueeze(-1) # chunk_size x 1
|
173
176
|
grad_logits_chunk = grad_logits_chunk * scaling_factors_expanded
|
174
177
|
|
175
|
-
|
178
|
+
if input_requires_grad:
|
179
|
+
grad_input[start_idx:end_idx] = grad_logits_chunk @ weight
|
176
180
|
|
177
|
-
if grad_weight is not None and
|
181
|
+
if grad_weight is not None and input_requires_grad:
|
178
182
|
grad_weight += torch.mm(grad_logits_chunk.t(), _input_chunk).float()
|
179
183
|
|
180
|
-
if bias is not None and
|
184
|
+
if bias is not None and input_requires_grad:
|
181
185
|
torch.add(
|
182
186
|
input=grad_bias,
|
183
187
|
other=grad_logits_chunk.sum(dim=0),
|
@@ -1,33 +0,0 @@
|
|
1
|
-
name: Publish documentation
|
2
|
-
on:
|
3
|
-
push:
|
4
|
-
branches:
|
5
|
-
- main
|
6
|
-
paths:
|
7
|
-
- 'docs/**'
|
8
|
-
- 'mkdocs.yml'
|
9
|
-
|
10
|
-
permissions:
|
11
|
-
contents: write
|
12
|
-
jobs:
|
13
|
-
deploy:
|
14
|
-
if: False
|
15
|
-
runs-on: ubuntu-latest
|
16
|
-
steps:
|
17
|
-
- uses: actions/checkout@v4
|
18
|
-
- name: Configure Git Credentials
|
19
|
-
run: |
|
20
|
-
git config user.name github-actions[bot]
|
21
|
-
git config user.email 41898282+github-actions[bot]@users.noreply.github.com
|
22
|
-
- uses: actions/setup-python@v5
|
23
|
-
with:
|
24
|
-
python-version: 3.x
|
25
|
-
- run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV
|
26
|
-
- uses: actions/cache@v4
|
27
|
-
with:
|
28
|
-
key: mkdocs-material-${{ env.cache_id }}
|
29
|
-
path: .cache
|
30
|
-
restore-keys: |
|
31
|
-
mkdocs-material-
|
32
|
-
- run: pip install mkdocs-material mkdocstrings[python]
|
33
|
-
- run: mkdocs gh-deploy --force
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{liger_kernel_nightly-0.6.2.dev20251014053719 → liger_kernel_nightly-0.6.2.dev20251016055812}/NOTICE
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|