liger-kernel-nightly 0.6.2.dev20250823034010__tar.gz → 0.6.2.dev20250826142826__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/PKG-INFO +1 -1
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/pyproject.toml +1 -1
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/ops/fused_linear_cross_entropy.py +15 -2
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/transformers/model/loss_utils.py +1 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel_nightly.egg-info/PKG-INFO +1 -1
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/test/transformers/test_fused_linear_cross_entropy.py +38 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/.github/ISSUE_TEMPLATE/bug_report.yaml +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/.github/ISSUE_TEMPLATE/feature_request.yaml +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/.github/pull_request_template.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/.github/workflows/amd-ci.yml +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/.github/workflows/benchmark.yml +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/.github/workflows/docs.yml +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/.github/workflows/intel-ci.yml +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/.github/workflows/nvi-ci.yml +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/.github/workflows/publish-nightly.yml +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/.github/workflows/publish-release.yml +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/.gitignore +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/LICENSE +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/Makefile +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/NOTICE +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/README.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/benchmark/README.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/benchmark/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/benchmark/benchmarks_visualizer.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/benchmark/data/all_benchmark_data.csv +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/benchmark/scripts/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/benchmark/scripts/benchmark_cpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/benchmark/scripts/benchmark_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/benchmark/scripts/benchmark_distill_cosine_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/benchmark/scripts/benchmark_distill_jsd_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/benchmark/scripts/benchmark_dpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/benchmark/scripts/benchmark_dyt.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/benchmark/scripts/benchmark_embedding.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/benchmark/scripts/benchmark_fused_add_rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/benchmark/scripts/benchmark_fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/benchmark/scripts/benchmark_fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/benchmark/scripts/benchmark_fused_neighborhood_attention.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/benchmark/scripts/benchmark_geglu.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/benchmark/scripts/benchmark_group_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/benchmark/scripts/benchmark_jsd.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/benchmark/scripts/benchmark_kl_div.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/benchmark/scripts/benchmark_kto_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/benchmark/scripts/benchmark_layer_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/benchmark/scripts/benchmark_llama4_rope.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/benchmark/scripts/benchmark_multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/benchmark/scripts/benchmark_orpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/benchmark/scripts/benchmark_qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/benchmark/scripts/benchmark_rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/benchmark/scripts/benchmark_rope.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/benchmark/scripts/benchmark_simpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/benchmark/scripts/benchmark_softmax.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/benchmark/scripts/benchmark_sparse_multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/benchmark/scripts/benchmark_sparsemax.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/benchmark/scripts/benchmark_swiglu.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/benchmark/scripts/benchmark_tvd.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/benchmark/scripts/utils.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/dev/fmt-requirements.txt +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/dev/modal/benchmarks.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/dev/modal/tests.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/dev/modal/tests_bwd.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/docs/Examples.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/docs/Getting-Started.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/docs/High-Level-APIs.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/docs/Low-Level-APIs.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/docs/acknowledgement.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/docs/contributing.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/docs/images/banner.GIF +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/docs/images/compose.gif +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/docs/images/e2e-memory.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/docs/images/e2e-tps.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/docs/images/logo-banner.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/docs/images/patch.gif +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/docs/images/post-training.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/docs/index.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/docs/license.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/examples/alignment/accelerate_config.yaml +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/examples/alignment/run_orpo.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/examples/huggingface/README.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/examples/huggingface/callback.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/examples/huggingface/config/fsdp_config.json +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/examples/huggingface/img/gemma_7b_mem.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/examples/huggingface/img/gemma_7b_tp.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/examples/huggingface/img/llama_mem_alloc.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/examples/huggingface/img/llama_tps.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/examples/huggingface/img/qwen_mem_alloc.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/examples/huggingface/img/qwen_tps.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/examples/huggingface/launch_on_modal.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/examples/huggingface/requirements.txt +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/examples/huggingface/run_benchmarks.sh +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/examples/huggingface/run_gemma.sh +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/examples/huggingface/run_llama.sh +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/examples/huggingface/run_qwen.sh +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/examples/huggingface/run_qwen2_vl.sh +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/examples/huggingface/training.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/examples/huggingface/training_multimodal.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/examples/lightning/README.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/examples/lightning/requirements.txt +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/examples/lightning/training.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/examples/medusa/README.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/examples/medusa/callback.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/examples/medusa/docs/images/Memory_Stage1_num_head_3.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/examples/medusa/docs/images/Memory_Stage1_num_head_5.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/examples/medusa/docs/images/Memory_Stage2_num_head_3.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/examples/medusa/docs/images/Memory_Stage2_num_head_5.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/examples/medusa/docs/images/Throughput_Stage1_num_head_3.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/examples/medusa/docs/images/Throughput_Stage1_num_head_5.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/examples/medusa/docs/images/Throughput_Stage2_num_head_3.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/examples/medusa/docs/images/Throughput_Stage2_num_head_5.png +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/examples/medusa/fsdp/acc-fsdp.conf +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/examples/medusa/medusa_util.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/examples/medusa/requirements.txt +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/examples/medusa/scripts/llama3_8b_medusa.sh +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/examples/medusa/train.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/licenses/LICENSE-Apache-2.0 +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/licenses/LICENSE-MIT-AutoAWQ +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/licenses/LICENSE-MIT-Efficient-Cross-Entropy +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/licenses/LICENSE-MIT-llmc +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/licenses/LICENSE-MIT-triton +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/mkdocs.yml +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/setup.cfg +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/setup.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/chunked_loss/README.md +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/chunked_loss/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/chunked_loss/cosine_similarity_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/chunked_loss/cpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/chunked_loss/dpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/chunked_loss/functional.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/chunked_loss/fused_linear_distillation.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/chunked_loss/fused_linear_ppo.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/chunked_loss/fused_linear_preference.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/chunked_loss/fused_linear_unpaired_preference.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/chunked_loss/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/chunked_loss/jsd_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/chunked_loss/kto_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/chunked_loss/orpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/chunked_loss/simpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/env_report.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/ops/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/ops/cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/ops/dyt.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/ops/experimental/embedding.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/ops/experimental/mm_int8int2.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/ops/fused_add_rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/ops/fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/ops/fused_neighborhood_attention.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/ops/geglu.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/ops/group_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/ops/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/ops/jsd.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/ops/kl_div.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/ops/layer_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/ops/llama4_rope.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/ops/multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/ops/qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/ops/rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/ops/rope.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/ops/softmax.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/ops/sparsemax.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/ops/swiglu.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/ops/tvd.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/ops/utils.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/transformers/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/transformers/auto_model.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/transformers/cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/transformers/dyt.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/transformers/experimental/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/transformers/experimental/embedding.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/transformers/fsdp.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/transformers/functional.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/transformers/fused_add_rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/transformers/fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/transformers/fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/transformers/fused_neighborhood_attention.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/transformers/geglu.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/transformers/group_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/transformers/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/transformers/jsd.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/transformers/kl_div.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/transformers/layer_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/transformers/llama4_rope.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/transformers/model/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/transformers/model/gemma.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/transformers/model/gemma2.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/transformers/model/gemma3.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/transformers/model/glm4.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/transformers/model/glm4v.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/transformers/model/llama.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/transformers/model/llama4.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/transformers/model/llava.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/transformers/model/mistral.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/transformers/model/mixtral.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/transformers/model/mllama.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/transformers/model/olmo2.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/transformers/model/paligemma.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/transformers/model/phi3.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/transformers/model/qwen2.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/transformers/model/qwen2_5_vl.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/transformers/model/qwen2_vl.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/transformers/model/qwen3.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/transformers/model/qwen3_moe.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/transformers/model/smollm3.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/transformers/monkey_patch.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/transformers/multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/transformers/qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/transformers/rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/transformers/rope.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/transformers/softmax.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/transformers/sparsemax.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/transformers/swiglu.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/transformers/trainer/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/transformers/trainer/orpo_trainer.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/transformers/trainer_integration.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/transformers/tvd.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/triton/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/triton/monkey_patch.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel/utils.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel_nightly.egg-info/SOURCES.txt +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel_nightly.egg-info/dependency_links.txt +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel_nightly.egg-info/requires.txt +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/src/liger_kernel_nightly.egg-info/top_level.txt +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/test/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/test/chunked_loss/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/test/chunked_loss/test_cosine_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/test/chunked_loss/test_cpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/test/chunked_loss/test_dpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/test/chunked_loss/test_grpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/test/chunked_loss/test_jsd_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/test/chunked_loss/test_kto_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/test/chunked_loss/test_orpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/test/chunked_loss/test_simpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/test/conftest.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/test/convergence/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/test/convergence/bf16/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/test/convergence/bf16/test_mini_models.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/test/convergence/bf16/test_mini_models_multimodal.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/test/convergence/bf16/test_mini_models_with_logits.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/test/convergence/fp32/__init__.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/test/convergence/fp32/test_mini_models.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/test/convergence/fp32/test_mini_models_multimodal.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/test/convergence/fp32/test_mini_models_with_logits.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/test/resources/fake_configs/Google/Gemma3/gemma-3-4b-it/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/test/resources/fake_configs/Google/Paligemma/paligemma-3b-pt-224/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/preprocessor_config.json +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/processor_config.json +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/test/resources/fake_configs/Qwen/Qwen2-VL-7B-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/test/resources/fake_configs/Qwen/Qwen2.5-VL-7B-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/test/resources/fake_configs/meta-llama/Llama-3.2-11B-Vision-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/test/resources/fake_configs/meta-llama/Llama-4-Scout-17B-16E-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/test/resources/scripts/generate_tokenized_dataset.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/test/resources/tiny_shakespeare.txt +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/test/resources/tiny_shakespeare_tokenized/data-00000-of-00001.arrow +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/test/resources/tiny_shakespeare_tokenized/dataset_info.json +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/test/resources/tiny_shakespeare_tokenized/state.json +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/test/transformers/test_auto_model.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/test/transformers/test_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/test/transformers/test_dyt.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/test/transformers/test_embedding.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/test/transformers/test_flex_attention.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/test/transformers/test_fused_add_rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/test/transformers/test_fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/test/transformers/test_fused_neighborhood_attention.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/test/transformers/test_geglu.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/test/transformers/test_group_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/test/transformers/test_grpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/test/transformers/test_jsd.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/test/transformers/test_kl_div.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/test/transformers/test_layer_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/test/transformers/test_mm_int8int2.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/test/transformers/test_monkey_patch.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/test/transformers/test_multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/test/transformers/test_qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/test/transformers/test_rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/test/transformers/test_rope.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/test/transformers/test_softmax.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/test/transformers/test_sparsemax.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/test/transformers/test_swiglu.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/test/transformers/test_trainer_integration.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/test/transformers/test_transformers.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/test/transformers/test_tvd.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/test/triton/test_triton_monkey_patch.py +0 -0
- {liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/test/utils.py +0 -0
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
4
4
|
|
5
5
|
[project]
|
6
6
|
name = "liger_kernel_nightly"
|
7
|
-
version = "0.6.2.
|
7
|
+
version = "0.6.2.dev20250826142826"
|
8
8
|
description = "Efficient Triton kernels for LLM Training"
|
9
9
|
urls = { "Homepage" = "https://github.com/linkedin/Liger-Kernel" }
|
10
10
|
readme = { file = "README.md", content-type = "text/markdown" }
|
@@ -101,8 +101,21 @@ def fused_linear_cross_entropy_forward(
|
|
101
101
|
# Compute softmax to get predicted probabilities
|
102
102
|
probs = torch.softmax(logits_for_softmax, dim=-1)
|
103
103
|
|
104
|
-
# Get
|
105
|
-
|
104
|
+
# Get predicted probabilities for token scaling, handling ignored targets
|
105
|
+
valid_target_mask = target_chunk != ignore_index
|
106
|
+
valid_targets = target_chunk[valid_target_mask]
|
107
|
+
|
108
|
+
if len(valid_targets) > 0:
|
109
|
+
# Gather probabilities only for valid targets
|
110
|
+
valid_probs = probs[valid_target_mask]
|
111
|
+
pred_probs_valid = torch.gather(valid_probs, -1, valid_targets.unsqueeze(-1)).squeeze(-1)
|
112
|
+
|
113
|
+
# Create full tensor with zeros for ignored targets
|
114
|
+
pred_probs = torch.zeros_like(target_chunk, dtype=probs.dtype, device=probs.device)
|
115
|
+
pred_probs[valid_target_mask] = pred_probs_valid
|
116
|
+
else:
|
117
|
+
# All targets are ignored
|
118
|
+
pred_probs = torch.zeros_like(target_chunk, dtype=probs.dtype, device=probs.device)
|
106
119
|
|
107
120
|
# Store the scaling factors
|
108
121
|
scaling_factors = pred_probs.detach() # Detach to ensure no gradient flow
|
@@ -578,3 +578,41 @@ def test_correctness_token_scaling_module():
|
|
578
578
|
|
579
579
|
# Check that gradients are close
|
580
580
|
assert torch.allclose(x1.grad, x2.grad, atol=1e-5, rtol=1e-5)
|
581
|
+
|
582
|
+
|
583
|
+
def test_token_scaling_with_ignore_index():
|
584
|
+
"""Test token scaling when some targets have ignore_index values."""
|
585
|
+
B, T, H, V = 2, 4, 8, 1000
|
586
|
+
dtype = torch.float32
|
587
|
+
|
588
|
+
# Create inputs
|
589
|
+
_input = torch.randn(B * T, H, device=device, dtype=dtype, requires_grad=True)
|
590
|
+
|
591
|
+
# Create targets with some ignore_index values (-100)
|
592
|
+
target = torch.tensor([0, 100, -100, 500, -100, 999], device=device, dtype=torch.long)
|
593
|
+
_input = torch.randn(6, H, device=device, dtype=dtype, requires_grad=True) # Adjust input size
|
594
|
+
|
595
|
+
# Create weights
|
596
|
+
weight = torch.randn(V, H, device=device, dtype=dtype)
|
597
|
+
bias = torch.randn(V, device=device, dtype=dtype)
|
598
|
+
|
599
|
+
# Test using functional API with token scaling
|
600
|
+
loss_scaled = liger_fused_linear_cross_entropy(
|
601
|
+
input=_input,
|
602
|
+
weight=weight,
|
603
|
+
target=target,
|
604
|
+
bias=bias,
|
605
|
+
ignore_index=-100,
|
606
|
+
reduction="sum",
|
607
|
+
use_token_scaling=True,
|
608
|
+
)
|
609
|
+
|
610
|
+
# This should not raise any CUDA errors
|
611
|
+
assert loss_scaled.numel() == 1 # Should return a scalar for sum reduction
|
612
|
+
assert not torch.isnan(loss_scaled) # Should not be NaN
|
613
|
+
assert not torch.isinf(loss_scaled) # Should not be infinite
|
614
|
+
|
615
|
+
# Test gradients
|
616
|
+
loss_scaled.backward()
|
617
|
+
assert _input.grad is not None
|
618
|
+
assert not torch.isnan(_input.grad).any() # Gradients should not be NaN
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{liger_kernel_nightly-0.6.2.dev20250823034010 → liger_kernel_nightly-0.6.2.dev20250826142826}/NOTICE
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|