liger-kernel-nightly 0.6.0.dev20250718050347__tar.gz → 0.6.0.dev20250718080702__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/PKG-INFO +1 -1
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/benchmark/data/all_benchmark_data.csv +72 -0
- liger_kernel_nightly-0.6.0.dev20250718080702/benchmark/scripts/benchmark_fused_add_rms_norm.py +201 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/pyproject.toml +1 -1
- liger_kernel_nightly-0.6.0.dev20250718080702/src/liger_kernel/ops/fused_add_rms_norm.py +412 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/transformers/__init__.py +5 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/transformers/functional.py +5 -0
- liger_kernel_nightly-0.6.0.dev20250718080702/src/liger_kernel/transformers/fused_add_rms_norm.py +39 -0
- liger_kernel_nightly-0.6.0.dev20250718080702/src/liger_kernel/transformers/model/smollm3.py +189 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/transformers/monkey_patch.py +73 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel_nightly.egg-info/PKG-INFO +1 -1
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel_nightly.egg-info/SOURCES.txt +5 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/test/convergence/bf16/test_mini_models.py +65 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/test/convergence/bf16/test_mini_models_with_logits.py +64 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/test/convergence/fp32/test_mini_models.py +62 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/test/convergence/fp32/test_mini_models_with_logits.py +62 -0
- liger_kernel_nightly-0.6.0.dev20250718080702/test/transformers/test_fused_add_rms_norm.py +219 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/test/transformers/test_monkey_patch.py +51 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/test/utils.py +12 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/.github/ISSUE_TEMPLATE/bug_report.yaml +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/.github/ISSUE_TEMPLATE/feature_request.yaml +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/.github/pull_request_template.md +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/.github/workflows/amd-ci.yml +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/.github/workflows/benchmark.yml +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/.github/workflows/docs.yml +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/.github/workflows/intel-ci.yml +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/.github/workflows/nvi-ci.yml +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/.github/workflows/publish-nightly.yml +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/.github/workflows/publish-release.yml +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/.gitignore +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/LICENSE +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/Makefile +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/NOTICE +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/README.md +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/benchmark/README.md +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/benchmark/__init__.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/benchmark/benchmarks_visualizer.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/benchmark/scripts/__init__.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/benchmark/scripts/benchmark_cpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/benchmark/scripts/benchmark_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/benchmark/scripts/benchmark_distill_cosine_loss.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/benchmark/scripts/benchmark_distill_jsd_loss.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/benchmark/scripts/benchmark_dpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/benchmark/scripts/benchmark_dyt.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/benchmark/scripts/benchmark_embedding.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/benchmark/scripts/benchmark_fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/benchmark/scripts/benchmark_fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/benchmark/scripts/benchmark_fused_neighborhood_attention.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/benchmark/scripts/benchmark_geglu.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/benchmark/scripts/benchmark_group_norm.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/benchmark/scripts/benchmark_jsd.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/benchmark/scripts/benchmark_kl_div.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/benchmark/scripts/benchmark_kto_loss.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/benchmark/scripts/benchmark_layer_norm.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/benchmark/scripts/benchmark_multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/benchmark/scripts/benchmark_orpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/benchmark/scripts/benchmark_qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/benchmark/scripts/benchmark_rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/benchmark/scripts/benchmark_rope.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/benchmark/scripts/benchmark_simpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/benchmark/scripts/benchmark_softmax.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/benchmark/scripts/benchmark_sparse_multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/benchmark/scripts/benchmark_sparsemax.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/benchmark/scripts/benchmark_swiglu.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/benchmark/scripts/benchmark_tvd.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/benchmark/scripts/utils.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/dev/fmt-requirements.txt +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/dev/modal/benchmarks.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/dev/modal/tests.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/dev/modal/tests_bwd.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/docs/Examples.md +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/docs/Getting-Started.md +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/docs/High-Level-APIs.md +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/docs/Low-Level-APIs.md +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/docs/acknowledgement.md +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/docs/contributing.md +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/docs/images/banner.GIF +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/docs/images/compose.gif +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/docs/images/e2e-memory.png +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/docs/images/e2e-tps.png +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/docs/images/logo-banner.png +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/docs/images/patch.gif +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/docs/images/post-training.png +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/docs/index.md +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/docs/license.md +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/examples/alignment/accelerate_config.yaml +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/examples/alignment/run_orpo.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/examples/huggingface/README.md +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/examples/huggingface/callback.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/examples/huggingface/config/fsdp_config.json +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/examples/huggingface/img/gemma_7b_mem.png +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/examples/huggingface/img/gemma_7b_tp.png +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/examples/huggingface/img/llama_mem_alloc.png +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/examples/huggingface/img/llama_tps.png +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/examples/huggingface/img/qwen_mem_alloc.png +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/examples/huggingface/img/qwen_tps.png +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/examples/huggingface/launch_on_modal.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/examples/huggingface/requirements.txt +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/examples/huggingface/run_benchmarks.sh +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/examples/huggingface/run_gemma.sh +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/examples/huggingface/run_llama.sh +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/examples/huggingface/run_qwen.sh +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/examples/huggingface/run_qwen2_vl.sh +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/examples/huggingface/training.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/examples/huggingface/training_multimodal.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/examples/lightning/README.md +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/examples/lightning/requirements.txt +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/examples/lightning/training.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/examples/medusa/README.md +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/examples/medusa/callback.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/examples/medusa/docs/images/Memory_Stage1_num_head_3.png +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/examples/medusa/docs/images/Memory_Stage1_num_head_5.png +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/examples/medusa/docs/images/Memory_Stage2_num_head_3.png +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/examples/medusa/docs/images/Memory_Stage2_num_head_5.png +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/examples/medusa/docs/images/Throughput_Stage1_num_head_3.png +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/examples/medusa/docs/images/Throughput_Stage1_num_head_5.png +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/examples/medusa/docs/images/Throughput_Stage2_num_head_3.png +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/examples/medusa/docs/images/Throughput_Stage2_num_head_5.png +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/examples/medusa/fsdp/acc-fsdp.conf +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/examples/medusa/medusa_util.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/examples/medusa/requirements.txt +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/examples/medusa/scripts/llama3_8b_medusa.sh +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/examples/medusa/train.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/licenses/LICENSE-Apache-2.0 +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/licenses/LICENSE-MIT-AutoAWQ +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/licenses/LICENSE-MIT-Efficient-Cross-Entropy +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/licenses/LICENSE-MIT-llmc +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/licenses/LICENSE-MIT-triton +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/mkdocs.yml +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/setup.cfg +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/setup.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/__init__.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/chunked_loss/README.md +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/chunked_loss/__init__.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/chunked_loss/cosine_similarity_loss.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/chunked_loss/cpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/chunked_loss/dpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/chunked_loss/functional.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/chunked_loss/fused_linear_distillation.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/chunked_loss/fused_linear_ppo.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/chunked_loss/fused_linear_preference.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/chunked_loss/fused_linear_unpaired_preference.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/chunked_loss/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/chunked_loss/jsd_loss.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/chunked_loss/kto_loss.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/chunked_loss/orpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/chunked_loss/simpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/env_report.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/ops/__init__.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/ops/cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/ops/dyt.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/ops/experimental/embedding.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/ops/experimental/mm_int8int2.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/ops/fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/ops/fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/ops/fused_neighborhood_attention.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/ops/geglu.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/ops/group_norm.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/ops/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/ops/jsd.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/ops/kl_div.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/ops/layer_norm.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/ops/multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/ops/qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/ops/rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/ops/rope.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/ops/softmax.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/ops/sparsemax.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/ops/swiglu.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/ops/tvd.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/ops/utils.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/transformers/auto_model.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/transformers/cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/transformers/dyt.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/transformers/experimental/embedding.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/transformers/fsdp.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/transformers/fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/transformers/fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/transformers/fused_neighborhood_attention.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/transformers/geglu.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/transformers/group_norm.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/transformers/grpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/transformers/jsd.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/transformers/kl_div.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/transformers/layer_norm.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/transformers/model/__init__.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/transformers/model/gemma.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/transformers/model/gemma2.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/transformers/model/gemma3.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/transformers/model/glm4.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/transformers/model/llama.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/transformers/model/llama4.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/transformers/model/llava.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/transformers/model/loss_utils.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/transformers/model/mistral.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/transformers/model/mixtral.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/transformers/model/mllama.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/transformers/model/olmo2.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/transformers/model/paligemma.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/transformers/model/phi3.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/transformers/model/qwen2.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/transformers/model/qwen2_5_vl.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/transformers/model/qwen2_vl.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/transformers/model/qwen3.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/transformers/model/qwen3_moe.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/transformers/multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/transformers/qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/transformers/rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/transformers/rope.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/transformers/softmax.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/transformers/sparsemax.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/transformers/swiglu.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/transformers/trainer/__init__.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/transformers/trainer/orpo_trainer.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/transformers/trainer_integration.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/transformers/tvd.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/triton/__init__.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/triton/monkey_patch.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel/utils.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel_nightly.egg-info/dependency_links.txt +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel_nightly.egg-info/requires.txt +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/src/liger_kernel_nightly.egg-info/top_level.txt +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/test/__init__.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/test/chunked_loss/__init__.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/test/chunked_loss/test_cosine_loss.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/test/chunked_loss/test_cpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/test/chunked_loss/test_dpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/test/chunked_loss/test_grpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/test/chunked_loss/test_jsd_loss.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/test/chunked_loss/test_kto_loss.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/test/chunked_loss/test_orpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/test/chunked_loss/test_simpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/test/conftest.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/test/convergence/__init__.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/test/convergence/bf16/__init__.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/test/convergence/bf16/test_mini_models_multimodal.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/test/convergence/fp32/__init__.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/test/convergence/fp32/test_mini_models_multimodal.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/test/resources/fake_configs/Google/Gemma3/gemma-3-4b-it/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/test/resources/fake_configs/Google/Paligemma/paligemma-3b-pt-224/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/preprocessor_config.json +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/processor_config.json +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/test/resources/fake_configs/Qwen/Qwen2-VL-7B-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/test/resources/fake_configs/Qwen/Qwen2.5-VL-7B-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/test/resources/fake_configs/meta-llama/Llama-3.2-11B-Vision-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/test/resources/fake_configs/meta-llama/Llama-4-Scout-17B-16E-Instruct/tokenizer_config.json +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/test/resources/scripts/generate_tokenized_dataset.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/test/resources/tiny_shakespeare.txt +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/test/resources/tiny_shakespeare_tokenized/data-00000-of-00001.arrow +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/test/resources/tiny_shakespeare_tokenized/dataset_info.json +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/test/resources/tiny_shakespeare_tokenized/state.json +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/test/transformers/test_auto_model.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/test/transformers/test_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/test/transformers/test_dyt.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/test/transformers/test_embedding.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/test/transformers/test_flex_attention.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/test/transformers/test_fused_linear_cross_entropy.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/test/transformers/test_fused_linear_jsd.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/test/transformers/test_fused_neighborhood_attention.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/test/transformers/test_geglu.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/test/transformers/test_group_norm.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/test/transformers/test_grpo_loss.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/test/transformers/test_jsd.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/test/transformers/test_kl_div.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/test/transformers/test_layer_norm.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/test/transformers/test_mm_int8int2.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/test/transformers/test_multi_token_attention.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/test/transformers/test_qwen2vl_mrope.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/test/transformers/test_rms_norm.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/test/transformers/test_rope.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/test/transformers/test_softmax.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/test/transformers/test_sparsemax.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/test/transformers/test_swiglu.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/test/transformers/test_trainer_integration.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/test/transformers/test_transformers.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/test/transformers/test_tvd.py +0 -0
- {liger_kernel_nightly-0.6.0.dev20250718050347 → liger_kernel_nightly-0.6.0.dev20250718080702}/test/triton/test_triton_monkey_patch.py +0 -0
@@ -1493,3 +1493,75 @@ distill_cosine_loss,torch,full,memory,MB,BT,B x T,1024,7566.2822265625,7566.2822
|
|
1493
1493
|
distill_cosine_loss,torch,full,memory,MB,BT,B x T,2048,11590.3134765625,11590.3134765625,11590.3134765625,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA A100-SXM4-80GB,2025-06-27 09:23:28,0.5.10
|
1494
1494
|
distill_cosine_loss,torch,full,memory,MB,BT,B x T,4096,19654.375,19654.375,19654.375,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA A100-SXM4-80GB,2025-06-27 09:23:28,0.5.10
|
1495
1495
|
distill_cosine_loss,torch,full,memory,MB,BT,B x T,8192,35782.5,35782.5,35782.5,"{""H"": 4096, ""V"": 128256, ""mode"": ""forward"", ""dtype"": ""torch.bfloat16"", ""bias"": false, ""weight_hard_loss"": 0.5, ""weight_soft_loss"": 0.5, ""ignore_index"": -100}",NVIDIA A100-SXM4-80GB,2025-06-27 09:23:28,0.5.10
|
1496
|
+
fused_add_rms_norm,liger_fused_add_rms_norm,forward,speed,ms,H,hidden size,1024,0.01759999990463257,0.017311999574303627,0.017920000478625298,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:20,0.6.0
|
1497
|
+
fused_add_rms_norm,liger_fused_add_rms_norm,forward,speed,ms,H,hidden size,2048,0.02924799919128418,0.028863999992609024,0.029983999207615852,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:20,0.6.0
|
1498
|
+
fused_add_rms_norm,liger_fused_add_rms_norm,forward,speed,ms,H,hidden size,4096,0.05129599943757057,0.050624001771211624,0.05209600180387497,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:20,0.6.0
|
1499
|
+
fused_add_rms_norm,liger_fused_add_rms_norm,forward,speed,ms,H,hidden size,8192,0.09344000369310379,0.09296000003814697,0.09382399916648865,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:20,0.6.0
|
1500
|
+
fused_add_rms_norm,liger_fused_add_rms_norm,forward,speed,ms,H,hidden size,16384,0.1791680008172989,0.17814399302005768,0.1796800047159195,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:20,0.6.0
|
1501
|
+
fused_add_rms_norm,liger_fused_add_rms_norm,forward,speed,ms,H,hidden size,32768,0.43830400705337524,0.43744000792503357,0.43929600715637207,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:20,0.6.0
|
1502
|
+
fused_add_rms_norm,huggingface,forward,speed,ms,H,hidden size,1024,0.060095999389886856,0.059808000922203064,0.06054399907588959,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:23,0.6.0
|
1503
|
+
fused_add_rms_norm,huggingface,forward,speed,ms,H,hidden size,2048,0.09084799885749817,0.09027200192213058,0.09161599725484848,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:23,0.6.0
|
1504
|
+
fused_add_rms_norm,huggingface,forward,speed,ms,H,hidden size,4096,0.17820799350738525,0.17744000256061554,0.17897599935531616,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:23,0.6.0
|
1505
|
+
fused_add_rms_norm,huggingface,forward,speed,ms,H,hidden size,8192,0.312608003616333,0.3118720054626465,0.31324800848960876,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:23,0.6.0
|
1506
|
+
fused_add_rms_norm,huggingface,forward,speed,ms,H,hidden size,16384,0.574944019317627,0.5740479826927185,0.5756288051605225,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:23,0.6.0
|
1507
|
+
fused_add_rms_norm,huggingface,forward,speed,ms,H,hidden size,32768,1.0943039655685425,1.0934272289276123,1.0951999425888062,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:23,0.6.0
|
1508
|
+
fused_add_rms_norm,liger_rms_norm,forward,speed,ms,H,hidden size,1024,0.0352960005402565,0.03481600061058998,0.03811199963092804,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:26,0.6.0
|
1509
|
+
fused_add_rms_norm,liger_rms_norm,forward,speed,ms,H,hidden size,2048,0.05430399999022484,0.05392000079154968,0.05503999814391136,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:26,0.6.0
|
1510
|
+
fused_add_rms_norm,liger_rms_norm,forward,speed,ms,H,hidden size,4096,0.10592000186443329,0.1054655984044075,0.10630399733781815,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:26,0.6.0
|
1511
|
+
fused_add_rms_norm,liger_rms_norm,forward,speed,ms,H,hidden size,8192,0.19679999351501465,0.19631999731063843,0.19724799692630768,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:26,0.6.0
|
1512
|
+
fused_add_rms_norm,liger_rms_norm,forward,speed,ms,H,hidden size,16384,0.37436801195144653,0.3733760118484497,0.3752320110797882,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:26,0.6.0
|
1513
|
+
fused_add_rms_norm,liger_rms_norm,forward,speed,ms,H,hidden size,32768,0.7376000285148621,0.7361343741416931,0.7391359806060791,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:26,0.6.0
|
1514
|
+
fused_add_rms_norm,liger_fused_add_rms_norm,full,speed,ms,H,hidden size,1024,0.3147200047969818,0.30796160697937014,0.32764801383018494,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:30,0.6.0
|
1515
|
+
fused_add_rms_norm,liger_fused_add_rms_norm,full,speed,ms,H,hidden size,2048,0.3089919984340668,0.30374398827552795,0.3226880133152008,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:30,0.6.0
|
1516
|
+
fused_add_rms_norm,liger_fused_add_rms_norm,full,speed,ms,H,hidden size,4096,0.30691200494766235,0.3023296058177948,0.3205504059791565,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:30,0.6.0
|
1517
|
+
fused_add_rms_norm,liger_fused_add_rms_norm,full,speed,ms,H,hidden size,8192,0.3246079981327057,0.3185984075069428,0.33656961321830753,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:30,0.6.0
|
1518
|
+
fused_add_rms_norm,liger_fused_add_rms_norm,full,speed,ms,H,hidden size,16384,0.6010559797286987,0.5996800065040588,0.6026239991188049,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:30,0.6.0
|
1519
|
+
fused_add_rms_norm,liger_fused_add_rms_norm,full,speed,ms,H,hidden size,32768,1.8402559757232666,1.8322880268096924,1.8461120128631592,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:30,0.6.0
|
1520
|
+
fused_add_rms_norm,huggingface,full,speed,ms,H,hidden size,1024,0.23878400027751923,0.23545600473880768,0.2507520020008087,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:33,0.6.0
|
1521
|
+
fused_add_rms_norm,huggingface,full,speed,ms,H,hidden size,2048,0.34513600170612335,0.34377598762512207,0.34678399562835693,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:33,0.6.0
|
1522
|
+
fused_add_rms_norm,huggingface,full,speed,ms,H,hidden size,4096,0.6330879926681519,0.631712019443512,0.6345599889755249,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:33,0.6.0
|
1523
|
+
fused_add_rms_norm,huggingface,full,speed,ms,H,hidden size,8192,1.1185599565505981,1.1172800064086914,1.1196800470352173,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:33,0.6.0
|
1524
|
+
fused_add_rms_norm,huggingface,full,speed,ms,H,hidden size,16384,2.0697600841522217,2.0678528785705566,2.0713536739349365,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:33,0.6.0
|
1525
|
+
fused_add_rms_norm,huggingface,full,speed,ms,H,hidden size,32768,3.9561920166015625,3.953824043273926,3.9581120014190674,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:33,0.6.0
|
1526
|
+
fused_add_rms_norm,liger_rms_norm,full,speed,ms,H,hidden size,1024,0.38916800916194916,0.3824320137500763,0.4037184059619903,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:36,0.6.0
|
1527
|
+
fused_add_rms_norm,liger_rms_norm,full,speed,ms,H,hidden size,2048,0.3890720009803772,0.38193280100822447,0.4032831907272339,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:36,0.6.0
|
1528
|
+
fused_add_rms_norm,liger_rms_norm,full,speed,ms,H,hidden size,4096,0.39715200662612915,0.3928639888763428,0.41097599267959595,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:36,0.6.0
|
1529
|
+
fused_add_rms_norm,liger_rms_norm,full,speed,ms,H,hidden size,8192,0.6275200247764587,0.6259520053863525,0.6287999749183655,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:36,0.6.0
|
1530
|
+
fused_add_rms_norm,liger_rms_norm,full,speed,ms,H,hidden size,16384,1.202239990234375,1.199679970741272,1.2048959732055664,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:36,0.6.0
|
1531
|
+
fused_add_rms_norm,liger_rms_norm,full,speed,ms,H,hidden size,32768,2.7738559246063232,2.7705343723297116,2.777868890762329,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:36,0.6.0
|
1532
|
+
fused_add_rms_norm,liger_fused_add_rms_norm,backward,speed,ms,H,hidden size,1024,0.15619200468063354,0.15376000106334686,0.1661248028278351,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:39,0.6.0
|
1533
|
+
fused_add_rms_norm,liger_fused_add_rms_norm,backward,speed,ms,H,hidden size,2048,0.15825600177049637,0.15600000321865082,0.16911999881267548,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:39,0.6.0
|
1534
|
+
fused_add_rms_norm,liger_fused_add_rms_norm,backward,speed,ms,H,hidden size,4096,0.16700799763202667,0.16502399742603302,0.1709440052509308,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:39,0.6.0
|
1535
|
+
fused_add_rms_norm,liger_fused_add_rms_norm,backward,speed,ms,H,hidden size,8192,0.1712000072002411,0.1700800061225891,0.17215999960899353,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:39,0.6.0
|
1536
|
+
fused_add_rms_norm,liger_fused_add_rms_norm,backward,speed,ms,H,hidden size,16384,0.42505601048469543,0.4233280122280121,0.42691200971603394,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:39,0.6.0
|
1537
|
+
fused_add_rms_norm,liger_fused_add_rms_norm,backward,speed,ms,H,hidden size,32768,1.4057759642601013,1.3944000005722046,1.4099839925765991,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:39,0.6.0
|
1538
|
+
fused_add_rms_norm,huggingface,backward,speed,ms,H,hidden size,1024,0.1520960032939911,0.15136000514030457,0.1528960019350052,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:42,0.6.0
|
1539
|
+
fused_add_rms_norm,huggingface,backward,speed,ms,H,hidden size,2048,0.2533760070800781,0.2524160146713257,0.25436800718307495,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:42,0.6.0
|
1540
|
+
fused_add_rms_norm,huggingface,backward,speed,ms,H,hidden size,4096,0.4551039934158325,0.4540799856185913,0.45612800121307373,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:42,0.6.0
|
1541
|
+
fused_add_rms_norm,huggingface,backward,speed,ms,H,hidden size,8192,0.8053439855575562,0.8038079738616943,0.806656002998352,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:42,0.6.0
|
1542
|
+
fused_add_rms_norm,huggingface,backward,speed,ms,H,hidden size,16384,1.4933120012283325,1.492095947265625,1.49452805519104,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:42,0.6.0
|
1543
|
+
fused_add_rms_norm,huggingface,backward,speed,ms,H,hidden size,32768,2.8600640296936035,2.8583295822143557,2.8612607955932616,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:42,0.6.0
|
1544
|
+
fused_add_rms_norm,liger_rms_norm,backward,speed,ms,H,hidden size,1024,0.20175999402999878,0.199072003364563,0.2154303938150406,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0
|
1545
|
+
fused_add_rms_norm,liger_rms_norm,backward,speed,ms,H,hidden size,2048,0.20263999700546265,0.20000000298023224,0.21675519943237304,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0
|
1546
|
+
fused_add_rms_norm,liger_rms_norm,backward,speed,ms,H,hidden size,4096,0.25276800990104675,0.2515519857406616,0.2539199888706207,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0
|
1547
|
+
fused_add_rms_norm,liger_rms_norm,backward,speed,ms,H,hidden size,8192,0.4322720021009445,0.43088001012802124,0.4336000084877014,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0
|
1548
|
+
fused_add_rms_norm,liger_rms_norm,backward,speed,ms,H,hidden size,16384,0.8288000226020813,0.8266303777694701,0.8311295866966247,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0
|
1549
|
+
fused_add_rms_norm,liger_rms_norm,backward,speed,ms,H,hidden size,32768,2.03987193107605,2.0360767364501955,2.0436416149139403,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0
|
1550
|
+
fused_add_rms_norm,liger_fused_add_rms_norm,full,memory,MB,H,hidden size,1024,72.546875,72.546875,72.546875,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0
|
1551
|
+
fused_add_rms_norm,liger_fused_add_rms_norm,full,memory,MB,H,hidden size,2048,145.0859375,145.0859375,145.0859375,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0
|
1552
|
+
fused_add_rms_norm,liger_fused_add_rms_norm,full,memory,MB,H,hidden size,4096,290.1640625,290.1640625,290.1640625,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0
|
1553
|
+
fused_add_rms_norm,liger_fused_add_rms_norm,full,memory,MB,H,hidden size,8192,580.3203125,580.3203125,580.3203125,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0
|
1554
|
+
fused_add_rms_norm,liger_fused_add_rms_norm,full,memory,MB,H,hidden size,16384,1160.6328125,1160.6328125,1160.6328125,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0
|
1555
|
+
fused_add_rms_norm,liger_fused_add_rms_norm,full,memory,MB,H,hidden size,32768,2321.2578125,2321.2578125,2321.2578125,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0
|
1556
|
+
fused_add_rms_norm,huggingface,full,memory,MB,H,hidden size,1024,104.03173828125,104.03173828125,104.03173828125,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0
|
1557
|
+
fused_add_rms_norm,huggingface,full,memory,MB,H,hidden size,2048,208.05517578125,208.05517578125,208.05517578125,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0
|
1558
|
+
fused_add_rms_norm,huggingface,full,memory,MB,H,hidden size,4096,416.10205078125,416.10205078125,416.10205078125,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0
|
1559
|
+
fused_add_rms_norm,huggingface,full,memory,MB,H,hidden size,8192,832.19580078125,832.19580078125,832.19580078125,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0
|
1560
|
+
fused_add_rms_norm,huggingface,full,memory,MB,H,hidden size,16384,1664.3125,1664.3125,1664.3125,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0
|
1561
|
+
fused_add_rms_norm,huggingface,full,memory,MB,H,hidden size,32768,3328.625,3328.625,3328.625,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0
|
1562
|
+
fused_add_rms_norm,liger_rms_norm,full,memory,MB,H,hidden size,1024,104.03564453125,104.03564453125,104.03564453125,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0
|
1563
|
+
fused_add_rms_norm,liger_rms_norm,full,memory,MB,H,hidden size,2048,208.06298828125,208.06298828125,208.06298828125,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0
|
1564
|
+
fused_add_rms_norm,liger_rms_norm,full,memory,MB,H,hidden size,4096,416.11767578125,416.11767578125,416.11767578125,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0
|
1565
|
+
fused_add_rms_norm,liger_rms_norm,full,memory,MB,H,hidden size,8192,832.22705078125,832.22705078125,832.22705078125,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0
|
1566
|
+
fused_add_rms_norm,liger_rms_norm,full,memory,MB,H,hidden size,16384,1544.44580078125,1544.44580078125,1544.44580078125,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0
|
1567
|
+
fused_add_rms_norm,liger_rms_norm,full,memory,MB,H,hidden size,32768,2960.8837890625,2960.8837890625,2960.8837890625,"{""M"": 2048, ""dtype"": ""torch.float32"", ""eps"": 1e-06}",NVIDIA H100 NVL,2025-07-16 07:04:45,0.6.0
|
liger_kernel_nightly-0.6.0.dev20250718080702/benchmark/scripts/benchmark_fused_add_rms_norm.py
ADDED
@@ -0,0 +1,201 @@
|
|
1
|
+
import torch
|
2
|
+
import torch.nn as nn
|
3
|
+
import triton
|
4
|
+
|
5
|
+
from utils import QUANTILES
|
6
|
+
from utils import SingleBenchmarkRunInput
|
7
|
+
from utils import SingleBenchmarkRunOutput
|
8
|
+
from utils import _test_memory
|
9
|
+
from utils import parse_benchmark_script_args
|
10
|
+
from utils import run_benchmarks
|
11
|
+
|
12
|
+
from liger_kernel.transformers.fused_add_rms_norm import LigerFusedAddRMSNorm
|
13
|
+
from liger_kernel.transformers.rms_norm import LigerRMSNorm
|
14
|
+
from liger_kernel.utils import infer_device
|
15
|
+
|
16
|
+
device = infer_device()
|
17
|
+
|
18
|
+
|
19
|
+
class NaiveAddRMSNorm(nn.Module):
|
20
|
+
def __init__(self, hidden_size, eps=1e-6):
|
21
|
+
"""
|
22
|
+
Naive implementation of the add residual rms norm.
|
23
|
+
"""
|
24
|
+
super().__init__()
|
25
|
+
self.weight = nn.Parameter(torch.ones(hidden_size))
|
26
|
+
self.variance_epsilon = eps
|
27
|
+
|
28
|
+
def forward(self, hidden_states, residual):
|
29
|
+
input_dtype = hidden_states.dtype
|
30
|
+
hidden_states = hidden_states.to(torch.float32)
|
31
|
+
residual = residual.to(torch.float32)
|
32
|
+
hidden_states = hidden_states + residual
|
33
|
+
residual = hidden_states
|
34
|
+
variance = hidden_states.pow(2).mean(-1, keepdim=True)
|
35
|
+
hidden_states = hidden_states * torch.rsqrt(variance + self.variance_epsilon)
|
36
|
+
return self.weight * hidden_states.to(input_dtype), residual.to(input_dtype)
|
37
|
+
|
38
|
+
|
39
|
+
class AddLigerRMSNorm(nn.Module):
|
40
|
+
def __init__(self, hidden_size, eps=1e-6):
|
41
|
+
"""
|
42
|
+
AddLigerRMSNorm is equivalent to NaiveAddRMSNorm class above, but uses the LigerRMSNorm kernel.
|
43
|
+
"""
|
44
|
+
super().__init__()
|
45
|
+
self.weight = nn.Parameter(torch.ones(hidden_size))
|
46
|
+
self.variance_epsilon = eps
|
47
|
+
self.rms_norm = LigerRMSNorm(hidden_size, eps, in_place=False)
|
48
|
+
|
49
|
+
def forward(self, hidden_states, residual):
|
50
|
+
input_dtype = hidden_states.dtype
|
51
|
+
hidden_states = hidden_states.to(torch.float32)
|
52
|
+
residual = residual.to(torch.float32)
|
53
|
+
hidden_states = hidden_states + residual
|
54
|
+
residual = hidden_states
|
55
|
+
hidden_states = self.rms_norm(hidden_states)
|
56
|
+
return self.weight * hidden_states.to(input_dtype), residual.to(input_dtype)
|
57
|
+
|
58
|
+
|
59
|
+
def bench_speed_fused_residual_rms_norm(input: SingleBenchmarkRunInput) -> SingleBenchmarkRunOutput:
|
60
|
+
N = input.x
|
61
|
+
provider = input.kernel_provider
|
62
|
+
mode = input.kernel_operation_mode
|
63
|
+
|
64
|
+
extra_benchmark_config = input.extra_benchmark_config
|
65
|
+
M = extra_benchmark_config["M"]
|
66
|
+
eps = extra_benchmark_config["eps"]
|
67
|
+
dtype = extra_benchmark_config["dtype"]
|
68
|
+
|
69
|
+
x_shape = (M, N)
|
70
|
+
|
71
|
+
# Fused Add RMS Norm
|
72
|
+
fused_add_rms_norm = LigerFusedAddRMSNorm(hidden_size=N, eps=eps).to(device)
|
73
|
+
# Naive implementation
|
74
|
+
naive_rms_norm = NaiveAddRMSNorm(hidden_size=N, eps=eps).to(device)
|
75
|
+
# LigerRMSNorm without fused residual addition
|
76
|
+
liger_rms_norm = AddLigerRMSNorm(hidden_size=N, eps=eps).to(device)
|
77
|
+
|
78
|
+
x = torch.randn(x_shape, dtype=dtype, device=device)
|
79
|
+
r = torch.randn(x_shape, dtype=dtype, device=device)
|
80
|
+
dy = torch.randn_like(x)
|
81
|
+
ds = torch.randn_like(r)
|
82
|
+
x.requires_grad_(True)
|
83
|
+
r.requires_grad_(True)
|
84
|
+
# utility functions
|
85
|
+
|
86
|
+
def y_fwd():
|
87
|
+
if provider == "liger_fused_add_rms_norm":
|
88
|
+
return fused_add_rms_norm(x, r)
|
89
|
+
|
90
|
+
if provider == "huggingface":
|
91
|
+
return naive_rms_norm(x, r)
|
92
|
+
|
93
|
+
if provider == "liger_rms_norm":
|
94
|
+
return liger_rms_norm(x, r)
|
95
|
+
|
96
|
+
if mode == "forward":
|
97
|
+
ms_50, ms_20, ms_80 = triton.testing.do_bench(
|
98
|
+
y_fwd,
|
99
|
+
grad_to_none=[x, r],
|
100
|
+
rep=500,
|
101
|
+
quantiles=QUANTILES,
|
102
|
+
)
|
103
|
+
elif mode == "backward":
|
104
|
+
y, s = y_fwd()
|
105
|
+
ms_50, ms_20, ms_80 = triton.testing.do_bench(
|
106
|
+
lambda: (torch.autograd.backward((y, s), (dy, ds), retain_graph=True)),
|
107
|
+
grad_to_none=[x, r],
|
108
|
+
rep=500,
|
109
|
+
quantiles=QUANTILES,
|
110
|
+
)
|
111
|
+
elif mode == "full":
|
112
|
+
|
113
|
+
def full():
|
114
|
+
y, s = y_fwd()
|
115
|
+
torch.autograd.backward((y, s), (dy, ds))
|
116
|
+
|
117
|
+
ms_50, ms_20, ms_80 = triton.testing.do_bench(
|
118
|
+
full,
|
119
|
+
grad_to_none=[x, r],
|
120
|
+
rep=500,
|
121
|
+
quantiles=QUANTILES,
|
122
|
+
)
|
123
|
+
|
124
|
+
return SingleBenchmarkRunOutput(
|
125
|
+
y_20=ms_20,
|
126
|
+
y_50=ms_50,
|
127
|
+
y_80=ms_80,
|
128
|
+
)
|
129
|
+
|
130
|
+
|
131
|
+
def bench_memory_fused_residual_rms_norm(input: SingleBenchmarkRunInput) -> SingleBenchmarkRunOutput:
|
132
|
+
N = input.x
|
133
|
+
provider = input.kernel_provider
|
134
|
+
|
135
|
+
extra_benchmark_config = input.extra_benchmark_config
|
136
|
+
M = extra_benchmark_config["M"]
|
137
|
+
eps = extra_benchmark_config["eps"]
|
138
|
+
dtype = extra_benchmark_config["dtype"]
|
139
|
+
|
140
|
+
x_shape = (M, N)
|
141
|
+
|
142
|
+
fused_add_rms_norm = LigerFusedAddRMSNorm(hidden_size=N, eps=eps).to(device)
|
143
|
+
naive_rms_norm = NaiveAddRMSNorm(hidden_size=N, eps=eps).to(device)
|
144
|
+
liger_rms_norm = AddLigerRMSNorm(hidden_size=N, eps=eps).to(device)
|
145
|
+
|
146
|
+
x = torch.randn(x_shape, dtype=dtype, device=device)
|
147
|
+
r = torch.randn(x_shape, dtype=dtype, device=device)
|
148
|
+
dy = torch.randn_like(x)
|
149
|
+
ds = torch.randn_like(r)
|
150
|
+
x.requires_grad_(True)
|
151
|
+
r.requires_grad_(True)
|
152
|
+
|
153
|
+
# utility functions
|
154
|
+
def y_fwd():
|
155
|
+
if provider == "liger_fused_add_rms_norm":
|
156
|
+
return fused_add_rms_norm(x, r)
|
157
|
+
if provider == "huggingface":
|
158
|
+
return naive_rms_norm(x, r)
|
159
|
+
if provider == "liger_rms_norm":
|
160
|
+
return liger_rms_norm(x, r)
|
161
|
+
|
162
|
+
def full():
|
163
|
+
y, s = y_fwd()
|
164
|
+
torch.autograd.backward((y, s), (dy, ds))
|
165
|
+
|
166
|
+
mem_50, mem_20, mem_80 = _test_memory(full, quantiles=QUANTILES)
|
167
|
+
|
168
|
+
return SingleBenchmarkRunOutput(
|
169
|
+
y_20=mem_20,
|
170
|
+
y_50=mem_50,
|
171
|
+
y_80=mem_80,
|
172
|
+
)
|
173
|
+
|
174
|
+
|
175
|
+
if __name__ == "__main__":
|
176
|
+
args = parse_benchmark_script_args()
|
177
|
+
|
178
|
+
common_configs = {
|
179
|
+
"kernel_name": "fused_add_rms_norm",
|
180
|
+
"x_name": "H",
|
181
|
+
"x_label": "hidden size",
|
182
|
+
"x_values": [2**i for i in range(10, 16)],
|
183
|
+
"kernel_providers": ["liger_fused_add_rms_norm", "huggingface", "liger_rms_norm"],
|
184
|
+
"extra_benchmark_configs": [{"M": 2048, "dtype": torch.float32, "eps": 1e-6}],
|
185
|
+
"overwrite": args.overwrite,
|
186
|
+
}
|
187
|
+
|
188
|
+
run_benchmarks(
|
189
|
+
bench_test_fn=bench_speed_fused_residual_rms_norm,
|
190
|
+
kernel_operation_modes=["forward", "full", "backward"],
|
191
|
+
metric_name="speed",
|
192
|
+
metric_unit="ms",
|
193
|
+
**common_configs,
|
194
|
+
)
|
195
|
+
run_benchmarks(
|
196
|
+
bench_test_fn=bench_memory_fused_residual_rms_norm,
|
197
|
+
kernel_operation_modes=["full"],
|
198
|
+
metric_name="memory",
|
199
|
+
metric_unit="MB",
|
200
|
+
**common_configs,
|
201
|
+
)
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
4
4
|
|
5
5
|
[project]
|
6
6
|
name = "liger_kernel_nightly"
|
7
|
-
version = "0.6.0.
|
7
|
+
version = "0.6.0.dev20250718080702"
|
8
8
|
description = "Efficient Triton kernels for LLM Training"
|
9
9
|
urls = { "Homepage" = "https://github.com/linkedin/Liger-Kernel" }
|
10
10
|
readme = { file = "README.md", content-type = "text/markdown" }
|