liger-kernel-nightly 0.5.10.dev20250523162037__tar.gz → 0.5.10.dev20250526105714__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (264) hide show
  1. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/PKG-INFO +3 -1
  2. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/README.md +2 -0
  3. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/benchmark/benchmarks_visualizer.py +125 -16
  4. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/benchmark/data/all_benchmark_data.csv +168 -24
  5. liger_kernel_nightly-0.5.10.dev20250526105714/benchmark/scripts/benchmark_multi_token_attention.py +218 -0
  6. liger_kernel_nightly-0.5.10.dev20250526105714/benchmark/scripts/benchmark_softmax.py +140 -0
  7. liger_kernel_nightly-0.5.10.dev20250526105714/benchmark/scripts/benchmark_sparse_multi_token_attention.py +254 -0
  8. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/benchmark/scripts/utils.py +7 -0
  9. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/mkdocs.yml +2 -3
  10. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/pyproject.toml +1 -1
  11. liger_kernel_nightly-0.5.10.dev20250526105714/src/liger_kernel/ops/multi_token_attention.py +207 -0
  12. liger_kernel_nightly-0.5.10.dev20250526105714/src/liger_kernel/ops/softmax.py +201 -0
  13. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/ops/sparsemax.py +62 -50
  14. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/transformers/functional.py +34 -0
  15. liger_kernel_nightly-0.5.10.dev20250526105714/src/liger_kernel/transformers/multi_token_attention.py +64 -0
  16. liger_kernel_nightly-0.5.10.dev20250526105714/src/liger_kernel/transformers/softmax.py +12 -0
  17. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel_nightly.egg-info/PKG-INFO +3 -1
  18. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel_nightly.egg-info/SOURCES.txt +9 -0
  19. liger_kernel_nightly-0.5.10.dev20250526105714/test/transformers/test_multi_token_attention.py +324 -0
  20. liger_kernel_nightly-0.5.10.dev20250526105714/test/transformers/test_softmax.py +103 -0
  21. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/.github/ISSUE_TEMPLATE/bug_report.yaml +0 -0
  22. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/.github/ISSUE_TEMPLATE/feature_request.yaml +0 -0
  23. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/.github/pull_request_template.md +0 -0
  24. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/.github/workflows/amd-ci.yml +0 -0
  25. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/.github/workflows/docs.yml +0 -0
  26. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/.github/workflows/intel-ci.yml +0 -0
  27. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/.github/workflows/nvi-ci.yml +0 -0
  28. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/.github/workflows/publish-nightly.yml +0 -0
  29. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/.github/workflows/publish-release.yml +0 -0
  30. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/.gitignore +0 -0
  31. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/.idea/workspace.xml +0 -0
  32. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/LICENSE +0 -0
  33. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/Makefile +0 -0
  34. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/NOTICE +0 -0
  35. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/benchmark/README.md +0 -0
  36. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/benchmark/__init__.py +0 -0
  37. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/benchmark/scripts/__init__.py +0 -0
  38. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/benchmark/scripts/benchmark_cpo_loss.py +0 -0
  39. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/benchmark/scripts/benchmark_cross_entropy.py +0 -0
  40. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/benchmark/scripts/benchmark_distill_jsd_loss.py +0 -0
  41. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/benchmark/scripts/benchmark_dpo_loss.py +0 -0
  42. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/benchmark/scripts/benchmark_dyt.py +0 -0
  43. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/benchmark/scripts/benchmark_embedding.py +0 -0
  44. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/benchmark/scripts/benchmark_fused_linear_cross_entropy.py +0 -0
  45. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/benchmark/scripts/benchmark_fused_linear_jsd.py +0 -0
  46. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/benchmark/scripts/benchmark_geglu.py +0 -0
  47. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/benchmark/scripts/benchmark_group_norm.py +0 -0
  48. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/benchmark/scripts/benchmark_jsd.py +0 -0
  49. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/benchmark/scripts/benchmark_kl_div.py +0 -0
  50. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/benchmark/scripts/benchmark_kto_loss.py +0 -0
  51. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/benchmark/scripts/benchmark_layer_norm.py +0 -0
  52. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/benchmark/scripts/benchmark_orpo_loss.py +0 -0
  53. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/benchmark/scripts/benchmark_qwen2vl_mrope.py +0 -0
  54. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/benchmark/scripts/benchmark_rms_norm.py +0 -0
  55. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/benchmark/scripts/benchmark_rope.py +0 -0
  56. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/benchmark/scripts/benchmark_simpo_loss.py +0 -0
  57. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/benchmark/scripts/benchmark_sparsemax.py +0 -0
  58. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/benchmark/scripts/benchmark_swiglu.py +0 -0
  59. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/benchmark/scripts/benchmark_tvd.py +0 -0
  60. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/dev/fmt-requirements.txt +0 -0
  61. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/dev/modal/tests.py +0 -0
  62. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/dev/modal/tests_bwd.py +0 -0
  63. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/docs/Examples.md +0 -0
  64. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/docs/Getting-Started.md +0 -0
  65. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/docs/High-Level-APIs.md +0 -0
  66. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/docs/Low-Level-APIs.md +0 -0
  67. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/docs/acknowledgement.md +0 -0
  68. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/docs/contributing.md +0 -0
  69. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/docs/images/banner.GIF +0 -0
  70. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/docs/images/compose.gif +0 -0
  71. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/docs/images/e2e-memory.png +0 -0
  72. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/docs/images/e2e-tps.png +0 -0
  73. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/docs/images/logo-banner.png +0 -0
  74. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/docs/images/patch.gif +0 -0
  75. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/docs/images/post-training.png +0 -0
  76. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/docs/index.md +0 -0
  77. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/docs/license.md +0 -0
  78. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/examples/alignment/accelerate_config.yaml +0 -0
  79. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/examples/alignment/run_orpo.py +0 -0
  80. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/examples/huggingface/README.md +0 -0
  81. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/examples/huggingface/callback.py +0 -0
  82. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/examples/huggingface/config/fsdp_config.json +0 -0
  83. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/examples/huggingface/img/gemma_7b_mem.png +0 -0
  84. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/examples/huggingface/img/gemma_7b_tp.png +0 -0
  85. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/examples/huggingface/img/llama_mem_alloc.png +0 -0
  86. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/examples/huggingface/img/llama_tps.png +0 -0
  87. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/examples/huggingface/img/qwen_mem_alloc.png +0 -0
  88. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/examples/huggingface/img/qwen_tps.png +0 -0
  89. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/examples/huggingface/launch_on_modal.py +0 -0
  90. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/examples/huggingface/requirements.txt +0 -0
  91. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/examples/huggingface/run_benchmarks.sh +0 -0
  92. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/examples/huggingface/run_gemma.sh +0 -0
  93. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/examples/huggingface/run_llama.sh +0 -0
  94. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/examples/huggingface/run_qwen.sh +0 -0
  95. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/examples/huggingface/run_qwen2_vl.sh +0 -0
  96. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/examples/huggingface/training.py +0 -0
  97. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/examples/huggingface/training_multimodal.py +0 -0
  98. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/examples/lightning/README.md +0 -0
  99. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/examples/lightning/requirements.txt +0 -0
  100. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/examples/lightning/training.py +0 -0
  101. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/examples/medusa/README.md +0 -0
  102. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/examples/medusa/callback.py +0 -0
  103. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/examples/medusa/docs/images/Memory_Stage1_num_head_3.png +0 -0
  104. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/examples/medusa/docs/images/Memory_Stage1_num_head_5.png +0 -0
  105. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/examples/medusa/docs/images/Memory_Stage2_num_head_3.png +0 -0
  106. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/examples/medusa/docs/images/Memory_Stage2_num_head_5.png +0 -0
  107. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/examples/medusa/docs/images/Throughput_Stage1_num_head_3.png +0 -0
  108. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/examples/medusa/docs/images/Throughput_Stage1_num_head_5.png +0 -0
  109. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/examples/medusa/docs/images/Throughput_Stage2_num_head_3.png +0 -0
  110. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/examples/medusa/docs/images/Throughput_Stage2_num_head_5.png +0 -0
  111. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/examples/medusa/fsdp/acc-fsdp.conf +0 -0
  112. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/examples/medusa/medusa_util.py +0 -0
  113. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/examples/medusa/requirements.txt +0 -0
  114. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/examples/medusa/scripts/llama3_8b_medusa.sh +0 -0
  115. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/examples/medusa/train.py +0 -0
  116. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/licenses/LICENSE-Apache-2.0 +0 -0
  117. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/licenses/LICENSE-MIT-AutoAWQ +0 -0
  118. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/licenses/LICENSE-MIT-Efficient-Cross-Entropy +0 -0
  119. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/licenses/LICENSE-MIT-llmc +0 -0
  120. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/licenses/LICENSE-MIT-triton +0 -0
  121. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/setup.cfg +0 -0
  122. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/setup.py +0 -0
  123. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/__init__.py +0 -0
  124. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/chunked_loss/README.md +0 -0
  125. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/chunked_loss/__init__.py +0 -0
  126. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/chunked_loss/cpo_loss.py +0 -0
  127. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/chunked_loss/dpo_loss.py +0 -0
  128. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/chunked_loss/functional.py +0 -0
  129. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/chunked_loss/fused_linear_distillation.py +0 -0
  130. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/chunked_loss/fused_linear_ppo.py +0 -0
  131. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/chunked_loss/fused_linear_preference.py +0 -0
  132. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/chunked_loss/fused_linear_unpaired_preference.py +0 -0
  133. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/chunked_loss/grpo_loss.py +0 -0
  134. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/chunked_loss/jsd_loss.py +0 -0
  135. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/chunked_loss/kto_loss.py +0 -0
  136. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/chunked_loss/orpo_loss.py +0 -0
  137. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/chunked_loss/simpo_loss.py +0 -0
  138. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/env_report.py +0 -0
  139. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/ops/__init__.py +0 -0
  140. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/ops/cross_entropy.py +0 -0
  141. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/ops/dyt.py +0 -0
  142. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/ops/experimental/embedding.py +0 -0
  143. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/ops/experimental/mm_int8int2.py +0 -0
  144. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/ops/fused_linear_cross_entropy.py +0 -0
  145. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/ops/fused_linear_jsd.py +0 -0
  146. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/ops/geglu.py +0 -0
  147. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/ops/group_norm.py +0 -0
  148. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/ops/grpo_loss.py +0 -0
  149. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/ops/jsd.py +0 -0
  150. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/ops/kl_div.py +0 -0
  151. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/ops/layer_norm.py +0 -0
  152. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/ops/qwen2vl_mrope.py +0 -0
  153. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/ops/rms_norm.py +0 -0
  154. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/ops/rope.py +0 -0
  155. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/ops/swiglu.py +0 -0
  156. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/ops/tvd.py +0 -0
  157. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/ops/utils.py +0 -0
  158. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/transformers/__init__.py +0 -0
  159. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/transformers/auto_model.py +0 -0
  160. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/transformers/cross_entropy.py +0 -0
  161. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/transformers/dyt.py +0 -0
  162. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/transformers/experimental/embedding.py +0 -0
  163. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/transformers/fsdp.py +0 -0
  164. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/transformers/fused_linear_cross_entropy.py +0 -0
  165. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/transformers/fused_linear_jsd.py +0 -0
  166. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/transformers/geglu.py +0 -0
  167. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/transformers/gema3_rms.py +0 -0
  168. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/transformers/group_norm.py +0 -0
  169. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/transformers/grpo_loss.py +0 -0
  170. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/transformers/jsd.py +0 -0
  171. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/transformers/kl_div.py +0 -0
  172. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/transformers/layer_norm.py +0 -0
  173. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/transformers/model/__init__.py +0 -0
  174. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/transformers/model/gemma.py +0 -0
  175. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/transformers/model/gemma2.py +0 -0
  176. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/transformers/model/gemma3.py +0 -0
  177. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/transformers/model/glm4.py +0 -0
  178. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/transformers/model/llama.py +0 -0
  179. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/transformers/model/llava.py +0 -0
  180. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/transformers/model/loss_utils.py +0 -0
  181. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/transformers/model/mistral.py +0 -0
  182. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/transformers/model/mixtral.py +0 -0
  183. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/transformers/model/mllama.py +0 -0
  184. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/transformers/model/olmo2.py +0 -0
  185. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/transformers/model/paligemma.py +0 -0
  186. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/transformers/model/phi3.py +0 -0
  187. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/transformers/model/qwen2.py +0 -0
  188. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/transformers/model/qwen2_5_vl.py +0 -0
  189. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/transformers/model/qwen2_vl.py +0 -0
  190. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/transformers/model/qwen3.py +0 -0
  191. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/transformers/model/qwen3_moe.py +0 -0
  192. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/transformers/monkey_patch.py +0 -0
  193. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/transformers/qwen2vl_mrope.py +0 -0
  194. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/transformers/rms_norm.py +0 -0
  195. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/transformers/rope.py +0 -0
  196. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/transformers/sparsemax.py +0 -0
  197. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/transformers/swiglu.py +0 -0
  198. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/transformers/trainer/__init__.py +0 -0
  199. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/transformers/trainer/orpo_trainer.py +0 -0
  200. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/transformers/trainer_integration.py +0 -0
  201. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/transformers/tvd.py +0 -0
  202. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/triton/__init__.py +0 -0
  203. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/triton/monkey_patch.py +0 -0
  204. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel/utils.py +0 -0
  205. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel_nightly.egg-info/dependency_links.txt +0 -0
  206. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel_nightly.egg-info/requires.txt +0 -0
  207. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/src/liger_kernel_nightly.egg-info/top_level.txt +0 -0
  208. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/test/__init__.py +0 -0
  209. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/test/chunked_loss/__init__.py +0 -0
  210. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/test/chunked_loss/test_cpo_loss.py +0 -0
  211. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/test/chunked_loss/test_dpo_loss.py +0 -0
  212. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/test/chunked_loss/test_grpo_loss.py +0 -0
  213. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/test/chunked_loss/test_jsd_loss.py +0 -0
  214. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/test/chunked_loss/test_kto_loss.py +0 -0
  215. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/test/chunked_loss/test_orpo_loss.py +0 -0
  216. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/test/chunked_loss/test_simpo_loss.py +0 -0
  217. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/test/conftest.py +0 -0
  218. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/test/convergence/__init__.py +0 -0
  219. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/test/convergence/bf16/__init__.py +0 -0
  220. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/test/convergence/bf16/test_mini_models.py +0 -0
  221. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/test/convergence/bf16/test_mini_models_multimodal.py +0 -0
  222. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/test/convergence/bf16/test_mini_models_with_logits.py +0 -0
  223. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/test/convergence/fp32/__init__.py +0 -0
  224. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/test/convergence/fp32/test_mini_models.py +0 -0
  225. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/test/convergence/fp32/test_mini_models_multimodal.py +0 -0
  226. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/test/convergence/fp32/test_mini_models_with_logits.py +0 -0
  227. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/test/resources/fake_configs/Google/Gemma3/gemma-3-4b-it/tokenizer_config.json +0 -0
  228. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/test/resources/fake_configs/Google/Paligemma/paligemma-3b-pt-224/tokenizer_config.json +0 -0
  229. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/preprocessor_config.json +0 -0
  230. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/processor_config.json +0 -0
  231. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/test/resources/fake_configs/Llava/llava-1.5-7b-hf/tokenizer_config.json +0 -0
  232. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/test/resources/fake_configs/Qwen/Qwen2-VL-7B-Instruct/tokenizer_config.json +0 -0
  233. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/test/resources/fake_configs/Qwen/Qwen2.5-VL-7B-Instruct/tokenizer_config.json +0 -0
  234. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/test/resources/fake_configs/meta-llama/Llama-3.2-11B-Vision-Instruct/tokenizer_config.json +0 -0
  235. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/test/resources/scripts/generate_tokenized_dataset.py +0 -0
  236. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/test/resources/tiny_shakespeare.txt +0 -0
  237. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/test/resources/tiny_shakespeare_tokenized/data-00000-of-00001.arrow +0 -0
  238. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/test/resources/tiny_shakespeare_tokenized/dataset_info.json +0 -0
  239. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/test/resources/tiny_shakespeare_tokenized/state.json +0 -0
  240. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/test/transformers/test_auto_model.py +0 -0
  241. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/test/transformers/test_cross_entropy.py +0 -0
  242. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/test/transformers/test_dyt.py +0 -0
  243. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/test/transformers/test_embedding.py +0 -0
  244. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/test/transformers/test_flex_attention.py +0 -0
  245. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/test/transformers/test_fused_linear_cross_entropy.py +0 -0
  246. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/test/transformers/test_fused_linear_jsd.py +0 -0
  247. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/test/transformers/test_geglu.py +0 -0
  248. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/test/transformers/test_group_norm.py +0 -0
  249. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/test/transformers/test_grpo_loss.py +0 -0
  250. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/test/transformers/test_jsd.py +0 -0
  251. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/test/transformers/test_kl_div.py +0 -0
  252. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/test/transformers/test_layer_norm.py +0 -0
  253. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/test/transformers/test_mm_int8int2.py +0 -0
  254. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/test/transformers/test_monkey_patch.py +0 -0
  255. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/test/transformers/test_qwen2vl_mrope.py +0 -0
  256. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/test/transformers/test_rms_norm.py +0 -0
  257. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/test/transformers/test_rope.py +0 -0
  258. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/test/transformers/test_sparsemax.py +0 -0
  259. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/test/transformers/test_swiglu.py +0 -0
  260. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/test/transformers/test_trainer_integration.py +0 -0
  261. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/test/transformers/test_transformers.py +0 -0
  262. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/test/transformers/test_tvd.py +0 -0
  263. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/test/triton/test_triton_monkey_patch.py +0 -0
  264. {liger_kernel_nightly-0.5.10.dev20250523162037 → liger_kernel_nightly-0.5.10.dev20250526105714}/test/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: liger_kernel_nightly
3
- Version: 0.5.10.dev20250523162037
3
+ Version: 0.5.10.dev20250526105714
4
4
  Summary: Efficient Triton kernels for LLM Training
5
5
  License: BSD 2-CLAUSE LICENSE
6
6
  Copyright 2024 LinkedIn Corporation
@@ -111,6 +111,8 @@ Requires-Dist: mkdocs-material; extra == "dev"
111
111
 
112
112
  We've also added optimized Post-Training kernels that deliver **up to 80% memory savings** for alignment and distillation tasks. We support losses like DPO, CPO, ORPO, SimPO, KTO, JSD, and many more. Check out [how we optimize the memory](https://x.com/hsu_byron/status/1866577403918917655).
113
113
 
114
+ You can view the documentation site for additional installation, usage examples, and API references:https://linkedin.github.io/Liger-Kernel/
115
+
114
116
  ## Supercharge Your Model with Liger Kernel
115
117
 
116
118
  ![Banner](https://raw.githubusercontent.com/linkedin/Liger-Kernel/main/docs/images/banner.GIF)
@@ -63,6 +63,8 @@
63
63
 
64
64
  We've also added optimized Post-Training kernels that deliver **up to 80% memory savings** for alignment and distillation tasks. We support losses like DPO, CPO, ORPO, SimPO, KTO, JSD, and many more. Check out [how we optimize the memory](https://x.com/hsu_byron/status/1866577403918917655).
65
65
 
66
+ You can view the documentation site for additional installation, usage examples, and API references:https://linkedin.github.io/Liger-Kernel/
67
+
66
68
  ## Supercharge Your Model with Liger Kernel
67
69
 
68
70
  ![Banner](https://raw.githubusercontent.com/linkedin/Liger-Kernel/main/docs/images/banner.GIF)
@@ -22,6 +22,9 @@ class VisualizationsConfig:
22
22
  kernel_name (str): Kernel name to benchmark. (Will run `scripts/benchmark_{kernel_name}.py`)
23
23
  metric_name (str): Metric name to visualize (speed/memory)
24
24
  kernel_operation_mode (str): Kernel operation mode to visualize (forward/backward/full). Defaults to "full"
25
+ extra_config_filter (str, optional): A string to filter extra_benchmark_config.
26
+ Can be a substring to match or a 'key=value' pair (e.g., "'H': 4096").
27
+ Defaults to None, which means the first available config will be used if multiple exist.
25
28
  display (bool): Display the visualization. Defaults to False
26
29
  overwrite (bool): Overwrite existing visualization, if none exist this flag has no effect as ones are always created and saved. Defaults to False
27
30
 
@@ -30,6 +33,7 @@ class VisualizationsConfig:
30
33
  kernel_name: str
31
34
  metric_name: str
32
35
  kernel_operation_mode: str = "full"
36
+ extra_config_filter: str | None = None
33
37
  display: bool = False
34
38
  overwrite: bool = False
35
39
 
@@ -55,6 +59,14 @@ def parse_args() -> VisualizationsConfig:
55
59
  default=None,
56
60
  help="Kernel operation modes to visualize (forward/backward/full). If not provided, generate for all available modes.",
57
61
  )
62
+ parser.add_argument(
63
+ "--extra-config-filter",
64
+ type=str,
65
+ default=None,
66
+ help="A string to filter extra_benchmark_config. "
67
+ "Can be a substring to match or a JSON-like 'key=value' pair (e.g., \"'H': 4096\" or \"H=4096\" for simple cases). "
68
+ "Defaults to None (first available config if multiple exist).",
69
+ )
58
70
  parser.add_argument("--display", action="store_true", help="Display the visualization")
59
71
  parser.add_argument(
60
72
  "--overwrite",
@@ -81,19 +93,101 @@ def load_data(config: VisualizationsConfig) -> pd.DataFrame:
81
93
  df = pd.read_csv(DATA_PATH)
82
94
  df["extra_benchmark_config"] = df["extra_benchmark_config_str"].apply(json.loads)
83
95
 
84
- filtered_df = df[
96
+ base_filtered_df = df[
85
97
  (df["kernel_name"] == config.kernel_name)
86
98
  & (df["metric_name"] == config.metric_name)
87
99
  & (df["kernel_operation_mode"] == config.kernel_operation_mode)
88
- # Use this to filter by extra benchmark configuration property
89
- # & (data['extra_benchmark_config'].apply(lambda x: x.get('H') == 4096))
90
- # FIXME: maybe add a way to filter using some configuration, except of hardcoding it
91
100
  ]
92
101
 
93
- if filtered_df.empty:
94
- raise ValueError("No data found for the given filters")
102
+ if base_filtered_df.empty:
103
+ raise ValueError(
104
+ f"No data found for kernel_name='{config.kernel_name}', "
105
+ f"metric_name='{config.metric_name}', "
106
+ f"kernel_operation_mode='{config.kernel_operation_mode}'."
107
+ )
95
108
 
96
- return filtered_df
109
+ unique_extra_configs_str = base_filtered_df["extra_benchmark_config_str"].unique()
110
+ selected_extra_config_str = None
111
+
112
+ if len(unique_extra_configs_str) == 0:
113
+ print(
114
+ "Warning: No extra_benchmark_config found for the initial filters. "
115
+ "Proceeding with all data from initial filter."
116
+ )
117
+ return base_filtered_df
118
+
119
+ if config.extra_config_filter:
120
+ matched_configs = []
121
+ try:
122
+ if "=" in config.extra_config_filter:
123
+ key_filter, value_filter = config.extra_config_filter.split("=", 1)
124
+ for cfg_str in unique_extra_configs_str:
125
+ cfg_json = json.loads(cfg_str)
126
+ if str(cfg_json.get(key_filter.strip("'\" "))) == value_filter.strip("'\" "):
127
+ matched_configs.append(cfg_str)
128
+ if not matched_configs:
129
+ matched_configs = [
130
+ cfg_str for cfg_str in unique_extra_configs_str if config.extra_config_filter in cfg_str
131
+ ]
132
+ except Exception as e:
133
+ print(
134
+ f"Note: Could not parse extra_config_filter '{config.extra_config_filter}' as key=value ({e}), using substring match."
135
+ )
136
+ matched_configs = [cfg_str for cfg_str in unique_extra_configs_str if config.extra_config_filter in cfg_str]
137
+
138
+ if matched_configs:
139
+ if len(matched_configs) > 1:
140
+ print(
141
+ f"Warning: Multiple extra_benchmark_configs match filter '{config.extra_config_filter}': {matched_configs}. "
142
+ f"Using the first one: {matched_configs[0]}"
143
+ )
144
+ selected_extra_config_str = matched_configs[0]
145
+ else:
146
+ print(
147
+ f"Warning: No extra_benchmark_config matches filter '{config.extra_config_filter}'. "
148
+ f"Available configs for {config.kernel_name} ({config.metric_name}, {config.kernel_operation_mode}): {list(unique_extra_configs_str)}"
149
+ )
150
+ if len(unique_extra_configs_str) > 0:
151
+ selected_extra_config_str = unique_extra_configs_str[0]
152
+ print(f"Defaulting to the first available extra_benchmark_config: {selected_extra_config_str}")
153
+ else:
154
+ raise ValueError("No extra_benchmark_config available to select after failed filter attempt.")
155
+
156
+ elif len(unique_extra_configs_str) > 1:
157
+ selected_extra_config_str = unique_extra_configs_str[0]
158
+ print(
159
+ f"Warning: Multiple extra_benchmark_configs found for {config.kernel_name} ({config.metric_name}, {config.kernel_operation_mode})."
160
+ )
161
+ print(f"Defaulting to use: {selected_extra_config_str}")
162
+ print(f"Available configs: {list(unique_extra_configs_str)}")
163
+ print(
164
+ "Use the --extra-config-filter argument to select a specific one "
165
+ "(e.g., --extra-config-filter \"'H': 4096\" or a substring like \"'seq_len': 512\")."
166
+ )
167
+ elif len(unique_extra_configs_str) == 1:
168
+ selected_extra_config_str = unique_extra_configs_str[0]
169
+ print(f"Using unique extra_benchmark_config: {selected_extra_config_str}")
170
+
171
+ if selected_extra_config_str:
172
+ final_filtered_df = base_filtered_df[
173
+ base_filtered_df["extra_benchmark_config_str"] == selected_extra_config_str
174
+ ]
175
+ else:
176
+ print("Warning: Could not select an extra_benchmark_config. Using data from initial filter if any.")
177
+ final_filtered_df = base_filtered_df
178
+
179
+ if final_filtered_df.empty:
180
+ raise ValueError(
181
+ f"No data found after attempting to filter by extra_benchmark_config. "
182
+ f"Selected/Defaulted extra_config_str: {selected_extra_config_str}"
183
+ if selected_extra_config_str
184
+ else "No specific extra_config was selected."
185
+ )
186
+
187
+ print(
188
+ f"Plotting data for extra_benchmark_config: {json.loads(selected_extra_config_str if selected_extra_config_str else '{}')}"
189
+ )
190
+ return final_filtered_df
97
191
 
98
192
 
99
193
  def plot_data(df: pd.DataFrame, config: VisualizationsConfig):
@@ -103,6 +197,10 @@ def plot_data(df: pd.DataFrame, config: VisualizationsConfig):
103
197
  df (pd.DataFrame): Filtered benchmark dataframe.
104
198
  config (VisualizationsConfig): Configuration object for the visualizations script.
105
199
  """
200
+ for col in ["y_value_20", "y_value_50", "y_value_80"]:
201
+ if col in df.columns:
202
+ df[col] = pd.to_numeric(df[col], errors="coerce")
203
+
106
204
  xlabel = df["x_label"].iloc[0]
107
205
  ylabel = f"{config.metric_name} ({df['metric_unit'].iloc[0]})"
108
206
  # Sort by "kernel_provider" to ensure consistent color assignment
@@ -110,15 +208,26 @@ def plot_data(df: pd.DataFrame, config: VisualizationsConfig):
110
208
 
111
209
  plt.figure(figsize=(10, 6))
112
210
  sns.set(style="whitegrid")
113
- ax = sns.lineplot(
114
- data=df,
115
- x="x_value",
116
- y="y_value_50",
117
- hue="kernel_provider",
118
- marker="o",
119
- palette="tab10",
120
- errorbar=("ci", None),
121
- )
211
+ try:
212
+ ax = sns.lineplot(
213
+ data=df,
214
+ x="x_value",
215
+ y="y_value_50",
216
+ hue="kernel_provider",
217
+ marker="o",
218
+ palette="tab10",
219
+ errorbar=("ci", None),
220
+ )
221
+ except Exception:
222
+ ax = sns.lineplot(
223
+ data=df,
224
+ x="x_value",
225
+ y="y_value_50",
226
+ hue="kernel_provider",
227
+ marker="o",
228
+ palette="tab10",
229
+ errorbar=None,
230
+ )
122
231
 
123
232
  # Seaborn can't plot pre-computed error bars, so we need to do it manually
124
233
  lines = ax.get_lines()
@@ -853,27 +853,171 @@ sparsemax,torch,full,memory,MB,V,feature size,4096,328.046875,328.046875,328.046
853
853
  sparsemax,torch,full,memory,MB,V,feature size,8192,704.00048828125,704.00048828125,704.00048828125,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-28 00:38:26,0.5.8
854
854
  sparsemax,torch,full,memory,MB,V,feature size,16384,1408.00048828125,1408.00048828125,1408.00048828125,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-28 00:38:26,0.5.8
855
855
  sparsemax,torch,full,memory,MB,V,feature size,32768,2816.00048828125,2816.00048828125,2816.00048828125,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-28 00:38:26,0.5.8
856
- sparsemax,liger,forward,memory,MB,V,feature size,1024,56.0078125,56.0078125,56.0078125,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-15 02:04:39,0.5.8
857
- sparsemax,liger,forward,memory,MB,V,feature size,2048,112.015625,112.015625,112.015625,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-15 02:04:39,0.5.8
858
- sparsemax,liger,forward,memory,MB,V,feature size,4096,224.03125,224.03125,224.03125,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-15 02:04:39,0.5.8
859
- sparsemax,liger,forward,memory,MB,V,feature size,8192,768.00048828125,768.00048828125,768.00048828125,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-15 02:04:39,0.5.8
860
- sparsemax,liger,forward,memory,MB,V,feature size,16384,1536.00048828125,1536.00048828125,1536.00048828125,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-15 02:04:39,0.5.8
861
- sparsemax,liger,forward,memory,MB,V,feature size,32768,3072.00048828125,3072.00048828125,3072.00048828125,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-15 02:04:39,0.5.8
862
- sparsemax,torch,forward,memory,MB,V,feature size,1024,82.03515625,82.03515625,82.03515625,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-15 02:04:39,0.5.8
863
- sparsemax,torch,forward,memory,MB,V,feature size,2048,164.0390625,164.0390625,164.0390625,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-15 02:04:39,0.5.8
864
- sparsemax,torch,forward,memory,MB,V,feature size,4096,328.046875,328.046875,328.046875,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-15 02:04:39,0.5.8
865
- sparsemax,torch,forward,memory,MB,V,feature size,8192,704.00048828125,704.00048828125,704.00048828125,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-15 02:04:39,0.5.8
866
- sparsemax,torch,forward,memory,MB,V,feature size,16384,1408.00048828125,1408.00048828125,1408.00048828125,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-15 02:04:39,0.5.8
867
- sparsemax,torch,forward,memory,MB,V,feature size,32768,2816.00048828125,2816.00048828125,2816.00048828125,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-15 02:04:39,0.5.8
868
- sparsemax,liger,backward,memory,MB,V,feature size,1024,56.0078125,56.0078125,56.0078125,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-15 02:04:40,0.5.8
869
- sparsemax,liger,backward,memory,MB,V,feature size,2048,112.015625,112.015625,112.015625,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-15 02:04:40,0.5.8
870
- sparsemax,liger,backward,memory,MB,V,feature size,4096,224.03125,224.03125,224.03125,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-15 02:04:40,0.5.8
871
- sparsemax,liger,backward,memory,MB,V,feature size,8192,768.00048828125,768.00048828125,768.00048828125,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-15 02:04:40,0.5.8
872
- sparsemax,liger,backward,memory,MB,V,feature size,16384,1536.00048828125,1536.00048828125,1536.00048828125,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-15 02:04:40,0.5.8
873
- sparsemax,liger,backward,memory,MB,V,feature size,32768,3072.00048828125,3072.00048828125,3072.00048828125,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-15 02:04:40,0.5.8
874
- sparsemax,torch,backward,memory,MB,V,feature size,1024,82.03515625,82.03515625,82.03515625,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-15 02:04:41,0.5.8
875
- sparsemax,torch,backward,memory,MB,V,feature size,2048,164.0390625,164.0390625,164.0390625,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-15 02:04:41,0.5.8
876
- sparsemax,torch,backward,memory,MB,V,feature size,4096,328.046875,328.046875,328.046875,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-15 02:04:41,0.5.8
877
- sparsemax,torch,backward,memory,MB,V,feature size,8192,704.00048828125,704.00048828125,704.00048828125,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-15 02:04:41,0.5.8
878
- sparsemax,torch,backward,memory,MB,V,feature size,16384,1408.00048828125,1408.00048828125,1408.00048828125,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-15 02:04:41,0.5.8
879
- sparsemax,torch,backward,memory,MB,V,feature size,32768,2816.00048828125,2816.00048828125,2816.00048828125,"{""B"": 4, ""T"": 512, ""dim"": -1, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-05-15 02:04:41,0.5.8
856
+ multi_token_attention,liger,forward,speed,ms,L,sequence length,32,0.01740800030529499,0.01740800030529499,0.018432000651955605,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:10,0.1.1
857
+ multi_token_attention,liger,forward,speed,ms,L,sequence length,64,0.018432000651955605,0.01740800030529499,0.01945599913597107,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:10,0.1.1
858
+ multi_token_attention,liger,forward,speed,ms,L,sequence length,128,0.023552000522613525,0.02252800017595291,0.02364799939095974,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:10,0.1.1
859
+ multi_token_attention,liger,forward,speed,ms,L,sequence length,256,0.043007999658584595,0.04198399931192398,0.043007999658584595,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:10,0.1.1
860
+ multi_token_attention,liger,forward,speed,ms,L,sequence length,512,0.12595200538635254,0.12492799758911133,0.12595200538635254,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:10,0.1.1
861
+ multi_token_attention,liger,forward,speed,ms,L,sequence length,1024,0.5283839702606201,0.5253120064735413,0.5294079780578613,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:10,0.1.1
862
+ multi_token_attention,torch,forward,speed,ms,L,sequence length,32,0.2467840015888214,0.24063999950885773,0.2529279887676239,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:11,0.1.1
863
+ multi_token_attention,torch,forward,speed,ms,L,sequence length,64,0.24166400730609894,0.23756800591945648,0.24883200228214264,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:11,0.1.1
864
+ multi_token_attention,torch,forward,speed,ms,L,sequence length,128,0.24268800020217896,0.2385600060224533,0.24985599517822266,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:11,0.1.1
865
+ multi_token_attention,torch,forward,speed,ms,L,sequence length,256,0.24166400730609894,0.23873919248580933,0.24782079458236694,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:11,0.1.1
866
+ multi_token_attention,torch,forward,speed,ms,L,sequence length,512,0.31334400177001953,0.3102720081806183,0.3213888108730316,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:11,0.1.1
867
+ multi_token_attention,torch,forward,speed,ms,L,sequence length,1024,0.719871997833252,0.7167999744415283,0.7260159850120544,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:11,0.1.1
868
+ multi_token_attention,liger,full,speed,ms,L,sequence length,32,0.9349120259284973,0.6543359756469727,0.9494400024414062,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:12,0.1.1
869
+ multi_token_attention,liger,full,speed,ms,L,sequence length,64,0.6215680241584778,0.5631999969482422,0.8916991949081421,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:12,0.1.1
870
+ multi_token_attention,liger,full,speed,ms,L,sequence length,128,0.5406720042228699,0.5335040092468262,0.550003170967102,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:12,0.1.1
871
+ multi_token_attention,liger,full,speed,ms,L,sequence length,256,0.5631999969482422,0.5560320019721985,0.5674688220024109,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:12,0.1.1
872
+ multi_token_attention,liger,full,speed,ms,L,sequence length,512,0.6430720090866089,0.6420480012893677,0.6430720090866089,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:12,0.1.1
873
+ multi_token_attention,liger,full,speed,ms,L,sequence length,1024,2.4780800342559814,2.4770560264587402,2.479987144470215,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:12,0.1.1
874
+ multi_token_attention,torch,full,speed,ms,L,sequence length,32,0.795199990272522,0.78438401222229,0.8038399815559387,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:13,0.1.1
875
+ multi_token_attention,torch,full,speed,ms,L,sequence length,64,0.7362560033798218,0.6504960060119629,0.7464960217475891,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:13,0.1.1
876
+ multi_token_attention,torch,full,speed,ms,L,sequence length,128,0.7680000066757202,0.6437439918518066,0.8105729818344116,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:13,0.1.1
877
+ multi_token_attention,torch,full,speed,ms,L,sequence length,256,0.7685279846191406,0.7586879730224609,0.783519983291626,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:13,0.1.1
878
+ multi_token_attention,torch,full,speed,ms,L,sequence length,512,0.9676799774169922,0.9625599980354309,0.9751039743423462,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:13,0.1.1
879
+ multi_token_attention,torch,full,speed,ms,L,sequence length,1024,2.772480010986328,2.7688961029052734,2.7842559814453125,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:13,0.1.1
880
+ multi_token_attention,liger,backward,speed,ms,L,sequence length,32,0.334879994392395,0.3222528100013733,0.6912000179290771,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:14,0.1.1
881
+ multi_token_attention,liger,backward,speed,ms,L,sequence length,64,0.23756800591945648,0.228166401386261,0.2629631757736206,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:14,0.1.1
882
+ multi_token_attention,liger,backward,speed,ms,L,sequence length,128,0.29785600304603577,0.2519040107727051,0.3081727921962738,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:14,0.1.1
883
+ multi_token_attention,liger,backward,speed,ms,L,sequence length,256,0.2590720057487488,0.24391679465770721,0.30832639336586,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:14,0.1.1
884
+ multi_token_attention,liger,backward,speed,ms,L,sequence length,512,0.5171200037002563,0.5169600248336792,0.5181440114974976,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:14,0.1.1
885
+ multi_token_attention,liger,backward,speed,ms,L,sequence length,1024,1.9578880071640015,1.9568639993667603,1.9615744352340698,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:14,0.1.1
886
+ multi_token_attention,torch,backward,speed,ms,L,sequence length,32,0.09830400347709656,0.08908800035715103,0.20353920757770538,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:15,0.1.1
887
+ multi_token_attention,torch,backward,speed,ms,L,sequence length,64,0.06348799914121628,0.062463998794555664,0.06348799914121628,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:15,0.1.1
888
+ multi_token_attention,torch,backward,speed,ms,L,sequence length,128,0.09011200070381165,0.08908800035715103,0.09011200070381165,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:15,0.1.1
889
+ multi_token_attention,torch,backward,speed,ms,L,sequence length,256,0.16383999586105347,0.16383999586105347,0.16486400365829468,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:15,0.1.1
890
+ multi_token_attention,torch,backward,speed,ms,L,sequence length,512,0.52019202709198,0.5191680192947388,0.52019202709198,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:15,0.1.1
891
+ multi_token_attention,torch,backward,speed,ms,L,sequence length,1024,1.9763200283050537,1.9752960205078125,1.9763200283050537,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:15,0.1.1
892
+ multi_token_attention,liger,full,memory,MB,L,sequence length,32,0.97412109375,0.97412109375,0.97412109375,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:15,0.1.1
893
+ multi_token_attention,liger,full,memory,MB,L,sequence length,64,1.53662109375,1.53662109375,1.53662109375,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:15,0.1.1
894
+ multi_token_attention,liger,full,memory,MB,L,sequence length,128,3.69287109375,3.69287109375,3.69287109375,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:15,0.1.1
895
+ multi_token_attention,liger,full,memory,MB,L,sequence length,256,13.068359375,13.068359375,13.068359375,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:15,0.1.1
896
+ multi_token_attention,liger,full,memory,MB,L,sequence length,512,48.974609375,48.974609375,48.974609375,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:15,0.1.1
897
+ multi_token_attention,liger,full,memory,MB,L,sequence length,1024,192.974609375,192.974609375,192.974609375,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:15,0.1.1
898
+ multi_token_attention,torch,full,memory,MB,L,sequence length,32,0.9599609375,0.9599609375,0.9599609375,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:15,0.1.1
899
+ multi_token_attention,torch,full,memory,MB,L,sequence length,64,1.4814453125,1.4814453125,1.4814453125,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:15,0.1.1
900
+ multi_token_attention,torch,full,memory,MB,L,sequence length,128,3.4736328125,3.4736328125,3.4736328125,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:15,0.1.1
901
+ multi_token_attention,torch,full,memory,MB,L,sequence length,256,12.19287109375,12.19287109375,12.19287109375,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:15,0.1.1
902
+ multi_token_attention,torch,full,memory,MB,L,sequence length,512,45.47412109375,45.47412109375,45.47412109375,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:15,0.1.1
903
+ multi_token_attention,torch,full,memory,MB,L,sequence length,1024,178.97412109375,178.97412109375,178.97412109375,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-28 04:46:15,0.1.1
904
+ softmax,liger,forward,speed,ms,N,hidden size,128,0.0071680000983178616,0.0071680000983178616,0.007942399941384792,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:04,0.5.8
905
+ softmax,liger,forward,speed,ms,N,hidden size,256,0.008448000065982342,0.008191999979317188,0.009216000325977802,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:04,0.5.8
906
+ softmax,liger,forward,speed,ms,N,hidden size,512,0.013311999849975109,0.01228800043463707,0.013311999849975109,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:04,0.5.8
907
+ softmax,liger,forward,speed,ms,N,hidden size,1024,0.021503999829292297,0.021503999829292297,0.02252800017595291,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:04,0.5.8
908
+ softmax,liger,forward,speed,ms,N,hidden size,2048,0.04095999896526337,0.04095999896526337,0.04198399931192398,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:04,0.5.8
909
+ softmax,liger,forward,speed,ms,N,hidden size,4096,0.0798719972372055,0.0798719972372055,0.08089599758386612,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:04,0.5.8
910
+ softmax,torch,forward,speed,ms,N,hidden size,128,0.006144000217318535,0.006144000217318535,0.0071680000983178616,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:07,0.5.8
911
+ softmax,torch,forward,speed,ms,N,hidden size,256,0.008191999979317188,0.008191999979317188,0.009216000325977802,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:07,0.5.8
912
+ softmax,torch,forward,speed,ms,N,hidden size,512,0.01228800043463707,0.01228800043463707,0.013311999849975109,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:07,0.5.8
913
+ softmax,torch,forward,speed,ms,N,hidden size,1024,0.02252800017595291,0.02252800017595291,0.023552000522613525,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:07,0.5.8
914
+ softmax,torch,forward,speed,ms,N,hidden size,2048,0.057583998888731,0.05734400078654289,0.058368001133203506,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:07,0.5.8
915
+ softmax,torch,forward,speed,ms,N,hidden size,4096,0.08323200047016144,0.08294399827718735,0.08396799862384796,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:07,0.5.8
916
+ softmax,liger,full,speed,ms,N,hidden size,128,0.053247999399900436,0.04505600035190582,0.06172160431742668,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:10,0.5.8
917
+ softmax,liger,full,speed,ms,N,hidden size,256,0.05939200147986412,0.04198399931192398,0.11169920116662979,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:10,0.5.8
918
+ softmax,liger,full,speed,ms,N,hidden size,512,0.11577600240707397,0.07720960676670074,0.16793599724769592,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:10,0.5.8
919
+ softmax,liger,full,speed,ms,N,hidden size,1024,0.12492799758911133,0.10273279249668121,0.2982015907764435,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:10,0.5.8
920
+ softmax,liger,full,speed,ms,N,hidden size,2048,0.1013759970664978,0.10035199671983719,0.12902399897575378,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:10,0.5.8
921
+ softmax,liger,full,speed,ms,N,hidden size,4096,0.19660800695419312,0.19660800695419312,0.19763199985027313,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:10,0.5.8
922
+ softmax,torch,full,speed,ms,N,hidden size,128,0.013311999849975109,0.013311999849975109,0.013504000380635262,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:13,0.5.8
923
+ softmax,torch,full,speed,ms,N,hidden size,256,0.019152000546455383,0.018432000651955605,0.01945599913597107,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:13,0.5.8
924
+ softmax,torch,full,speed,ms,N,hidden size,512,0.03891199827194214,0.03788800165057182,0.03891199827194214,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:13,0.5.8
925
+ softmax,torch,full,speed,ms,N,hidden size,1024,0.08396799862384796,0.08396799862384796,0.08499199897050858,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:13,0.5.8
926
+ softmax,torch,full,speed,ms,N,hidden size,2048,0.18329599499702454,0.18329599499702454,0.18432000279426575,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:13,0.5.8
927
+ softmax,torch,full,speed,ms,N,hidden size,4096,0.3307519853115082,0.32972800731658936,0.33169281482696533,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:13,0.5.8
928
+ softmax,liger,forward,speed,ms,N,hidden size,128,0.006335999816656113,0.006144000217318535,0.0071680000983178616,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:16,0.5.8
929
+ softmax,liger,forward,speed,ms,N,hidden size,256,0.0071680000983178616,0.006144000217318535,0.0071680000983178616,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:16,0.5.8
930
+ softmax,liger,forward,speed,ms,N,hidden size,512,0.008191999979317188,0.008191999979317188,0.009216000325977802,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:16,0.5.8
931
+ softmax,liger,forward,speed,ms,N,hidden size,1024,0.013311999849975109,0.01228800043463707,0.013311999849975109,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:16,0.5.8
932
+ softmax,liger,forward,speed,ms,N,hidden size,2048,0.02252800017595291,0.02252800017595291,0.023552000522613525,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:16,0.5.8
933
+ softmax,liger,forward,speed,ms,N,hidden size,4096,0.04095999896526337,0.04095999896526337,0.04198399931192398,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:16,0.5.8
934
+ softmax,torch,forward,speed,ms,N,hidden size,128,0.006144000217318535,0.005119999870657921,0.006144000217318535,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:19,0.5.8
935
+ softmax,torch,forward,speed,ms,N,hidden size,256,0.006207999773323536,0.006144000217318535,0.0071680000983178616,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:19,0.5.8
936
+ softmax,torch,forward,speed,ms,N,hidden size,512,0.008383999578654766,0.008191999979317188,0.009216000325977802,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:19,0.5.8
937
+ softmax,torch,forward,speed,ms,N,hidden size,1024,0.014336000196635723,0.014336000196635723,0.014336000196635723,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:19,0.5.8
938
+ softmax,torch,forward,speed,ms,N,hidden size,2048,0.05939200147986412,0.058368001133203506,0.05939200147986412,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:19,0.5.8
939
+ softmax,torch,forward,speed,ms,N,hidden size,4096,0.06758400052785873,0.06675200164318085,0.06758400052785873,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:19,0.5.8
940
+ softmax,liger,full,speed,ms,N,hidden size,128,0.11472000181674957,0.09744639694690704,0.20684799551963806,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:22,0.5.8
941
+ softmax,liger,full,speed,ms,N,hidden size,256,0.15787199139595032,0.10769280046224594,0.20897281169891357,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:22,0.5.8
942
+ softmax,liger,full,speed,ms,N,hidden size,512,0.14028799533843994,0.0832064226269722,0.2879999876022339,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:22,0.5.8
943
+ softmax,liger,full,speed,ms,N,hidden size,1024,0.2088959962129593,0.11446399986743927,0.2972480058670044,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:22,0.5.8
944
+ softmax,liger,full,speed,ms,N,hidden size,2048,0.1443839967250824,0.09318400174379349,0.28278398513793945,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:22,0.5.8
945
+ softmax,liger,full,speed,ms,N,hidden size,4096,0.11673600226640701,0.10035199671983719,0.28074881434440613,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:22,0.5.8
946
+ softmax,torch,full,speed,ms,N,hidden size,128,0.011264000087976456,0.010239999741315842,0.011264000087976456,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:25,0.5.8
947
+ softmax,torch,full,speed,ms,N,hidden size,256,0.013311999849975109,0.013311999849975109,0.013632000423967838,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:25,0.5.8
948
+ softmax,torch,full,speed,ms,N,hidden size,512,0.01945599913597107,0.01945599913597107,0.01945599913597107,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:25,0.5.8
949
+ softmax,torch,full,speed,ms,N,hidden size,1024,0.04198399931192398,0.04198399931192398,0.04224000126123428,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:25,0.5.8
950
+ softmax,torch,full,speed,ms,N,hidden size,2048,0.12595200538635254,0.12595200538635254,0.12697599828243256,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:25,0.5.8
951
+ softmax,torch,full,speed,ms,N,hidden size,4096,0.19763199985027313,0.19660800695419312,0.19809921085834503,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:25,0.5.8
952
+ softmax,liger,full,memory,MB,N,hidden size,128,0.00244140625,0.00244140625,0.00244140625,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:25,0.5.8
953
+ softmax,liger,full,memory,MB,N,hidden size,256,0.0048828125,0.0048828125,0.0048828125,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:25,0.5.8
954
+ softmax,liger,full,memory,MB,N,hidden size,512,0.009765625,0.009765625,0.009765625,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:25,0.5.8
955
+ softmax,liger,full,memory,MB,N,hidden size,1024,0.01953125,0.01953125,0.01953125,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:25,0.5.8
956
+ softmax,liger,full,memory,MB,N,hidden size,2048,0.0390625,0.0390625,0.0390625,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:25,0.5.8
957
+ softmax,liger,full,memory,MB,N,hidden size,4096,0.078125,0.078125,0.078125,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:25,0.5.8
958
+ softmax,torch,full,memory,MB,N,hidden size,128,0.0029296875,0.0029296875,0.0029296875,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:25,0.5.8
959
+ softmax,torch,full,memory,MB,N,hidden size,256,0.005859375,0.005859375,0.005859375,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:25,0.5.8
960
+ softmax,torch,full,memory,MB,N,hidden size,512,0.01171875,0.01171875,0.01171875,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:25,0.5.8
961
+ softmax,torch,full,memory,MB,N,hidden size,1024,0.0234375,0.0234375,0.0234375,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:25,0.5.8
962
+ softmax,torch,full,memory,MB,N,hidden size,2048,0.046875,0.046875,0.046875,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:25,0.5.8
963
+ softmax,torch,full,memory,MB,N,hidden size,4096,0.09375,0.09375,0.09375,"{""M"": 2048, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:25,0.5.8
964
+ softmax,liger,full,memory,MB,N,hidden size,128,0.00244140625,0.00244140625,0.00244140625,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:25,0.5.8
965
+ softmax,liger,full,memory,MB,N,hidden size,256,0.00244140625,0.00244140625,0.00244140625,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:25,0.5.8
966
+ softmax,liger,full,memory,MB,N,hidden size,512,0.0048828125,0.0048828125,0.0048828125,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:25,0.5.8
967
+ softmax,liger,full,memory,MB,N,hidden size,1024,0.009765625,0.009765625,0.009765625,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:25,0.5.8
968
+ softmax,liger,full,memory,MB,N,hidden size,2048,0.01953125,0.01953125,0.01953125,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:25,0.5.8
969
+ softmax,liger,full,memory,MB,N,hidden size,4096,0.0390625,0.0390625,0.0390625,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:25,0.5.8
970
+ softmax,torch,full,memory,MB,N,hidden size,128,0.0029296875,0.0029296875,0.0029296875,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:26,0.5.8
971
+ softmax,torch,full,memory,MB,N,hidden size,256,0.0029296875,0.0029296875,0.0029296875,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:26,0.5.8
972
+ softmax,torch,full,memory,MB,N,hidden size,512,0.005859375,0.005859375,0.005859375,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:26,0.5.8
973
+ softmax,torch,full,memory,MB,N,hidden size,1024,0.01171875,0.01171875,0.01171875,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:26,0.5.8
974
+ softmax,torch,full,memory,MB,N,hidden size,2048,0.0234375,0.0234375,0.0234375,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:26,0.5.8
975
+ softmax,torch,full,memory,MB,N,hidden size,4096,0.046875,0.046875,0.046875,"{""M"": 2048, ""dtype"": ""torch.bfloat16""}",NVIDIA GeForce RTX 3090,2025-04-30 16:11:26,0.5.8
976
+ sparse_multi_token_attention,liger,forward,speed,ms,L,sequence length,32,0.31436800956726074,0.30646398663520813,0.319487988948822,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:12,0.5.8
977
+ sparse_multi_token_attention,liger,forward,speed,ms,L,sequence length,64,0.3779039978981018,0.3678207993507385,0.38410240411758423,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:12,0.5.8
978
+ sparse_multi_token_attention,liger,forward,speed,ms,L,sequence length,128,0.35020801424980164,0.3428351879119873,0.35839998722076416,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:12,0.5.8
979
+ sparse_multi_token_attention,liger,forward,speed,ms,L,sequence length,256,0.5294079780578613,0.5283839702606201,0.5304319858551025,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:12,0.5.8
980
+ sparse_multi_token_attention,liger,forward,speed,ms,L,sequence length,512,1.7315839529037476,1.7304960489273071,1.815551996231079,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:12,0.5.8
981
+ sparse_multi_token_attention,liger,forward,speed,ms,L,sequence length,1024,6.465375900268555,6.462463855743408,6.718054294586182,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:12,0.5.8
982
+ sparse_multi_token_attention,torch,forward,speed,ms,L,sequence length,32,0.5888000130653381,0.5826560258865356,0.5960000157356262,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:13,0.5.8
983
+ sparse_multi_token_attention,torch,forward,speed,ms,L,sequence length,64,0.6010879874229431,0.5947520136833191,0.608128011226654,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:13,0.5.8
984
+ sparse_multi_token_attention,torch,forward,speed,ms,L,sequence length,128,0.5816320180892944,0.5745791792869568,0.5908480286598206,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:13,0.5.8
985
+ sparse_multi_token_attention,torch,forward,speed,ms,L,sequence length,256,0.8591359853744507,0.8529919981956482,0.8627520203590393,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:13,0.5.8
986
+ sparse_multi_token_attention,torch,forward,speed,ms,L,sequence length,512,1.931391954421997,1.925772786140442,1.935705542564392,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:13,0.5.8
987
+ sparse_multi_token_attention,torch,forward,speed,ms,L,sequence length,1024,6.76915168762207,6.761676788330078,7.009791851043701,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:13,0.5.8
988
+ sparse_multi_token_attention,liger,full,speed,ms,L,sequence length,32,2.111056089401245,2.0716030597686768,2.137094497680664,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:16,0.5.8
989
+ sparse_multi_token_attention,liger,full,speed,ms,L,sequence length,64,2.174975872039795,2.1364736557006836,2.297856092453003,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:16,0.5.8
990
+ sparse_multi_token_attention,liger,full,speed,ms,L,sequence length,128,2.0894718170166016,2.073791980743408,2.1352319717407227,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:16,0.5.8
991
+ sparse_multi_token_attention,liger,full,speed,ms,L,sequence length,256,2.137216091156006,1.8400319814682007,2.194175958633423,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:16,0.5.8
992
+ sparse_multi_token_attention,liger,full,speed,ms,L,sequence length,512,2.2814719676971436,2.1872639656066895,2.2833151817321777,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:16,0.5.8
993
+ sparse_multi_token_attention,liger,full,speed,ms,L,sequence length,1024,8.308735847473145,8.299519538879395,8.551424026489258,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:16,0.5.8
994
+ sparse_multi_token_attention,torch,full,speed,ms,L,sequence length,32,1.5749119520187378,1.498412847518921,2.170527935028076,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:17,0.5.8
995
+ sparse_multi_token_attention,torch,full,speed,ms,L,sequence length,64,1.494047999382019,1.482604742050171,1.5207936763763428,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:17,0.5.8
996
+ sparse_multi_token_attention,torch,full,speed,ms,L,sequence length,128,1.4581760168075562,1.4419968128204346,2.1133759021759033,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:17,0.5.8
997
+ sparse_multi_token_attention,torch,full,speed,ms,L,sequence length,256,1.7448960542678833,1.7180671691894531,1.7537024021148682,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:17,0.5.8
998
+ sparse_multi_token_attention,torch,full,speed,ms,L,sequence length,512,2.796544075012207,2.7762560844421387,2.8190720081329346,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:17,0.5.8
999
+ sparse_multi_token_attention,torch,full,speed,ms,L,sequence length,1024,9.511823654174805,9.501286506652832,9.787391662597656,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:17,0.5.8
1000
+ sparse_multi_token_attention,liger,backward,speed,ms,L,sequence length,32,0.3544960021972656,0.33546239137649536,0.8041215538978577,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:18,0.5.8
1001
+ sparse_multi_token_attention,liger,backward,speed,ms,L,sequence length,64,0.32897597551345825,0.32051199674606323,0.3438591957092285,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:18,0.5.8
1002
+ sparse_multi_token_attention,liger,backward,speed,ms,L,sequence length,128,0.30931198596954346,0.3002240061759949,0.3197120130062103,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:18,0.5.8
1003
+ sparse_multi_token_attention,liger,backward,speed,ms,L,sequence length,256,0.31334400177001953,0.2956160008907318,0.3251904249191284,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:18,0.5.8
1004
+ sparse_multi_token_attention,liger,backward,speed,ms,L,sequence length,512,0.447488009929657,0.44646400213241577,0.4485119879245758,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:18,0.5.8
1005
+ sparse_multi_token_attention,liger,backward,speed,ms,L,sequence length,1024,1.8585599660873413,1.8574656248092651,1.861631989479065,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:18,0.5.8
1006
+ sparse_multi_token_attention,torch,backward,speed,ms,L,sequence length,32,0.25804799795150757,0.24883200228214264,0.30926719307899475,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:18,0.5.8
1007
+ sparse_multi_token_attention,torch,backward,speed,ms,L,sequence length,64,0.25804799795150757,0.2514623999595642,0.26668161153793335,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:18,0.5.8
1008
+ sparse_multi_token_attention,torch,backward,speed,ms,L,sequence length,128,0.24075199663639069,0.2303999960422516,0.25194239616394043,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:18,0.5.8
1009
+ sparse_multi_token_attention,torch,backward,speed,ms,L,sequence length,256,0.24686399102210999,0.23756800591945648,0.2550272047519684,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:18,0.5.8
1010
+ sparse_multi_token_attention,torch,backward,speed,ms,L,sequence length,512,0.7045120000839233,0.704479992389679,0.7063615918159485,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:18,0.5.8
1011
+ sparse_multi_token_attention,torch,backward,speed,ms,L,sequence length,1024,2.698431968688965,2.697216033935547,2.7013120651245117,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:18,0.5.8
1012
+ sparse_multi_token_attention,liger,full,memory,MB,L,sequence length,32,0.3603515625,0.3603515625,0.3603515625,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:19,0.5.8
1013
+ sparse_multi_token_attention,liger,full,memory,MB,L,sequence length,64,1.4189453125,1.4189453125,1.4189453125,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:19,0.5.8
1014
+ sparse_multi_token_attention,liger,full,memory,MB,L,sequence length,128,5.6455078125,5.6455078125,5.6455078125,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:19,0.5.8
1015
+ sparse_multi_token_attention,liger,full,memory,MB,L,sequence length,256,22.53662109375,22.53662109375,22.53662109375,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:19,0.5.8
1016
+ sparse_multi_token_attention,liger,full,memory,MB,L,sequence length,512,90.06884765625,90.06884765625,90.06884765625,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:19,0.5.8
1017
+ sparse_multi_token_attention,liger,full,memory,MB,L,sequence length,1024,360.13330078125,360.13330078125,360.13330078125,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:19,0.5.8
1018
+ sparse_multi_token_attention,torch,full,memory,MB,L,sequence length,32,0.45263671875,0.45263671875,0.45263671875,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:19,0.5.8
1019
+ sparse_multi_token_attention,torch,full,memory,MB,L,sequence length,64,1.7685546875,1.7685546875,1.7685546875,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:19,0.5.8
1020
+ sparse_multi_token_attention,torch,full,memory,MB,L,sequence length,128,7.04833984375,7.04833984375,7.04833984375,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:19,0.5.8
1021
+ sparse_multi_token_attention,torch,full,memory,MB,L,sequence length,256,28.15478515625,28.15478515625,28.15478515625,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:19,0.5.8
1022
+ sparse_multi_token_attention,torch,full,memory,MB,L,sequence length,512,112.55517578125,112.55517578125,112.55517578125,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:19,0.5.8
1023
+ sparse_multi_token_attention,torch,full,memory,MB,L,sequence length,1024,450.10595703125,450.10595703125,450.10595703125,"{""B"": 2, ""C_in"": 4, ""C_out"": 4, ""K"": 3, ""groups"": 1, ""bias"": true, ""dtype"": ""torch.float32""}",NVIDIA GeForce RTX 3090,2025-04-30 17:22:19,0.5.8