ejkernel 0.0.76__tar.gz → 0.0.78__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ejkernel-0.0.76 → ejkernel-0.0.78}/PKG-INFO +1 -1
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/__init__.py +1 -1
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/gated_delta_rule/_pallas_impl_fwd.py +41 -2
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/gated_delta_rule/_xla_impl_fwd.py +57 -19
- {ejkernel-0.0.76 → ejkernel-0.0.78}/pyproject.toml +1 -1
- {ejkernel-0.0.76 → ejkernel-0.0.78}/README.md +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/CMakeLists.txt +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_attention.h +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_attention_ffi.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_attention_kernel.h +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_attention_launch_template.h +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_fwd_hdim128_vhdim128_bf16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_fwd_hdim128_vhdim128_bf16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_fwd_hdim128_vhdim128_bf16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_fwd_hdim128_vhdim128_bf16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_fwd_hdim128_vhdim128_bf16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_fwd_hdim128_vhdim128_fp16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_fwd_hdim128_vhdim128_fp16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_fwd_hdim128_vhdim128_fp16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_fwd_hdim128_vhdim128_fp16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_fwd_hdim128_vhdim128_fp16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_fwd_hdim192_vhdim192_bf16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_fwd_hdim192_vhdim192_bf16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_fwd_hdim192_vhdim192_bf16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_fwd_hdim192_vhdim192_bf16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_fwd_hdim192_vhdim192_bf16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_fwd_hdim192_vhdim192_fp16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_fwd_hdim192_vhdim192_fp16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_fwd_hdim192_vhdim192_fp16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_fwd_hdim192_vhdim192_fp16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_fwd_hdim192_vhdim192_fp16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_fwd_hdim256_vhdim256_bf16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_fwd_hdim256_vhdim256_bf16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_fwd_hdim256_vhdim256_bf16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_fwd_hdim256_vhdim256_bf16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_fwd_hdim256_vhdim256_bf16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_fwd_hdim256_vhdim256_fp16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_fwd_hdim256_vhdim256_fp16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_fwd_hdim256_vhdim256_fp16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_fwd_hdim256_vhdim256_fp16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_fwd_hdim256_vhdim256_fp16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_fwd_hdim32_vhdim32_bf16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_fwd_hdim32_vhdim32_bf16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_fwd_hdim32_vhdim32_bf16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_fwd_hdim32_vhdim32_bf16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_fwd_hdim32_vhdim32_bf16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_fwd_hdim32_vhdim32_fp16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_fwd_hdim32_vhdim32_fp16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_fwd_hdim32_vhdim32_fp16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_fwd_hdim32_vhdim32_fp16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_fwd_hdim32_vhdim32_fp16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_fwd_hdim64_vhdim64_bf16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_fwd_hdim64_vhdim64_bf16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_fwd_hdim64_vhdim64_bf16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_fwd_hdim64_vhdim64_bf16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_fwd_hdim64_vhdim64_bf16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_fwd_hdim64_vhdim64_fp16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_fwd_hdim64_vhdim64_fp16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_fwd_hdim64_vhdim64_fp16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_fwd_hdim64_vhdim64_fp16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_fwd_hdim64_vhdim64_fp16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_fwd_hdim96_vhdim96_bf16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_fwd_hdim96_vhdim96_bf16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_fwd_hdim96_vhdim96_bf16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_fwd_hdim96_vhdim96_bf16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_fwd_hdim96_vhdim96_bf16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_fwd_hdim96_vhdim96_fp16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_fwd_hdim96_vhdim96_fp16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_fwd_hdim96_vhdim96_fp16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_fwd_hdim96_vhdim96_fp16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_fwd_hdim96_vhdim96_fp16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/code_gen.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/code_gen.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/CMakeLists.txt +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/block.h +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/code_gen.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/copy_sm90_bulk_reduce.hpp +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/cuda_check.h +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/epilogue_bwd.hpp +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/epilogue_fwd.hpp +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/flash.h +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/flash_api.cpp +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/flash_api_stable.cpp +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/flash_bwd_kernel_sm80.h +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/flash_bwd_kernel_sm90.h +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/flash_bwd_launch_template.h +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/flash_bwd_postprocess_kernel.h +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/flash_bwd_preprocess_kernel.h +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/flash_fwd_combine.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/flash_fwd_combine_kernel.h +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/flash_fwd_combine_launch_template.h +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/flash_fwd_kernel_sm80.h +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/flash_fwd_kernel_sm90.h +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/flash_fwd_launch_template.h +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/flash_prepare_scheduler.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/heuristics.h +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim128_bf16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim128_bf16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim128_bf16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim128_bf16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim128_bf16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim128_bf16_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim128_bf16_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim128_bf16_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim128_bf16_softcap_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim128_bf16_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim128_bf16_softcapall_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim128_fp16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim128_fp16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim128_fp16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim128_fp16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim128_fp16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim128_fp16_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim128_fp16_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim128_fp16_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim128_fp16_softcap_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim128_fp16_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim128_fp16_softcapall_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim192_bf16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim192_bf16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim192_bf16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim192_bf16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim192_bf16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim192_bf16_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim192_bf16_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim192_bf16_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim192_bf16_softcap_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim192_bf16_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim192_bf16_softcapall_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim192_fp16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim192_fp16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim192_fp16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim192_fp16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim192_fp16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim192_fp16_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim192_fp16_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim192_fp16_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim192_fp16_softcap_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim192_fp16_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim192_fp16_softcapall_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim256_bf16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim256_bf16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim256_bf16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim256_bf16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim256_bf16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim256_bf16_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim256_bf16_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim256_bf16_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim256_bf16_softcap_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim256_bf16_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim256_bf16_softcapall_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim256_fp16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim256_fp16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim256_fp16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim256_fp16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim256_fp16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim256_fp16_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim256_fp16_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim256_fp16_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim256_fp16_softcap_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim256_fp16_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim256_fp16_softcapall_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim64_bf16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim64_bf16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim64_bf16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim64_bf16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim64_bf16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim64_bf16_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim64_bf16_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim64_bf16_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim64_bf16_softcap_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim64_bf16_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim64_bf16_softcapall_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim64_fp16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim64_fp16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim64_fp16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim64_fp16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim64_fp16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim64_fp16_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim64_fp16_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim64_fp16_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim64_fp16_softcap_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim64_fp16_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim64_fp16_softcapall_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim96_bf16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim96_bf16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim96_bf16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim96_bf16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim96_bf16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim96_bf16_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim96_bf16_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim96_bf16_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim96_bf16_softcap_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim96_bf16_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim96_bf16_softcapall_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim96_fp16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim96_fp16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim96_fp16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim96_fp16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim96_fp16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim96_fp16_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim96_fp16_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim96_fp16_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim96_fp16_softcap_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim96_fp16_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_bwd_hdim96_fp16_softcapall_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_paged_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_paged_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_paged_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_paged_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_paged_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_paged_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_paged_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_paged_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_paged_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_paged_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_paged_softcap_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_paged_softcap_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_paged_softcap_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_paged_softcap_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_paged_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_paged_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_paged_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_paged_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_paged_softcap_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_paged_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_paged_softcapall_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_paged_split_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_paged_split_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_paged_split_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_paged_split_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_paged_split_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_paged_split_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_paged_split_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_paged_split_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_paged_split_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_paged_split_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_paged_split_softcap_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_paged_split_softcap_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_paged_split_softcap_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_paged_split_softcap_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_paged_split_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_paged_split_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_paged_split_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_paged_split_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_paged_split_softcap_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_paged_split_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_paged_split_softcapall_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_softcap_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_softcap_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_softcap_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_softcap_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_softcap_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_softcapall_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_split_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_split_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_split_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_split_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_split_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_split_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_split_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_split_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_split_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_split_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_split_softcap_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_split_softcap_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_split_softcap_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_split_softcap_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_split_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_split_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_split_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_split_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_split_softcap_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_split_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_bf16_split_softcapall_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_e4m3_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_e4m3_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_e4m3_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_e4m3_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_e4m3_paged_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_e4m3_paged_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_e4m3_paged_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_e4m3_paged_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_e4m3_paged_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_e4m3_paged_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_e4m3_paged_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_e4m3_paged_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_e4m3_paged_softcap_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_e4m3_paged_softcap_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_e4m3_paged_softcap_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_e4m3_paged_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_e4m3_paged_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_e4m3_paged_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_e4m3_paged_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_e4m3_paged_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_e4m3_paged_split_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_e4m3_paged_split_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_e4m3_paged_split_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_e4m3_paged_split_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_e4m3_paged_split_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_e4m3_paged_split_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_e4m3_paged_split_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_e4m3_paged_split_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_e4m3_paged_split_softcap_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_e4m3_paged_split_softcap_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_e4m3_paged_split_softcap_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_e4m3_paged_split_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_e4m3_paged_split_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_e4m3_paged_split_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_e4m3_paged_split_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_e4m3_paged_split_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_e4m3_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_e4m3_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_e4m3_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_e4m3_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_e4m3_softcap_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_e4m3_softcap_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_e4m3_softcap_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_e4m3_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_e4m3_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_e4m3_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_e4m3_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_e4m3_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_e4m3_split_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_e4m3_split_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_e4m3_split_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_e4m3_split_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_e4m3_split_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_e4m3_split_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_e4m3_split_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_e4m3_split_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_e4m3_split_softcap_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_e4m3_split_softcap_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_e4m3_split_softcap_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_e4m3_split_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_e4m3_split_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_e4m3_split_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_e4m3_split_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_e4m3_split_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_paged_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_paged_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_paged_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_paged_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_paged_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_paged_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_paged_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_paged_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_paged_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_paged_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_paged_softcap_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_paged_softcap_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_paged_softcap_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_paged_softcap_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_paged_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_paged_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_paged_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_paged_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_paged_softcap_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_paged_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_paged_softcapall_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_paged_split_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_paged_split_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_paged_split_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_paged_split_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_paged_split_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_paged_split_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_paged_split_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_paged_split_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_paged_split_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_paged_split_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_paged_split_softcap_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_paged_split_softcap_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_paged_split_softcap_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_paged_split_softcap_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_paged_split_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_paged_split_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_paged_split_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_paged_split_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_paged_split_softcap_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_paged_split_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_paged_split_softcapall_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_softcap_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_softcap_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_softcap_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_softcap_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_softcap_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_softcapall_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_split_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_split_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_split_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_split_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_split_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_split_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_split_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_split_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_split_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_split_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_split_softcap_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_split_softcap_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_split_softcap_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_split_softcap_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_split_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_split_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_split_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_split_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_split_softcap_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_split_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim128_fp16_split_softcapall_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_128_bf16_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_128_bf16_paged_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_128_bf16_paged_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_128_bf16_paged_split_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_128_bf16_paged_split_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_128_bf16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_128_bf16_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_128_bf16_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_128_bf16_split_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_128_bf16_split_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_128_e4m3_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_128_e4m3_paged_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_128_e4m3_paged_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_128_e4m3_paged_split_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_128_e4m3_paged_split_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_128_e4m3_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_128_e4m3_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_128_e4m3_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_128_e4m3_split_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_128_e4m3_split_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_128_fp16_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_128_fp16_paged_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_128_fp16_paged_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_128_fp16_paged_split_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_128_fp16_paged_split_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_128_fp16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_128_fp16_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_128_fp16_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_128_fp16_split_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_128_fp16_split_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_paged_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_paged_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_paged_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_paged_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_paged_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_paged_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_paged_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_paged_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_paged_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_paged_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_paged_softcap_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_paged_softcap_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_paged_softcap_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_paged_softcap_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_paged_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_paged_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_paged_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_paged_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_paged_softcap_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_paged_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_paged_softcapall_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_paged_split_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_paged_split_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_paged_split_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_paged_split_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_paged_split_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_paged_split_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_paged_split_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_paged_split_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_paged_split_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_paged_split_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_paged_split_softcap_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_paged_split_softcap_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_paged_split_softcap_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_paged_split_softcap_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_paged_split_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_paged_split_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_paged_split_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_paged_split_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_paged_split_softcap_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_paged_split_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_paged_split_softcapall_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_softcap_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_softcap_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_softcap_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_softcap_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_softcap_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_softcapall_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_split_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_split_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_split_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_split_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_split_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_split_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_split_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_split_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_split_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_split_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_split_softcap_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_split_softcap_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_split_softcap_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_split_softcap_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_split_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_split_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_split_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_split_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_split_softcap_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_split_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_bf16_split_softcapall_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_e4m3_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_e4m3_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_e4m3_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_e4m3_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_e4m3_paged_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_e4m3_paged_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_e4m3_paged_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_e4m3_paged_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_e4m3_paged_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_e4m3_paged_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_e4m3_paged_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_e4m3_paged_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_e4m3_paged_softcap_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_e4m3_paged_softcap_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_e4m3_paged_softcap_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_e4m3_paged_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_e4m3_paged_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_e4m3_paged_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_e4m3_paged_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_e4m3_paged_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_e4m3_paged_split_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_e4m3_paged_split_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_e4m3_paged_split_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_e4m3_paged_split_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_e4m3_paged_split_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_e4m3_paged_split_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_e4m3_paged_split_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_e4m3_paged_split_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_e4m3_paged_split_softcap_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_e4m3_paged_split_softcap_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_e4m3_paged_split_softcap_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_e4m3_paged_split_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_e4m3_paged_split_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_e4m3_paged_split_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_e4m3_paged_split_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_e4m3_paged_split_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_e4m3_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_e4m3_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_e4m3_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_e4m3_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_e4m3_softcap_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_e4m3_softcap_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_e4m3_softcap_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_e4m3_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_e4m3_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_e4m3_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_e4m3_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_e4m3_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_e4m3_split_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_e4m3_split_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_e4m3_split_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_e4m3_split_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_e4m3_split_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_e4m3_split_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_e4m3_split_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_e4m3_split_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_e4m3_split_softcap_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_e4m3_split_softcap_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_e4m3_split_softcap_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_e4m3_split_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_e4m3_split_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_e4m3_split_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_e4m3_split_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_e4m3_split_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_paged_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_paged_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_paged_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_paged_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_paged_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_paged_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_paged_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_paged_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_paged_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_paged_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_paged_softcap_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_paged_softcap_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_paged_softcap_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_paged_softcap_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_paged_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_paged_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_paged_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_paged_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_paged_softcap_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_paged_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_paged_softcapall_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_paged_split_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_paged_split_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_paged_split_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_paged_split_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_paged_split_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_paged_split_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_paged_split_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_paged_split_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_paged_split_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_paged_split_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_paged_split_softcap_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_paged_split_softcap_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_paged_split_softcap_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_paged_split_softcap_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_paged_split_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_paged_split_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_paged_split_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_paged_split_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_paged_split_softcap_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_paged_split_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_paged_split_softcapall_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_softcap_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_softcap_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_softcap_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_softcap_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_softcap_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_softcapall_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_split_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_split_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_split_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_split_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_split_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_split_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_split_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_split_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_split_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_split_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_split_softcap_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_split_softcap_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_split_softcap_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_split_softcap_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_split_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_split_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_split_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_split_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_split_softcap_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_split_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim192_fp16_split_softcapall_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_paged_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_paged_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_paged_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_paged_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_paged_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_paged_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_paged_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_paged_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_paged_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_paged_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_paged_softcap_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_paged_softcap_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_paged_softcap_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_paged_softcap_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_paged_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_paged_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_paged_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_paged_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_paged_softcap_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_paged_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_paged_softcapall_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_paged_split_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_paged_split_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_paged_split_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_paged_split_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_paged_split_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_paged_split_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_paged_split_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_paged_split_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_paged_split_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_paged_split_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_paged_split_softcap_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_paged_split_softcap_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_paged_split_softcap_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_paged_split_softcap_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_paged_split_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_paged_split_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_paged_split_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_paged_split_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_paged_split_softcap_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_paged_split_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_paged_split_softcapall_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_softcap_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_softcap_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_softcap_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_softcap_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_softcap_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_softcapall_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_split_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_split_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_split_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_split_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_split_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_split_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_split_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_split_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_split_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_split_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_split_softcap_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_split_softcap_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_split_softcap_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_split_softcap_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_split_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_split_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_split_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_split_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_split_softcap_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_split_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_bf16_split_softcapall_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_e4m3_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_e4m3_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_e4m3_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_e4m3_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_e4m3_paged_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_e4m3_paged_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_e4m3_paged_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_e4m3_paged_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_e4m3_paged_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_e4m3_paged_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_e4m3_paged_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_e4m3_paged_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_e4m3_paged_softcap_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_e4m3_paged_softcap_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_e4m3_paged_softcap_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_e4m3_paged_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_e4m3_paged_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_e4m3_paged_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_e4m3_paged_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_e4m3_paged_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_e4m3_paged_split_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_e4m3_paged_split_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_e4m3_paged_split_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_e4m3_paged_split_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_e4m3_paged_split_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_e4m3_paged_split_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_e4m3_paged_split_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_e4m3_paged_split_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_e4m3_paged_split_softcap_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_e4m3_paged_split_softcap_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_e4m3_paged_split_softcap_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_e4m3_paged_split_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_e4m3_paged_split_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_e4m3_paged_split_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_e4m3_paged_split_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_e4m3_paged_split_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_e4m3_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_e4m3_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_e4m3_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_e4m3_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_e4m3_softcap_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_e4m3_softcap_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_e4m3_softcap_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_e4m3_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_e4m3_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_e4m3_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_e4m3_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_e4m3_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_e4m3_split_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_e4m3_split_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_e4m3_split_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_e4m3_split_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_e4m3_split_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_e4m3_split_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_e4m3_split_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_e4m3_split_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_e4m3_split_softcap_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_e4m3_split_softcap_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_e4m3_split_softcap_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_e4m3_split_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_e4m3_split_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_e4m3_split_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_e4m3_split_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_e4m3_split_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_paged_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_paged_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_paged_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_paged_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_paged_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_paged_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_paged_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_paged_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_paged_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_paged_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_paged_softcap_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_paged_softcap_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_paged_softcap_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_paged_softcap_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_paged_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_paged_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_paged_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_paged_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_paged_softcap_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_paged_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_paged_softcapall_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_paged_split_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_paged_split_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_paged_split_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_paged_split_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_paged_split_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_paged_split_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_paged_split_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_paged_split_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_paged_split_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_paged_split_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_paged_split_softcap_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_paged_split_softcap_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_paged_split_softcap_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_paged_split_softcap_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_paged_split_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_paged_split_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_paged_split_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_paged_split_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_paged_split_softcap_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_paged_split_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_paged_split_softcapall_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_softcap_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_softcap_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_softcap_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_softcap_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_softcap_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_softcapall_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_split_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_split_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_split_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_split_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_split_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_split_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_split_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_split_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_split_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_split_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_split_softcap_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_split_softcap_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_split_softcap_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_split_softcap_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_split_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_split_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_split_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_split_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_split_softcap_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_split_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim256_fp16_split_softcapall_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_256_bf16_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_256_bf16_paged_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_256_bf16_paged_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_256_bf16_paged_split_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_256_bf16_paged_split_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_256_bf16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_256_bf16_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_256_bf16_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_256_bf16_split_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_256_bf16_split_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_256_fp16_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_256_fp16_paged_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_256_fp16_paged_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_256_fp16_paged_split_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_256_fp16_paged_split_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_256_fp16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_256_fp16_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_256_fp16_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_256_fp16_split_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_256_fp16_split_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_512_bf16_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_512_bf16_paged_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_512_bf16_paged_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_512_bf16_paged_split_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_512_bf16_paged_split_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_512_bf16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_512_bf16_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_512_bf16_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_512_bf16_split_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_512_bf16_split_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_512_fp16_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_512_fp16_paged_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_512_fp16_paged_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_512_fp16_paged_split_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_512_fp16_paged_split_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_512_fp16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_512_fp16_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_512_fp16_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_512_fp16_split_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_512_fp16_split_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_paged_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_paged_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_paged_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_paged_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_paged_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_paged_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_paged_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_paged_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_paged_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_paged_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_paged_softcap_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_paged_softcap_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_paged_softcap_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_paged_softcap_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_paged_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_paged_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_paged_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_paged_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_paged_softcap_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_paged_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_paged_softcapall_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_paged_split_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_paged_split_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_paged_split_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_paged_split_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_paged_split_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_paged_split_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_paged_split_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_paged_split_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_paged_split_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_paged_split_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_paged_split_softcap_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_paged_split_softcap_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_paged_split_softcap_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_paged_split_softcap_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_paged_split_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_paged_split_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_paged_split_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_paged_split_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_paged_split_softcap_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_paged_split_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_paged_split_softcapall_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_softcap_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_softcap_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_softcap_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_softcap_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_softcap_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_softcapall_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_split_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_split_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_split_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_split_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_split_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_split_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_split_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_split_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_split_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_split_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_split_softcap_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_split_softcap_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_split_softcap_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_split_softcap_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_split_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_split_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_split_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_split_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_split_softcap_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_split_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_bf16_split_softcapall_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_e4m3_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_e4m3_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_e4m3_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_e4m3_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_e4m3_paged_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_e4m3_paged_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_e4m3_paged_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_e4m3_paged_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_e4m3_paged_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_e4m3_paged_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_e4m3_paged_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_e4m3_paged_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_e4m3_paged_softcap_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_e4m3_paged_softcap_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_e4m3_paged_softcap_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_e4m3_paged_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_e4m3_paged_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_e4m3_paged_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_e4m3_paged_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_e4m3_paged_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_e4m3_paged_split_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_e4m3_paged_split_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_e4m3_paged_split_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_e4m3_paged_split_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_e4m3_paged_split_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_e4m3_paged_split_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_e4m3_paged_split_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_e4m3_paged_split_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_e4m3_paged_split_softcap_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_e4m3_paged_split_softcap_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_e4m3_paged_split_softcap_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_e4m3_paged_split_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_e4m3_paged_split_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_e4m3_paged_split_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_e4m3_paged_split_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_e4m3_paged_split_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_e4m3_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_e4m3_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_e4m3_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_e4m3_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_e4m3_softcap_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_e4m3_softcap_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_e4m3_softcap_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_e4m3_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_e4m3_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_e4m3_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_e4m3_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_e4m3_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_e4m3_split_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_e4m3_split_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_e4m3_split_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_e4m3_split_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_e4m3_split_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_e4m3_split_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_e4m3_split_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_e4m3_split_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_e4m3_split_softcap_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_e4m3_split_softcap_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_e4m3_split_softcap_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_e4m3_split_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_e4m3_split_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_e4m3_split_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_e4m3_split_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_e4m3_split_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_paged_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_paged_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_paged_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_paged_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_paged_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_paged_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_paged_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_paged_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_paged_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_paged_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_paged_softcap_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_paged_softcap_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_paged_softcap_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_paged_softcap_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_paged_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_paged_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_paged_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_paged_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_paged_softcap_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_paged_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_paged_softcapall_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_paged_split_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_paged_split_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_paged_split_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_paged_split_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_paged_split_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_paged_split_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_paged_split_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_paged_split_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_paged_split_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_paged_split_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_paged_split_softcap_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_paged_split_softcap_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_paged_split_softcap_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_paged_split_softcap_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_paged_split_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_paged_split_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_paged_split_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_paged_split_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_paged_split_softcap_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_paged_split_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_paged_split_softcapall_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_softcap_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_softcap_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_softcap_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_softcap_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_softcap_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_softcapall_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_split_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_split_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_split_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_split_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_split_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_split_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_split_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_split_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_split_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_split_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_split_softcap_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_split_softcap_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_split_softcap_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_split_softcap_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_split_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_split_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_split_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_split_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_split_softcap_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_split_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim64_fp16_split_softcapall_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_paged_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_paged_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_paged_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_paged_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_paged_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_paged_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_paged_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_paged_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_paged_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_paged_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_paged_softcap_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_paged_softcap_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_paged_softcap_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_paged_softcap_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_paged_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_paged_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_paged_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_paged_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_paged_softcap_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_paged_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_paged_softcapall_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_paged_split_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_paged_split_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_paged_split_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_paged_split_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_paged_split_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_paged_split_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_paged_split_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_paged_split_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_paged_split_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_paged_split_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_paged_split_softcap_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_paged_split_softcap_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_paged_split_softcap_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_paged_split_softcap_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_paged_split_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_paged_split_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_paged_split_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_paged_split_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_paged_split_softcap_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_paged_split_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_paged_split_softcapall_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_softcap_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_softcap_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_softcap_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_softcap_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_softcap_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_softcapall_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_split_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_split_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_split_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_split_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_split_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_split_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_split_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_split_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_split_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_split_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_split_softcap_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_split_softcap_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_split_softcap_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_split_softcap_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_split_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_split_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_split_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_split_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_split_softcap_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_split_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_bf16_split_softcapall_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_e4m3_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_e4m3_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_e4m3_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_e4m3_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_e4m3_paged_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_e4m3_paged_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_e4m3_paged_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_e4m3_paged_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_e4m3_paged_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_e4m3_paged_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_e4m3_paged_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_e4m3_paged_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_e4m3_paged_softcap_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_e4m3_paged_softcap_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_e4m3_paged_softcap_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_e4m3_paged_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_e4m3_paged_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_e4m3_paged_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_e4m3_paged_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_e4m3_paged_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_e4m3_paged_split_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_e4m3_paged_split_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_e4m3_paged_split_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_e4m3_paged_split_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_e4m3_paged_split_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_e4m3_paged_split_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_e4m3_paged_split_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_e4m3_paged_split_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_e4m3_paged_split_softcap_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_e4m3_paged_split_softcap_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_e4m3_paged_split_softcap_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_e4m3_paged_split_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_e4m3_paged_split_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_e4m3_paged_split_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_e4m3_paged_split_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_e4m3_paged_split_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_e4m3_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_e4m3_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_e4m3_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_e4m3_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_e4m3_softcap_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_e4m3_softcap_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_e4m3_softcap_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_e4m3_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_e4m3_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_e4m3_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_e4m3_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_e4m3_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_e4m3_split_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_e4m3_split_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_e4m3_split_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_e4m3_split_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_e4m3_split_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_e4m3_split_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_e4m3_split_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_e4m3_split_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_e4m3_split_softcap_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_e4m3_split_softcap_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_e4m3_split_softcap_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_e4m3_split_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_e4m3_split_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_e4m3_split_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_e4m3_split_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_e4m3_split_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_paged_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_paged_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_paged_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_paged_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_paged_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_paged_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_paged_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_paged_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_paged_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_paged_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_paged_softcap_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_paged_softcap_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_paged_softcap_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_paged_softcap_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_paged_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_paged_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_paged_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_paged_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_paged_softcap_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_paged_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_paged_softcapall_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_paged_split_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_paged_split_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_paged_split_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_paged_split_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_paged_split_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_paged_split_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_paged_split_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_paged_split_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_paged_split_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_paged_split_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_paged_split_softcap_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_paged_split_softcap_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_paged_split_softcap_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_paged_split_softcap_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_paged_split_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_paged_split_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_paged_split_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_paged_split_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_paged_split_softcap_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_paged_split_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_paged_split_softcapall_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_softcap_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_softcap_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_softcap_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_softcap_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_softcap_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_softcapall_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_split_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_split_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_split_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_split_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_split_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_split_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_split_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_split_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_split_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_split_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_split_softcap_packgqa_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_split_softcap_packgqa_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_split_softcap_packgqa_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_split_softcap_packgqa_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_split_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_split_softcap_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_split_softcap_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_split_softcap_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_split_softcap_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_split_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdim96_fp16_split_softcapall_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdimall_bf16_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdimall_bf16_paged_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdimall_bf16_paged_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdimall_bf16_paged_split_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdimall_bf16_paged_split_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdimall_bf16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdimall_bf16_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdimall_bf16_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdimall_bf16_split_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdimall_bf16_split_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdimall_e4m3_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdimall_e4m3_paged_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdimall_e4m3_paged_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdimall_e4m3_paged_split_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdimall_e4m3_paged_split_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdimall_e4m3_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdimall_e4m3_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdimall_e4m3_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdimall_e4m3_split_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdimall_e4m3_split_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdimall_fp16_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdimall_fp16_paged_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdimall_fp16_paged_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdimall_fp16_paged_split_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdimall_fp16_paged_split_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdimall_fp16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdimall_fp16_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdimall_fp16_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdimall_fp16_split_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdimall_fp16_split_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdimdiff_bf16_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdimdiff_bf16_paged_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdimdiff_bf16_paged_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdimdiff_bf16_paged_split_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdimdiff_bf16_paged_split_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdimdiff_bf16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdimdiff_bf16_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdimdiff_bf16_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdimdiff_bf16_split_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdimdiff_bf16_split_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdimdiff_e4m3_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdimdiff_e4m3_paged_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdimdiff_e4m3_paged_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdimdiff_e4m3_paged_split_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdimdiff_e4m3_paged_split_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdimdiff_e4m3_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdimdiff_e4m3_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdimdiff_e4m3_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdimdiff_e4m3_split_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdimdiff_e4m3_split_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdimdiff_fp16_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdimdiff_fp16_paged_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdimdiff_fp16_paged_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdimdiff_fp16_paged_split_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdimdiff_fp16_paged_split_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdimdiff_fp16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdimdiff_fp16_softcap_packgqa_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdimdiff_fp16_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdimdiff_fp16_split_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/instantiations/flash_fwd_hdimdiff_fp16_split_softcap_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/mainloop_bwd_sm80.hpp +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/mainloop_bwd_sm90_tma_gmma_ws.hpp +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/mainloop_fwd_sm80.hpp +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/mainloop_fwd_sm90_tma_gmma_ws.hpp +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/mask.h +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/named_barrier.hpp +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/pack_gqa.h +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/paged_kv.h +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/rotary.h +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/seqlen.h +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/sm90_pipeline_no_cluster.hpp +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/softmax.h +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/static_switch.h +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/tile_scheduler.hpp +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/tile_size.h +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/utils.h +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/include/c10/cuda/CUDAException.h +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/include/ejkernel_flash_attention.h +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/include/ejkernel_flash_attention_cutlass.h +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/alibi.h +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/aten_shim.h +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/block_info.h +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/code_gen.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/dropout.h +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash.h +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_attention_ffi.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim128_bf16_causal_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim128_bf16_causal_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim128_bf16_causal_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim128_bf16_causal_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim128_bf16_causal_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim128_bf16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim128_bf16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim128_bf16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim128_bf16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim128_bf16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim128_fp16_causal_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim128_fp16_causal_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim128_fp16_causal_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim128_fp16_causal_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim128_fp16_causal_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim128_fp16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim128_fp16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim128_fp16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim128_fp16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim128_fp16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim192_bf16_causal_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim192_bf16_causal_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim192_bf16_causal_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim192_bf16_causal_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim192_bf16_causal_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim192_bf16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim192_bf16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim192_bf16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim192_bf16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim192_bf16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim192_fp16_causal_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim192_fp16_causal_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim192_fp16_causal_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim192_fp16_causal_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim192_fp16_causal_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim192_fp16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim192_fp16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim192_fp16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim192_fp16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim192_fp16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim256_bf16_causal_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim256_bf16_causal_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim256_bf16_causal_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim256_bf16_causal_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim256_bf16_causal_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim256_bf16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim256_bf16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim256_bf16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim256_bf16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim256_bf16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim256_fp16_causal_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim256_fp16_causal_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim256_fp16_causal_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim256_fp16_causal_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim256_fp16_causal_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim256_fp16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim256_fp16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim256_fp16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim256_fp16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim256_fp16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim32_bf16_causal_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim32_bf16_causal_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim32_bf16_causal_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim32_bf16_causal_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim32_bf16_causal_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim32_bf16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim32_bf16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim32_bf16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim32_bf16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim32_bf16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim32_fp16_causal_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim32_fp16_causal_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim32_fp16_causal_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim32_fp16_causal_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim32_fp16_causal_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim32_fp16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim32_fp16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim32_fp16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim32_fp16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim32_fp16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim64_bf16_causal_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim64_bf16_causal_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim64_bf16_causal_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim64_bf16_causal_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim64_bf16_causal_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim64_bf16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim64_bf16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim64_bf16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim64_bf16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim64_bf16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim64_fp16_causal_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim64_fp16_causal_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim64_fp16_causal_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim64_fp16_causal_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim64_fp16_causal_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim64_fp16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim64_fp16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim64_fp16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim64_fp16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim64_fp16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim96_bf16_causal_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim96_bf16_causal_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim96_bf16_causal_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim96_bf16_causal_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim96_bf16_causal_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim96_bf16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim96_bf16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim96_bf16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim96_bf16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim96_bf16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim96_fp16_causal_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim96_fp16_causal_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim96_fp16_causal_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim96_fp16_causal_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim96_fp16_causal_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim96_fp16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim96_fp16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim96_fp16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim96_fp16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_hdim96_fp16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_kernel.h +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_launch_template.h +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_bwd_preprocess_kernel.h +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim128_bf16_causal_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim128_bf16_causal_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim128_bf16_causal_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim128_bf16_causal_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim128_bf16_causal_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim128_bf16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim128_bf16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim128_bf16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim128_bf16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim128_bf16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim128_fp16_causal_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim128_fp16_causal_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim128_fp16_causal_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim128_fp16_causal_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim128_fp16_causal_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim128_fp16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim128_fp16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim128_fp16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim128_fp16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim128_fp16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim192_bf16_causal_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim192_bf16_causal_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim192_bf16_causal_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim192_bf16_causal_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim192_bf16_causal_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim192_bf16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim192_bf16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim192_bf16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim192_bf16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim192_bf16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim192_fp16_causal_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim192_fp16_causal_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim192_fp16_causal_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim192_fp16_causal_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim192_fp16_causal_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim192_fp16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim192_fp16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim192_fp16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim192_fp16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim192_fp16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim256_bf16_causal_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim256_bf16_causal_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim256_bf16_causal_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim256_bf16_causal_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim256_bf16_causal_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim256_bf16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim256_bf16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim256_bf16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim256_bf16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim256_bf16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim256_fp16_causal_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim256_fp16_causal_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim256_fp16_causal_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim256_fp16_causal_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim256_fp16_causal_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim256_fp16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim256_fp16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim256_fp16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim256_fp16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim256_fp16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim32_bf16_causal_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim32_bf16_causal_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim32_bf16_causal_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim32_bf16_causal_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim32_bf16_causal_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim32_bf16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim32_bf16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim32_bf16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim32_bf16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim32_bf16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim32_fp16_causal_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim32_fp16_causal_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim32_fp16_causal_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim32_fp16_causal_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim32_fp16_causal_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim32_fp16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim32_fp16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim32_fp16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim32_fp16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim32_fp16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim64_bf16_causal_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim64_bf16_causal_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim64_bf16_causal_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim64_bf16_causal_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim64_bf16_causal_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim64_bf16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim64_bf16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim64_bf16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim64_bf16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim64_bf16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim64_fp16_causal_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim64_fp16_causal_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim64_fp16_causal_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim64_fp16_causal_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim64_fp16_causal_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim64_fp16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim64_fp16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim64_fp16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim64_fp16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim64_fp16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim96_bf16_causal_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim96_bf16_causal_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim96_bf16_causal_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim96_bf16_causal_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim96_bf16_causal_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim96_bf16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim96_bf16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim96_bf16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim96_bf16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim96_bf16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim96_fp16_causal_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim96_fp16_causal_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim96_fp16_causal_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim96_fp16_causal_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim96_fp16_causal_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim96_fp16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim96_fp16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim96_fp16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim96_fp16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_hdim96_fp16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_kernel.h +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_launch_template.h +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim128_bf16_causal_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim128_bf16_causal_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim128_bf16_causal_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim128_bf16_causal_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim128_bf16_causal_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim128_bf16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim128_bf16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim128_bf16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim128_bf16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim128_bf16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim128_fp16_causal_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim128_fp16_causal_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim128_fp16_causal_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim128_fp16_causal_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim128_fp16_causal_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim128_fp16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim128_fp16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim128_fp16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim128_fp16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim128_fp16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim192_bf16_causal_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim192_bf16_causal_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim192_bf16_causal_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim192_bf16_causal_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim192_bf16_causal_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim192_bf16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim192_bf16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim192_bf16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim192_bf16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim192_bf16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim192_fp16_causal_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim192_fp16_causal_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim192_fp16_causal_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim192_fp16_causal_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim192_fp16_causal_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim192_fp16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim192_fp16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim192_fp16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim192_fp16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim192_fp16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim256_bf16_causal_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim256_bf16_causal_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim256_bf16_causal_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim256_bf16_causal_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim256_bf16_causal_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim256_bf16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim256_bf16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim256_bf16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim256_bf16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim256_bf16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim256_fp16_causal_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim256_fp16_causal_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim256_fp16_causal_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim256_fp16_causal_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim256_fp16_causal_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim256_fp16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim256_fp16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim256_fp16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim256_fp16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim256_fp16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim32_bf16_causal_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim32_bf16_causal_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim32_bf16_causal_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim32_bf16_causal_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim32_bf16_causal_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim32_bf16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim32_bf16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim32_bf16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim32_bf16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim32_bf16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim32_fp16_causal_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim32_fp16_causal_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim32_fp16_causal_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim32_fp16_causal_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim32_fp16_causal_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim32_fp16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim32_fp16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim32_fp16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim32_fp16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim32_fp16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim64_bf16_causal_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim64_bf16_causal_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim64_bf16_causal_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim64_bf16_causal_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim64_bf16_causal_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim64_bf16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim64_bf16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim64_bf16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim64_bf16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim64_bf16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim64_fp16_causal_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim64_fp16_causal_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim64_fp16_causal_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim64_fp16_causal_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim64_fp16_causal_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim64_fp16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim64_fp16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim64_fp16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim64_fp16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim64_fp16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim96_bf16_causal_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim96_bf16_causal_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim96_bf16_causal_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim96_bf16_causal_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim96_bf16_causal_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim96_bf16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim96_bf16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim96_bf16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim96_bf16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim96_bf16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim96_fp16_causal_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim96_fp16_causal_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim96_fp16_causal_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim96_fp16_causal_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim96_fp16_causal_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim96_fp16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim96_fp16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim96_fp16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim96_fp16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/flash_fwd_split_hdim96_fp16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/hardware_info.h +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/kernel_traits.h +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/mask.h +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/namespace_config.h +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/philox.cuh +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/philox_unpack.cuh +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/rotary.h +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/softmax.h +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/static_switch.h +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/src/utils.h +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/quantized_matmul/CMakeLists.txt +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/quantized_matmul/src/code_gen.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/quantized_matmul/src/qmm_cuda.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/quantized_matmul/src/qmm_cuda_impl.h +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/quantized_matmul/src/qmm_dequant_affine_bits2_bf16.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/quantized_matmul/src/qmm_dequant_affine_bits2_f16.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/quantized_matmul/src/qmm_dequant_affine_bits2_f32.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/quantized_matmul/src/qmm_dequant_affine_bits3_bf16.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/quantized_matmul/src/qmm_dequant_affine_bits3_f16.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/quantized_matmul/src/qmm_dequant_affine_bits3_f32.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/quantized_matmul/src/qmm_dequant_affine_bits4_bf16.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/quantized_matmul/src/qmm_dequant_affine_bits4_f16.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/quantized_matmul/src/qmm_dequant_affine_bits4_f32.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/quantized_matmul/src/qmm_dequant_affine_bits5_bf16.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/quantized_matmul/src/qmm_dequant_affine_bits5_f16.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/quantized_matmul/src/qmm_dequant_affine_bits5_f32.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/quantized_matmul/src/qmm_dequant_affine_bits6_bf16.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/quantized_matmul/src/qmm_dequant_affine_bits6_f16.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/quantized_matmul/src/qmm_dequant_affine_bits6_f32.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/quantized_matmul/src/qmm_dequant_affine_bits7_bf16.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/quantized_matmul/src/qmm_dequant_affine_bits7_f16.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/quantized_matmul/src/qmm_dequant_affine_bits7_f32.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/quantized_matmul/src/qmm_dequant_affine_bits8_bf16.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/quantized_matmul/src/qmm_dequant_affine_bits8_f16.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/quantized_matmul/src/qmm_dequant_affine_bits8_f32.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/quantized_matmul/src/qmm_dequant_dispatch.h +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/quantized_matmul/src/qmm_dequant_kernels.h +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/quantized_matmul/src/qmm_dequant_mxfp4.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/quantized_matmul/src/qmm_dequant_mxfp8.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/quantized_matmul/src/qmm_dequant_nf4_bf16.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/quantized_matmul/src/qmm_dequant_nf4_f16.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/quantized_matmul/src/qmm_dequant_nf4_f32.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/quantized_matmul/src/qmm_dequant_nvfp4.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/quantized_matmul/src/qmm_dequant_nvfp8.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/CMakeLists.txt +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/code_gen.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3.h +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_ffi.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim128_bf16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim128_bf16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim128_bf16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim128_bf16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim128_bf16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim128_fp16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim128_fp16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim128_fp16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim128_fp16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim128_fp16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim128_fp32_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim128_fp32_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim128_fp32_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim128_fp32_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim128_fp32_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim192_bf16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim192_bf16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim192_bf16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim192_bf16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim192_bf16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim192_fp16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim192_fp16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim192_fp16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim192_fp16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim192_fp16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim192_fp32_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim192_fp32_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim192_fp32_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim192_fp32_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim192_fp32_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim256_bf16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim256_bf16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim256_bf16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim256_bf16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim256_bf16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim256_fp16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim256_fp16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim256_fp16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim256_fp16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim256_fp16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim256_fp32_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim256_fp32_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim256_fp32_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim256_fp32_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim256_fp32_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim32_bf16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim32_bf16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim32_bf16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim32_bf16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim32_bf16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim32_fp16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim32_fp16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim32_fp16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim32_fp16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim32_fp16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim32_fp32_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim32_fp32_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim32_fp32_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim32_fp32_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim32_fp32_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim64_bf16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim64_bf16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim64_bf16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim64_bf16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim64_bf16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim64_fp16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim64_fp16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim64_fp16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim64_fp16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim64_fp16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim64_fp32_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim64_fp32_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim64_fp32_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim64_fp32_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim64_fp32_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim96_bf16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim96_bf16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim96_bf16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim96_bf16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim96_bf16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim96_fp16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim96_fp16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim96_fp16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim96_fp16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim96_fp16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim96_fp32_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim96_fp32_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim96_fp32_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim96_fp32_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_fwd_hdim96_fp32_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_kernel.h +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/ragged_page_attention_v3/src/rpa_v3_launch_template.h +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/CMakeLists.txt +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/src/code_gen.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/src/ua.h +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/src/ua_cuda.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/src/ua_fwd_hdim128_bf16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/src/ua_fwd_hdim128_bf16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/src/ua_fwd_hdim128_bf16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/src/ua_fwd_hdim128_bf16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/src/ua_fwd_hdim128_bf16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/src/ua_fwd_hdim128_fp16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/src/ua_fwd_hdim128_fp16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/src/ua_fwd_hdim128_fp16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/src/ua_fwd_hdim128_fp16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/src/ua_fwd_hdim128_fp16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/src/ua_fwd_hdim192_bf16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/src/ua_fwd_hdim192_bf16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/src/ua_fwd_hdim192_bf16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/src/ua_fwd_hdim192_bf16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/src/ua_fwd_hdim192_bf16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/src/ua_fwd_hdim192_fp16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/src/ua_fwd_hdim192_fp16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/src/ua_fwd_hdim192_fp16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/src/ua_fwd_hdim192_fp16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/src/ua_fwd_hdim192_fp16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/src/ua_fwd_hdim256_bf16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/src/ua_fwd_hdim256_bf16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/src/ua_fwd_hdim256_bf16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/src/ua_fwd_hdim256_bf16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/src/ua_fwd_hdim256_bf16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/src/ua_fwd_hdim256_fp16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/src/ua_fwd_hdim256_fp16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/src/ua_fwd_hdim256_fp16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/src/ua_fwd_hdim256_fp16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/src/ua_fwd_hdim256_fp16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/src/ua_fwd_hdim32_bf16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/src/ua_fwd_hdim32_bf16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/src/ua_fwd_hdim32_bf16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/src/ua_fwd_hdim32_bf16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/src/ua_fwd_hdim32_bf16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/src/ua_fwd_hdim32_fp16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/src/ua_fwd_hdim32_fp16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/src/ua_fwd_hdim32_fp16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/src/ua_fwd_hdim32_fp16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/src/ua_fwd_hdim32_fp16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/src/ua_fwd_hdim64_bf16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/src/ua_fwd_hdim64_bf16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/src/ua_fwd_hdim64_bf16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/src/ua_fwd_hdim64_bf16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/src/ua_fwd_hdim64_bf16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/src/ua_fwd_hdim64_fp16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/src/ua_fwd_hdim64_fp16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/src/ua_fwd_hdim64_fp16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/src/ua_fwd_hdim64_fp16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/src/ua_fwd_hdim64_fp16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/src/ua_fwd_hdim96_bf16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/src/ua_fwd_hdim96_bf16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/src/ua_fwd_hdim96_bf16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/src/ua_fwd_hdim96_bf16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/src/ua_fwd_hdim96_bf16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/src/ua_fwd_hdim96_fp16_sm100.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/src/ua_fwd_hdim96_fp16_sm110.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/src/ua_fwd_hdim96_fp16_sm120.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/src/ua_fwd_hdim96_fp16_sm80.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/src/ua_fwd_hdim96_fp16_sm90.cu +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/src/ua_kernel.h +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/unified_attention/src/ua_launch_template.h +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/benchmarks.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/build_cudalib.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/callib/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/callib/_cute_call.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/callib/_cute_ffi.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/callib/_ejit.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/callib/_pallas_call.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/callib/_triton_call.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/callib/_utils.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/errors.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_cuda/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_cuda/blocksparse_attention/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_cuda/blocksparse_attention/_build.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_cuda/blocksparse_attention/_cuda_impl.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_cuda/blocksparse_attention/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_cuda/flash_attention/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_cuda/flash_attention/_build.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_cuda/flash_attention/_cuda_impl.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_cuda/flash_attention/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_cuda/quantized_matmul/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_cuda/quantized_matmul/_build.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_cuda/quantized_matmul/_cuda_impl.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_cuda/quantized_matmul/_cuda_impl_bwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_cuda/quantized_matmul/_cuda_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_cuda/quantized_matmul/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_cuda/ragged_page_attention_v3/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_cuda/ragged_page_attention_v3/_build.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_cuda/ragged_page_attention_v3/_cuda_impl.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_cuda/ragged_page_attention_v3/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_cuda/unified_attention/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_cuda/unified_attention/_build.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_cuda/unified_attention/_cuda_impl.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_cuda/unified_attention/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_cute/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_cute/chunked_prefill_paged_decode/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_cute/chunked_prefill_paged_decode/_cute_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_cute/chunked_prefill_paged_decode/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_cute/flash_attention/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_cute/flash_attention/_cute_impl.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_cute/flash_attention/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_cute/quantized_matmul/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_cute/quantized_matmul/_cute_impl.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_cute/quantized_matmul/_cute_impl_bwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_cute/quantized_matmul/_cute_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_cute/quantized_matmul/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_cute/ragged_page_attention_v3/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_cute/ragged_page_attention_v3/_cute_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_cute/ragged_page_attention_v3/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_cute/unified_attention/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_cute/unified_attention/_cute_impl.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_cute/unified_attention/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/gpu/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/gpu/ragged_decode_attention/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/gpu/ragged_decode_attention/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/gpu/ragged_decode_attention/_pallas_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/gpu/scaled_dot_product_attention/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/gpu/scaled_dot_product_attention/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/gpu/scaled_dot_product_attention/_pallas_impl_bwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/gpu/scaled_dot_product_attention/_pallas_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/all_gather_matmul/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/all_gather_matmul/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/all_gather_matmul/_pallas_impl.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/blocksparse_attention/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/blocksparse_attention/_info.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/blocksparse_attention/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/blocksparse_attention/_kernel.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/blocksparse_attention/_masks.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/deepseek_attn/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/deepseek_attn/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/deepseek_attn/_pallas_impl_bwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/deepseek_attn/_pallas_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/flash_attention/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/flash_attention/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/flash_attention/_pallas_impl_bwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/flash_attention/_pallas_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/flash_attention/_utils.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/flash_mla/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/flash_mla/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/flash_mla/_pallas_impl_bwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/flash_mla/_pallas_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/flash_mla/_utils.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/gated_delta_rule/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/gated_delta_rule/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/gated_delta_rule/_pallas_impl_bwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/grouped_matmul/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/grouped_matmul/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/grouped_matmul/_pallas_impl.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/grouped_matmul/_utils.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/grouped_matmulv2/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/grouped_matmulv2/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/grouped_matmulv2/_pallas_impl.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/grouped_matmulv3/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/grouped_matmulv3/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/grouped_matmulv3/_pallas_impl.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/multi_latent_ragged_page_attention/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/multi_latent_ragged_page_attention/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/multi_latent_ragged_page_attention/_pallas_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/multi_latent_ragged_page_attention_v2/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/multi_latent_ragged_page_attention_v2/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/multi_latent_ragged_page_attention_v2/_pallas_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/page_attention/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/page_attention/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/page_attention/_pallas_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/prefill_page_attention/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/prefill_page_attention/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/prefill_page_attention/_pallas_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/quantized_matmul/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/quantized_matmul/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/quantized_matmul/_pallas_impl_bwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/quantized_matmul/_pallas_impl_core.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/quantized_matmul/_pallas_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/ragged_decode_attention/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/ragged_decode_attention/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/ragged_decode_attention/_pallas_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/ragged_gated_delta_rule/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/ragged_gated_delta_rule/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/ragged_gated_delta_rule/_pallas_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/ragged_page_attention_v2/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/ragged_page_attention_v2/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/ragged_page_attention_v2/_pallas_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/ragged_page_attention_v2/_utils.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/ragged_page_attention_v3/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/ragged_page_attention_v3/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/ragged_page_attention_v3/_pallas_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/ragged_page_attention_v3/_pallas_impl_fwd_h64.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/ragged_page_attention_v3/_utils.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/reduce_scatter_matmul/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/reduce_scatter_matmul/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/reduce_scatter_matmul/_pallas_impl.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/ring_attention/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/ring_attention/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/ring_attention/_pallas_impl_bwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_pallas/tpu/ring_attention/_pallas_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_registry.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/blocksparse_attention/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/blocksparse_attention/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/blocksparse_attention/_mask.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/blocksparse_attention/_triton_impl_bwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/blocksparse_attention/_triton_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/blocksparse_attention/_utilities.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/chunked_prefill_paged_decode/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/chunked_prefill_paged_decode/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/chunked_prefill_paged_decode/_triton_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/decode_attention/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/decode_attention/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/decode_attention/_triton_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/flash_attention/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/flash_attention/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/flash_attention/_triton_impl_bwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/flash_attention/_triton_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/flash_attention/_utilities.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/flash_mla/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/flash_mla/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/flash_mla/_triton_impl_bwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/flash_mla/_triton_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/flash_mla/_utilities.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/gla/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/gla/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/gla/_triton_impl_bwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/gla/_triton_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/lightning_attn/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/lightning_attn/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/lightning_attn/_triton_impl_bwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/lightning_attn/_triton_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/mean_pooling/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/mean_pooling/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/mean_pooling/_triton_impl_bwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/mean_pooling/_triton_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/native_sparse_attention/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/native_sparse_attention/_compression.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/native_sparse_attention/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/native_sparse_attention/_triton_impl_bwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/native_sparse_attention/_triton_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/native_sparse_attention/_utilities.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/page_attention/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/page_attention/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/page_attention/_triton_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/quantized_matmul/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/quantized_matmul/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/quantized_matmul/_triton_impl.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/quantized_matmul/_triton_impl_bwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/quantized_matmul/_triton_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/quantized_matmul/_triton_impl_gemv.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/ragged_decode_attention/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/ragged_decode_attention/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/ragged_decode_attention/_triton_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/ragged_page_attention_v2/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/ragged_page_attention_v2/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/ragged_page_attention_v2/_triton_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/ragged_page_attention_v3/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/ragged_page_attention_v3/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/ragged_page_attention_v3/_triton_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/recurrent/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/recurrent/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/recurrent/_triton_impl_bwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/recurrent/_triton_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/ring_attention/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/ring_attention/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/ring_attention/_triton_impl_bwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/ring_attention/_triton_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/rwkv4/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/rwkv4/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/rwkv4/_triton_impl_bwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/rwkv4/_triton_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/rwkv6/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/rwkv6/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/rwkv6/_triton_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/rwkv7/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/rwkv7/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/rwkv7/_triton_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/unified_attention/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/unified_attention/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_triton/unified_attention/_triton_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/all_gather_matmul/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/all_gather_matmul/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/all_gather_matmul/_xla_impl_bwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/all_gather_matmul/_xla_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/attention/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/attention/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/attention/_xla_impl_bwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/attention/_xla_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/blocksparse_attention/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/blocksparse_attention/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/blocksparse_attention/_xla_impl_bwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/blocksparse_attention/_xla_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/chunked_prefill_paged_decode/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/chunked_prefill_paged_decode/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/chunked_prefill_paged_decode/_xla_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/decode_attention/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/decode_attention/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/decode_attention/_xla_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/deepseek_attn/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/deepseek_attn/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/deepseek_attn/_xla_impl_bwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/deepseek_attn/_xla_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/flash_attention/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/flash_attention/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/flash_attention/_xla_impl_bwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/flash_attention/_xla_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/flash_mla/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/flash_mla/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/flash_mla/_xla_impl_bwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/flash_mla/_xla_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/gated_delta_rule/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/gated_delta_rule/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/gated_delta_rule/_xla_impl_bwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/gla/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/gla/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/gla/_xla_impl_bwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/gla/_xla_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/grouped_matmul/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/grouped_matmul/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/grouped_matmul/_xla_impl_bwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/grouped_matmul/_xla_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/grouped_matmulv3/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/grouped_matmulv3/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/kernel_delta_attention/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/kernel_delta_attention/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/kernel_delta_attention/_xla_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/lightning_attn/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/lightning_attn/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/lightning_attn/_xla_impl_bwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/lightning_attn/_xla_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/mean_pooling/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/mean_pooling/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/mean_pooling/_xla_impl_bwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/mean_pooling/_xla_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/multi_latent_ragged_page_attention/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/multi_latent_ragged_page_attention/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/multi_latent_ragged_page_attention/_xla_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/multi_latent_ragged_page_attention_v2/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/multi_latent_ragged_page_attention_v2/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/multi_latent_ragged_page_attention_v2/_xla_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/native_sparse_attention/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/native_sparse_attention/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/native_sparse_attention/_xla_impl_bwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/native_sparse_attention/_xla_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/page_attention/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/page_attention/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/page_attention/_xla_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/prefill_page_attention/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/prefill_page_attention/_impl.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/prefill_page_attention/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/quantized_matmul/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/quantized_matmul/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/quantized_matmul/_xla_impl_bwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/quantized_matmul/_xla_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/ragged_decode_attention/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/ragged_decode_attention/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/ragged_decode_attention/_xla_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/ragged_gated_delta_rule/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/ragged_gated_delta_rule/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/ragged_gated_delta_rule/_xla_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/ragged_page_attention_v2/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/ragged_page_attention_v2/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/ragged_page_attention_v2/_xla_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/ragged_page_attention_v2_turboquant/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/ragged_page_attention_v2_turboquant/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/ragged_page_attention_v2_turboquant/_xla_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/ragged_page_attention_v3/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/ragged_page_attention_v3/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/ragged_page_attention_v3/_xla_impl_bwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/ragged_page_attention_v3/_xla_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/ragged_page_attention_v3_turboquant/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/ragged_page_attention_v3_turboquant/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/ragged_page_attention_v3_turboquant/_xla_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/recurrent/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/recurrent/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/recurrent/_xla_impl_bwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/recurrent/_xla_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/reduce_scatter_matmul/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/reduce_scatter_matmul/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/reduce_scatter_matmul/_xla_impl_bwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/reduce_scatter_matmul/_xla_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/ring_attention/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/ring_attention/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/ring_attention/_utils.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/ring_attention/_xla_impl_bwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/ring_attention/_xla_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/rwkv4/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/rwkv4/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/rwkv4/_xla_impl_bwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/rwkv4/_xla_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/rwkv6/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/rwkv6/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/rwkv6/_xla_impl_bwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/rwkv6/_xla_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/rwkv7/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/rwkv7/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/rwkv7/_xla_impl_bwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/rwkv7/_xla_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/scaled_dot_product_attention/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/scaled_dot_product_attention/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/scaled_dot_product_attention/_xla_impl_bwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/scaled_dot_product_attention/_xla_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/state_space_v1/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/state_space_v1/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/state_space_v1/_xla_impl_bwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/state_space_v1/_xla_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/state_space_v2/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/state_space_v2/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/state_space_v2/_xla_impl_bwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/state_space_v2/_xla_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/unified_attention/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/unified_attention/_interface.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/kernels/_xla/unified_attention/_xla_impl_fwd.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/loggings.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/modules/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/modules/base.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/modules/operations/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/modules/operations/all_gather_matmul.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/modules/operations/attention.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/modules/operations/blocksparse_attention.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/modules/operations/chunked_prefill_paged_decode.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/modules/operations/configs.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/modules/operations/decode_attention.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/modules/operations/deepseek_attn.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/modules/operations/flash_attention.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/modules/operations/gated_delta_rule.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/modules/operations/gated_linear_attention.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/modules/operations/grouped_matmul.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/modules/operations/kernel_delta_attention.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/modules/operations/lightning_attention.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/modules/operations/multi_head_latent_attention.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/modules/operations/multi_latent_ragged_page_attention.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/modules/operations/multi_latent_ragged_page_attention_v2.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/modules/operations/native_sparse_attention.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/modules/operations/page_attention.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/modules/operations/pooling.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/modules/operations/prefill_page_attention.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/modules/operations/quantized_matmul.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/modules/operations/ragged_decode_attention.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/modules/operations/ragged_gated_delta_rule.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/modules/operations/ragged_page_attention_v2.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/modules/operations/ragged_page_attention_v2_turboquant.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/modules/operations/ragged_page_attention_v3.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/modules/operations/ragged_page_attention_v3_turboquant.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/modules/operations/recurrent.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/modules/operations/reduce_scatter_matmul.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/modules/operations/ring_attention.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/modules/operations/rwkv4.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/modules/operations/rwkv6.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/modules/operations/rwkv7.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/modules/operations/scaled_dot_product_attention.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/modules/operations/state_space_v1.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/modules/operations/state_space_v2.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/modules/operations/unified_attention.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/ops/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/ops/config/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/ops/config/cache.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/ops/config/persistent.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/ops/config/selection.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/ops/core/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/ops/core/kernel.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/ops/core/types.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/ops/execution/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/ops/execution/batch.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/ops/execution/executor.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/ops/execution/offline.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/ops/execution/profiler.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/ops/execution/tuning.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/ops/registry.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/ops/utils/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/ops/utils/datacarrier.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/ops/utils/fingerprint.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/ops/utils/meta.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/ops/utils/serialize.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/quantization/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/quantization/_quants/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/quantization/_quants/quantizations.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/quantization/_utils/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/quantization/_utils/bitpack.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/quantization/_utils/fp_tables.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/quantization/_utils/grouping.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/quantization/_utils/qparams.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/quantization/quantized_array.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/quantization/runtime.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/quantization/turboquant/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/quantization/turboquant/codebook.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/quantization/turboquant/matrices.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/quantization/turboquant/ops.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/quantization/turboquant/packing.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/types/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/types/mask.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/utils.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/xla_utils/__init__.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/xla_utils/cumsum.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/xla_utils/shardings.py +0 -0
- {ejkernel-0.0.76 → ejkernel-0.0.78}/ejkernel/xla_utils/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: ejkernel
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.78
|
|
4
4
|
Summary: Accelerate, Optimize performance with streamlined training and serving options with JAX.
|
|
5
5
|
Keywords: Deep Learning,Machine Learning,JAX,CUDA,XLA,Triton,Pallas
|
|
6
6
|
Author: Erfan Zare Chavoshi
|
|
@@ -43,7 +43,7 @@ import os as _os
|
|
|
43
43
|
_os.environ.setdefault("TF_GPU_ALLOCATOR", "cuda_malloc_async")
|
|
44
44
|
_os.environ.setdefault("CUTE_DSL_ENABLE_TVM_FFI", "1")
|
|
45
45
|
|
|
46
|
-
__version__ = "0.0.
|
|
46
|
+
__version__ = "0.0.78"
|
|
47
47
|
|
|
48
48
|
from . import errors, kernels, modules, types, utils, xla_utils
|
|
49
49
|
from .errors import EjkernelRuntimeError
|
|
@@ -34,7 +34,7 @@ from jax.experimental import pallas as pl
|
|
|
34
34
|
from jax.experimental.pallas import tpu as pltpu
|
|
35
35
|
from jaxtyping import Array, Float
|
|
36
36
|
|
|
37
|
-
from ...._xla.gated_delta_rule._xla_impl_fwd import _l2norm_with_inv
|
|
37
|
+
from ...._xla.gated_delta_rule._xla_impl_fwd import _l2norm_with_inv, _recurrent_gdr_fwd
|
|
38
38
|
|
|
39
39
|
_P = lax.Precision.DEFAULT
|
|
40
40
|
_N_FUSE = 1
|
|
@@ -53,7 +53,15 @@ def _chunk_blockspec(shape: tuple[int, ...]) -> pl.BlockSpec:
|
|
|
53
53
|
def _neumann_inv(A, C, strict_lower=None, lower_mask=None):
|
|
54
54
|
"""Compute (I - A)^{-1} via repeated squaring. Input must be pre-sanitized."""
|
|
55
55
|
_hp = lax.Precision.HIGHEST
|
|
56
|
-
|
|
56
|
+
# ``A`` is strict-lower triangular, so the Neumann series terminates
|
|
57
|
+
# exactly after at most ``C - 1`` powers. Repeated squaring needs
|
|
58
|
+
# ``ceil(log2(C))`` stages to materialize all terms up to ``A^(C-1)``.
|
|
59
|
+
#
|
|
60
|
+
# Clipping this to 4 only reconstructs powers through ``A^15``. That
|
|
61
|
+
# makes partially-filled chunks numerically wrong once the valid prefix
|
|
62
|
+
# exceeds ~16 tokens, which shows up most clearly on padded-heavy SFT
|
|
63
|
+
# batches where the last chunk is only partially active.
|
|
64
|
+
num_iters = math.ceil(math.log2(C)) if C > 1 else 0
|
|
57
65
|
if strict_lower is None:
|
|
58
66
|
strict_lower = jnp.tril(jnp.ones((C, C), dtype=jnp.float32), k=-1)
|
|
59
67
|
if lower_mask is None:
|
|
@@ -511,6 +519,37 @@ def _chunk_gdr_bwd_rule(chunk_size, use_qk_l2norm, res, g):
|
|
|
511
519
|
_chunk_gdr_fwd.defvjp(_chunk_gdr_fwd_rule, _chunk_gdr_bwd_rule)
|
|
512
520
|
|
|
513
521
|
|
|
522
|
+
def _chunk_gdr_fwd(
|
|
523
|
+
query: Float[Array, "batch num_heads seq_len head_dim"],
|
|
524
|
+
key: Float[Array, "batch num_heads seq_len head_dim"],
|
|
525
|
+
value: Float[Array, "batch num_heads seq_len d_state"],
|
|
526
|
+
beta: Float[Array, "batch num_heads seq_len"],
|
|
527
|
+
decay: Float[Array, "batch num_heads seq_len"] | None,
|
|
528
|
+
chunk_size: int = 64,
|
|
529
|
+
initial_state: Float[Array, "batch num_heads head_dim d_state"] | None = None,
|
|
530
|
+
use_qk_l2norm: bool = True,
|
|
531
|
+
) -> tuple[
|
|
532
|
+
Float[Array, "batch num_heads seq_len d_state"],
|
|
533
|
+
Float[Array, "batch num_heads head_dim d_state"],
|
|
534
|
+
]:
|
|
535
|
+
"""Exact multi-token chunked GDR path for TPU.
|
|
536
|
+
|
|
537
|
+
Keep the optimized Pallas single-token decode kernel, but route the
|
|
538
|
+
unstable multi-token training/prefill path through the exact chunked
|
|
539
|
+
triangular-solve implementation.
|
|
540
|
+
"""
|
|
541
|
+
return _recurrent_gdr_fwd(
|
|
542
|
+
query=query,
|
|
543
|
+
key=key,
|
|
544
|
+
value=value,
|
|
545
|
+
beta=beta,
|
|
546
|
+
decay=decay,
|
|
547
|
+
initial_state=initial_state,
|
|
548
|
+
use_qk_l2norm=use_qk_l2norm,
|
|
549
|
+
chunk_size=chunk_size,
|
|
550
|
+
)
|
|
551
|
+
|
|
552
|
+
|
|
514
553
|
def _gdr_single_step_fwd_kernel(q_ref, k_ref, v_ref, beta_ref, decay_ref, state_ref, out_ref, final_state_ref):
|
|
515
554
|
q_t = q_ref[0, 0, 0].astype(jnp.float32)
|
|
516
555
|
k_t = k_ref[0, 0, 0].astype(jnp.float32)
|
|
@@ -174,9 +174,9 @@ def _recurrent_gdr_fwd(
|
|
|
174
174
|
initial_state = initial_state.astype(jnp.float32)
|
|
175
175
|
|
|
176
176
|
if decay is None:
|
|
177
|
-
decay = jnp.zeros((B, H, L), dtype=
|
|
177
|
+
decay = jnp.zeros((B, H, L), dtype=input_dtype)
|
|
178
178
|
else:
|
|
179
|
-
decay = decay.astype(
|
|
179
|
+
decay = decay.astype(input_dtype)
|
|
180
180
|
|
|
181
181
|
pad_size = (C - L % C) % C
|
|
182
182
|
if pad_size > 0:
|
|
@@ -205,7 +205,7 @@ def _recurrent_gdr_fwd(
|
|
|
205
205
|
strict_lower = jnp.tril(jnp.ones((C, C), dtype=jnp.bool_), k=-1)
|
|
206
206
|
lower_mask = jnp.tril(jnp.ones((C, C), dtype=jnp.bool_))
|
|
207
207
|
g_diff = jnp.where(strict_lower, g_diff, -1e30)
|
|
208
|
-
S = jnp.where(strict_lower, S * jnp.exp(g_diff), 0.0)
|
|
208
|
+
S = jnp.where(strict_lower, S * jnp.exp(jnp.clip(g_diff, -20.0, 20.0)), 0.0)
|
|
209
209
|
|
|
210
210
|
eye = jnp.eye(C, dtype=jnp.float32)
|
|
211
211
|
lhs = jnp.broadcast_to(eye, S.shape) + S
|
|
@@ -223,17 +223,17 @@ def _recurrent_gdr_fwd(
|
|
|
223
223
|
input_dtype
|
|
224
224
|
)
|
|
225
225
|
|
|
226
|
-
k_beta_g = k_beta.astype(jnp.float32) * jnp.exp(g_cumsum)[..., None]
|
|
226
|
+
k_beta_g = k_beta.astype(jnp.float32) * jnp.exp(jnp.clip(g_cumsum, -20.0, 20.0))[..., None]
|
|
227
227
|
w_chunks = jnp.einsum("bhcij,bhcjk->bhcik", A, k_beta_g, precision=_MATMUL_PRECISION).astype(input_dtype)
|
|
228
228
|
|
|
229
229
|
attn_qk = jnp.einsum("bhcik,bhcjk->bhcij", q_c, k_c, precision=_MATMUL_PRECISION).astype(jnp.float32)
|
|
230
230
|
g_diff_intra = g_cumsum[..., :, None] - g_cumsum[..., None, :]
|
|
231
231
|
g_diff_intra = jnp.where(lower_mask, g_diff_intra, -1e30)
|
|
232
|
-
attn_i = jnp.where(lower_mask, attn_qk * jnp.exp(g_diff_intra), 0.0).astype(input_dtype)
|
|
232
|
+
attn_i = jnp.where(lower_mask, attn_qk * jnp.exp(jnp.clip(g_diff_intra, -20.0, 20.0)), 0.0).astype(input_dtype)
|
|
233
233
|
|
|
234
|
-
q_g = (q_c.astype(jnp.float32) * jnp.exp(g_cumsum)[..., None]).astype(input_dtype)
|
|
235
|
-
g_end_exp = jnp.exp(g_cumsum[..., -1])[..., None, None]
|
|
236
|
-
g_diff_state = jnp.exp(g_cumsum[..., -1, None] - g_cumsum)[..., None]
|
|
234
|
+
q_g = (q_c.astype(jnp.float32) * jnp.exp(jnp.clip(g_cumsum, -20.0, 20.0))[..., None]).astype(input_dtype)
|
|
235
|
+
g_end_exp = jnp.exp(jnp.clip(g_cumsum[..., -1], -20.0, 20.0))[..., None, None]
|
|
236
|
+
g_diff_state = jnp.exp(jnp.clip(g_cumsum[..., -1, None] - g_cumsum, -20.0, 20.0))[..., None]
|
|
237
237
|
k_g_diff = (k_c.astype(jnp.float32) * g_diff_state).astype(input_dtype)
|
|
238
238
|
|
|
239
239
|
xs = (
|
|
@@ -339,9 +339,9 @@ def _chunk_gdr_fwd_core(
|
|
|
339
339
|
key, k_inv_norm = _l2norm_with_inv(key, axis=-1, eps=1e-6)
|
|
340
340
|
|
|
341
341
|
if decay is None:
|
|
342
|
-
decay = jnp.zeros((B, H, L), dtype=
|
|
342
|
+
decay = jnp.zeros((B, H, L), dtype=jnp.float32)
|
|
343
343
|
else:
|
|
344
|
-
decay = decay.astype(
|
|
344
|
+
decay = decay.astype(jnp.float32)
|
|
345
345
|
|
|
346
346
|
pad_size = (chunk_size - L % chunk_size) % chunk_size
|
|
347
347
|
if pad_size > 0:
|
|
@@ -374,7 +374,7 @@ def _chunk_gdr_fwd_core(
|
|
|
374
374
|
|
|
375
375
|
g_diff = g_cumsum[:, :, :, :, None] - g_cumsum[:, :, :, None, :]
|
|
376
376
|
g_diff = jnp.tril(g_diff)
|
|
377
|
-
decay_mask = jnp.exp(g_diff)
|
|
377
|
+
decay_mask = jnp.exp(jnp.clip(g_diff, -20.0, 20.0))
|
|
378
378
|
decay_mask = jnp.tril(decay_mask)
|
|
379
379
|
|
|
380
380
|
attn = jnp.einsum("bhcik,bhcjk->bhcij", k_beta, key, precision=_MATMUL_PRECISION)
|
|
@@ -387,10 +387,10 @@ def _chunk_gdr_fwd_core(
|
|
|
387
387
|
|
|
388
388
|
attn = jnp.nan_to_num(inv, nan=0.0, posinf=0.0, neginf=0.0).astype(input_dtype)
|
|
389
389
|
|
|
390
|
-
g_cumsum_exp = jnp.exp(g_cumsum).astype(input_dtype)
|
|
390
|
+
g_cumsum_exp = jnp.exp(jnp.clip(g_cumsum, -20.0, 20.0)).astype(input_dtype)
|
|
391
391
|
g_end = g_cumsum[:, :, :, -1]
|
|
392
|
-
g_end_exp = jnp.exp(g_end).astype(input_dtype)
|
|
393
|
-
g_diff_state_exp = jnp.exp(g_end[:, :, :, None] - g_cumsum).astype(input_dtype)
|
|
392
|
+
g_end_exp = jnp.exp(jnp.clip(g_end, -20.0, 20.0)).astype(input_dtype)
|
|
393
|
+
g_diff_state_exp = jnp.exp(jnp.clip(g_end[:, :, :, None] - g_cumsum, -20.0, 20.0)).astype(input_dtype)
|
|
394
394
|
|
|
395
395
|
value_local = jnp.einsum("bhcij,bhcjv->bhciv", attn, v_beta, precision=_MATMUL_PRECISION)
|
|
396
396
|
k_beta_scaled = k_beta * g_cumsum_exp[:, :, :, :, None]
|
|
@@ -424,15 +424,21 @@ def _chunk_gdr_fwd_core(
|
|
|
424
424
|
q_scaled = q_i * g_exp_i[:, :, :, None]
|
|
425
425
|
qk_fused = jnp.stack([k_cumdecay_i, q_scaled], axis=0)
|
|
426
426
|
both = jnp.einsum("nbhik,bhkv->nbhiv", qk_fused, state, precision=_MATMUL_PRECISION)
|
|
427
|
-
v_prime
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
427
|
+
v_prime = jnp.nan_to_num(both[0], nan=0.0, posinf=0.0, neginf=0.0)
|
|
428
|
+
attn_inter = jnp.nan_to_num(both[1], nan=0.0, posinf=0.0, neginf=0.0)
|
|
429
|
+
|
|
430
|
+
v_new = jnp.nan_to_num(v_i - v_prime, nan=0.0, posinf=0.0, neginf=0.0)
|
|
431
|
+
core_out = jnp.nan_to_num(
|
|
432
|
+
attn_inter + jnp.einsum("bhij,bhjv->bhiv", attn_qk, v_new, precision=_MATMUL_PRECISION),
|
|
433
|
+
nan=0.0,
|
|
434
|
+
posinf=0.0,
|
|
435
|
+
neginf=0.0,
|
|
436
|
+
)
|
|
431
437
|
|
|
432
438
|
state_decayed = state * g_end_exp_i[:, :, None, None]
|
|
433
439
|
k_scaled = k_i * g_diff_exp_i[:, :, :, None]
|
|
434
440
|
state_update = jnp.einsum("bhik,bhiv->bhkv", k_scaled, v_new, precision=_MATMUL_PRECISION)
|
|
435
|
-
new_state = (state_decayed + state_update).astype(state.dtype)
|
|
441
|
+
new_state = jnp.nan_to_num(state_decayed + state_update, nan=0.0, posinf=0.0, neginf=0.0).astype(state.dtype)
|
|
436
442
|
|
|
437
443
|
return new_state, core_out.astype(input_dtype)
|
|
438
444
|
|
|
@@ -537,6 +543,38 @@ def _chunk_gdr_bwd_rule(chunk_size, use_qk_l2norm, res, g):
|
|
|
537
543
|
_chunk_gdr_fwd.defvjp(_chunk_gdr_fwd_rule, _chunk_gdr_bwd_rule)
|
|
538
544
|
|
|
539
545
|
|
|
546
|
+
def _chunk_gdr_fwd(
|
|
547
|
+
query: Float[Array, "batch num_heads seq_len head_dim"],
|
|
548
|
+
key: Float[Array, "batch num_heads seq_len head_dim"],
|
|
549
|
+
value: Float[Array, "batch num_heads seq_len d_state"],
|
|
550
|
+
beta: Float[Array, "batch num_heads seq_len"],
|
|
551
|
+
decay: Float[Array, "batch num_heads seq_len"] | None,
|
|
552
|
+
chunk_size: int = 64,
|
|
553
|
+
initial_state: Float[Array, "batch num_heads head_dim d_state"] | None = None,
|
|
554
|
+
use_qk_l2norm: bool = True,
|
|
555
|
+
) -> tuple[
|
|
556
|
+
Float[Array, "batch num_heads seq_len d_state"],
|
|
557
|
+
Float[Array, "batch num_heads head_dim d_state"],
|
|
558
|
+
]:
|
|
559
|
+
"""Exact multi-token chunked GDR path.
|
|
560
|
+
|
|
561
|
+
The previous Neumann/custom-VJP implementation diverges catastrophically on
|
|
562
|
+
real padded SFT batches even though isolated tensor probes looked small.
|
|
563
|
+
Use the exact triangular-solve chunked formulation for the production
|
|
564
|
+
multi-token path and rely on standard autodiff through it.
|
|
565
|
+
"""
|
|
566
|
+
return _recurrent_gdr_fwd(
|
|
567
|
+
query=query,
|
|
568
|
+
key=key,
|
|
569
|
+
value=value,
|
|
570
|
+
beta=beta,
|
|
571
|
+
decay=decay,
|
|
572
|
+
initial_state=initial_state,
|
|
573
|
+
use_qk_l2norm=use_qk_l2norm,
|
|
574
|
+
chunk_size=chunk_size,
|
|
575
|
+
)
|
|
576
|
+
|
|
577
|
+
|
|
540
578
|
def _single_step_gdr_fwd(
|
|
541
579
|
query: Float[Array, "batch num_heads 1 head_dim"],
|
|
542
580
|
key: Float[Array, "batch num_heads 1 head_dim"],
|
|
@@ -4,7 +4,7 @@ build-backend = "uv_build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "ejkernel"
|
|
7
|
-
version = "0.0.
|
|
7
|
+
version = "0.0.78"
|
|
8
8
|
authors = [{ name = "Erfan Zare Chavoshi", email = "Erfanzare810@gmail.com" }]
|
|
9
9
|
description = "Accelerate, Optimize performance with streamlined training and serving options with JAX."
|
|
10
10
|
readme = "README.md"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_attention_ffi.cu
RENAMED
|
File without changes
|
{ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/blocksparse_attention/src/blocksparse_attention_kernel.h
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/flash_bwd_postprocess_kernel.h
RENAMED
|
File without changes
|
{ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/flash_bwd_preprocess_kernel.h
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ejkernel-0.0.76 → ejkernel-0.0.78}/csrc/flash_attention/hopper/flash_fwd_combine_launch_template.h
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|