PyPI - causal-conv1d - Versions diffs - 1.2.2.post1__tar.gz → 1.3.0.post1__tar.gz - Mend

causal-conv1d 1.2.2.post1tar.gz → 1.3.0.post1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

{causal_conv1d-1.2.2.post1 → causal_conv1d-1.3.0.post1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: causal_conv1d
-Version: 1.2.2.post1
+Version: 1.3.0.post1
 Summary: Causal depthwise conv1d in CUDA, with a PyTorch interface
 Home-page: https://github.com/Dao-AILab/causal-conv1d
 Author: Tri Dao
@@ -43,3 +43,16 @@ import torch.nn.functional as F
 F.conv1d(x, weight.unsqueeze(1), bias, padding=width - 1, groups=dim)[..., :seqlen]
 ```
+## Additional Prerequisites for AMD cards
+### Patching ROCm
+If you are on ROCm 6.0, run the following steps to avoid errors during compilation. This is not required for ROCm 6.1 onwards.
+1. Locate your ROCm installation directory. This is typically found at `/opt/rocm/`, but may vary depending on your installation.
+2. Apply the Patch. Run with `sudo` in case you encounter permission issues.
+   ```bash
+    patch /opt/rocm/include/hip/amd_detail/amd_hip_bf16.h < rocm_patch/rocm6_0.patch
+   ```

{causal_conv1d-1.2.2.post1 → causal_conv1d-1.3.0.post1}/README.md RENAMED Viewed

@@ -28,3 +28,16 @@ import torch.nn.functional as F
 F.conv1d(x, weight.unsqueeze(1), bias, padding=width - 1, groups=dim)[..., :seqlen]
 ```
+## Additional Prerequisites for AMD cards
+### Patching ROCm
+If you are on ROCm 6.0, run the following steps to avoid errors during compilation. This is not required for ROCm 6.1 onwards.
+1. Locate your ROCm installation directory. This is typically found at `/opt/rocm/`, but may vary depending on your installation.
+2. Apply the Patch. Run with `sudo` in case you encounter permission issues.
+   ```bash
+    patch /opt/rocm/include/hip/amd_detail/amd_hip_bf16.h < rocm_patch/rocm6_0.patch
+   ```

{causal_conv1d-1.2.2.post1 → causal_conv1d-1.3.0.post1}/causal_conv1d/__init__.py RENAMED Viewed

@@ -1,3 +1,3 @@
-__version__ = "1.2.2.post1"
+__version__ = "1.3.0.post1"
 from causal_conv1d.causal_conv1d_interface import causal_conv1d_fn, causal_conv1d_update

{causal_conv1d-1.2.2.post1 → causal_conv1d-1.3.0.post1}/causal_conv1d.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: causal-conv1d
-Version: 1.2.2.post1
+Version: 1.3.0.post1
 Summary: Causal depthwise conv1d in CUDA, with a PyTorch interface
 Home-page: https://github.com/Dao-AILab/causal-conv1d
 Author: Tri Dao
@@ -43,3 +43,16 @@ import torch.nn.functional as F
 F.conv1d(x, weight.unsqueeze(1), bias, padding=width - 1, groups=dim)[..., :seqlen]
 ```
+## Additional Prerequisites for AMD cards
+### Patching ROCm
+If you are on ROCm 6.0, run the following steps to avoid errors during compilation. This is not required for ROCm 6.1 onwards.
+1. Locate your ROCm installation directory. This is typically found at `/opt/rocm/`, but may vary depending on your installation.
+2. Apply the Patch. Run with `sudo` in case you encounter permission issues.
+   ```bash
+    patch /opt/rocm/include/hip/amd_detail/amd_hip_bf16.h < rocm_patch/rocm6_0.patch
+   ```

{causal_conv1d-1.2.2.post1 → causal_conv1d-1.3.0.post1}/setup.py RENAMED Viewed

@@ -18,7 +18,7 @@ import urllib.error
 from wheel.bdist_wheel import bdist_wheel as _bdist_wheel
 import torch
-from torch.utils.cpp_extension import BuildExtension, CppExtension, CUDAExtension, CUDA_HOME
+from torch.utils.cpp_extension import BuildExtension, CppExtension, CUDAExtension, CUDA_HOME, HIP_HOME
 with open("README.md", "r", encoding="utf-8") as fh:
@@ -66,6 +66,45 @@ def get_cuda_bare_metal_version(cuda_dir):
     return raw_output, bare_metal_version
+def get_hip_version(rocm_dir):
+    hipcc_bin = "hipcc" if rocm_dir is None else os.path.join(rocm_dir, "bin", "hipcc")
+    try:
+        raw_output = subprocess.check_output(
+            [hipcc_bin, "--version"], universal_newlines=True
+        )
+    except Exception as e:
+        print(
+            f"hip installation not found: {e} ROCM_PATH={os.environ.get('ROCM_PATH')}"
+        )
+        return None, None
+    for line in raw_output.split("\n"):
+        if "HIP version" in line:
+            rocm_version = parse(line.split()[-1].replace("-", "+")) # local version is not parsed correctly
+            return line, rocm_version
+    return None, None
+def get_torch_hip_version():
+    if torch.version.hip:
+        return parse(torch.version.hip.split()[-1].replace("-", "+"))
+    else:
+        return None
+def check_if_hip_home_none(global_option: str) -> None:
+    if HIP_HOME is not None:
+        return
+    # warn instead of error because user could be downloading prebuilt wheels, so hipcc won't be necessary
+    # in that case.
+    warnings.warn(
+        f"{global_option} was requested, but hipcc was not found.  Are you sure your environment has hipcc available?"
+    )
 def check_if_cuda_home_none(global_option: str) -> None:
     if CUDA_HOME is not None:
         return
@@ -85,37 +124,67 @@ def append_nvcc_threads(nvcc_extra_args):
 cmdclass = {}
 ext_modules = []
+HIP_BUILD = bool(torch.version.hip)
 if not SKIP_CUDA_BUILD:
     print("\n\ntorch.__version__  = {}\n\n".format(torch.__version__))
     TORCH_MAJOR = int(torch.__version__.split(".")[0])
     TORCH_MINOR = int(torch.__version__.split(".")[1])
-    check_if_cuda_home_none("causal_conv1d")
-    # Check, if CUDA11 is installed for compute capability 8.0
     cc_flag = []
-    if CUDA_HOME is not None:
-        _, bare_metal_version = get_cuda_bare_metal_version(CUDA_HOME)
-        if bare_metal_version < Version("11.6"):
-            raise RuntimeError(
-                "causal_conv1d is only supported on CUDA 11.6 and above.  "
-                "Note: make sure nvcc has a supported version by running nvcc -V."
-            )
-    cc_flag.append("-gencode")
-    cc_flag.append("arch=compute_53,code=sm_53")
-    cc_flag.append("-gencode")
-    cc_flag.append("arch=compute_62,code=sm_62")
-    cc_flag.append("-gencode")
-    cc_flag.append("arch=compute_70,code=sm_70")
-    cc_flag.append("-gencode")
-    cc_flag.append("arch=compute_72,code=sm_72")
-    cc_flag.append("-gencode")
-    cc_flag.append("arch=compute_80,code=sm_80")
-    cc_flag.append("-gencode")
-    cc_flag.append("arch=compute_87,code=sm_87")
-    if bare_metal_version >= Version("11.8"):
+    if HIP_BUILD:
+        check_if_hip_home_none(PACKAGE_NAME)
+        rocm_home = os.getenv("ROCM_PATH")
+        _, hip_version = get_hip_version(rocm_home)
+        if HIP_HOME is not None:
+            if hip_version < Version("6.0"):
+                raise RuntimeError(
+                    f"{PACKAGE_NAME} is only supported on ROCm 6.0 and above.  "
+                    "Note: make sure HIP has a supported version by running hipcc --version."
+                )
+            if hip_version == Version("6.0"):
+                warnings.warn(
+                    f"{PACKAGE_NAME} requires a patch to be applied when running on ROCm 6.0. "
+                    "Refer to the README.md for detailed instructions.",
+                    UserWarning
+                )
+        cc_flag.append("-DBUILD_PYTHON_PACKAGE")
+    else:
+        check_if_cuda_home_none(PACKAGE_NAME)
+        # Check, if CUDA11 is installed for compute capability 8.0
+        if CUDA_HOME is not None:
+            _, bare_metal_version = get_cuda_bare_metal_version(CUDA_HOME)
+            if bare_metal_version < Version("11.6"):
+                raise RuntimeError(
+                    f"{PACKAGE_NAME} is only supported on CUDA 11.6 and above.  "
+                    "Note: make sure nvcc has a supported version by running nvcc -V."
+                )
+        cc_flag.append("-gencode")
+        cc_flag.append("arch=compute_53,code=sm_53")
+        cc_flag.append("-gencode")
+        cc_flag.append("arch=compute_62,code=sm_62")
+        cc_flag.append("-gencode")
+        cc_flag.append("arch=compute_70,code=sm_70")
+        cc_flag.append("-gencode")
+        cc_flag.append("arch=compute_72,code=sm_72")
         cc_flag.append("-gencode")
-        cc_flag.append("arch=compute_90,code=sm_90")
+        cc_flag.append("arch=compute_80,code=sm_80")
+        cc_flag.append("-gencode")
+        cc_flag.append("arch=compute_87,code=sm_87")
+        if bare_metal_version >= Version("11.8"):
+            cc_flag.append("-gencode")
+            cc_flag.append("arch=compute_90,code=sm_90")
     # HACK: The compiler flag -D_GLIBCXX_USE_CXX11_ABI is set to be the same as
     # torch._C._GLIBCXX_USE_CXX11_ABI
@@ -123,6 +192,43 @@ if not SKIP_CUDA_BUILD:
     if FORCE_CXX11_ABI:
         torch._C._GLIBCXX_USE_CXX11_ABI = True
+    if HIP_BUILD:
+        extra_compile_args = {
+            "cxx": ["-O3", "-std=c++17"],
+            "nvcc": [
+                "-O3",
+                "-std=c++17",
+                f"--offload-arch={os.getenv('HIP_ARCHITECTURES', 'native')}",
+                "-U__CUDA_NO_HALF_OPERATORS__",
+                "-U__CUDA_NO_HALF_CONVERSIONS__",
+                "-DCK_FMHA_FWD_FAST_EXP2=1",
+                "-fgpu-flush-denormals-to-zero",
+            ]
+            + cc_flag,
+        }
+    else:
+        extra_compile_args = {
+            "cxx": ["-O3"],
+            "nvcc": append_nvcc_threads(
+                [
+                    "-O3",
+                    "-U__CUDA_NO_HALF_OPERATORS__",
+                    "-U__CUDA_NO_HALF_CONVERSIONS__",
+                    "-U__CUDA_NO_BFLOAT16_OPERATORS__",
+                    "-U__CUDA_NO_BFLOAT16_CONVERSIONS__",
+                    "-U__CUDA_NO_BFLOAT162_OPERATORS__",
+                    "-U__CUDA_NO_BFLOAT162_CONVERSIONS__",
+                    "--expt-relaxed-constexpr",
+                    "--expt-extended-lambda",
+                    "--use_fast_math",
+                    "--ptxas-options=-v",
+                    "-lineinfo",
+                ]
+                + cc_flag
+            ),
+        }
     ext_modules.append(
         CUDAExtension(
             name="causal_conv1d_cuda",
@@ -132,26 +238,7 @@ if not SKIP_CUDA_BUILD:
                 "csrc/causal_conv1d_bwd.cu",
                 "csrc/causal_conv1d_update.cu",
             ],
-            extra_compile_args={
-                "cxx": ["-O3"],
-                "nvcc": append_nvcc_threads(
-                    [
-                        "-O3",
-                        "-U__CUDA_NO_HALF_OPERATORS__",
-                        "-U__CUDA_NO_HALF_CONVERSIONS__",
-                        "-U__CUDA_NO_BFLOAT16_OPERATORS__",
-                        "-U__CUDA_NO_BFLOAT16_CONVERSIONS__",
-                        "-U__CUDA_NO_BFLOAT162_OPERATORS__",
-                        "-U__CUDA_NO_BFLOAT162_CONVERSIONS__",
-                        "--expt-relaxed-constexpr",
-                        "--expt-extended-lambda",
-                        "--use_fast_math",
-                        "--ptxas-options=-v",
-                        "-lineinfo",
-                    ]
-                    + cc_flag
-                ),
-            },
+            extra_compile_args=extra_compile_args,
             include_dirs=[Path(this_dir) / "csrc" / "causal_conv1d"],
         )
     )
@@ -169,24 +256,36 @@ def get_package_version():
 def get_wheel_url():
     # Determine the version numbers that will be used to determine the correct wheel
-    # We're using the CUDA version used to build torch, not the one currently installed
-    # _, cuda_version_raw = get_cuda_bare_metal_version(CUDA_HOME)
-    torch_cuda_version = parse(torch.version.cuda)
     torch_version_raw = parse(torch.__version__)
-    # For CUDA 11, we only compile for CUDA 11.8, and for CUDA 12 we only compile for CUDA 12.2
-    # to save CI time. Minor versions should be compatible.
-    torch_cuda_version = parse("11.8") if torch_cuda_version.major == 11 else parse("12.2")
+    if HIP_BUILD:
+        # We're using the HIP version used to build torch, not the one currently installed
+        torch_hip_version = get_torch_hip_version()
+        hip_version = f"{torch_hip_version.major}{torch_hip_version.minor}"
+    else:
+        # We're using the CUDA version used to build torch, not the one currently installed
+        # _, cuda_version_raw = get_cuda_bare_metal_version(CUDA_HOME)
+        torch_cuda_version = parse(torch.version.cuda)
+        # For CUDA 11, we only compile for CUDA 11.8, and for CUDA 12 we only compile for CUDA 12.2
+        # to save CI time. Minor versions should be compatible.
+        torch_cuda_version = parse("11.8") if torch_cuda_version.major == 11 else parse("12.2")
+        cuda_version = f"{torch_cuda_version.major}{torch_cuda_version.minor}"
+    gpu_compute_version = hip_version if HIP_BUILD else cuda_version
+    cuda_or_hip = "hip" if HIP_BUILD else "cu"
     python_version = f"cp{sys.version_info.major}{sys.version_info.minor}"
     platform_name = get_platform()
     causal_conv1d_version = get_package_version()
-    # cuda_version = f"{cuda_version_raw.major}{cuda_version_raw.minor}"
-    cuda_version = f"{torch_cuda_version.major}{torch_cuda_version.minor}"
     torch_version = f"{torch_version_raw.major}.{torch_version_raw.minor}"
     cxx11_abi = str(torch._C._GLIBCXX_USE_CXX11_ABI).upper()
     # Determine wheel URL based on CUDA version, torch version, python version and OS
-    wheel_filename = f"{PACKAGE_NAME}-{causal_conv1d_version}+cu{cuda_version}torch{torch_version}cxx11abi{cxx11_abi}-{python_version}-{python_version}-{platform_name}.whl"
+    wheel_filename = f"{PACKAGE_NAME}-{causal_conv1d_version}+{cuda_or_hip}{gpu_compute_version}torch{torch_version}cxx11abi{cxx11_abi}-{python_version}-{python_version}-{platform_name}.whl"
     wheel_url = BASE_WHEEL_URL.format(
         tag_name=f"v{causal_conv1d_version}", wheel_name=wheel_filename
     )