PyPI - morphottention - Versions diffs - 0.2.0__tar.gz → 0.2.1__tar.gz - Mend

morphottention 0.2.0tar.gz → 0.2.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

{morphottention-0.2.0 → morphottention-0.2.1}/CMakeLists.txt RENAMED Viewed

@@ -41,21 +41,14 @@ list(APPEND CMAKE_PREFIX_PATH "${TORCH_CMAKE_PREFIX}")
 find_package(Torch REQUIRED CONFIG)
-if(WIN32)
-    set(_TORCH_PYTHON_LIB_NAME "torch_python.lib")
-else()
-    set(_TORCH_PYTHON_LIB_NAME "libtorch_python.so")
-endif()
 execute_process(
-        COMMAND ${Python_EXECUTABLE} -c "import pathlib, torch; print(pathlib.Path(torch.__file__).resolve().parent / 'lib' / '${_TORCH_PYTHON_LIB_NAME}')"
-        OUTPUT_VARIABLE TORCH_PYTHON_LIBRARY
+        COMMAND ${Python_EXECUTABLE} -c "import pathlib, torch; print(pathlib.Path(torch.__file__).resolve().parent / 'lib')"
+        OUTPUT_VARIABLE TORCH_LIB_DIR
         OUTPUT_STRIP_TRAILING_WHITESPACE
 )
-if(NOT EXISTS "${TORCH_PYTHON_LIBRARY}")
-    message(FATAL_ERROR "Could not locate ${_TORCH_PYTHON_LIB_NAME} at: ${TORCH_PYTHON_LIBRARY}")
+if(NOT EXISTS "${TORCH_LIB_DIR}")
+    message(FATAL_ERROR "Could not locate torch lib dir at: ${TORCH_LIB_DIR}")
 endif()
-get_filename_component(TORCH_LIB_DIR "${TORCH_PYTHON_LIBRARY}" DIRECTORY)
 find_package(CUDAToolkit REQUIRED)
@@ -85,25 +78,19 @@ endif()
 target_include_directories(_C PRIVATE
         ${CMAKE_CURRENT_SOURCE_DIR}/csrc
+        ${CMAKE_CURRENT_SOURCE_DIR}/csrc/compat
         ${TORCH_INCLUDE_DIRS}
         ${Python_INCLUDE_DIRS}
         ${CUDAToolkit_INCLUDE_DIRS}
 )
-target_compile_definitions(_C PRIVATE
-        TORCH_EXTENSION_NAME=_C
-)
 target_link_libraries(_C PRIVATE
         ${TORCH_LIBRARIES}
-        ${TORCH_PYTHON_LIBRARY}
-        Python::Module
         CUDA::cudart
         CUDA::cublas
 )
 if(NOT WIN32)
-    target_link_options(_C PRIVATE "-Wl,--no-as-needed")
     set_target_properties(_C PROPERTIES
             BUILD_RPATH   "${TORCH_LIB_DIR}"
             INSTALL_RPATH "$ORIGIN/../torch/lib"

{morphottention-0.2.0 → morphottention-0.2.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: morphottention
-Version: 0.2.0
+Version: 0.2.1
 Summary: Mathematical Morphology-based self-attention module for PyTorch (CUDA) using Flash-style kernel fusion.
 Keywords: attention,cuda,pytorch,transformer,morphology,flash-attention,ViT
 Author-Email: Vedran Hrabar <vedran.hrabar@outlook.com>

morphottention-0.2.1/build.toml ADDED Viewed

@@ -0,0 +1,37 @@
+[general]
+name = "morphottention"
+backends = ["cuda"]
+license = "MIT"
+version = 0
+[general.hub]
+repo-id = "vhrabar/morphottention"
+[torch]
+include = ["csrc", "torch-ext"]
+src = [
+  "csrc/cuda/binder.cpp",
+  "csrc/cuda/dispatch.cpp",
+  "csrc/cuda/dispatch.h",
+]
+[kernel.morpho]
+backend = "cuda"
+cuda-capabilities = ["8.9", "9.0", "10.0", "12.0", "12.0a"]
+depends = ["torch"]
+include = ["csrc"]
+src = [
+  "csrc/cuda/attention/attention.cpp",
+  "csrc/cuda/attention/attention.cu",
+  "csrc/cuda/attention/attention.cuh",
+  "csrc/cuda/dispatch.h",
+  "csrc/cuda/morfology/cube.cuh",
+  "csrc/cuda/morfology/soft_morph.cuh",
+  "csrc/cuda/sm120/matmul.cuh",
+  "csrc/cuda/sm120/project.cuh",
+  "csrc/cuda/sm120/smem.cuh",
+  "csrc/cuda/utils/declarations.cuh",
+  "csrc/cuda/utils/reductions.cuh",
+  "csrc/cuda/utils/smem.cuh",
+  "csrc/cuda/utils/utils.cuh",
+]

morphottention-0.2.1/csrc/compat/registration.h ADDED Viewed

@@ -0,0 +1,14 @@
+#ifndef MORPHOTTENTION_COMPAT_REGISTRATION_H
+#define MORPHOTTENTION_COMPAT_REGISTRATION_H
+// Compatibility shim for the local CMake / PyPI build.
+#include <torch/library.h>
+#define TORCH_LIBRARY_EXPAND(NAME, MODULE) TORCH_LIBRARY(NAME, MODULE)
+#define TORCH_EXTENSION_NAME morphottention
+#define REGISTER_EXTENSION(NAME)
+#endif // MORPHOTTENTION_COMPAT_REGISTRATION_H

{morphottention-0.2.0 → morphottention-0.2.1}/csrc/cuda/attention/attention.cpp RENAMED Viewed

@@ -4,7 +4,7 @@
 #include <ATen/cuda/CUDAContext.h>
 #include <c10/cuda/CUDAGuard.h>
-#include <torch/extension.h>
+#include <torch/torch.h>
 auto check = [](const torch::Tensor& t, const char* name) {
     TORCH_CHECK(t.is_cuda(), name, " must be a CUDA tensor");

morphottention-0.2.1/csrc/cuda/binder.cpp ADDED Viewed

@@ -0,0 +1,17 @@
+#include "dispatch.h"
+#include "registration.h"
+#include <torch/library.h>
+TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, m) {
+    m.def("forward(Tensor X, Tensor W_phi, Tensor gate_q, Tensor gate_k, Tensor W_V, "
+          "int H, int cube_m, float scale, bool causal) -> Tensor[]");
+    m.def("backward(Tensor grad_out, Tensor X, Tensor W_phi, Tensor gate_q, Tensor gate_k, "
+          "Tensor W_V, Tensor out, Tensor lse, int H, int cube_m, float scale, bool causal) -> Tensor[]");
+    m.impl("forward", torch::kCUDA, &forward);
+    m.impl("backward", torch::kCUDA, &backward);
+}
+REGISTER_EXTENSION(TORCH_EXTENSION_NAME)

{morphottention-0.2.0 → morphottention-0.2.1}/csrc/cuda/dispatch.h RENAMED Viewed

@@ -1,7 +1,7 @@
 #ifndef MORPHOTTENTION_DISPATCH_H
 #define MORPHOTTENTION_DISPATCH_H
-#include <torch/extension.h>
+#include <torch/torch.h>
 // py-facing dispatchers
 std::vector<torch::Tensor> forward(const torch::Tensor& X, const torch::Tensor& W_phi, const torch::Tensor& gate_q,

morphottention-0.2.1/flake.lock ADDED Viewed

@@ -0,0 +1,117 @@
+{
+  "nodes": {
+    "flake-compat": {
+      "locked": {
+        "lastModified": 1767039857,
+        "narHash": "sha256-vNpUSpF5Nuw8xvDLj2KCwwksIbjua2LZCqhV1LNRDns=",
+        "owner": "edolstra",
+        "repo": "flake-compat",
+        "rev": "5edf11c44bc78a0d334f6334cdaf7d60d732daab",
+        "type": "github"
+      },
+      "original": {
+        "owner": "edolstra",
+        "repo": "flake-compat",
+        "type": "github"
+      }
+    },
+    "flake-utils": {
+      "inputs": {
+        "systems": "systems"
+      },
+      "locked": {
+        "lastModified": 1731533236,
+        "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=",
+        "owner": "numtide",
+        "repo": "flake-utils",
+        "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b",
+        "type": "github"
+      },
+      "original": {
+        "owner": "numtide",
+        "repo": "flake-utils",
+        "type": "github"
+      }
+    },
+    "kernel-builder": {
+      "inputs": {
+        "flake-compat": "flake-compat",
+        "flake-utils": "flake-utils",
+        "nixpkgs": "nixpkgs",
+        "rust-overlay": "rust-overlay"
+      },
+      "locked": {
+        "lastModified": 1783078249,
+        "narHash": "sha256-WjIYeFTWncfrj6w2t9PxYrCpqCQa2MIyQIaQncH7XvA=",
+        "owner": "huggingface",
+        "repo": "kernels",
+        "rev": "b9710edf6436d3949e50085938a3c49e626ee885",
+        "type": "github"
+      },
+      "original": {
+        "owner": "huggingface",
+        "repo": "kernels",
+        "type": "github"
+      }
+    },
+    "nixpkgs": {
+      "locked": {
+        "lastModified": 1776927958,
+        "narHash": "sha256-XOzEtft7E0P6TgQViLUOQeGHlEYiQ0+FY24BPEksj6s=",
+        "owner": "NixOS",
+        "repo": "nixpkgs",
+        "rev": "fec2c46cca5bf9767486a290abae51200b656d69",
+        "type": "github"
+      },
+      "original": {
+        "owner": "NixOS",
+        "ref": "nixos-unstable-small",
+        "repo": "nixpkgs",
+        "type": "github"
+      }
+    },
+    "root": {
+      "inputs": {
+        "kernel-builder": "kernel-builder"
+      }
+    },
+    "rust-overlay": {
+      "inputs": {
+        "nixpkgs": [
+          "kernel-builder",
+          "nixpkgs"
+        ]
+      },
+      "locked": {
+        "lastModified": 1776914043,
+        "narHash": "sha256-qug5r56yW1qOsjSI99l3Jm15JNT9CvS2otkXNRNtrPI=",
+        "owner": "oxalica",
+        "repo": "rust-overlay",
+        "rev": "2d35c4358d7de3a0e606a6e8b27925d981c01cc3",
+        "type": "github"
+      },
+      "original": {
+        "owner": "oxalica",
+        "repo": "rust-overlay",
+        "type": "github"
+      }
+    },
+    "systems": {
+      "locked": {
+        "lastModified": 1681028828,
+        "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
+        "owner": "nix-systems",
+        "repo": "default",
+        "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
+        "type": "github"
+      },
+      "original": {
+        "owner": "nix-systems",
+        "repo": "default",
+        "type": "github"
+      }
+    }
+  },
+  "root": "root",
+  "version": 7
+}

morphottention-0.2.1/flake.nix ADDED Viewed

@@ -0,0 +1,17 @@
+{
+  description = "Flake for the Morphottention CUDA kernel";
+  inputs = {
+    kernel-builder.url = "github:huggingface/kernels";
+  };
+  outputs =
+    {
+      self,
+      kernel-builder,
+    }:
+    kernel-builder.lib.genKernelFlakeOutputs {
+      inherit self;
+      path = ./.;
+    };
+}

{morphottention-0.2.0 → morphottention-0.2.1}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "morphottention"
-version = "0.2.0"
+version = "0.2.1"
 description = "Mathematical Morphology-based self-attention module for PyTorch (CUDA) using Flash-style kernel fusion."
 readme = "README.md"
 requires-python = ">=3.12"
@@ -78,5 +78,5 @@ minimum-version = "build-system.requires"
 cmake.version = ">=3.24"
 build-dir = "build/{wheel_tag}"
-wheel.packages = ["src/morphottention"]
+wheel.packages = ["torch-ext/morphottention"]
 editable.rebuild = true

morphottention-0.2.1/torch-ext/morphottention/_cmake_ops.py ADDED Viewed

@@ -0,0 +1,34 @@
+"""
+Op-namespace loader for the local CMake / PyPI build.
+"""
+from __future__ import annotations
+import glob
+import os
+import torch
+_NAMESPACE = "morphottention"
+def _load_c_extension() -> None:
+    pkg_dir = os.path.dirname(__file__)
+    for pattern in ("_C*.so", "_C*.pyd", "_C*.dll"):
+        matches = glob.glob(os.path.join(pkg_dir, pattern))
+        if matches:
+            torch.ops.load_library(matches[0])  # type: ignore[no-untyped-call]
+            return
+    raise ImportError(
+        f"Could not find the compiled morphottention '_C' extension in {pkg_dir}. "
+        "Reinstall the package so the CUDA kernels are built."
+    )
+_load_c_extension()
+ops = getattr(torch.ops, _NAMESPACE)
+def add_op_namespace_prefix(op_name: str) -> str:
+    return f"{_NAMESPACE}::{op_name}"

{morphottention-0.2.0/src → morphottention-0.2.1/torch-ext}/morphottention/autograd.py RENAMED Viewed

@@ -7,7 +7,28 @@ from __future__ import annotations
 import torch
 from torch import nn
-from . import _C
+try:
+    from ._ops import add_op_namespace_prefix, ops  # type: ignore[import-not-found]
+except ImportError:
+    from ._cmake_ops import add_op_namespace_prefix, ops
+@torch.library.register_fake(add_op_namespace_prefix("forward"))  # type: ignore[untyped-decorator]
+def _morpho_forward_fake(
+    X: torch.Tensor,
+    W_phi: torch.Tensor,
+    gate_q: torch.Tensor,
+    gate_k: torch.Tensor,
+    W_V: torch.Tensor,
+    H: int,
+    cube_m: int,
+    scale: float,
+    causal: bool,
+) -> list[torch.Tensor]:
+    B, N, _D = X.shape
+    out = torch.empty_like(X)
+    lse = X.new_empty((B * H, N), dtype=torch.float32)
+    return [out, lse]
 class MorphoAttentionFunction(torch.autograd.Function):
@@ -32,14 +53,14 @@ class MorphoAttentionFunction(torch.autograd.Function):
             raise ValueError("MorphoAttention expects a CUDA tensor")
         x = x.contiguous()
-        out, lse = _C.forward(x, W_phi, gate_q, gate_k, W_V, H, cube_m, scale, causal)
+        out, lse = ops.forward(x, W_phi, gate_q, gate_k, W_V, H, cube_m, scale, causal)
         ctx.save_for_backward(x, W_phi, gate_q, gate_k, W_V, out, lse)
         ctx.H = H  # type: ignore[attr-defined]
         ctx.cube_m = cube_m  # type: ignore[attr-defined]
         ctx.scale = scale  # type: ignore[attr-defined]
         ctx.causal = causal  # type: ignore[attr-defined]
-        return out
+        return out  # type: ignore[no-any-return]
     @staticmethod
     def backward(
@@ -49,7 +70,7 @@ class MorphoAttentionFunction(torch.autograd.Function):
         x, W_phi, gate_q, gate_k, W_V, out, lse = ctx.saved_tensors  # type: ignore[attr-defined]
         grad_out = grad_out.contiguous()
-        dX, dW_phi, d_gate_q, d_gate_k, dW_V = _C.backward(
+        dX, dW_phi, d_gate_q, d_gate_k, dW_V = ops.backward(
             grad_out,
             x,
             W_phi,

morphottention-0.2.0/csrc/cuda/binder.cpp DELETED Viewed

@@ -1,14 +0,0 @@
-#include "dispatch.h"
-#include <torch/extension.h>
-PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
-    m.doc() = "Morphottention CUDA attention kernels";
-    m.def("forward", &forward, "Attention forward dispatcher", py::arg("X"), py::arg("W_phi"), py::arg("gate_q"),
-          py::arg("gate_k"), py::arg("W_V"), py::arg("H"), py::arg("cube_m"), py::arg("scale"), py::arg("causal"));
-    m.def("backward", &backward, "Attention backward dispatcher", py::arg("grad_out"), py::arg("X"), py::arg("W_phi"),
-          py::arg("gate_q"), py::arg("gate_k"), py::arg("W_V"), py::arg("out"), py::arg("lse"), py::arg("H"),
-          py::arg("cube_m"), py::arg("scale"), py::arg("causal"));
-}

morphottention-0.2.0/src/morphottention/_C.pyi DELETED Viewed

@@ -1,27 +0,0 @@
-import torch
-def forward(
-    X: torch.Tensor,
-    W_phi: torch.Tensor,
-    gate_q: torch.Tensor,
-    gate_k: torch.Tensor,
-    W_V: torch.Tensor,
-    H: int,
-    cube_m: int,
-    scale: float,
-    causal: bool,
-) -> list[torch.Tensor]: ...
-def backward(
-    grad_out: torch.Tensor,
-    X: torch.Tensor,
-    W_phi: torch.Tensor,
-    gate_q: torch.Tensor,
-    gate_k: torch.Tensor,
-    W_V: torch.Tensor,
-    out: torch.Tensor,
-    lse: torch.Tensor,
-    H: int,
-    cube_m: int,
-    scale: float,
-    causal: bool,
-) -> list[torch.Tensor]: ...