morphottention 0.2.0__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Binary file
morphottention/_C.pyi ADDED
@@ -0,0 +1,27 @@
1
+ import torch
2
+
3
+ def forward(
4
+ X: torch.Tensor,
5
+ W_phi: torch.Tensor,
6
+ gate_q: torch.Tensor,
7
+ gate_k: torch.Tensor,
8
+ W_V: torch.Tensor,
9
+ H: int,
10
+ cube_m: int,
11
+ scale: float,
12
+ causal: bool,
13
+ ) -> list[torch.Tensor]: ...
14
+ def backward(
15
+ grad_out: torch.Tensor,
16
+ X: torch.Tensor,
17
+ W_phi: torch.Tensor,
18
+ gate_q: torch.Tensor,
19
+ gate_k: torch.Tensor,
20
+ W_V: torch.Tensor,
21
+ out: torch.Tensor,
22
+ lse: torch.Tensor,
23
+ H: int,
24
+ cube_m: int,
25
+ scale: float,
26
+ causal: bool,
27
+ ) -> list[torch.Tensor]: ...
@@ -0,0 +1,3 @@
1
+ from .autograd import MorphoAttention, MorphoAttentionFunction, morpho_attention
2
+
3
+ __all__ = ["MorphoAttention", "MorphoAttentionFunction", "morpho_attention"]
@@ -0,0 +1,141 @@
1
+ """
2
+ Autograd wrapper and nn.Module around the compiled Morphottention CUDA kernels.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ import torch
8
+ from torch import nn
9
+
10
+ from . import _C
11
+
12
+
13
+ class MorphoAttentionFunction(torch.autograd.Function):
14
+ """
15
+ Bridges the CUDA attention kernels into autograd.
16
+ """
17
+
18
+ @staticmethod
19
+ def forward(
20
+ ctx: torch.autograd.function.FunctionCtx,
21
+ x: torch.Tensor,
22
+ W_phi: torch.Tensor,
23
+ gate_q: torch.Tensor,
24
+ gate_k: torch.Tensor,
25
+ W_V: torch.Tensor,
26
+ H: int,
27
+ cube_m: int,
28
+ scale: float,
29
+ causal: bool,
30
+ ) -> torch.Tensor:
31
+ if not x.is_cuda:
32
+ raise ValueError("MorphoAttention expects a CUDA tensor")
33
+
34
+ x = x.contiguous()
35
+ out, lse = _C.forward(x, W_phi, gate_q, gate_k, W_V, H, cube_m, scale, causal)
36
+
37
+ ctx.save_for_backward(x, W_phi, gate_q, gate_k, W_V, out, lse)
38
+ ctx.H = H # type: ignore[attr-defined]
39
+ ctx.cube_m = cube_m # type: ignore[attr-defined]
40
+ ctx.scale = scale # type: ignore[attr-defined]
41
+ ctx.causal = causal # type: ignore[attr-defined]
42
+ return out
43
+
44
+ @staticmethod
45
+ def backward(
46
+ ctx: torch.autograd.function.FunctionCtx,
47
+ grad_out: torch.Tensor,
48
+ ) -> tuple[torch.Tensor | None, ...]:
49
+ x, W_phi, gate_q, gate_k, W_V, out, lse = ctx.saved_tensors # type: ignore[attr-defined]
50
+
51
+ grad_out = grad_out.contiguous()
52
+ dX, dW_phi, d_gate_q, d_gate_k, dW_V = _C.backward(
53
+ grad_out,
54
+ x,
55
+ W_phi,
56
+ gate_q,
57
+ gate_k,
58
+ W_V,
59
+ out,
60
+ lse,
61
+ ctx.H, # type: ignore[attr-defined]
62
+ ctx.cube_m, # type: ignore[attr-defined]
63
+ ctx.scale, # type: ignore[attr-defined]
64
+ ctx.causal, # type: ignore[attr-defined]
65
+ )
66
+
67
+ return dX, dW_phi, d_gate_q, d_gate_k, dW_V, None, None, None, None
68
+
69
+
70
+ class MorphoAttention(nn.Module):
71
+ """
72
+ Morphological hypercube attention
73
+ :param dim: model dimension ``D`` (must be divisible by ``num_heads``).
74
+ :param num_heads: number of attention heads ``H``.
75
+ :param cube_m: hypercube width per head ``m`` (the score-GEMM contraction dim).
76
+ :param scale: softmax temperature.
77
+ :param causal: whether to apply causal masking.
78
+ """
79
+
80
+ def __init__(
81
+ self,
82
+ dim: int,
83
+ num_heads: int,
84
+ cube_m: int,
85
+ *,
86
+ scale: float = 1.0,
87
+ causal: bool = False,
88
+ dtype: torch.dtype = torch.float16,
89
+ device: torch.device | str | None = None,
90
+ ) -> None:
91
+ super().__init__()
92
+ if dim % num_heads != 0:
93
+ raise ValueError(f"dim ({dim}) must be divisible by num_heads ({num_heads})")
94
+
95
+ self.dim = dim
96
+ self.num_heads = num_heads
97
+ self.cube_m = cube_m
98
+ self.scale = scale
99
+ self.causal = causal
100
+
101
+ head_dim_v = dim // num_heads
102
+ self.W_phi = nn.Parameter(torch.empty(dim, num_heads * cube_m, dtype=dtype, device=device))
103
+ self.gate_q = nn.Parameter(torch.empty(num_heads, cube_m, dtype=dtype, device=device))
104
+ self.gate_k = nn.Parameter(torch.empty(num_heads, cube_m, dtype=dtype, device=device))
105
+ self.W_V = nn.Parameter(torch.empty(dim, num_heads * head_dim_v, dtype=dtype, device=device))
106
+ self.reset_parameters()
107
+
108
+ def reset_parameters(self) -> None:
109
+ std = self.dim**-0.5
110
+ with torch.no_grad():
111
+ for w in (self.W_phi, self.W_V):
112
+ w.copy_(torch.empty(w.shape, dtype=torch.float32, device=w.device).normal_(0.0, std))
113
+ self.gate_q.fill_(1.0)
114
+ self.gate_k.fill_(1.0)
115
+
116
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
117
+ return MorphoAttentionFunction.apply( # type: ignore[no-any-return, no-untyped-call]
118
+ x, self.W_phi, self.gate_q, self.gate_k, self.W_V, self.num_heads, self.cube_m, self.scale, self.causal
119
+ )
120
+
121
+
122
+ def morpho_attention(
123
+ x: torch.Tensor,
124
+ W_phi: torch.Tensor,
125
+ gate_q: torch.Tensor,
126
+ gate_k: torch.Tensor,
127
+ W_V: torch.Tensor,
128
+ num_heads: int,
129
+ cube_m: int,
130
+ *,
131
+ scale: float = 1.0,
132
+ causal: bool = False,
133
+ ) -> torch.Tensor:
134
+ """
135
+ Apply the Morphottention attention mechanism with autograd support.
136
+ :param: x: input activations of shape (B, N, D) on a CUDA device.
137
+ :returns: the attention output, differentiable w.r.t. ``x``.
138
+ """
139
+ return MorphoAttentionFunction.apply( # type: ignore[no-any-return, no-untyped-call]
140
+ x, W_phi, gate_q, gate_k, W_V, num_heads, cube_m, scale, causal
141
+ )
File without changes
@@ -0,0 +1,130 @@
1
+ Metadata-Version: 2.2
2
+ Name: morphottention
3
+ Version: 0.2.0
4
+ Summary: Mathematical Morphology-based self-attention module for PyTorch (CUDA) using Flash-style kernel fusion.
5
+ Keywords: attention,cuda,pytorch,transformer,morphology,flash-attention,ViT
6
+ Author-Email: Vedran Hrabar <vedran.hrabar@outlook.com>
7
+ License: MIT
8
+ Classifier: Development Status :: 3 - Alpha
9
+ Classifier: Environment :: GPU
10
+ Classifier: Environment :: GPU :: NVIDIA CUDA
11
+ Classifier: Environment :: GPU :: NVIDIA CUDA :: 13
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Operating System :: POSIX :: Linux
16
+ Classifier: Operating System :: Microsoft :: Windows
17
+ Classifier: Programming Language :: C++
18
+ Classifier: Programming Language :: Python :: 3
19
+ Classifier: Programming Language :: Python :: 3 :: Only
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Programming Language :: Python :: 3.13
22
+ Classifier: Programming Language :: Python :: 3.14
23
+ Classifier: Programming Language :: Python :: Implementation :: CPython
24
+ Classifier: Topic :: Scientific/Engineering
25
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
26
+ Classifier: Topic :: Scientific/Engineering :: Image Recognition
27
+ Classifier: Topic :: Scientific/Engineering :: Mathematics
28
+ Classifier: Topic :: Software Development :: Libraries
29
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
30
+ Classifier: Typing :: Typed
31
+ Project-URL: repository, https://github.com/vhrabar/morphottention
32
+ Project-URL: documentation, https://github.com/vhrabar/morphottention/wiki
33
+ Project-URL: Bug Tracker, https://github.com/vhrabar/morphottention/issues
34
+ Requires-Python: >=3.12
35
+ Requires-Dist: torch>=2.12
36
+ Description-Content-Type: text/markdown
37
+
38
+ # Morphottention
39
+ Mathematical Morphology-based self-attention module for PyTorch using Flash-style kernel fusion.
40
+
41
+ ## Install
42
+
43
+ Prebuilt wheels are published for CPython 3.14 on Linux (x86_64, aarch64) and
44
+ Windows (x86_64). A working CUDA-enabled PyTorch (`torch >= 2.12`) must already
45
+ be installed in the environment.
46
+
47
+ ```bash
48
+ pip install morphottention
49
+ ```
50
+
51
+ ## Usage
52
+
53
+ The package exposes an `nn.Module` (`MorphoAttention`), a functional entry point
54
+ (`morpho_attention`), and the raw autograd bridge (`MorphoAttentionFunction`).
55
+ All inputs must be CUDA tensors; the module defaults to `float16`.
56
+
57
+ ### As an `nn.Module`
58
+
59
+ ```python
60
+ import torch
61
+ from morphottention import MorphoAttention
62
+
63
+ attn = MorphoAttention(
64
+ dim=256, # model dimension D
65
+ num_heads=8, # number of attention heads H
66
+ cube_m=16, # hypercube width per head
67
+ scale=1.0, # softmax temperature
68
+ causal=False, # casual masking flag
69
+ device="cuda",
70
+ )
71
+
72
+ x = torch.randn(2, 128, 256, dtype=torch.float16, device="cuda") # (B, N, D)
73
+ out = attn(x) # (B, N, D)
74
+ out.sum().backward()
75
+ ```
76
+
77
+ ### Functional form
78
+
79
+ ```python
80
+ from morphottention import morpho_attention
81
+
82
+ out = morpho_attention(
83
+ x,
84
+ W_phi,
85
+ gate_q,
86
+ gate_k,
87
+ W_V,
88
+ num_heads=8, cube_m=16, scale=1.0,
89
+ causal=False,
90
+ )
91
+ ```
92
+
93
+ ### Raw autograd bridge
94
+
95
+ ```python
96
+ import torch
97
+ from morphottention import MorphoAttentionFunction
98
+
99
+ B, N, D, H, cube_m = 2, 128, 256, 8, 16
100
+
101
+ x = torch.randn(B, N, D, dtype=torch.float16, device="cuda", requires_grad=True)
102
+ W_phi = torch.randn(D, H * cube_m, dtype=torch.float16, device="cuda", requires_grad=True)
103
+ gate_q = torch.ones(H, cube_m, dtype=torch.float16, device="cuda", requires_grad=True)
104
+ gate_k = torch.ones(H, cube_m, dtype=torch.float16, device="cuda", requires_grad=True)
105
+ W_V = torch.randn(D, D, dtype=torch.float16, device="cuda", requires_grad=True)
106
+
107
+ out = MorphoAttentionFunction.apply(
108
+ x, W_phi, gate_q, gate_k, W_V,
109
+ H, cube_m, 1.0, False, # num_heads, cube_m, scale, causal
110
+ ) # (B, N, D)
111
+ out.sum().backward()
112
+ ```
113
+
114
+ `W_phi` has shape `(D, H * cube_m)`, `W_V` has shape `(D, D)`, and `gate_q` /
115
+ `gate_k` each have shape `(H, cube_m)`.
116
+
117
+ ## Building from source
118
+
119
+ Requires the CUDA 13.X toolkit (`nvcc`) and a matching `torch` build:
120
+
121
+ ```bash
122
+ uv sync --package morphottention --no-dev --group build
123
+ uv build --package morphottention --wheel --no-build-isolation
124
+ ```
125
+
126
+ ## License
127
+
128
+ MIT
129
+
130
+ Copyright © 2026 Vedran Hrabar.
@@ -0,0 +1,8 @@
1
+ morphottention/__init__.py,sha256=geq-PCIo1BkEd77w7_3-vTUKGHBFFHs-CTzT3tlMwGQ,159
2
+ morphottention/_C.cp313-win_amd64.pyd,sha256=UZvNR1ewZ5Zsjd15H6Ry9TCmo-wCTw5dsOKpjspDXZM,786944
3
+ morphottention/_C.pyi,sha256=jFKwfXOxsS7Y9FjwKBGaVDpi2YqwN_x0cP1mJJCJO_I,572
4
+ morphottention/autograd.py,sha256=w07iFr_y8D7qMBlCbbdjZDtObCP5DbKXRceVe-jwyDY,4633
5
+ morphottention/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ morphottention-0.2.0.dist-info/METADATA,sha256=tCP9s_kBbh8fzl1kLw1a5zUVozbvBxw598XksuFgn-w,4256
7
+ morphottention-0.2.0.dist-info/WHEEL,sha256=UZrbbE4r80xj7Ncfa6JoeTVe-77bdXLkKUA63V8pKWQ,106
8
+ morphottention-0.2.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: scikit-build-core 0.12.2
3
+ Root-Is-Purelib: false
4
+ Tag: cp313-cp313-win_amd64
5
+