PyPI - blksprs - Versions diffs - 1.4__tar.gz → 1.4.2__tar.gz - Mend

blksprs 1.4tar.gz → 1.4.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

{blksprs-1.4 → blksprs-1.4.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: blksprs
-Version: 1.4
+Version: 1.4.2
 Summary: A lightweight library for operations on blocksparse matrices in PyTorch.
 Author-email: Felix Schön <schoen@kr.tuwien.ac.at>
 Project-URL: Homepage, https://github.com/FelixSchoen/blksprs
@@ -14,10 +14,8 @@ Requires-Dist: pytest-xdist; extra == "test"
 Requires-Dist: pytest-cov; extra == "test"
 Requires-Dist: coverage; extra == "test"
 Requires-Dist: matplotlib; extra == "test"
-Provides-Extra: deploy
-Requires-Dist: build; extra == "deploy"
-Requires-Dist: twine; extra == "deploy"
-Requires-Dist: pdoc3; extra == "deploy"
+Provides-Extra: build
+Requires-Dist: build; extra == "build"
 # blksprs
@@ -146,7 +144,7 @@ def test_readme():
     # Assert that the output has the correct sparsity layout
     actual_sparsity_layout_o = bs.layout.build_sparsity_layout(o_dense, sparsity_block_size,
                                                                triton_block_size=triton_block_size)
-    assert torch.allclose(actual_sparsity_layout_o, sparsity_layout_o)
+    assert torch.allclose(actual_sparsity_layout_o.to(torch.int), sparsity_layout_o)
     # Convert output tensor back to original shape
     o = bs.util.undo_shape_blocksparse(o_dense, x_shape_original)

{blksprs-1.4 → blksprs-1.4.2}/README.md RENAMED Viewed

@@ -125,7 +125,7 @@ def test_readme():
     # Assert that the output has the correct sparsity layout
     actual_sparsity_layout_o = bs.layout.build_sparsity_layout(o_dense, sparsity_block_size,
                                                                triton_block_size=triton_block_size)
-    assert torch.allclose(actual_sparsity_layout_o, sparsity_layout_o)
+    assert torch.allclose(actual_sparsity_layout_o.to(torch.int), sparsity_layout_o)
     # Convert output tensor back to original shape
     o = bs.util.undo_shape_blocksparse(o_dense, x_shape_original)

{blksprs-1.4 → blksprs-1.4.2}/blksprs/__init__.py RENAMED Viewed

@@ -15,4 +15,4 @@ class misc:
     from blksprs.misc.row_wise import row_wise_sum, row_wise_max, row_wise_add, row_wise_sub
 class util:
-    from blksprs.utils.tools import do_shape_blocksparse, undo_shape_blocksparse
+    from blksprs.utils.tools import do_shape_blocksparse, undo_shape_blocksparse, disable_validation

{blksprs-1.4 → blksprs-1.4.2}/blksprs/layouting/distribution_layout.py RENAMED Viewed

@@ -31,7 +31,7 @@ def build_distribution_layout(indices: Tensor, sparsity_layout_indices: Tensor,
     sparsity_lut_i = torch.nonzero(sparsity_layout_indices).contiguous()
     output = torch.zeros(size_target[0], size_target[1] // sparsity_block_size, size_target[2] // sparsity_block_size,
-                         device=indices.device, dtype=torch.int32)
+                         dtype=torch.bool, device=indices.device)
     i_b, i_r, i_c = indices.size()
     i_b_s, i_r_s, i_c_s = indices.stride()

{blksprs-1.4 → blksprs-1.4.2}/blksprs/layouting/sparsity_layout.py RENAMED Viewed

@@ -27,7 +27,7 @@ def build_sparsity_layout(x: Tensor, sparsity_block_size: int, triton_block_size
     validate_device(x)
     output = torch.zeros(x.size(0), x.size(1) // sparsity_block_size, x.size(2) // sparsity_block_size,
-                         device=x.device, dtype=torch.int32)
+                         dtype=torch.bool, device=x.device)
     x_b, x_r, x_c = x.size()
     x_b_s, x_r_s, x_c_s = x.stride()
@@ -117,7 +117,7 @@ def build_sparsity_layout_adaption(x: Tensor, sparsity_layout_from: Tensor,
     o_r = math.ceil(sparsity_layout_from.size(1) * sparsity_block_size_from // sparsity_block_size_to)
     o_c = math.ceil(sparsity_layout_from.size(2) * sparsity_block_size_from // sparsity_block_size_to)
-    output = torch.zeros(o_b, o_r, o_c, device=x.device, dtype=torch.int32)
+    output = torch.zeros(o_b, o_r, o_c, dtype=torch.bool, device=x.device)
     x_b, x_r, x_c = x.size()
     x_b_s, x_r_s, x_c_s = x.stride()

{blksprs-1.4 → blksprs-1.4.2}/blksprs/misc/broadcast_ops.py RENAMED Viewed

@@ -25,6 +25,9 @@ def broadcast_add(x: Tensor, y: Tensor, sparsity_layout_output: Tensor,
             output tensor corresponds to x(i) + y(j).
     """
+    x = x.contiguous()
+    y = y.contiguous()
     validate_device(x, y)
     validate_contiguous(x, y)
     if x.size(-1) != y.size(-1):
@@ -38,7 +41,7 @@ def broadcast_add(x: Tensor, y: Tensor, sparsity_layout_output: Tensor,
     validate_contiguous(sparsity_layout_output, sparsity_lut_o)
-    output = torch.zeros(n_sparse_blocks, sparsity_block_size, sparsity_block_size, device=x.device)
+    output = torch.zeros(n_sparse_blocks, sparsity_block_size, sparsity_block_size, dtype=x.dtype, device=x.device)
     x_b, x_c = x.size()
     x_b_s, x_c_s = x.stride()

{blksprs-1.4 → blksprs-1.4.2}/blksprs/misc/repeat_interleave.py RENAMED Viewed

@@ -27,6 +27,8 @@ def repeat_interleave(x: Tensor, sparsity_layout: Tensor, repeats: int,
         Tensor: The sparsity layout of the resulting output tensor.
     """
+    x = x.contiguous()
     validate_dimensions(x)
     validate_contiguous(x)
     validate_device(x)

{blksprs-1.4 → blksprs-1.4.2}/blksprs/misc/row_wise.py RENAMED Viewed

@@ -31,6 +31,8 @@ def row_wise_sum(x: Tensor, sparsity_layout: Tensor, sparsity_block_size: int,
             of the input and the sparsity layout of the output tensor.
     """
+    x = x.contiguous()
     validate_dimensions(x)
     validate_contiguous(x)
     validate_device(x)
@@ -54,6 +56,7 @@ def row_wise_sum(x: Tensor, sparsity_layout: Tensor, sparsity_block_size: int,
     output = torch.zeros(size=(n_sparse_blocks_output,
                                sparsity_block_size,
                                1 if flag_slice_only else sparsity_block_size),
+                         dtype=x.dtype,
                          device=x.device)
     x_b, x_r, x_c = x.size()
@@ -151,6 +154,8 @@ def row_wise_max(x: Tensor, sparsity_layout: Tensor, sparsity_block_size: int,
             of the input and the sparsity layout of the output tensor.
     """
+    x = x.contiguous()
     validate_dimensions(x)
     validate_contiguous(x)
     validate_device(x)

{blksprs-1.4 → blksprs-1.4.2}/blksprs/ops/conversion.py RENAMED Viewed

@@ -28,6 +28,8 @@ def to_dense(x: Tensor, sparsity_layout: Tensor, sparsity_block_size: int, fill_
         Tensor: The block-sparse tensor converted to regular form.
     """
+    x = x.contiguous()
     validate_dimensions(x)
     validate_contiguous(x, sparsity_layout)
     validate_device(x)
@@ -156,6 +158,8 @@ def to_sparse(x: Tensor, sparsity_layout: Tensor, sparsity_block_size: int, trit
         Tensor: The block-sparse tensor converted to compressed form.
     """
+    x = x.contiguous()
     validate_dimensions(x)
     validate_contiguous(x)
     validate_device(x)
@@ -182,8 +186,8 @@ class _BlocksparseToSparse(torch.autograd.Function):
     def forward(ctx, x: Tensor,
                 sparsity_layout: Tensor, sparsity_lut: Tensor,
                 sparsity_block_size: int, n_sparse_blocks: int, triton_block_size: int) -> Tensor:
-        output = torch.empty(size=(n_sparse_blocks, sparsity_block_size, sparsity_block_size), dtype=x.dtype,
-                             device=x.device)
+        output = torch.empty(size=(n_sparse_blocks, sparsity_block_size, sparsity_block_size),
+                             dtype=x.dtype, device=x.device)
         x_b, x_r, x_c = x.size()
         x_b_s, x_r_s, x_c_s = x.stride()
@@ -282,6 +286,8 @@ def adapt_layout(x: Tensor, sparsity_layout_from: Tensor, sparsity_block_size_fr
         Tensor: The block-sparse tensor in compressed form with the adapted sparsity layout and sparsity block size.
     """
+    x = x.contiguous()
     validate_dimensions(x)
     validate_contiguous(x, sparsity_layout_from)
     validate_device(x)

{blksprs-1.4 → blksprs-1.4.2}/blksprs/ops/distribution.py RENAMED Viewed

@@ -24,6 +24,9 @@ def gather(src: Tensor, sparsity_layout_src: Tensor, idx: Tensor, sparsity_layou
         Tensor: The result of the gather operation as a block-sparse tensor in compressed form.
     """
+    src = src.contiguous()
+    idx = idx.contiguous()
     validate_dimensions(src, idx)
     validate_contiguous(src, idx)
     validate_dtype_int(idx)
@@ -200,6 +203,9 @@ def scatter_reduce(src: Tensor, sparsity_layout_src: Tensor,
         Tensor: The result of the scatter operation as a block-sparse tensor in compressed form.
     """
+    src = src.contiguous()
+    idx = idx.contiguous()
     validate_dimensions(src, idx)
     validate_contiguous(src, idx)
     validate_dtype_int(idx)

{blksprs-1.4 → blksprs-1.4.2}/blksprs/ops/exp.py RENAMED Viewed

@@ -25,6 +25,8 @@ def exp(x: Tensor, sparsity_block_size: int, triton_block_size: int = None) -> T
             compressed form.
     """
+    x = x.contiguous()
     validate_dimensions(x)
     validate_contiguous(x)
     validate_device(x)

{blksprs-1.4 → blksprs-1.4.2}/blksprs/ops/matmul.py RENAMED Viewed

@@ -6,7 +6,7 @@ from triton import language as tl
 from blksprs.ops.transpose import transpose
 from blksprs.utils.tools import get_triton_block_size
 from blksprs.utils.validation import validate_contiguous, validate_dimensions, validate_device, \
-    validate_sparsity, validate_sparsity_block_size, validate_triton_block_size
+    validate_sparsity, validate_sparsity_block_size, validate_triton_block_size, validate_dtype_float
 def matmul(x: Tensor, sparsity_layout_x: Tensor,
@@ -30,8 +30,12 @@ def matmul(x: Tensor, sparsity_layout_x: Tensor,
         Tensor: The result of the matrix multiplication as a block-sparse tensor in compressed form.
     """
+    x = x.contiguous()
+    y = y.contiguous()
     validate_dimensions(x, y)
     validate_contiguous(x, y)
+    validate_dtype_float(x, y)
     validate_device(x, y)
     validate_sparsity(sparsity_block_size, (x, sparsity_layout_x), (y, sparsity_layout_y))
     if sparsity_layout_x.size(-1) != sparsity_layout_y.size(-2):
@@ -74,7 +78,8 @@ class _BlocksparseMatmulSSS(torch.autograd.Function):
                 sparsity_layout_y: Tensor, sparsity_reverse_lut_y: Tensor,
                 sparsity_layout_o: Tensor, sparsity_lut_o: Tensor,
                 sparsity_block_size: int, n_sparse_blocks: int, triton_block_size: int) -> Tensor:
-        output = torch.empty(size=(n_sparse_blocks, sparsity_block_size, sparsity_block_size), device=x.device)
+        output = torch.empty(size=(n_sparse_blocks, sparsity_block_size, sparsity_block_size),
+                             dtype=x.dtype, device=x.device)
         x_b, x_r, x_c = x.size()
         x_b_s, x_r_s, x_c_s = x.stride()
@@ -211,7 +216,7 @@ class _BlocksparseMatmulSSS(torch.autograd.Function):
                 blk_y = tl.load(y + blk_y_idx, mask=blk_y_msk)
                 # Perform matrix multiplication
-                buf += tl.dot(blk_x, blk_y)
+                buf += tl.dot(blk_x, blk_y, input_precision="tf32")
         # Store output
         blk_o_idx = ((pid_blk * o_b_s) +

{blksprs-1.4 → blksprs-1.4.2}/blksprs/ops/softmax.py RENAMED Viewed

@@ -26,6 +26,8 @@ def softmax(x: Tensor, sparsity_layout: Tensor, sparsity_block_size: int, triton
         Tensor: The result of the softmax operation as a block-sparse tensor in compressed form.
     """
+    x = x.contiguous()
     validate_dimensions(x)
     validate_contiguous(x)
     validate_device(x)
@@ -125,7 +127,7 @@ class _BlocksparseSoftmax(torch.autograd.Function):
         s_l_s_b, s_l_s_r, s_l_s_c = sparsity_layout_s.size()
         s_l_s_b_s, s_l_s_r_s, s_l_s_c_s = sparsity_layout_s.stride()
-        grad_x = torch.empty_like(o)
+        grad_x = torch.empty_like(o, dtype=torch.float)
         triton_grid = lambda meta: [o_b,
                                     triton.cdiv(o_r, meta["TRITON_BLOCK_SIZE"]),

{blksprs-1.4 → blksprs-1.4.2}/blksprs/ops/transpose.py RENAMED Viewed

@@ -26,6 +26,8 @@ def transpose(x: Tensor, sparsity_layout: Tensor, sparsity_block_size: int, trit
         Tensor: The sparsity layout of the transposed tensor.
     """
+    x = x.contiguous()
     validate_dimensions(x)
     validate_contiguous(x)
     validate_device(x)
@@ -57,7 +59,8 @@ class _BlocksparseTranspose(torch.autograd.Function):
     def forward(ctx, x: Tensor,
                 sparsity_layout: Tensor, sparsity_lut: Tensor, sparsity_reverse_lut: Tensor, sparsity_block_size: int,
                 n_sparse_blocks: int, triton_block_size: int) -> (Tensor, Tensor):
-        output = torch.empty(size=(n_sparse_blocks, sparsity_block_size, sparsity_block_size), device=x.device)
+        output = torch.empty(size=(n_sparse_blocks, sparsity_block_size, sparsity_block_size),
+                             dtype=x.dtype, device=x.device)
         x_b, x_r, x_c = x.size()
         x_b_s, x_r_s, x_c_s = x.stride()
@@ -99,7 +102,8 @@ class _BlocksparseTranspose(torch.autograd.Function):
         sparsity_block_size = ctx.sparsity_block_size
         triton_block_size = ctx.triton_block_size
-        return transpose(grad_output, sparsity_layout, sparsity_block_size, triton_block_size)[0], None, None, None, None, None, None
+        return transpose(grad_output, sparsity_layout, sparsity_block_size, triton_block_size)[
+            0], None, None, None, None, None, None
     @staticmethod
     @triton.jit

{blksprs-1.4 → blksprs-1.4.2}/blksprs/utils/tools.py RENAMED Viewed

@@ -1,12 +1,13 @@
-import torch
 from torch import Tensor, Size
+from blksprs.utils.validation import _set_skip_validation
 def do_shape_blocksparse(x: Tensor):
     if x.dim() == 3:
-        return x, x.size()
+        return x.contiguous(), x.size()
-    return x.reshape(-1, x.size(-2), x.size(-1)), x.size()
+    return x.reshape(-1, x.size(-2), x.size(-1)).contiguous(), x.size()
 def undo_shape_blocksparse(x: Tensor, shape: Size):
@@ -18,3 +19,7 @@ def undo_shape_blocksparse(x: Tensor, shape: Size):
 def get_triton_block_size(sparsity_block_size: int, limit: int = 128):
     return min(sparsity_block_size, limit)
+def disable_validation():
+    _set_skip_validation(True)

{blksprs-1.4 → blksprs-1.4.2}/blksprs/utils/validation.py RENAMED Viewed

@@ -1,18 +1,19 @@
 import torch
 from torch import Tensor
+VALIDATION = True
-def validate_dimensions(*tensors: Tensor) -> None:
-    if _skip_validation():
+def validate_dimensions(*tensors: Tensor, dims=3) -> None:
+    if _check_skip_validation():
         return
     for tensor in tensors:
-        if tensor.dim() != 3:
-            raise ValueError("Tensor must have 3 dimensions")
+        if tensor.dim() != dims:
+            raise ValueError(f"Tensor must have {dims} dimensions")
 def validate_contiguous(*tensors: Tensor) -> None:
-    if _skip_validation():
+    if _check_skip_validation():
         return
     for tensor in tensors:
@@ -21,7 +22,7 @@ def validate_contiguous(*tensors: Tensor) -> None:
 def validate_dtype_float(*tensors: Tensor) -> None:
-    if _skip_validation():
+    if _check_skip_validation():
         return
     for tensor in tensors:
@@ -30,7 +31,7 @@ def validate_dtype_float(*tensors: Tensor) -> None:
 def validate_dtype_int(*tensors: Tensor) -> None:
-    if _skip_validation():
+    if _check_skip_validation():
         return
     for tensor in tensors:
@@ -39,7 +40,7 @@ def validate_dtype_int(*tensors: Tensor) -> None:
 def validate_device(*tensors: Tensor) -> None:
-    if _skip_validation():
+    if _check_skip_validation():
         return
     device = None
@@ -56,7 +57,7 @@ def validate_device(*tensors: Tensor) -> None:
 def validate_sparsity(sparsity_block_size: int, *tensor_sparsity_layout_tuples: tuple[Tensor, Tensor]) -> None:
-    if _skip_validation():
+    if _check_skip_validation():
         return
     for (tensor, sparsity_layout) in tensor_sparsity_layout_tuples:
@@ -73,7 +74,7 @@ def _validate_sparsity_layout_values(sparsity_layout: Tensor):
         raise ValueError("Sparsity layout values must be either 0 or 1")
 def validate_sparsity_block_size(sparsity_block_size: int, *tensors):
-    if _skip_validation():
+    if _check_skip_validation():
         return
     if not (sparsity_block_size & (sparsity_block_size - 1)) == 0:
@@ -84,14 +85,21 @@ def validate_sparsity_block_size(sparsity_block_size: int, *tensors):
             raise ValueError("Tensor sizes must be divisible by sparsity block size")
 def validate_triton_block_size(triton_block_size: int, sparsity_block_size: int):
-    if _skip_validation():
+    if _check_skip_validation():
         return
     if triton_block_size is None:
         return
+    if not (triton_block_size & (triton_block_size - 1)) == 0:
+        raise ValueError("Triton block size must be a power of 2")
     if triton_block_size > sparsity_block_size:
         raise ValueError("Triton block size cannot be larger than sparsity block size")
-def _skip_validation():
-    return False
+def _check_skip_validation():
+    return not VALIDATION
+def _set_skip_validation(skip_validation: bool):
+    global VALIDATION
+    VALIDATION = not skip_validation

{blksprs-1.4 → blksprs-1.4.2}/blksprs.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: blksprs
-Version: 1.4
+Version: 1.4.2
 Summary: A lightweight library for operations on blocksparse matrices in PyTorch.
 Author-email: Felix Schön <schoen@kr.tuwien.ac.at>
 Project-URL: Homepage, https://github.com/FelixSchoen/blksprs
@@ -14,10 +14,8 @@ Requires-Dist: pytest-xdist; extra == "test"
 Requires-Dist: pytest-cov; extra == "test"
 Requires-Dist: coverage; extra == "test"
 Requires-Dist: matplotlib; extra == "test"
-Provides-Extra: deploy
-Requires-Dist: build; extra == "deploy"
-Requires-Dist: twine; extra == "deploy"
-Requires-Dist: pdoc3; extra == "deploy"
+Provides-Extra: build
+Requires-Dist: build; extra == "build"
 # blksprs
@@ -146,7 +144,7 @@ def test_readme():
     # Assert that the output has the correct sparsity layout
     actual_sparsity_layout_o = bs.layout.build_sparsity_layout(o_dense, sparsity_block_size,
                                                                triton_block_size=triton_block_size)
-    assert torch.allclose(actual_sparsity_layout_o, sparsity_layout_o)
+    assert torch.allclose(actual_sparsity_layout_o.to(torch.int), sparsity_layout_o)
     # Convert output tensor back to original shape
     o = bs.util.undo_shape_blocksparse(o_dense, x_shape_original)

{blksprs-1.4 → blksprs-1.4.2}/blksprs.egg-info/requires.txt RENAMED Viewed

@@ -1,9 +1,7 @@
 torch
-[deploy]
+[build]
 build
-twine
-pdoc3
 [test]
 pytest

{blksprs-1.4 → blksprs-1.4.2}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "blksprs"
-version = "1.4"
+version = "1.4.2"
 authors = [{ name = "Felix Schön", email = "schoen@kr.tuwien.ac.at" }]
 description = "A lightweight library for operations on blocksparse matrices in PyTorch."
 readme = "README.md"
@@ -22,10 +22,8 @@ test = [
     "coverage",
     "matplotlib"
 ]
-deploy = [
-    "build",
-    "twine",
-    "pdoc3"
+build = [
+    "build"
 ]
 [build-system]

{blksprs-1.4 → blksprs-1.4.2}/blksprs/utils/benchmarking.py RENAMED Viewed

File without changes

{blksprs-1.4 → blksprs-1.4.2}/blksprs.egg-info/SOURCES.txt RENAMED Viewed

File without changes

{blksprs-1.4 → blksprs-1.4.2}/blksprs.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{blksprs-1.4 → blksprs-1.4.2}/blksprs.egg-info/top_level.txt RENAMED Viewed

File without changes

{blksprs-1.4 → blksprs-1.4.2}/setup.cfg RENAMED Viewed

File without changes

blksprs 1.4__tar.gz → 1.4.2__tar.gz

blksprs 1.4tar.gz → 1.4.2tar.gz