PyPI - blksprs - Versions diffs - 2.1.2__py3-none-any.whl → 2.1.4__py3-none-any.whl - Mend

blksprs 2.1.2py3-none-any.whl → 2.1.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

blksprs/__init__.py +1 -1
blksprs/ops/conversion.py +1 -1
blksprs/ops/flow.py +1 -1
blksprs/ops/matmul.py +1 -1
blksprs/ops/softmax.py +15 -13
{blksprs-2.1.2.dist-info → blksprs-2.1.4.dist-info}/METADATA +9 -13
{blksprs-2.1.2.dist-info → blksprs-2.1.4.dist-info}/RECORD +9 -9
{blksprs-2.1.2.dist-info → blksprs-2.1.4.dist-info}/WHEEL +0 -0
{blksprs-2.1.2.dist-info → blksprs-2.1.4.dist-info}/top_level.txt +0 -0

blksprs/__init__.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from blksprs.utils.blksprs_tensor import BlksprsTensor
-__version__ = "2.1.2"
+__version__ = "2.1.4"
 class ops:

blksprs/ops/conversion.py CHANGED Viewed

@@ -56,7 +56,7 @@ def to_sparse(x: Tensor, sparsity_layout: Tensor,
 def to_sparse_forward(x: Tensor, _: Tensor,
                       sparsity_lut: Tensor, sparsity_block_size: int, n_sparse_blocks: int) -> Tensor:
     with torch.no_grad():
-        output = torch.zeros(size=(n_sparse_blocks, sparsity_block_size, sparsity_block_size),
+        output = torch.empty(size=(n_sparse_blocks, sparsity_block_size, sparsity_block_size),
                              dtype=x.dtype, device=x.device)
         x_b, x_r, x_c = x.size()

blksprs/ops/flow.py CHANGED Viewed

@@ -14,7 +14,7 @@ def flow_pull_forward(x: Tensor, sparsity_layout_o: Tensor,
                       sparsity_lut: Tensor, sparsity_reverse_lut: Tensor,
                       sparsity_block_size: int, n_sparse_blocks: int) -> Tensor:
     with torch.no_grad():
-        output = torch.zeros(size=(n_sparse_blocks, sparsity_block_size, sparsity_block_size),
+        output = torch.empty(size=(n_sparse_blocks, sparsity_block_size, sparsity_block_size),
                              dtype=x.dtype, device=x.device)
         x_b, x_r, x_c = x.size()

blksprs/ops/matmul.py CHANGED Viewed

@@ -62,7 +62,7 @@ def matmul_forward(x: Tensor, y: Tensor,
                    _: Tensor, sparsity_lut_o: Tensor,
                    sparsity_block_size: int, n_sparse_blocks: int) -> Tensor:
     with torch.no_grad():
-        output = torch.zeros(size=(n_sparse_blocks, sparsity_block_size, sparsity_block_size),
+        output = torch.empty(size=(n_sparse_blocks, sparsity_block_size, sparsity_block_size),
                              dtype=x.dtype, device=x.device)
         x_b, x_r, x_c = x.size()

blksprs/ops/softmax.py CHANGED Viewed

@@ -66,7 +66,7 @@ def softmax_forward(x: Tensor, sparsity_layout: Tensor,
                     sparsity_lut: Tensor,
                     sparsity_reverse_lut_rws: Tensor,
                     sparsity_block_size: int) -> Tensor:
-    output = torch.zeros_like(x)
+    output = torch.empty_like(x)
     x_row_wise_max, sparsity_layout_rwm = row_wise_max(x, sparsity_layout, sparsity_block_size,
                                                        flag_slice_only=True)
@@ -114,7 +114,7 @@ def softmax_backward_wrapper(ctx, grad_output):
 def softmax_backward(grad_output: Tensor, o: Tensor, sparsity_lut: Tensor, sparsity_layout: Tensor,
                      sparsity_block_size: int) -> Tensor:
     with torch.no_grad():
-        grad_x = torch.zeros_like(o, dtype=torch.float)
+        grad_x = torch.empty_like(o, dtype=torch.float)
         s, sparsity_layout_s = row_wise_sum(grad_output * o, sparsity_layout, sparsity_block_size, flag_slice_only=True)
@@ -359,7 +359,7 @@ def softmax_fused_forward(x: Tensor, sparsity_layout: Tensor,
                           sparsity_reverse_lut_sorted: Tensor,
                           max_blocks_line: int,
                           sparsity_block_size: int) -> Tensor:
-    output = torch.zeros_like(x)
+    output = torch.empty_like(x)
     x_b, x_r, x_c = x.size()
     x_b_s, x_r_s, x_c_s = stride(x)
@@ -374,7 +374,7 @@ def softmax_fused_forward(x: Tensor, sparsity_layout: Tensor,
      (x,
       x_b, x_b_s, x_r_s, x_c_s,
       output,
-      s_l_b, s_l_b_s, s_l_r_s, s_l_c_s,
+      s_l_b, s_l_b_s, s_l_r_s, s_l_c, s_l_c_s,
       sparsity_reverse_lut_sorted,
       max_blocks_line,
       sparsity_block_size))
@@ -399,7 +399,7 @@ def softmax_fused_backward(grad_output: Tensor,
                            max_blocks_line: int,
                            sparsity_block_size: int) -> Tensor:
     with torch.no_grad():
-        grad_x = torch.zeros_like(o)
+        grad_x = torch.empty_like(o)
         g_b, g_r, g_c = grad_output.size()
         g_b_s, g_r_s, g_c_s = stride(grad_output)
@@ -417,7 +417,7 @@ def softmax_fused_backward(grad_output: Tensor,
           g_b, g_b_s, g_r_s, g_c_s,
           o,
           o_b, o_b_s, o_r_s, o_c_s,
-          s_l_b, s_l_b_s, s_l_r_s, s_l_c_s,
+          s_l_b, s_l_b_s, s_l_r_s, s_l_c, s_l_c_s,
           sparsity_reverse_lut_sorted,
           grad_x,
           max_blocks_line,
@@ -437,7 +437,7 @@ def softmax_fused_backward(grad_output: Tensor,
 def softmax_fused_kernel(x,
                          x_b, x_b_s, x_r_s, x_c_s,
                          o,
-                         s_l_b, s_l_b_s, s_l_r_s, s_l_c_s,
+                         s_l_b, s_l_b_s, s_l_r_s, s_l_c, s_l_c_s,
                          r_lut_s,
                          mbs: tl.constexpr,
                          sparsity_block_size: tl.constexpr,
@@ -451,8 +451,9 @@ def softmax_fused_kernel(x,
     blk_rev_idx = (pid_bat * s_l_b_s +
                    pid_row * s_l_r_s +
                    (tl.arange(0, mbs) * s_l_c_s))
-    blk_rev_msk = (blk_rev_idx >= 0 and blk_rev_idx < s_l_b * s_l_b_s)
-    blk_rev = tl.load(r_lut_s + blk_rev_idx, mask=blk_rev_msk).to(tl.int32)
+    blk_rev_msk = ((blk_rev_idx >= 0 and blk_rev_idx < s_l_b * s_l_b_s) and
+                   (tl.arange(0, mbs) < s_l_c))
+    blk_rev = tl.load(r_lut_s + blk_rev_idx, mask=blk_rev_msk, other=-1).to(tl.int32)
     if (not (tl.min(blk_rev) == -1 and
              tl.max(blk_rev) == -1)):
@@ -488,7 +489,7 @@ def softmax_fused_kernel_grad(g,
                               g_b, g_b_s, g_r_s, g_c_s,
                               x,
                               x_b, x_b_s, x_r_s, x_c_s,
-                              s_l_b, s_l_b_s, s_l_r_s, s_l_c_s,
+                              s_l_b, s_l_b_s, s_l_r_s, s_l_c, s_l_c_s,
                               r_lut_s,
                               o,
                               mbs: tl.constexpr,
@@ -503,8 +504,9 @@ def softmax_fused_kernel_grad(g,
     blk_rev_idx = (pid_bat * s_l_b_s +
                    pid_row * s_l_r_s +
                    (tl.arange(0, mbs) * s_l_c_s))
-    blk_rev_msk = (blk_rev_idx >= 0 and blk_rev_idx < s_l_b * s_l_b_s)
-    blk_rev = tl.load(r_lut_s + blk_rev_idx, mask=blk_rev_msk).to(tl.int32)
+    blk_rev_msk = ((blk_rev_idx >= 0 and blk_rev_idx < s_l_b * s_l_b_s) and
+                   (tl.arange(0, mbs) < s_l_c))
+    blk_rev = tl.load(r_lut_s + blk_rev_idx, mask=blk_rev_msk, other=-1).to(tl.int32)
     if (not (tl.min(blk_rev) == -1 and
              tl.max(blk_rev) == -1)):
@@ -557,7 +559,7 @@ def softmax_fused_build_lut(lut: dict, sparsity_layout: Tensor):
                            .sum(dim=-1)
                            .max()
                            .item())
-        lut["max_blocks_line"] = min(ceil_pow2(max(max_blocks_line, 2)), sparsity_layout.size(-1))
+        lut["max_blocks_line"] = ceil_pow2(max(max_blocks_line, 2))
     validate_contiguous(sparsity_layout, lut["sparsity_reverse_lut_sorted"])

{blksprs-2.1.2.dist-info → blksprs-2.1.4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: blksprs
-Version: 2.1.2
+Version: 2.1.4
 Summary: A lightweight library for operations on block-sparse matrices in PyTorch.
 Author-email: Felix Schön <schoen@kr.tuwien.ac.at>
 Project-URL: Homepage, https://github.com/FelixSchoen/blksprs
@@ -20,7 +20,8 @@ Requires-Dist: matplotlib; extra == "test"
 # blksprs
 [![GitHub Release](https://img.shields.io/github/v/release/FelixSchoen/blksprs?include_prereleases&label=Latest%20Release)](https://github.com/FelixSchoen/blksprs/releases)
-[![Python Version](https://img.shields.io/badge/Python%20Version-3.11-blue)](https://www.python.org/downloads/release/python-3119/)
+[![Python 3.11](https://img.shields.io/badge/Python%20Version-3.11-blue)](https://www.python.org/downloads/release/python-3119/)
+[![Python 3.12](https://img.shields.io/badge/Python%20Version-3.12-blue)](https://www.python.org/downloads/release/python-31210/)
 ## Overview
@@ -75,9 +76,7 @@ _* see the [Roadmap](#roadmap) section for more information_
 ## Installation
-Note that due to the dependency on [Triton](https://github.com/triton-lang/triton) this library is **only compatible
-with
-the Linux platform**.
+Note that due to the dependency on [Triton](https://github.com/triton-lang/triton) this library is **only compatible with the Linux platform**.
 Keep track of this [issue](https://github.com/triton-lang/triton/issues/1640) for updates.
 We recommend installing blksprs from [PyPI](https://pypi.org/project/blksprs/) using pip:
@@ -86,8 +85,8 @@ We recommend installing blksprs from [PyPI](https://pypi.org/project/blksprs/) u
 ### Dependencies
-- [PyTorch](https://pytorch.org/) (built with v2.6)
-- _[NumPy](https://numpy.org/) (to get rid of warnings, built with v2.2.4)_
+- [PyTorch](https://pytorch.org/) (built with v2.7.1)
+- _[NumPy](https://numpy.org/) (to get rid of warnings, built with v2.3.1)_
 - _[Triton](https://github.com/triton-lang/triton) (included with PyTorch)_
 ## Changelog
@@ -103,7 +102,7 @@ We will continue to maintain the library and fix any issues that arise.
 Should you find any bugs please open an [issue](https://github.com/FelixSchoen/blksprs/issues).
 We also encourage [pull requests](https://github.com/FelixSchoen/blksprs/pulls).
-It might be that this changes with future projects, but as of March 2025, we are content with the current state of the
+It might be that this changes with future projects, but as of June 2025, we are content with the current state of the
 library.
 ## Known Limitations and Issues
@@ -112,9 +111,6 @@ library.
   In order to work around this bug a manual conversion of some values is needed, (slightly) negatively impacting
   performance.
   Watch the [issue](https://github.com/triton-lang/triton/issues/6376) on Triton's issue tracker for more information.
-- PyTorch's `wrap_triton()` currently does not support config pruning. It thus cannot be used for some of the kernels,
-  which could impact graph compilation.
-- There seem to be some issues with autocasting, forcing some operations to manually cast.
 - There will be some slight numerical differences between vanilla and blksprs operations.
   These instabilities are due to Triton and thus cannot be fixed by this library alone.
   However, for all intents and purposes, these very minor differences should not matter and can safely be ignored.
@@ -196,8 +192,8 @@ def test_readme():
     # Other available functions
     bs.ops.transpose(o_sparse, sparsity_layout_o, sparsity_block_size)
-    bs.ops.softmax(o_sparse, sparsity_layout_o, sparsity_block_size)
-    bs.ops.softmax_fused(o_sparse, sparsity_layout_o, sparsity_block_size) # Significantly faster version that requires that rows of matrix fit into memory
+    bs.ops.softmax(o_sparse, sparsity_layout_o, sparsity_block_size, flag_fused=False)
+    bs.ops.softmax_fused(o_sparse, sparsity_layout_o, sparsity_block_size) # Significantly faster version that requires that rows of matrix fit into memory (default if flag is not set)
     bs.ops.misc.row_wise_sum(o_sparse, sparsity_layout_o, sparsity_block_size)
     bs.ops.misc.row_wise_max(o_sparse, sparsity_layout_o, sparsity_block_size)

{blksprs-2.1.2.dist-info → blksprs-2.1.4.dist-info}/RECORD RENAMED Viewed

@@ -1,13 +1,13 @@
-blksprs/__init__.py,sha256=NRxydw4i9jg7WeDuojfEePdtdbughV9AZsEcT9yywK4,1615
+blksprs/__init__.py,sha256=XERzTtkiElDeBppOO8rNrF6OktUQf_yozDiA4DUXqTY,1615
 blksprs/layouting/distribution_layout.py,sha256=ur1ty_2U-Hfj78hMWsLZvu7ZuGhzW3qGLKMc72DfTZM,5861
 blksprs/layouting/sparsity_layout.py,sha256=eXHmu2h7K5Q-YUpfOxocJoeP_5ZoQFZf_eHLxRZQbYU,11207
-blksprs/ops/conversion.py,sha256=RgVSyiULLwv8KWQqSyXpKwTr4Qp-lpDK9i-zKlN841I,21914
+blksprs/ops/conversion.py,sha256=nv5gXiyZkUtk1kCIlPr0Vpaj4G8G6dJdW7StlbV3nDw,21914
 blksprs/ops/distribution.py,sha256=0tPldv0ARzmCV1CU2jvfqpHBgOuHPrDFiCtqsLs7CZc,20789
-blksprs/ops/flow.py,sha256=qdWBCLDSkKaa8CAfkO1NgH-J5N7yMsILyR7qEpyrIUU,8246
-blksprs/ops/matmul.py,sha256=5tVBKU_lglUjaLDi6J_dscdqlmzRz38OGxqAxZxZXDs,11879
+blksprs/ops/flow.py,sha256=oUn_xDT74220-EmnBnB8bRNtbS1mjbxWpm76PFsK22o,8246
+blksprs/ops/matmul.py,sha256=ES9bpiCIRBxaynNIL5ftDP0c9LSArbj8YJqkPEzBaIU,11879
 blksprs/ops/partitioning.py,sha256=cfQmY9BZqGTvvJorIhtb-EyuGRJGPraWR-wTKdb47aI,9954
 blksprs/ops/repeat.py,sha256=TLYNxwPuT9y5K9xyM41WK5gnggAJF3lI61Q2K7zWjns,9035
-blksprs/ops/softmax.py,sha256=1UIovPrdE_zgAIPqjmOTFn8CMbd_2Z8tPP-vMBxU07I,23526
+blksprs/ops/softmax.py,sha256=tfC_jaAKrA956rxGeb57klMuYRKTiyMCd5Zg5DIH3fc,23649
 blksprs/ops/transpose.py,sha256=U-VAyLRT6_NDv9qYSFzBqfVlDeIpTqAMEXkqto0VF6w,4072
 blksprs/ops/misc/broadcast_ops.py,sha256=-PrHiSJikZh8nXUmXxSCtFEP27TTxFr4wcrNxBjnimk,5987
 blksprs/ops/misc/row_wise.py,sha256=n5FJjAuOd8BHBJQx4bsQwr-HmXkR9PYVAqfk77wjOFU,19653
@@ -17,7 +17,7 @@ blksprs/utils/blksprs_tensor.py,sha256=pfoz59aJixj_fIoFx76ySiygwRQUemmgjMKepZ2c4
 blksprs/utils/processing.py,sha256=RNkEDc0g-sNHRuMPkRzNWU13d3_lIkXMJdoqES4yQTM,3738
 blksprs/utils/tools.py,sha256=TKygEKge4wJtJnXXDg8BTL8vzBpqIJsQ_A3_5FmLpcE,859
 blksprs/utils/validation.py,sha256=G8eQlvJVMKfEX3k2AwBD0A6Ck-gFoRLpLNY6HXsB3fA,4348
-blksprs-2.1.2.dist-info/METADATA,sha256=U20ZL7XLhrgiMd_0QGFik0Ci43SDoCT8q876-1yCeNo,9665
-blksprs-2.1.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-blksprs-2.1.2.dist-info/top_level.txt,sha256=qyp0IHeY3H2GQA97i4hk_To5rRBS2YcE1HRPSLy04fk,8
-blksprs-2.1.2.dist-info/RECORD,,
+blksprs-2.1.4.dist-info/METADATA,sha256=qGLQunHEIoHlmRvFnM0TVDjOSApwGzBglpZezmfhHLU,9590
+blksprs-2.1.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+blksprs-2.1.4.dist-info/top_level.txt,sha256=qyp0IHeY3H2GQA97i4hk_To5rRBS2YcE1HRPSLy04fk,8
+blksprs-2.1.4.dist-info/RECORD,,

{blksprs-2.1.2.dist-info → blksprs-2.1.4.dist-info}/WHEEL RENAMED Viewed

File without changes

{blksprs-2.1.2.dist-info → blksprs-2.1.4.dist-info}/top_level.txt RENAMED Viewed

File without changes

blksprs 2.1.2__py3-none-any.whl → 2.1.4__py3-none-any.whl

blksprs 2.1.2py3-none-any.whl → 2.1.4py3-none-any.whl