blksprs 1.3__tar.gz → 1.4.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. {blksprs-1.3 → blksprs-1.4.1}/PKG-INFO +31 -30
  2. {blksprs-1.3 → blksprs-1.4.1}/README.md +28 -25
  3. blksprs-1.4.1/blksprs/__init__.py +18 -0
  4. {blksprs-1.3 → blksprs-1.4.1}/blksprs/layouting/distribution_layout.py +1 -1
  5. {blksprs-1.3 → blksprs-1.4.1}/blksprs/layouting/sparsity_layout.py +2 -2
  6. blksprs-1.3/blksprs/misc/broadcast_addition.py → blksprs-1.4.1/blksprs/misc/broadcast_ops.py +9 -6
  7. {blksprs-1.3 → blksprs-1.4.1}/blksprs/misc/repeat_interleave.py +2 -0
  8. blksprs-1.4.1/blksprs/misc/row_wise.py +390 -0
  9. {blksprs-1.3 → blksprs-1.4.1}/blksprs/ops/conversion.py +6 -0
  10. {blksprs-1.3 → blksprs-1.4.1}/blksprs/ops/distribution.py +6 -0
  11. {blksprs-1.3 → blksprs-1.4.1}/blksprs/ops/exp.py +2 -0
  12. {blksprs-1.3 → blksprs-1.4.1}/blksprs/ops/matmul.py +6 -2
  13. {blksprs-1.3 → blksprs-1.4.1}/blksprs/ops/softmax.py +13 -13
  14. {blksprs-1.3 → blksprs-1.4.1}/blksprs/ops/transpose.py +2 -0
  15. {blksprs-1.3 → blksprs-1.4.1}/blksprs/utils/tools.py +7 -1
  16. {blksprs-1.3 → blksprs-1.4.1}/blksprs/utils/validation.py +15 -10
  17. {blksprs-1.3 → blksprs-1.4.1}/blksprs.egg-info/PKG-INFO +31 -30
  18. {blksprs-1.3 → blksprs-1.4.1}/blksprs.egg-info/SOURCES.txt +3 -2
  19. {blksprs-1.3 → blksprs-1.4.1}/blksprs.egg-info/requires.txt +1 -3
  20. {blksprs-1.3 → blksprs-1.4.1}/pyproject.toml +3 -5
  21. blksprs-1.3/blksprs/ops/row_wise_sum.py +0 -231
  22. {blksprs-1.3 → blksprs-1.4.1}/blksprs/utils/benchmarking.py +0 -0
  23. {blksprs-1.3 → blksprs-1.4.1}/blksprs.egg-info/dependency_links.txt +0 -0
  24. {blksprs-1.3 → blksprs-1.4.1}/blksprs.egg-info/top_level.txt +0 -0
  25. {blksprs-1.3 → blksprs-1.4.1}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: blksprs
3
- Version: 1.3
3
+ Version: 1.4.1
4
4
  Summary: A lightweight library for operations on blocksparse matrices in PyTorch.
5
5
  Author-email: Felix Schön <schoen@kr.tuwien.ac.at>
6
6
  Project-URL: Homepage, https://github.com/FelixSchoen/blksprs
@@ -14,10 +14,8 @@ Requires-Dist: pytest-xdist; extra == "test"
14
14
  Requires-Dist: pytest-cov; extra == "test"
15
15
  Requires-Dist: coverage; extra == "test"
16
16
  Requires-Dist: matplotlib; extra == "test"
17
- Provides-Extra: deploy
18
- Requires-Dist: build; extra == "deploy"
19
- Requires-Dist: twine; extra == "deploy"
20
- Requires-Dist: pdoc3; extra == "deploy"
17
+ Provides-Extra: build
18
+ Requires-Dist: build; extra == "build"
21
19
 
22
20
  # blksprs
23
21
 
@@ -83,14 +81,7 @@ the [test cases](https://github.com/FelixSchoen/blksprs/blob/main/test/cases/tes
83
81
 
84
82
  ```python
85
83
  import torch
86
-
87
- from blksprs.layouting.sparsity_layout import build_sparsity_layout
88
- from blksprs.ops.conversion import to_sparse, to_dense
89
- from blksprs.ops.matmul import matmul
90
- from blksprs.ops.row_wise_sum import row_wise_sum
91
- from blksprs.ops.softmax import softmax
92
- from blksprs.ops.transpose import transpose
93
- from blksprs.utils.tools import do_shape_blocksparse, undo_shape_blocksparse
84
+ import blksprs as bs
94
85
 
95
86
 
96
87
  def test_readme():
@@ -112,47 +103,57 @@ def test_readme():
112
103
  y = torch.randn(size=(b, h, n, k), device="cuda").transpose(-1, -2).contiguous()
113
104
 
114
105
  # Convert tensors to three-dimensional (dense) tensors since Triton can only handle tensors of exactly three dimensions
115
- x_dense, x_shape_original = do_shape_blocksparse(x)
116
- y_dense, y_shape_original = do_shape_blocksparse(y)
106
+ x_dense, x_shape_original = bs.util.do_shape_blocksparse(x)
107
+ y_dense, y_shape_original = bs.util.do_shape_blocksparse(y)
117
108
 
118
109
  # Create sparsity layouts from existing tensors
119
- sparsity_layout_x = build_sparsity_layout(x_dense, sparsity_block_size, triton_block_size=triton_block_size)
120
- sparsity_layout_y = build_sparsity_layout(y_dense, sparsity_block_size, triton_block_size=triton_block_size)
110
+ sparsity_layout_x = bs.layout.build_sparsity_layout(x_dense, sparsity_block_size,
111
+ triton_block_size=triton_block_size)
112
+ sparsity_layout_y = bs.layout.build_sparsity_layout(y_dense, sparsity_block_size,
113
+ triton_block_size=triton_block_size)
121
114
 
122
115
  # Create random sparsity layout for output tensor
123
116
  sparsity_layout_o = _get_random_sparsity_layout(b * h, m, n, sparsity_block_size, sparsity_percentage)
124
117
 
125
118
  # Convert tensors to sparse tensors for matrix multiplication
126
- x_sparse = to_sparse(x_dense, sparsity_layout_x, sparsity_block_size, triton_block_size=triton_block_size)
127
- y_sparse = to_sparse(y_dense, sparsity_layout_y, sparsity_block_size, triton_block_size=triton_block_size)
119
+ x_sparse = bs.to_sparse(x_dense, sparsity_layout_x, sparsity_block_size, triton_block_size=triton_block_size)
120
+ y_sparse = bs.to_sparse(y_dense, sparsity_layout_y, sparsity_block_size, triton_block_size=triton_block_size)
128
121
 
129
122
  # Perform matrix multiplication
130
- o_sparse = matmul(x_sparse, sparsity_layout_x, y_sparse, sparsity_layout_y, sparsity_layout_o, sparsity_block_size,
131
- triton_block_size=triton_block_size)
132
- o_dense = to_dense(o_sparse, sparsity_layout_o, sparsity_block_size, triton_block_size=triton_block_size)
123
+ o_sparse = bs.matmul(x_sparse, sparsity_layout_x, y_sparse, sparsity_layout_y, sparsity_layout_o,
124
+ sparsity_block_size,
125
+ triton_block_size=triton_block_size)
126
+
127
+ # Apply element-wise operation
128
+ o_sparse = torch.add(o_sparse, 1)
129
+
130
+ o_dense = bs.to_dense(o_sparse, sparsity_layout_o, sparsity_block_size, triton_block_size=triton_block_size)
133
131
 
134
132
  # Sanity check
135
133
  o_torch = torch.matmul(x_dense, y_dense)
134
+ o_torch = torch.add(o_torch, 1)
136
135
 
137
136
  # Perform round trip to set sparse blocks to 0
138
- o_torch_round_trip = to_dense(
139
- to_sparse(o_torch, sparsity_layout_o, sparsity_block_size, triton_block_size=triton_block_size),
137
+ o_torch_round_trip = bs.to_dense(
138
+ bs.to_sparse(o_torch, sparsity_layout_o, sparsity_block_size, triton_block_size=triton_block_size),
140
139
  sparsity_layout_o, sparsity_block_size, fill_value=0, triton_block_size=triton_block_size)
141
140
 
142
141
  # Assert that the output is correct
143
142
  assert torch.allclose(o_dense, o_torch_round_trip, atol=2e-2) # Note that small numerical differences are expected
144
143
 
145
144
  # Assert that the output has the correct sparsity layout
146
- actual_sparsity_layout_o = build_sparsity_layout(o_dense, sparsity_block_size, triton_block_size=triton_block_size)
147
- assert torch.allclose(actual_sparsity_layout_o, sparsity_layout_o)
145
+ actual_sparsity_layout_o = bs.layout.build_sparsity_layout(o_dense, sparsity_block_size,
146
+ triton_block_size=triton_block_size)
147
+ assert torch.allclose(actual_sparsity_layout_o.to(torch.int), sparsity_layout_o)
148
148
 
149
149
  # Convert output tensor back to original shape
150
- o = undo_shape_blocksparse(o_dense, x_shape_original)
150
+ o = bs.util.undo_shape_blocksparse(o_dense, x_shape_original)
151
151
 
152
152
  # Other available functions
153
- transpose(o_sparse, sparsity_layout_o, sparsity_block_size, triton_block_size=triton_block_size)
154
- softmax(o_sparse, sparsity_layout_o, sparsity_block_size, triton_block_size=triton_block_size)
155
- row_wise_sum(o_sparse, sparsity_layout_o, sparsity_block_size, triton_block_size=triton_block_size)
153
+ bs.transpose(o_sparse, sparsity_layout_o, sparsity_block_size, triton_block_size=triton_block_size)
154
+ bs.softmax(o_sparse, sparsity_layout_o, sparsity_block_size, triton_block_size=triton_block_size)
155
+ bs.misc.row_wise_sum(o_sparse, sparsity_layout_o, sparsity_block_size, triton_block_size=triton_block_size)
156
+ bs.misc.row_wise_max(o_sparse, sparsity_layout_o, sparsity_block_size, triton_block_size=triton_block_size)
156
157
 
157
158
 
158
159
  def _get_random_sparsity_layout(b, m, n, sparsity_block_size, sparsity_percentage):
@@ -62,14 +62,7 @@ the [test cases](https://github.com/FelixSchoen/blksprs/blob/main/test/cases/tes
62
62
 
63
63
  ```python
64
64
  import torch
65
-
66
- from blksprs.layouting.sparsity_layout import build_sparsity_layout
67
- from blksprs.ops.conversion import to_sparse, to_dense
68
- from blksprs.ops.matmul import matmul
69
- from blksprs.ops.row_wise_sum import row_wise_sum
70
- from blksprs.ops.softmax import softmax
71
- from blksprs.ops.transpose import transpose
72
- from blksprs.utils.tools import do_shape_blocksparse, undo_shape_blocksparse
65
+ import blksprs as bs
73
66
 
74
67
 
75
68
  def test_readme():
@@ -91,47 +84,57 @@ def test_readme():
91
84
  y = torch.randn(size=(b, h, n, k), device="cuda").transpose(-1, -2).contiguous()
92
85
 
93
86
  # Convert tensors to three-dimensional (dense) tensors since Triton can only handle tensors of exactly three dimensions
94
- x_dense, x_shape_original = do_shape_blocksparse(x)
95
- y_dense, y_shape_original = do_shape_blocksparse(y)
87
+ x_dense, x_shape_original = bs.util.do_shape_blocksparse(x)
88
+ y_dense, y_shape_original = bs.util.do_shape_blocksparse(y)
96
89
 
97
90
  # Create sparsity layouts from existing tensors
98
- sparsity_layout_x = build_sparsity_layout(x_dense, sparsity_block_size, triton_block_size=triton_block_size)
99
- sparsity_layout_y = build_sparsity_layout(y_dense, sparsity_block_size, triton_block_size=triton_block_size)
91
+ sparsity_layout_x = bs.layout.build_sparsity_layout(x_dense, sparsity_block_size,
92
+ triton_block_size=triton_block_size)
93
+ sparsity_layout_y = bs.layout.build_sparsity_layout(y_dense, sparsity_block_size,
94
+ triton_block_size=triton_block_size)
100
95
 
101
96
  # Create random sparsity layout for output tensor
102
97
  sparsity_layout_o = _get_random_sparsity_layout(b * h, m, n, sparsity_block_size, sparsity_percentage)
103
98
 
104
99
  # Convert tensors to sparse tensors for matrix multiplication
105
- x_sparse = to_sparse(x_dense, sparsity_layout_x, sparsity_block_size, triton_block_size=triton_block_size)
106
- y_sparse = to_sparse(y_dense, sparsity_layout_y, sparsity_block_size, triton_block_size=triton_block_size)
100
+ x_sparse = bs.to_sparse(x_dense, sparsity_layout_x, sparsity_block_size, triton_block_size=triton_block_size)
101
+ y_sparse = bs.to_sparse(y_dense, sparsity_layout_y, sparsity_block_size, triton_block_size=triton_block_size)
107
102
 
108
103
  # Perform matrix multiplication
109
- o_sparse = matmul(x_sparse, sparsity_layout_x, y_sparse, sparsity_layout_y, sparsity_layout_o, sparsity_block_size,
110
- triton_block_size=triton_block_size)
111
- o_dense = to_dense(o_sparse, sparsity_layout_o, sparsity_block_size, triton_block_size=triton_block_size)
104
+ o_sparse = bs.matmul(x_sparse, sparsity_layout_x, y_sparse, sparsity_layout_y, sparsity_layout_o,
105
+ sparsity_block_size,
106
+ triton_block_size=triton_block_size)
107
+
108
+ # Apply element-wise operation
109
+ o_sparse = torch.add(o_sparse, 1)
110
+
111
+ o_dense = bs.to_dense(o_sparse, sparsity_layout_o, sparsity_block_size, triton_block_size=triton_block_size)
112
112
 
113
113
  # Sanity check
114
114
  o_torch = torch.matmul(x_dense, y_dense)
115
+ o_torch = torch.add(o_torch, 1)
115
116
 
116
117
  # Perform round trip to set sparse blocks to 0
117
- o_torch_round_trip = to_dense(
118
- to_sparse(o_torch, sparsity_layout_o, sparsity_block_size, triton_block_size=triton_block_size),
118
+ o_torch_round_trip = bs.to_dense(
119
+ bs.to_sparse(o_torch, sparsity_layout_o, sparsity_block_size, triton_block_size=triton_block_size),
119
120
  sparsity_layout_o, sparsity_block_size, fill_value=0, triton_block_size=triton_block_size)
120
121
 
121
122
  # Assert that the output is correct
122
123
  assert torch.allclose(o_dense, o_torch_round_trip, atol=2e-2) # Note that small numerical differences are expected
123
124
 
124
125
  # Assert that the output has the correct sparsity layout
125
- actual_sparsity_layout_o = build_sparsity_layout(o_dense, sparsity_block_size, triton_block_size=triton_block_size)
126
- assert torch.allclose(actual_sparsity_layout_o, sparsity_layout_o)
126
+ actual_sparsity_layout_o = bs.layout.build_sparsity_layout(o_dense, sparsity_block_size,
127
+ triton_block_size=triton_block_size)
128
+ assert torch.allclose(actual_sparsity_layout_o.to(torch.int), sparsity_layout_o)
127
129
 
128
130
  # Convert output tensor back to original shape
129
- o = undo_shape_blocksparse(o_dense, x_shape_original)
131
+ o = bs.util.undo_shape_blocksparse(o_dense, x_shape_original)
130
132
 
131
133
  # Other available functions
132
- transpose(o_sparse, sparsity_layout_o, sparsity_block_size, triton_block_size=triton_block_size)
133
- softmax(o_sparse, sparsity_layout_o, sparsity_block_size, triton_block_size=triton_block_size)
134
- row_wise_sum(o_sparse, sparsity_layout_o, sparsity_block_size, triton_block_size=triton_block_size)
134
+ bs.transpose(o_sparse, sparsity_layout_o, sparsity_block_size, triton_block_size=triton_block_size)
135
+ bs.softmax(o_sparse, sparsity_layout_o, sparsity_block_size, triton_block_size=triton_block_size)
136
+ bs.misc.row_wise_sum(o_sparse, sparsity_layout_o, sparsity_block_size, triton_block_size=triton_block_size)
137
+ bs.misc.row_wise_max(o_sparse, sparsity_layout_o, sparsity_block_size, triton_block_size=triton_block_size)
135
138
 
136
139
 
137
140
  def _get_random_sparsity_layout(b, m, n, sparsity_block_size, sparsity_percentage):
@@ -0,0 +1,18 @@
1
+ from blksprs.ops.conversion import to_dense, to_sparse
2
+ from blksprs.ops.distribution import gather, scatter, scatter_reduce
3
+ from blksprs.ops.exp import exp
4
+ from blksprs.ops.matmul import matmul
5
+ from blksprs.ops.softmax import softmax
6
+ from blksprs.ops.transpose import transpose
7
+
8
+ class layout:
9
+ from blksprs.layouting.distribution_layout import build_distribution_layout
10
+ from blksprs.layouting.sparsity_layout import build_sparsity_layout, build_sparsity_layout_adaption
11
+
12
+ class misc:
13
+ from blksprs.misc.broadcast_ops import broadcast_add, broadcast_sub
14
+ from blksprs.misc.repeat_interleave import repeat_interleave
15
+ from blksprs.misc.row_wise import row_wise_sum, row_wise_max, row_wise_add, row_wise_sub
16
+
17
+ class util:
18
+ from blksprs.utils.tools import do_shape_blocksparse, undo_shape_blocksparse, disable_validation
@@ -31,7 +31,7 @@ def build_distribution_layout(indices: Tensor, sparsity_layout_indices: Tensor,
31
31
  sparsity_lut_i = torch.nonzero(sparsity_layout_indices).contiguous()
32
32
 
33
33
  output = torch.zeros(size_target[0], size_target[1] // sparsity_block_size, size_target[2] // sparsity_block_size,
34
- device=indices.device, dtype=torch.int32)
34
+ dtype=torch.bool, device=indices.device)
35
35
 
36
36
  i_b, i_r, i_c = indices.size()
37
37
  i_b_s, i_r_s, i_c_s = indices.stride()
@@ -27,7 +27,7 @@ def build_sparsity_layout(x: Tensor, sparsity_block_size: int, triton_block_size
27
27
  validate_device(x)
28
28
 
29
29
  output = torch.zeros(x.size(0), x.size(1) // sparsity_block_size, x.size(2) // sparsity_block_size,
30
- device=x.device, dtype=torch.int32)
30
+ dtype=torch.bool, device=x.device)
31
31
 
32
32
  x_b, x_r, x_c = x.size()
33
33
  x_b_s, x_r_s, x_c_s = x.stride()
@@ -117,7 +117,7 @@ def build_sparsity_layout_adaption(x: Tensor, sparsity_layout_from: Tensor,
117
117
  o_r = math.ceil(sparsity_layout_from.size(1) * sparsity_block_size_from // sparsity_block_size_to)
118
118
  o_c = math.ceil(sparsity_layout_from.size(2) * sparsity_block_size_from // sparsity_block_size_to)
119
119
 
120
- output = torch.zeros(o_b, o_r, o_c, device=x.device, dtype=torch.int32)
120
+ output = torch.zeros(o_b, o_r, o_c, dtype=torch.bool, device=x.device)
121
121
 
122
122
  x_b, x_r, x_c = x.size()
123
123
  x_b_s, x_r_s, x_c_s = x.stride()
@@ -8,8 +8,8 @@ from blksprs.utils.validation import validate_contiguous, validate_device, \
8
8
  validate_sparsity_block_size, validate_triton_block_size
9
9
 
10
10
 
11
- def broadcast_addition(x: Tensor, y: Tensor, sparsity_layout_output: Tensor,
12
- sparsity_block_size: int, triton_block_size: int = None) -> Tensor:
11
+ def broadcast_add(x: Tensor, y: Tensor, sparsity_layout_output: Tensor,
12
+ sparsity_block_size: int, triton_block_size: int = None) -> Tensor:
13
13
  """Performs a broadcast and subsequent addition of two dense tensors x and y. Returns a block-sparse tensor in
14
14
  compressed form.
15
15
 
@@ -25,6 +25,9 @@ def broadcast_addition(x: Tensor, y: Tensor, sparsity_layout_output: Tensor,
25
25
  output tensor corresponds to x(i) + y(j).
26
26
 
27
27
  """
28
+ x = x.contiguous()
29
+ y = y.contiguous()
30
+
28
31
  validate_device(x, y)
29
32
  validate_contiguous(x, y)
30
33
  if x.size(-1) != y.size(-1):
@@ -70,12 +73,12 @@ def broadcast_addition(x: Tensor, y: Tensor, sparsity_layout_output: Tensor,
70
73
  return output
71
74
 
72
75
 
73
- def broadcast_subtraction(x: Tensor, y: Tensor, sparsity_layout_output: Tensor,
74
- sparsity_block_size: int, triton_block_size: int = None) -> Tensor:
75
- """Wrapper for ``broadcast_addition`` with negated y.
76
+ def broadcast_sub(x: Tensor, y: Tensor, sparsity_layout_output: Tensor,
77
+ sparsity_block_size: int, triton_block_size: int = None) -> Tensor:
78
+ """Wrapper for ``broadcast_add`` with negated y.
76
79
 
77
80
  """
78
- return broadcast_addition(x, torch.neg(y), sparsity_layout_output, sparsity_block_size, triton_block_size)
81
+ return broadcast_add(x, torch.neg(y), sparsity_layout_output, sparsity_block_size, triton_block_size)
79
82
 
80
83
 
81
84
  @triton.jit
@@ -27,6 +27,8 @@ def repeat_interleave(x: Tensor, sparsity_layout: Tensor, repeats: int,
27
27
  Tensor: The sparsity layout of the resulting output tensor.
28
28
 
29
29
  """
30
+ x = x.contiguous()
31
+
30
32
  validate_dimensions(x)
31
33
  validate_contiguous(x)
32
34
  validate_device(x)