blksprs 1.3__tar.gz → 1.4.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {blksprs-1.3 → blksprs-1.4.1}/PKG-INFO +31 -30
- {blksprs-1.3 → blksprs-1.4.1}/README.md +28 -25
- blksprs-1.4.1/blksprs/__init__.py +18 -0
- {blksprs-1.3 → blksprs-1.4.1}/blksprs/layouting/distribution_layout.py +1 -1
- {blksprs-1.3 → blksprs-1.4.1}/blksprs/layouting/sparsity_layout.py +2 -2
- blksprs-1.3/blksprs/misc/broadcast_addition.py → blksprs-1.4.1/blksprs/misc/broadcast_ops.py +9 -6
- {blksprs-1.3 → blksprs-1.4.1}/blksprs/misc/repeat_interleave.py +2 -0
- blksprs-1.4.1/blksprs/misc/row_wise.py +390 -0
- {blksprs-1.3 → blksprs-1.4.1}/blksprs/ops/conversion.py +6 -0
- {blksprs-1.3 → blksprs-1.4.1}/blksprs/ops/distribution.py +6 -0
- {blksprs-1.3 → blksprs-1.4.1}/blksprs/ops/exp.py +2 -0
- {blksprs-1.3 → blksprs-1.4.1}/blksprs/ops/matmul.py +6 -2
- {blksprs-1.3 → blksprs-1.4.1}/blksprs/ops/softmax.py +13 -13
- {blksprs-1.3 → blksprs-1.4.1}/blksprs/ops/transpose.py +2 -0
- {blksprs-1.3 → blksprs-1.4.1}/blksprs/utils/tools.py +7 -1
- {blksprs-1.3 → blksprs-1.4.1}/blksprs/utils/validation.py +15 -10
- {blksprs-1.3 → blksprs-1.4.1}/blksprs.egg-info/PKG-INFO +31 -30
- {blksprs-1.3 → blksprs-1.4.1}/blksprs.egg-info/SOURCES.txt +3 -2
- {blksprs-1.3 → blksprs-1.4.1}/blksprs.egg-info/requires.txt +1 -3
- {blksprs-1.3 → blksprs-1.4.1}/pyproject.toml +3 -5
- blksprs-1.3/blksprs/ops/row_wise_sum.py +0 -231
- {blksprs-1.3 → blksprs-1.4.1}/blksprs/utils/benchmarking.py +0 -0
- {blksprs-1.3 → blksprs-1.4.1}/blksprs.egg-info/dependency_links.txt +0 -0
- {blksprs-1.3 → blksprs-1.4.1}/blksprs.egg-info/top_level.txt +0 -0
- {blksprs-1.3 → blksprs-1.4.1}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: blksprs
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.4.1
|
|
4
4
|
Summary: A lightweight library for operations on blocksparse matrices in PyTorch.
|
|
5
5
|
Author-email: Felix Schön <schoen@kr.tuwien.ac.at>
|
|
6
6
|
Project-URL: Homepage, https://github.com/FelixSchoen/blksprs
|
|
@@ -14,10 +14,8 @@ Requires-Dist: pytest-xdist; extra == "test"
|
|
|
14
14
|
Requires-Dist: pytest-cov; extra == "test"
|
|
15
15
|
Requires-Dist: coverage; extra == "test"
|
|
16
16
|
Requires-Dist: matplotlib; extra == "test"
|
|
17
|
-
Provides-Extra:
|
|
18
|
-
Requires-Dist: build; extra == "
|
|
19
|
-
Requires-Dist: twine; extra == "deploy"
|
|
20
|
-
Requires-Dist: pdoc3; extra == "deploy"
|
|
17
|
+
Provides-Extra: build
|
|
18
|
+
Requires-Dist: build; extra == "build"
|
|
21
19
|
|
|
22
20
|
# blksprs
|
|
23
21
|
|
|
@@ -83,14 +81,7 @@ the [test cases](https://github.com/FelixSchoen/blksprs/blob/main/test/cases/tes
|
|
|
83
81
|
|
|
84
82
|
```python
|
|
85
83
|
import torch
|
|
86
|
-
|
|
87
|
-
from blksprs.layouting.sparsity_layout import build_sparsity_layout
|
|
88
|
-
from blksprs.ops.conversion import to_sparse, to_dense
|
|
89
|
-
from blksprs.ops.matmul import matmul
|
|
90
|
-
from blksprs.ops.row_wise_sum import row_wise_sum
|
|
91
|
-
from blksprs.ops.softmax import softmax
|
|
92
|
-
from blksprs.ops.transpose import transpose
|
|
93
|
-
from blksprs.utils.tools import do_shape_blocksparse, undo_shape_blocksparse
|
|
84
|
+
import blksprs as bs
|
|
94
85
|
|
|
95
86
|
|
|
96
87
|
def test_readme():
|
|
@@ -112,47 +103,57 @@ def test_readme():
|
|
|
112
103
|
y = torch.randn(size=(b, h, n, k), device="cuda").transpose(-1, -2).contiguous()
|
|
113
104
|
|
|
114
105
|
# Convert tensors to three-dimensional (dense) tensors since Triton can only handle tensors of exactly three dimensions
|
|
115
|
-
x_dense, x_shape_original = do_shape_blocksparse(x)
|
|
116
|
-
y_dense, y_shape_original = do_shape_blocksparse(y)
|
|
106
|
+
x_dense, x_shape_original = bs.util.do_shape_blocksparse(x)
|
|
107
|
+
y_dense, y_shape_original = bs.util.do_shape_blocksparse(y)
|
|
117
108
|
|
|
118
109
|
# Create sparsity layouts from existing tensors
|
|
119
|
-
sparsity_layout_x = build_sparsity_layout(x_dense, sparsity_block_size,
|
|
120
|
-
|
|
110
|
+
sparsity_layout_x = bs.layout.build_sparsity_layout(x_dense, sparsity_block_size,
|
|
111
|
+
triton_block_size=triton_block_size)
|
|
112
|
+
sparsity_layout_y = bs.layout.build_sparsity_layout(y_dense, sparsity_block_size,
|
|
113
|
+
triton_block_size=triton_block_size)
|
|
121
114
|
|
|
122
115
|
# Create random sparsity layout for output tensor
|
|
123
116
|
sparsity_layout_o = _get_random_sparsity_layout(b * h, m, n, sparsity_block_size, sparsity_percentage)
|
|
124
117
|
|
|
125
118
|
# Convert tensors to sparse tensors for matrix multiplication
|
|
126
|
-
x_sparse = to_sparse(x_dense, sparsity_layout_x, sparsity_block_size, triton_block_size=triton_block_size)
|
|
127
|
-
y_sparse = to_sparse(y_dense, sparsity_layout_y, sparsity_block_size, triton_block_size=triton_block_size)
|
|
119
|
+
x_sparse = bs.to_sparse(x_dense, sparsity_layout_x, sparsity_block_size, triton_block_size=triton_block_size)
|
|
120
|
+
y_sparse = bs.to_sparse(y_dense, sparsity_layout_y, sparsity_block_size, triton_block_size=triton_block_size)
|
|
128
121
|
|
|
129
122
|
# Perform matrix multiplication
|
|
130
|
-
o_sparse = matmul(x_sparse, sparsity_layout_x, y_sparse, sparsity_layout_y, sparsity_layout_o,
|
|
131
|
-
|
|
132
|
-
|
|
123
|
+
o_sparse = bs.matmul(x_sparse, sparsity_layout_x, y_sparse, sparsity_layout_y, sparsity_layout_o,
|
|
124
|
+
sparsity_block_size,
|
|
125
|
+
triton_block_size=triton_block_size)
|
|
126
|
+
|
|
127
|
+
# Apply element-wise operation
|
|
128
|
+
o_sparse = torch.add(o_sparse, 1)
|
|
129
|
+
|
|
130
|
+
o_dense = bs.to_dense(o_sparse, sparsity_layout_o, sparsity_block_size, triton_block_size=triton_block_size)
|
|
133
131
|
|
|
134
132
|
# Sanity check
|
|
135
133
|
o_torch = torch.matmul(x_dense, y_dense)
|
|
134
|
+
o_torch = torch.add(o_torch, 1)
|
|
136
135
|
|
|
137
136
|
# Perform round trip to set sparse blocks to 0
|
|
138
|
-
o_torch_round_trip = to_dense(
|
|
139
|
-
to_sparse(o_torch, sparsity_layout_o, sparsity_block_size, triton_block_size=triton_block_size),
|
|
137
|
+
o_torch_round_trip = bs.to_dense(
|
|
138
|
+
bs.to_sparse(o_torch, sparsity_layout_o, sparsity_block_size, triton_block_size=triton_block_size),
|
|
140
139
|
sparsity_layout_o, sparsity_block_size, fill_value=0, triton_block_size=triton_block_size)
|
|
141
140
|
|
|
142
141
|
# Assert that the output is correct
|
|
143
142
|
assert torch.allclose(o_dense, o_torch_round_trip, atol=2e-2) # Note that small numerical differences are expected
|
|
144
143
|
|
|
145
144
|
# Assert that the output has the correct sparsity layout
|
|
146
|
-
actual_sparsity_layout_o = build_sparsity_layout(o_dense, sparsity_block_size,
|
|
147
|
-
|
|
145
|
+
actual_sparsity_layout_o = bs.layout.build_sparsity_layout(o_dense, sparsity_block_size,
|
|
146
|
+
triton_block_size=triton_block_size)
|
|
147
|
+
assert torch.allclose(actual_sparsity_layout_o.to(torch.int), sparsity_layout_o)
|
|
148
148
|
|
|
149
149
|
# Convert output tensor back to original shape
|
|
150
|
-
o = undo_shape_blocksparse(o_dense, x_shape_original)
|
|
150
|
+
o = bs.util.undo_shape_blocksparse(o_dense, x_shape_original)
|
|
151
151
|
|
|
152
152
|
# Other available functions
|
|
153
|
-
transpose(o_sparse, sparsity_layout_o, sparsity_block_size, triton_block_size=triton_block_size)
|
|
154
|
-
softmax(o_sparse, sparsity_layout_o, sparsity_block_size, triton_block_size=triton_block_size)
|
|
155
|
-
row_wise_sum(o_sparse, sparsity_layout_o, sparsity_block_size, triton_block_size=triton_block_size)
|
|
153
|
+
bs.transpose(o_sparse, sparsity_layout_o, sparsity_block_size, triton_block_size=triton_block_size)
|
|
154
|
+
bs.softmax(o_sparse, sparsity_layout_o, sparsity_block_size, triton_block_size=triton_block_size)
|
|
155
|
+
bs.misc.row_wise_sum(o_sparse, sparsity_layout_o, sparsity_block_size, triton_block_size=triton_block_size)
|
|
156
|
+
bs.misc.row_wise_max(o_sparse, sparsity_layout_o, sparsity_block_size, triton_block_size=triton_block_size)
|
|
156
157
|
|
|
157
158
|
|
|
158
159
|
def _get_random_sparsity_layout(b, m, n, sparsity_block_size, sparsity_percentage):
|
|
@@ -62,14 +62,7 @@ the [test cases](https://github.com/FelixSchoen/blksprs/blob/main/test/cases/tes
|
|
|
62
62
|
|
|
63
63
|
```python
|
|
64
64
|
import torch
|
|
65
|
-
|
|
66
|
-
from blksprs.layouting.sparsity_layout import build_sparsity_layout
|
|
67
|
-
from blksprs.ops.conversion import to_sparse, to_dense
|
|
68
|
-
from blksprs.ops.matmul import matmul
|
|
69
|
-
from blksprs.ops.row_wise_sum import row_wise_sum
|
|
70
|
-
from blksprs.ops.softmax import softmax
|
|
71
|
-
from blksprs.ops.transpose import transpose
|
|
72
|
-
from blksprs.utils.tools import do_shape_blocksparse, undo_shape_blocksparse
|
|
65
|
+
import blksprs as bs
|
|
73
66
|
|
|
74
67
|
|
|
75
68
|
def test_readme():
|
|
@@ -91,47 +84,57 @@ def test_readme():
|
|
|
91
84
|
y = torch.randn(size=(b, h, n, k), device="cuda").transpose(-1, -2).contiguous()
|
|
92
85
|
|
|
93
86
|
# Convert tensors to three-dimensional (dense) tensors since Triton can only handle tensors of exactly three dimensions
|
|
94
|
-
x_dense, x_shape_original = do_shape_blocksparse(x)
|
|
95
|
-
y_dense, y_shape_original = do_shape_blocksparse(y)
|
|
87
|
+
x_dense, x_shape_original = bs.util.do_shape_blocksparse(x)
|
|
88
|
+
y_dense, y_shape_original = bs.util.do_shape_blocksparse(y)
|
|
96
89
|
|
|
97
90
|
# Create sparsity layouts from existing tensors
|
|
98
|
-
sparsity_layout_x = build_sparsity_layout(x_dense, sparsity_block_size,
|
|
99
|
-
|
|
91
|
+
sparsity_layout_x = bs.layout.build_sparsity_layout(x_dense, sparsity_block_size,
|
|
92
|
+
triton_block_size=triton_block_size)
|
|
93
|
+
sparsity_layout_y = bs.layout.build_sparsity_layout(y_dense, sparsity_block_size,
|
|
94
|
+
triton_block_size=triton_block_size)
|
|
100
95
|
|
|
101
96
|
# Create random sparsity layout for output tensor
|
|
102
97
|
sparsity_layout_o = _get_random_sparsity_layout(b * h, m, n, sparsity_block_size, sparsity_percentage)
|
|
103
98
|
|
|
104
99
|
# Convert tensors to sparse tensors for matrix multiplication
|
|
105
|
-
x_sparse = to_sparse(x_dense, sparsity_layout_x, sparsity_block_size, triton_block_size=triton_block_size)
|
|
106
|
-
y_sparse = to_sparse(y_dense, sparsity_layout_y, sparsity_block_size, triton_block_size=triton_block_size)
|
|
100
|
+
x_sparse = bs.to_sparse(x_dense, sparsity_layout_x, sparsity_block_size, triton_block_size=triton_block_size)
|
|
101
|
+
y_sparse = bs.to_sparse(y_dense, sparsity_layout_y, sparsity_block_size, triton_block_size=triton_block_size)
|
|
107
102
|
|
|
108
103
|
# Perform matrix multiplication
|
|
109
|
-
o_sparse = matmul(x_sparse, sparsity_layout_x, y_sparse, sparsity_layout_y, sparsity_layout_o,
|
|
110
|
-
|
|
111
|
-
|
|
104
|
+
o_sparse = bs.matmul(x_sparse, sparsity_layout_x, y_sparse, sparsity_layout_y, sparsity_layout_o,
|
|
105
|
+
sparsity_block_size,
|
|
106
|
+
triton_block_size=triton_block_size)
|
|
107
|
+
|
|
108
|
+
# Apply element-wise operation
|
|
109
|
+
o_sparse = torch.add(o_sparse, 1)
|
|
110
|
+
|
|
111
|
+
o_dense = bs.to_dense(o_sparse, sparsity_layout_o, sparsity_block_size, triton_block_size=triton_block_size)
|
|
112
112
|
|
|
113
113
|
# Sanity check
|
|
114
114
|
o_torch = torch.matmul(x_dense, y_dense)
|
|
115
|
+
o_torch = torch.add(o_torch, 1)
|
|
115
116
|
|
|
116
117
|
# Perform round trip to set sparse blocks to 0
|
|
117
|
-
o_torch_round_trip = to_dense(
|
|
118
|
-
to_sparse(o_torch, sparsity_layout_o, sparsity_block_size, triton_block_size=triton_block_size),
|
|
118
|
+
o_torch_round_trip = bs.to_dense(
|
|
119
|
+
bs.to_sparse(o_torch, sparsity_layout_o, sparsity_block_size, triton_block_size=triton_block_size),
|
|
119
120
|
sparsity_layout_o, sparsity_block_size, fill_value=0, triton_block_size=triton_block_size)
|
|
120
121
|
|
|
121
122
|
# Assert that the output is correct
|
|
122
123
|
assert torch.allclose(o_dense, o_torch_round_trip, atol=2e-2) # Note that small numerical differences are expected
|
|
123
124
|
|
|
124
125
|
# Assert that the output has the correct sparsity layout
|
|
125
|
-
actual_sparsity_layout_o = build_sparsity_layout(o_dense, sparsity_block_size,
|
|
126
|
-
|
|
126
|
+
actual_sparsity_layout_o = bs.layout.build_sparsity_layout(o_dense, sparsity_block_size,
|
|
127
|
+
triton_block_size=triton_block_size)
|
|
128
|
+
assert torch.allclose(actual_sparsity_layout_o.to(torch.int), sparsity_layout_o)
|
|
127
129
|
|
|
128
130
|
# Convert output tensor back to original shape
|
|
129
|
-
o = undo_shape_blocksparse(o_dense, x_shape_original)
|
|
131
|
+
o = bs.util.undo_shape_blocksparse(o_dense, x_shape_original)
|
|
130
132
|
|
|
131
133
|
# Other available functions
|
|
132
|
-
transpose(o_sparse, sparsity_layout_o, sparsity_block_size, triton_block_size=triton_block_size)
|
|
133
|
-
softmax(o_sparse, sparsity_layout_o, sparsity_block_size, triton_block_size=triton_block_size)
|
|
134
|
-
row_wise_sum(o_sparse, sparsity_layout_o, sparsity_block_size, triton_block_size=triton_block_size)
|
|
134
|
+
bs.transpose(o_sparse, sparsity_layout_o, sparsity_block_size, triton_block_size=triton_block_size)
|
|
135
|
+
bs.softmax(o_sparse, sparsity_layout_o, sparsity_block_size, triton_block_size=triton_block_size)
|
|
136
|
+
bs.misc.row_wise_sum(o_sparse, sparsity_layout_o, sparsity_block_size, triton_block_size=triton_block_size)
|
|
137
|
+
bs.misc.row_wise_max(o_sparse, sparsity_layout_o, sparsity_block_size, triton_block_size=triton_block_size)
|
|
135
138
|
|
|
136
139
|
|
|
137
140
|
def _get_random_sparsity_layout(b, m, n, sparsity_block_size, sparsity_percentage):
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from blksprs.ops.conversion import to_dense, to_sparse
|
|
2
|
+
from blksprs.ops.distribution import gather, scatter, scatter_reduce
|
|
3
|
+
from blksprs.ops.exp import exp
|
|
4
|
+
from blksprs.ops.matmul import matmul
|
|
5
|
+
from blksprs.ops.softmax import softmax
|
|
6
|
+
from blksprs.ops.transpose import transpose
|
|
7
|
+
|
|
8
|
+
class layout:
|
|
9
|
+
from blksprs.layouting.distribution_layout import build_distribution_layout
|
|
10
|
+
from blksprs.layouting.sparsity_layout import build_sparsity_layout, build_sparsity_layout_adaption
|
|
11
|
+
|
|
12
|
+
class misc:
|
|
13
|
+
from blksprs.misc.broadcast_ops import broadcast_add, broadcast_sub
|
|
14
|
+
from blksprs.misc.repeat_interleave import repeat_interleave
|
|
15
|
+
from blksprs.misc.row_wise import row_wise_sum, row_wise_max, row_wise_add, row_wise_sub
|
|
16
|
+
|
|
17
|
+
class util:
|
|
18
|
+
from blksprs.utils.tools import do_shape_blocksparse, undo_shape_blocksparse, disable_validation
|
|
@@ -31,7 +31,7 @@ def build_distribution_layout(indices: Tensor, sparsity_layout_indices: Tensor,
|
|
|
31
31
|
sparsity_lut_i = torch.nonzero(sparsity_layout_indices).contiguous()
|
|
32
32
|
|
|
33
33
|
output = torch.zeros(size_target[0], size_target[1] // sparsity_block_size, size_target[2] // sparsity_block_size,
|
|
34
|
-
|
|
34
|
+
dtype=torch.bool, device=indices.device)
|
|
35
35
|
|
|
36
36
|
i_b, i_r, i_c = indices.size()
|
|
37
37
|
i_b_s, i_r_s, i_c_s = indices.stride()
|
|
@@ -27,7 +27,7 @@ def build_sparsity_layout(x: Tensor, sparsity_block_size: int, triton_block_size
|
|
|
27
27
|
validate_device(x)
|
|
28
28
|
|
|
29
29
|
output = torch.zeros(x.size(0), x.size(1) // sparsity_block_size, x.size(2) // sparsity_block_size,
|
|
30
|
-
|
|
30
|
+
dtype=torch.bool, device=x.device)
|
|
31
31
|
|
|
32
32
|
x_b, x_r, x_c = x.size()
|
|
33
33
|
x_b_s, x_r_s, x_c_s = x.stride()
|
|
@@ -117,7 +117,7 @@ def build_sparsity_layout_adaption(x: Tensor, sparsity_layout_from: Tensor,
|
|
|
117
117
|
o_r = math.ceil(sparsity_layout_from.size(1) * sparsity_block_size_from // sparsity_block_size_to)
|
|
118
118
|
o_c = math.ceil(sparsity_layout_from.size(2) * sparsity_block_size_from // sparsity_block_size_to)
|
|
119
119
|
|
|
120
|
-
output = torch.zeros(o_b, o_r, o_c,
|
|
120
|
+
output = torch.zeros(o_b, o_r, o_c, dtype=torch.bool, device=x.device)
|
|
121
121
|
|
|
122
122
|
x_b, x_r, x_c = x.size()
|
|
123
123
|
x_b_s, x_r_s, x_c_s = x.stride()
|
blksprs-1.3/blksprs/misc/broadcast_addition.py → blksprs-1.4.1/blksprs/misc/broadcast_ops.py
RENAMED
|
@@ -8,8 +8,8 @@ from blksprs.utils.validation import validate_contiguous, validate_device, \
|
|
|
8
8
|
validate_sparsity_block_size, validate_triton_block_size
|
|
9
9
|
|
|
10
10
|
|
|
11
|
-
def
|
|
12
|
-
|
|
11
|
+
def broadcast_add(x: Tensor, y: Tensor, sparsity_layout_output: Tensor,
|
|
12
|
+
sparsity_block_size: int, triton_block_size: int = None) -> Tensor:
|
|
13
13
|
"""Performs a broadcast and subsequent addition of two dense tensors x and y. Returns a block-sparse tensor in
|
|
14
14
|
compressed form.
|
|
15
15
|
|
|
@@ -25,6 +25,9 @@ def broadcast_addition(x: Tensor, y: Tensor, sparsity_layout_output: Tensor,
|
|
|
25
25
|
output tensor corresponds to x(i) + y(j).
|
|
26
26
|
|
|
27
27
|
"""
|
|
28
|
+
x = x.contiguous()
|
|
29
|
+
y = y.contiguous()
|
|
30
|
+
|
|
28
31
|
validate_device(x, y)
|
|
29
32
|
validate_contiguous(x, y)
|
|
30
33
|
if x.size(-1) != y.size(-1):
|
|
@@ -70,12 +73,12 @@ def broadcast_addition(x: Tensor, y: Tensor, sparsity_layout_output: Tensor,
|
|
|
70
73
|
return output
|
|
71
74
|
|
|
72
75
|
|
|
73
|
-
def
|
|
74
|
-
|
|
75
|
-
"""Wrapper for ``
|
|
76
|
+
def broadcast_sub(x: Tensor, y: Tensor, sparsity_layout_output: Tensor,
|
|
77
|
+
sparsity_block_size: int, triton_block_size: int = None) -> Tensor:
|
|
78
|
+
"""Wrapper for ``broadcast_add`` with negated y.
|
|
76
79
|
|
|
77
80
|
"""
|
|
78
|
-
return
|
|
81
|
+
return broadcast_add(x, torch.neg(y), sparsity_layout_output, sparsity_block_size, triton_block_size)
|
|
79
82
|
|
|
80
83
|
|
|
81
84
|
@triton.jit
|