blksprs 2.1.5__tar.gz → 2.1.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {blksprs-2.1.5 → blksprs-2.1.7}/PKG-INFO +1 -1
- {blksprs-2.1.5 → blksprs-2.1.7}/blksprs/__init__.py +1 -1
- {blksprs-2.1.5 → blksprs-2.1.7}/blksprs/ops/conversion.py +4 -4
- {blksprs-2.1.5 → blksprs-2.1.7}/blksprs/ops/distribution.py +3 -5
- {blksprs-2.1.5 → blksprs-2.1.7}/blksprs/ops/matmul.py +2 -3
- {blksprs-2.1.5 → blksprs-2.1.7}/blksprs/ops/misc/broadcast_ops.py +2 -3
- {blksprs-2.1.5 → blksprs-2.1.7}/blksprs/ops/misc/row_wise.py +5 -3
- {blksprs-2.1.5 → blksprs-2.1.7}/blksprs/ops/partitioning.py +3 -3
- {blksprs-2.1.5 → blksprs-2.1.7}/blksprs/ops/repeat.py +3 -3
- {blksprs-2.1.5 → blksprs-2.1.7}/blksprs/ops/softmax.py +3 -3
- {blksprs-2.1.5 → blksprs-2.1.7}/blksprs/ops/transpose.py +2 -2
- {blksprs-2.1.5 → blksprs-2.1.7}/blksprs/utils/validation.py +7 -5
- {blksprs-2.1.5 → blksprs-2.1.7}/blksprs.egg-info/PKG-INFO +1 -1
- {blksprs-2.1.5 → blksprs-2.1.7}/pyproject.toml +1 -1
- {blksprs-2.1.5 → blksprs-2.1.7}/README.md +0 -0
- {blksprs-2.1.5 → blksprs-2.1.7}/blksprs/layouting/distribution_layout.py +0 -0
- {blksprs-2.1.5 → blksprs-2.1.7}/blksprs/layouting/sparsity_layout.py +0 -0
- {blksprs-2.1.5 → blksprs-2.1.7}/blksprs/ops/flow.py +0 -0
- {blksprs-2.1.5 → blksprs-2.1.7}/blksprs/utils/autotuning.py +0 -0
- {blksprs-2.1.5 → blksprs-2.1.7}/blksprs/utils/benchmarking.py +0 -0
- {blksprs-2.1.5 → blksprs-2.1.7}/blksprs/utils/blksprs_tensor.py +0 -0
- {blksprs-2.1.5 → blksprs-2.1.7}/blksprs/utils/processing.py +0 -0
- {blksprs-2.1.5 → blksprs-2.1.7}/blksprs/utils/tools.py +0 -0
- {blksprs-2.1.5 → blksprs-2.1.7}/blksprs.egg-info/SOURCES.txt +0 -0
- {blksprs-2.1.5 → blksprs-2.1.7}/blksprs.egg-info/dependency_links.txt +0 -0
- {blksprs-2.1.5 → blksprs-2.1.7}/blksprs.egg-info/requires.txt +0 -0
- {blksprs-2.1.5 → blksprs-2.1.7}/blksprs.egg-info/top_level.txt +0 -0
- {blksprs-2.1.5 → blksprs-2.1.7}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: blksprs
|
|
3
|
-
Version: 2.1.
|
|
3
|
+
Version: 2.1.7
|
|
4
4
|
Summary: A lightweight library for operations on block-sparse matrices in PyTorch.
|
|
5
5
|
Author-email: Felix Schön <schoen@kr.tuwien.ac.at>
|
|
6
6
|
Project-URL: Homepage, https://github.com/FelixSchoen/blksprs
|
|
@@ -9,7 +9,7 @@ from blksprs.utils.blksprs_tensor import BlksprsTensor
|
|
|
9
9
|
from blksprs.utils.tools import stride
|
|
10
10
|
from blksprs.utils.autotuning import get_autotune_configs, prune_autotune_configs, prune_autotune_configs_conversion
|
|
11
11
|
from blksprs.utils.validation import validate_contiguous, validate_dimensions, validate_device, \
|
|
12
|
-
validate_sparsity, validate_sparsity_block_size, validate_sparsity_dense
|
|
12
|
+
validate_sparsity, validate_sparsity_block_size, validate_sparsity_dense, ensure_contiguous
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
def to_blksprs(x: Tensor, sparsity_layout: Tensor, sparsity_block_size: int) -> BlksprsTensor:
|
|
@@ -35,7 +35,7 @@ def to_sparse(x: Tensor, sparsity_layout: Tensor,
|
|
|
35
35
|
BlksprsTensor: The block-sparse tensor converted to compressed form.
|
|
36
36
|
|
|
37
37
|
"""
|
|
38
|
-
x = x
|
|
38
|
+
x = ensure_contiguous(x)
|
|
39
39
|
|
|
40
40
|
validate_dimensions(x)
|
|
41
41
|
validate_contiguous(x)
|
|
@@ -187,7 +187,7 @@ def to_dense(x: BlksprsTensor, sparsity_layout: Tensor,
|
|
|
187
187
|
Tensor: The block-sparse tensor converted to regular form.
|
|
188
188
|
|
|
189
189
|
"""
|
|
190
|
-
x = x
|
|
190
|
+
x = ensure_contiguous(x)
|
|
191
191
|
|
|
192
192
|
validate_dimensions(x)
|
|
193
193
|
validate_contiguous(x, sparsity_layout)
|
|
@@ -335,7 +335,7 @@ def adapt_layout(x: BlksprsTensor, sparsity_layout_from: Tensor, sparsity_block_
|
|
|
335
335
|
Tensor: The sparsity layout of the resulting output tensor.
|
|
336
336
|
|
|
337
337
|
"""
|
|
338
|
-
x = x
|
|
338
|
+
x = ensure_contiguous(x)
|
|
339
339
|
|
|
340
340
|
validate_dimensions(x)
|
|
341
341
|
validate_contiguous(x, sparsity_layout_from)
|
|
@@ -9,7 +9,7 @@ from blksprs.utils.blksprs_tensor import BlksprsTensor
|
|
|
9
9
|
from blksprs.utils.tools import stride
|
|
10
10
|
from blksprs.utils.autotuning import get_autotune_configs, prune_autotune_configs
|
|
11
11
|
from blksprs.utils.validation import validate_contiguous, validate_dimensions, validate_device, \
|
|
12
|
-
validate_sparsity, validate_dtype_int, validate_sparsity_block_size
|
|
12
|
+
validate_sparsity, validate_dtype_int, validate_sparsity_block_size, ensure_contiguous
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
@torch.amp.custom_fwd(device_type="cuda", cast_inputs=torch.float16)
|
|
@@ -32,8 +32,7 @@ def gather(src: BlksprsTensor, sparsity_layout_src: Tensor,
|
|
|
32
32
|
BlksprsTensor: The result of the gather operation as a block-sparse tensor in compressed form.
|
|
33
33
|
|
|
34
34
|
"""
|
|
35
|
-
src = src
|
|
36
|
-
idx = idx.contiguous()
|
|
35
|
+
src, idx = ensure_contiguous(src, idx)
|
|
37
36
|
|
|
38
37
|
validate_dimensions(src, idx)
|
|
39
38
|
validate_contiguous(src, idx)
|
|
@@ -261,8 +260,7 @@ def scatter_reduce(src: BlksprsTensor, sparsity_layout_src: Tensor,
|
|
|
261
260
|
BlksprsTensor: The result of the scatter operation as a block-sparse tensor in compressed form.
|
|
262
261
|
|
|
263
262
|
"""
|
|
264
|
-
src = src
|
|
265
|
-
idx = idx.contiguous()
|
|
263
|
+
src, idx = ensure_contiguous(src, idx)
|
|
266
264
|
|
|
267
265
|
validate_dimensions(src, idx)
|
|
268
266
|
validate_contiguous(src, idx)
|
|
@@ -9,7 +9,7 @@ from blksprs.utils.blksprs_tensor import BlksprsTensor
|
|
|
9
9
|
from blksprs.utils.tools import stride
|
|
10
10
|
from blksprs.utils.autotuning import get_autotune_configs, prune_autotune_configs
|
|
11
11
|
from blksprs.utils.validation import validate_contiguous, validate_dimensions, validate_device, \
|
|
12
|
-
validate_sparsity, validate_sparsity_block_size, validate_dtype_float
|
|
12
|
+
validate_sparsity, validate_sparsity_block_size, validate_dtype_float, ensure_contiguous
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
@torch.amp.custom_fwd(device_type="cuda", cast_inputs=torch.float16)
|
|
@@ -34,8 +34,7 @@ def matmul(x: BlksprsTensor, sparsity_layout_x: Tensor,
|
|
|
34
34
|
BlksprsTensor: The result of the matrix multiplication as a block-sparse tensor in compressed form.
|
|
35
35
|
|
|
36
36
|
"""
|
|
37
|
-
x = x
|
|
38
|
-
y = y.contiguous()
|
|
37
|
+
x, y = ensure_contiguous(x, y)
|
|
39
38
|
|
|
40
39
|
validate_dimensions(x, y)
|
|
41
40
|
validate_contiguous(x, y)
|
|
@@ -9,7 +9,7 @@ from blksprs.utils.blksprs_tensor import BlksprsTensor
|
|
|
9
9
|
from blksprs.utils.tools import stride
|
|
10
10
|
from blksprs.utils.autotuning import get_autotune_configs, prune_autotune_configs
|
|
11
11
|
from blksprs.utils.validation import validate_contiguous, validate_device, \
|
|
12
|
-
validate_sparsity_block_size
|
|
12
|
+
validate_sparsity_block_size, ensure_contiguous
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
@torch.amp.custom_fwd(device_type="cuda", cast_inputs=torch.float16)
|
|
@@ -29,8 +29,7 @@ def broadcast_add(x: Tensor, y: Tensor, sparsity_layout_output: Tensor,
|
|
|
29
29
|
output tensor corresponds to x(i) + y(j).
|
|
30
30
|
|
|
31
31
|
"""
|
|
32
|
-
x = x
|
|
33
|
-
y = y.contiguous()
|
|
32
|
+
x, y = ensure_contiguous(x, y)
|
|
34
33
|
|
|
35
34
|
validate_device(x, y)
|
|
36
35
|
validate_contiguous(x, y)
|
|
@@ -8,7 +8,7 @@ from blksprs.utils.autotuning import get_autotune_configs, prune_autotune_config
|
|
|
8
8
|
from blksprs.utils.blksprs_tensor import BlksprsTensor
|
|
9
9
|
from blksprs.utils.tools import stride
|
|
10
10
|
from blksprs.utils.validation import validate_dimensions, validate_contiguous, validate_device, validate_sparsity, \
|
|
11
|
-
validate_sparsity_block_size
|
|
11
|
+
validate_sparsity_block_size, ensure_contiguous
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
@torch.amp.custom_fwd(device_type="cuda", cast_inputs=torch.float32)
|
|
@@ -34,7 +34,7 @@ def row_wise_sum(x: BlksprsTensor, sparsity_layout: Tensor, sparsity_block_size:
|
|
|
34
34
|
of the input and the sparsity layout of the output tensor.
|
|
35
35
|
|
|
36
36
|
"""
|
|
37
|
-
x = x
|
|
37
|
+
x = ensure_contiguous(x)
|
|
38
38
|
|
|
39
39
|
validate_dimensions(x)
|
|
40
40
|
validate_contiguous(x)
|
|
@@ -176,7 +176,7 @@ def row_wise_max(x: BlksprsTensor, sparsity_layout: Tensor, sparsity_block_size:
|
|
|
176
176
|
"""
|
|
177
177
|
# TODO Fix for triton bug, see https://github.com/triton-lang/triton/issues/6376, should be fixed with the upcoming 3.4.0 release
|
|
178
178
|
x = torch.where(x == -0.0, torch.tensor(0.0), x)
|
|
179
|
-
x = x
|
|
179
|
+
x = ensure_contiguous(x)
|
|
180
180
|
|
|
181
181
|
validate_dimensions(x)
|
|
182
182
|
validate_contiguous(x)
|
|
@@ -311,6 +311,8 @@ def row_wise_add(x: BlksprsTensor, sparsity_layout_x: Tensor, y: Tensor,
|
|
|
311
311
|
compressed form.
|
|
312
312
|
|
|
313
313
|
"""
|
|
314
|
+
x = ensure_contiguous(x)
|
|
315
|
+
|
|
314
316
|
validate_dimensions(x)
|
|
315
317
|
validate_contiguous(x)
|
|
316
318
|
validate_device(x)
|
|
@@ -5,7 +5,7 @@ from torch._library import triton_op
|
|
|
5
5
|
from blksprs.ops.flow import flow_pull_forward
|
|
6
6
|
from blksprs.utils.blksprs_tensor import BlksprsTensor
|
|
7
7
|
from blksprs.utils.validation import validate_dimensions, validate_contiguous, validate_device, \
|
|
8
|
-
validate_sparsity, validate_sparsity_block_size
|
|
8
|
+
validate_sparsity, validate_sparsity_block_size, ensure_contiguous
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
@torch.amp.custom_fwd(device_type="cuda", cast_inputs=torch.float16)
|
|
@@ -27,7 +27,7 @@ def split(x: BlksprsTensor, sparsity_layout: Tensor, partitions: int,
|
|
|
27
27
|
Tensor: The sparsity layout of the output tensor.
|
|
28
28
|
|
|
29
29
|
"""
|
|
30
|
-
x = x
|
|
30
|
+
x = ensure_contiguous(x)
|
|
31
31
|
|
|
32
32
|
validate_dimensions(x)
|
|
33
33
|
validate_contiguous(x)
|
|
@@ -132,7 +132,7 @@ def merge(x: BlksprsTensor, sparsity_layout: Tensor, partitions: int,
|
|
|
132
132
|
Tensor: The sparsity layout of the output tensor.
|
|
133
133
|
|
|
134
134
|
"""
|
|
135
|
-
x = x
|
|
135
|
+
x = ensure_contiguous(x)
|
|
136
136
|
|
|
137
137
|
validate_dimensions(x)
|
|
138
138
|
validate_contiguous(x)
|
|
@@ -5,7 +5,7 @@ from torch._library import triton_op
|
|
|
5
5
|
from blksprs.ops.flow import flow_pull_forward, flow_push_forward
|
|
6
6
|
from blksprs.utils.blksprs_tensor import BlksprsTensor
|
|
7
7
|
from blksprs.utils.validation import validate_dimensions, validate_contiguous, validate_device, \
|
|
8
|
-
validate_sparsity, validate_sparsity_block_size
|
|
8
|
+
validate_sparsity, validate_sparsity_block_size, ensure_contiguous
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
@torch.amp.custom_fwd(device_type="cuda", cast_inputs=torch.float16)
|
|
@@ -36,7 +36,7 @@ def repeat(x: BlksprsTensor, sparsity_layout_x: Tensor, repeats: tuple[int, int,
|
|
|
36
36
|
Tensor: The sparsity layout of the resulting output tensor.
|
|
37
37
|
|
|
38
38
|
"""
|
|
39
|
-
x = x
|
|
39
|
+
x = ensure_contiguous(x)
|
|
40
40
|
|
|
41
41
|
validate_dimensions(x)
|
|
42
42
|
validate_contiguous(x)
|
|
@@ -77,7 +77,7 @@ def repeat_interleave(x: BlksprsTensor, sparsity_layout_x: Tensor, repeats: int,
|
|
|
77
77
|
Tensor: The sparsity layout of the resulting output tensor.
|
|
78
78
|
|
|
79
79
|
"""
|
|
80
|
-
x = x
|
|
80
|
+
x = ensure_contiguous(x)
|
|
81
81
|
|
|
82
82
|
validate_dimensions(x)
|
|
83
83
|
validate_contiguous(x)
|
|
@@ -12,7 +12,7 @@ from blksprs.utils.blksprs_tensor import BlksprsTensor
|
|
|
12
12
|
from blksprs.utils.tools import stride, ceil_pow2
|
|
13
13
|
from blksprs.utils.autotuning import get_autotune_configs, prune_autotune_configs
|
|
14
14
|
from blksprs.utils.validation import validate_contiguous, validate_dimensions, validate_device, \
|
|
15
|
-
validate_sparsity, validate_sparsity_block_size, validate_dtype_float_32
|
|
15
|
+
validate_sparsity, validate_sparsity_block_size, validate_dtype_float_32, ensure_contiguous
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
def softmax(x: BlksprsTensor, sparsity_layout: Tensor, sparsity_block_size: int, flag_fused: bool = True,
|
|
@@ -44,7 +44,7 @@ def softmax_regular(x: BlksprsTensor, sparsity_layout: Tensor, sparsity_block_si
|
|
|
44
44
|
BlksprsTensor: The result of the softmax operation as a block-sparse tensor in compressed form.
|
|
45
45
|
|
|
46
46
|
"""
|
|
47
|
-
x = x
|
|
47
|
+
x = ensure_contiguous(x)
|
|
48
48
|
|
|
49
49
|
validate_dimensions(x)
|
|
50
50
|
validate_contiguous(x)
|
|
@@ -335,7 +335,7 @@ def softmax_fused(x: BlksprsTensor, sparsity_layout: Tensor, sparsity_block_size
|
|
|
335
335
|
BlksprsTensor: The result of the softmax operation as a block-sparse tensor in compressed form.
|
|
336
336
|
|
|
337
337
|
"""
|
|
338
|
-
x = x
|
|
338
|
+
x = ensure_contiguous(x)
|
|
339
339
|
|
|
340
340
|
validate_dimensions(x)
|
|
341
341
|
validate_contiguous(x)
|
|
@@ -5,7 +5,7 @@ from torch._library import triton_op
|
|
|
5
5
|
from blksprs.ops.flow import flow_pull_forward
|
|
6
6
|
from blksprs.utils.blksprs_tensor import BlksprsTensor
|
|
7
7
|
from blksprs.utils.validation import validate_dimensions, validate_contiguous, validate_device, \
|
|
8
|
-
validate_sparsity, validate_sparsity_block_size
|
|
8
|
+
validate_sparsity, validate_sparsity_block_size, ensure_contiguous
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
@torch.amp.custom_fwd(device_type="cuda", cast_inputs=torch.float16)
|
|
@@ -27,7 +27,7 @@ def transpose(x: BlksprsTensor, sparsity_layout: Tensor,
|
|
|
27
27
|
Tensor: The sparsity layout of the transposed tensor.
|
|
28
28
|
|
|
29
29
|
"""
|
|
30
|
-
x = x
|
|
30
|
+
x = ensure_contiguous(x)
|
|
31
31
|
|
|
32
32
|
validate_dimensions(x)
|
|
33
33
|
validate_contiguous(x)
|
|
@@ -6,10 +6,12 @@ VALIDATION = True
|
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
def ensure_contiguous(*tensors: Tensor) -> tuple[Tensor, ...]:
|
|
9
|
-
|
|
10
|
-
return tensors
|
|
9
|
+
transformed = tensors
|
|
11
10
|
|
|
12
|
-
|
|
11
|
+
if _check_contiguous():
|
|
12
|
+
transformed = tuple(tensor.contiguous() for tensor in tensors)
|
|
13
|
+
|
|
14
|
+
return transformed[0] if len(transformed) == 1 else transformed
|
|
13
15
|
|
|
14
16
|
|
|
15
17
|
def validate_dimensions(*tensors: Tensor, dims=3) -> None:
|
|
@@ -132,8 +134,8 @@ def validate_sparsity_block_size(sparsity_block_size: int, *tensors):
|
|
|
132
134
|
raise ValueError("Tensor sizes must be divisible by sparsity block size")
|
|
133
135
|
|
|
134
136
|
|
|
135
|
-
def
|
|
136
|
-
return
|
|
137
|
+
def _check_contiguous():
|
|
138
|
+
return CONTIGUOUS
|
|
137
139
|
|
|
138
140
|
|
|
139
141
|
def _set_skip_contiguous(skip_contiguous: bool):
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: blksprs
|
|
3
|
-
Version: 2.1.
|
|
3
|
+
Version: 2.1.7
|
|
4
4
|
Summary: A lightweight library for operations on block-sparse matrices in PyTorch.
|
|
5
5
|
Author-email: Felix Schön <schoen@kr.tuwien.ac.at>
|
|
6
6
|
Project-URL: Homepage, https://github.com/FelixSchoen/blksprs
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|