blksprs 2.0rc3__tar.gz → 2.0rc4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {blksprs-2.0rc3 → blksprs-2.0rc4}/PKG-INFO +1 -1
- {blksprs-2.0rc3 → blksprs-2.0rc4}/blksprs/ops/conversion.py +3 -1
- {blksprs-2.0rc3 → blksprs-2.0rc4}/blksprs/ops/distribution.py +2 -1
- {blksprs-2.0rc3 → blksprs-2.0rc4}/blksprs/ops/flow.py +2 -1
- {blksprs-2.0rc3 → blksprs-2.0rc4}/blksprs/ops/matmul.py +2 -1
- {blksprs-2.0rc3 → blksprs-2.0rc4}/blksprs/ops/misc/row_wise.py +3 -2
- {blksprs-2.0rc3 → blksprs-2.0rc4}/blksprs/ops/softmax.py +6 -4
- {blksprs-2.0rc3 → blksprs-2.0rc4}/blksprs.egg-info/PKG-INFO +1 -1
- {blksprs-2.0rc3 → blksprs-2.0rc4}/pyproject.toml +1 -1
- {blksprs-2.0rc3 → blksprs-2.0rc4}/README.md +0 -0
- {blksprs-2.0rc3 → blksprs-2.0rc4}/blksprs/__init__.py +0 -0
- {blksprs-2.0rc3 → blksprs-2.0rc4}/blksprs/layouting/distribution_layout.py +0 -0
- {blksprs-2.0rc3 → blksprs-2.0rc4}/blksprs/layouting/sparsity_layout.py +0 -0
- {blksprs-2.0rc3 → blksprs-2.0rc4}/blksprs/ops/misc/broadcast_ops.py +0 -0
- {blksprs-2.0rc3 → blksprs-2.0rc4}/blksprs/ops/partitioning.py +0 -0
- {blksprs-2.0rc3 → blksprs-2.0rc4}/blksprs/ops/repeat.py +0 -0
- {blksprs-2.0rc3 → blksprs-2.0rc4}/blksprs/ops/transpose.py +0 -0
- {blksprs-2.0rc3 → blksprs-2.0rc4}/blksprs/utils/benchmarking.py +0 -0
- {blksprs-2.0rc3 → blksprs-2.0rc4}/blksprs/utils/blksprs_tensor.py +0 -0
- {blksprs-2.0rc3 → blksprs-2.0rc4}/blksprs/utils/processing.py +0 -0
- {blksprs-2.0rc3 → blksprs-2.0rc4}/blksprs/utils/tools.py +0 -0
- {blksprs-2.0rc3 → blksprs-2.0rc4}/blksprs/utils/validation.py +0 -0
- {blksprs-2.0rc3 → blksprs-2.0rc4}/blksprs.egg-info/SOURCES.txt +0 -0
- {blksprs-2.0rc3 → blksprs-2.0rc4}/blksprs.egg-info/dependency_links.txt +0 -0
- {blksprs-2.0rc3 → blksprs-2.0rc4}/blksprs.egg-info/requires.txt +0 -0
- {blksprs-2.0rc3 → blksprs-2.0rc4}/blksprs.egg-info/top_level.txt +0 -0
- {blksprs-2.0rc3 → blksprs-2.0rc4}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: blksprs
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.0rc4
|
|
4
4
|
Summary: A lightweight library for operations on blocksparse matrices in PyTorch.
|
|
5
5
|
Author-email: Felix Schön <schoen@kr.tuwien.ac.at>
|
|
6
6
|
Project-URL: Homepage, https://github.com/FelixSchoen/blksprs
|
|
@@ -54,7 +54,7 @@ def to_sparse(x: Tensor, sparsity_layout: Tensor,
|
|
|
54
54
|
@triton_op("blksprs::to_sparse", mutates_args={})
|
|
55
55
|
def to_sparse_forward(x: Tensor, _: Tensor,
|
|
56
56
|
sparsity_lut: Tensor, sparsity_block_size: int, n_sparse_blocks: int) -> Tensor:
|
|
57
|
-
output = torch.
|
|
57
|
+
output = torch.zeros(size=(n_sparse_blocks, sparsity_block_size, sparsity_block_size),
|
|
58
58
|
dtype=x.dtype, device=x.device)
|
|
59
59
|
|
|
60
60
|
x_b, x_r, x_c = x.size()
|
|
@@ -87,6 +87,7 @@ def to_sparse_backward(ctx, grad_output):
|
|
|
87
87
|
@triton.autotune(
|
|
88
88
|
configs=get_autotune_configs(),
|
|
89
89
|
key=[],
|
|
90
|
+
reset_to_zero=["o"]
|
|
90
91
|
)
|
|
91
92
|
@triton.jit
|
|
92
93
|
def to_sparse_kernel(x,
|
|
@@ -252,6 +253,7 @@ def to_dense_backward(ctx, grad_output):
|
|
|
252
253
|
@triton.autotune(
|
|
253
254
|
configs=get_autotune_configs(),
|
|
254
255
|
key=[],
|
|
256
|
+
restore_value=["o"]
|
|
255
257
|
)
|
|
256
258
|
@triton.jit
|
|
257
259
|
def to_dense_kernel(x,
|
|
@@ -54,7 +54,7 @@ def gather(src: BlksprsTensor, sparsity_layout_src: Tensor,
|
|
|
54
54
|
def gather_forward(x: Tensor, sparsity_layout_x: Tensor, sparsity_reverse_lut_x: Tensor,
|
|
55
55
|
dim: int, i: Tensor, _: Tensor, sparsity_lut_i: Tensor,
|
|
56
56
|
sparsity_block_size: int) -> Tensor:
|
|
57
|
-
output = torch.
|
|
57
|
+
output = torch.zeros_like(i, dtype=x.dtype)
|
|
58
58
|
|
|
59
59
|
x_b, x_r, x_c = x.size()
|
|
60
60
|
x_b_s, x_r_s, x_c_s = stride(x)
|
|
@@ -101,6 +101,7 @@ def gather_backward(ctx, grad_output):
|
|
|
101
101
|
@triton.autotune(
|
|
102
102
|
configs=get_autotune_configs(),
|
|
103
103
|
key=[],
|
|
104
|
+
reset_to_zero=["o"]
|
|
104
105
|
)
|
|
105
106
|
@triton.jit
|
|
106
107
|
def gather_kernel(x,
|
|
@@ -12,7 +12,7 @@ from blksprs.utils.tools import stride, get_autotune_configs
|
|
|
12
12
|
def flow_pull_forward(x: Tensor, sparsity_layout_o: Tensor,
|
|
13
13
|
sparsity_lut: Tensor, sparsity_reverse_lut: Tensor,
|
|
14
14
|
sparsity_block_size: int, n_sparse_blocks: int) -> Tensor:
|
|
15
|
-
output = torch.
|
|
15
|
+
output = torch.zeros(size=(n_sparse_blocks, sparsity_block_size, sparsity_block_size),
|
|
16
16
|
dtype=x.dtype, device=x.device)
|
|
17
17
|
|
|
18
18
|
x_b, x_r, x_c = x.size()
|
|
@@ -44,6 +44,7 @@ def flow_pull_forward(x: Tensor, sparsity_layout_o: Tensor,
|
|
|
44
44
|
@triton.autotune(
|
|
45
45
|
configs=get_autotune_configs(),
|
|
46
46
|
key=[],
|
|
47
|
+
reset_to_zero=["o"]
|
|
47
48
|
)
|
|
48
49
|
@triton.jit
|
|
49
50
|
def flow_pull_kernel(x,
|
|
@@ -60,7 +60,7 @@ def matmul_forward(x: Tensor, y: Tensor,
|
|
|
60
60
|
sparsity_layout_y: Tensor, sparsity_reverse_lut_y: Tensor,
|
|
61
61
|
_: Tensor, sparsity_lut_o: Tensor,
|
|
62
62
|
sparsity_block_size: int, n_sparse_blocks: int) -> Tensor:
|
|
63
|
-
output = torch.
|
|
63
|
+
output = torch.zeros(size=(n_sparse_blocks, sparsity_block_size, sparsity_block_size),
|
|
64
64
|
dtype=x.dtype, device=x.device)
|
|
65
65
|
|
|
66
66
|
x_b, x_r, x_c = x.size()
|
|
@@ -118,6 +118,7 @@ def matmul_backward(ctx, grad_output):
|
|
|
118
118
|
@triton.autotune(
|
|
119
119
|
configs=get_autotune_configs(),
|
|
120
120
|
key=[],
|
|
121
|
+
reset_to_zero=["o"]
|
|
121
122
|
)
|
|
122
123
|
@triton.jit
|
|
123
124
|
def matmul_kernel(x,
|
|
@@ -354,7 +354,7 @@ def row_wise_sub(x: BlksprsTensor, sparsity_layout_x: Tensor, y: Tensor,
|
|
|
354
354
|
def row_wise_add_forward(x: Tensor, sparsity_lut_x: Tensor,
|
|
355
355
|
sparsity_layout_x_rwm: Tensor, sparsity_reverse_x_lut_rwm: Tensor,
|
|
356
356
|
y: Tensor, sparsity_block_size: int) -> Tensor:
|
|
357
|
-
output = torch.
|
|
357
|
+
output = torch.zeros_like(x)
|
|
358
358
|
|
|
359
359
|
x_b, x_r, x_c = x.size()
|
|
360
360
|
x_b_s, x_r_s, x_c_s = stride(x)
|
|
@@ -387,7 +387,8 @@ def row_wise_add_forward(x: Tensor, sparsity_lut_x: Tensor,
|
|
|
387
387
|
|
|
388
388
|
@triton.autotune(
|
|
389
389
|
configs=get_autotune_configs(),
|
|
390
|
-
key=[]
|
|
390
|
+
key=[],
|
|
391
|
+
reset_to_zero=["o"]
|
|
391
392
|
)
|
|
392
393
|
@triton.jit
|
|
393
394
|
def kernel_blocksparse_row_wise_add(x,
|
|
@@ -51,7 +51,7 @@ def softmax_forward(x: Tensor, sparsity_layout: Tensor,
|
|
|
51
51
|
sparsity_lut: Tensor,
|
|
52
52
|
sparsity_reverse_lut_rws: Tensor,
|
|
53
53
|
sparsity_block_size: int) -> Tensor:
|
|
54
|
-
output = torch.
|
|
54
|
+
output = torch.zeros_like(x)
|
|
55
55
|
|
|
56
56
|
x_b, x_r, x_c = x.size()
|
|
57
57
|
x_b_s, x_r_s, x_c_s = stride(x)
|
|
@@ -108,7 +108,7 @@ def softmax_backward(ctx, grad_output):
|
|
|
108
108
|
s_l_s_b, s_l_s_r, s_l_s_c = sparsity_layout_s.size()
|
|
109
109
|
s_l_s_b_s, s_l_s_r_s, s_l_s_c_s = stride(sparsity_layout_s)
|
|
110
110
|
|
|
111
|
-
grad_x = torch.
|
|
111
|
+
grad_x = torch.zeros_like(o, dtype=torch.float)
|
|
112
112
|
|
|
113
113
|
triton_grid = lambda meta: [o_b,
|
|
114
114
|
triton.cdiv(o_r, meta["TRITON_BLOCK_SIZE"]),
|
|
@@ -133,7 +133,8 @@ def softmax_backward(ctx, grad_output):
|
|
|
133
133
|
|
|
134
134
|
@triton.autotune(
|
|
135
135
|
configs=get_autotune_configs(),
|
|
136
|
-
key=[]
|
|
136
|
+
key=[],
|
|
137
|
+
reset_to_zero=["o"]
|
|
137
138
|
)
|
|
138
139
|
@triton.jit
|
|
139
140
|
def softmax_kernel(x,
|
|
@@ -198,7 +199,8 @@ def softmax_kernel(x,
|
|
|
198
199
|
|
|
199
200
|
@triton.autotune(
|
|
200
201
|
configs=get_autotune_configs(),
|
|
201
|
-
key=[]
|
|
202
|
+
key=[],
|
|
203
|
+
reset_to_zero=["o"]
|
|
202
204
|
)
|
|
203
205
|
@triton.jit
|
|
204
206
|
def softmax_kernel_grad(g,
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: blksprs
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.0rc4
|
|
4
4
|
Summary: A lightweight library for operations on blocksparse matrices in PyTorch.
|
|
5
5
|
Author-email: Felix Schön <schoen@kr.tuwien.ac.at>
|
|
6
6
|
Project-URL: Homepage, https://github.com/FelixSchoen/blksprs
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|