blksprs 2.0rc6__py3-none-any.whl → 2.0rc8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- blksprs/__init__.py +1 -0
- blksprs/layouting/distribution_layout.py +39 -26
- blksprs/layouting/sparsity_layout.py +58 -45
- blksprs/ops/conversion.py +86 -84
- blksprs/ops/distribution.py +81 -79
- blksprs/ops/flow.py +64 -60
- blksprs/ops/matmul.py +50 -55
- blksprs/ops/misc/broadcast_ops.py +29 -27
- blksprs/ops/misc/row_wise.py +134 -132
- blksprs/ops/partitioning.py +12 -10
- blksprs/ops/repeat.py +6 -5
- blksprs/ops/softmax.py +55 -47
- blksprs/ops/transpose.py +8 -7
- blksprs/utils/autotuning.py +10 -10
- blksprs/utils/processing.py +0 -1
- blksprs/utils/tools.py +8 -9
- {blksprs-2.0rc6.dist-info → blksprs-2.0rc8.dist-info}/METADATA +7 -3
- blksprs-2.0rc8.dist-info/RECORD +23 -0
- {blksprs-2.0rc6.dist-info → blksprs-2.0rc8.dist-info}/WHEEL +1 -1
- blksprs-2.0rc6.dist-info/RECORD +0 -23
- {blksprs-2.0rc6.dist-info → blksprs-2.0rc8.dist-info}/top_level.txt +0 -0
blksprs/utils/tools.py
CHANGED
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
import tomllib
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
1
4
|
import torch
|
|
2
5
|
from torch import Tensor, Size
|
|
3
6
|
|
|
@@ -5,6 +8,11 @@ from torch import Tensor, Size
|
|
|
5
8
|
torch._dynamo.config.capture_scalar_outputs = True
|
|
6
9
|
|
|
7
10
|
|
|
11
|
+
def version():
|
|
12
|
+
with open(Path(__file__).parent.parent.parent.joinpath("pyproject.toml"), "rb") as f:
|
|
13
|
+
return tomllib.load(f)["project"]["version"]
|
|
14
|
+
|
|
15
|
+
|
|
8
16
|
def do_shape_blocksparse(x: Tensor):
|
|
9
17
|
if x.dim() == 3:
|
|
10
18
|
return x.contiguous(), x.size()
|
|
@@ -26,12 +34,3 @@ def stride(x: Tensor):
|
|
|
26
34
|
return x.size(1) * x.size(2), x.size(2), 1
|
|
27
35
|
else:
|
|
28
36
|
raise NotImplementedError
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
def get_autocast_min_val():
|
|
32
|
-
if torch.is_autocast_enabled():
|
|
33
|
-
dtype = torch.get_autocast_dtype("cuda")
|
|
34
|
-
else:
|
|
35
|
-
dtype = torch.float
|
|
36
|
-
|
|
37
|
-
return torch.finfo(dtype).min
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: blksprs
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.0rc8
|
|
4
4
|
Summary: A lightweight library for operations on blocksparse matrices in PyTorch.
|
|
5
5
|
Author-email: Felix Schön <schoen@kr.tuwien.ac.at>
|
|
6
6
|
Project-URL: Homepage, https://github.com/FelixSchoen/blksprs
|
|
@@ -108,12 +108,16 @@ library.
|
|
|
108
108
|
|
|
109
109
|
## Known Limitations and Issues
|
|
110
110
|
|
|
111
|
+
- Triton has a bug with `tl.atomix_max()` used for the row-wise max operation.
|
|
112
|
+
In order to work around this bug a manual conversion of some values is needed, (slightly) negatively impacting
|
|
113
|
+
performance.
|
|
114
|
+
Watch the [issue](https://github.com/triton-lang/triton/issues/6376) on Triton's issue tracker for more information.
|
|
111
115
|
- PyTorch's `wrap_triton()` currently does not support config pruning. It thus cannot be used for some of the kernels,
|
|
112
116
|
which could impact graph compilation.
|
|
113
117
|
- There seem to be some issues with autocasting, forcing some operations to manually cast.
|
|
114
118
|
- There will be some slight numerical differences between vanilla and blksprs operations.
|
|
115
|
-
These instabilities are due to Triton and thus cannot be fixed by this library alone.
|
|
116
|
-
However, for all intents and purposes, these very minor differences should not matter and can safely be ignored.
|
|
119
|
+
These instabilities are due to Triton and thus cannot be fixed by this library alone.
|
|
120
|
+
However, for all intents and purposes, these very minor differences should not matter and can safely be ignored.
|
|
117
121
|
|
|
118
122
|
## Usage
|
|
119
123
|
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
blksprs/__init__.py,sha256=283rF0fbrUqsH_KXUvCgbCMqO0GOgenMkxwDVh1QdpU,1617
|
|
2
|
+
blksprs/layouting/distribution_layout.py,sha256=ur1ty_2U-Hfj78hMWsLZvu7ZuGhzW3qGLKMc72DfTZM,5861
|
|
3
|
+
blksprs/layouting/sparsity_layout.py,sha256=eXHmu2h7K5Q-YUpfOxocJoeP_5ZoQFZf_eHLxRZQbYU,11207
|
|
4
|
+
blksprs/ops/conversion.py,sha256=_g32aEEZdeuHHPj1pBfTNMxknRwJ9O1zk3Wv76pBIrg,21898
|
|
5
|
+
blksprs/ops/distribution.py,sha256=0tPldv0ARzmCV1CU2jvfqpHBgOuHPrDFiCtqsLs7CZc,20789
|
|
6
|
+
blksprs/ops/flow.py,sha256=PDZAD8u4y9qW1IXERki6ItKbEKnm_ChG8SKWM3_P9Oc,8245
|
|
7
|
+
blksprs/ops/matmul.py,sha256=5tVBKU_lglUjaLDi6J_dscdqlmzRz38OGxqAxZxZXDs,11879
|
|
8
|
+
blksprs/ops/partitioning.py,sha256=cfQmY9BZqGTvvJorIhtb-EyuGRJGPraWR-wTKdb47aI,9954
|
|
9
|
+
blksprs/ops/repeat.py,sha256=TLYNxwPuT9y5K9xyM41WK5gnggAJF3lI61Q2K7zWjns,9035
|
|
10
|
+
blksprs/ops/softmax.py,sha256=BwrRQdtRdkiSvl2mf5bpsTmyIxWiJOpa1HFg0st5yGU,12778
|
|
11
|
+
blksprs/ops/transpose.py,sha256=U-VAyLRT6_NDv9qYSFzBqfVlDeIpTqAMEXkqto0VF6w,4072
|
|
12
|
+
blksprs/ops/misc/broadcast_ops.py,sha256=-PrHiSJikZh8nXUmXxSCtFEP27TTxFr4wcrNxBjnimk,5987
|
|
13
|
+
blksprs/ops/misc/row_wise.py,sha256=n5FJjAuOd8BHBJQx4bsQwr-HmXkR9PYVAqfk77wjOFU,19653
|
|
14
|
+
blksprs/utils/autotuning.py,sha256=a-kmWRjJ3eED2XbjkQeOJSyW8bdIs27HgKMPvAKqWeU,2052
|
|
15
|
+
blksprs/utils/benchmarking.py,sha256=dLabDscTFn5NkmOI1g7DnKeTneUYW3RIVv9MDF-8BKc,1271
|
|
16
|
+
blksprs/utils/blksprs_tensor.py,sha256=pfoz59aJixj_fIoFx76ySiygwRQUemmgjMKepZ2c4j0,244
|
|
17
|
+
blksprs/utils/processing.py,sha256=RNkEDc0g-sNHRuMPkRzNWU13d3_lIkXMJdoqES4yQTM,3738
|
|
18
|
+
blksprs/utils/tools.py,sha256=BozpH3oEXe3K9ZRJsIzlasDk-sZyJqmwSf1gl7xbbdo,865
|
|
19
|
+
blksprs/utils/validation.py,sha256=G8eQlvJVMKfEX3k2AwBD0A6Ck-gFoRLpLNY6HXsB3fA,4348
|
|
20
|
+
blksprs-2.0rc8.dist-info/METADATA,sha256=h70L26BthR6laP7sMQLF9L3dHIRQNCF_oKwZ5g4dZSg,9509
|
|
21
|
+
blksprs-2.0rc8.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
|
|
22
|
+
blksprs-2.0rc8.dist-info/top_level.txt,sha256=qyp0IHeY3H2GQA97i4hk_To5rRBS2YcE1HRPSLy04fk,8
|
|
23
|
+
blksprs-2.0rc8.dist-info/RECORD,,
|
blksprs-2.0rc6.dist-info/RECORD
DELETED
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
blksprs/__init__.py,sha256=OHfpwJCZWGUfpT-DVfC1YSaeZl4aCMNt9CrzMPymywU,1577
|
|
2
|
-
blksprs/layouting/distribution_layout.py,sha256=TkMh_DYKX56Cb8Vq7EHyupMRvzm0XbUNP8QP7afv9wM,5122
|
|
3
|
-
blksprs/layouting/sparsity_layout.py,sha256=6GOjwllDUK9L8jEQNu2i17Pp1BIIQm8fv3xVuiR0zIw,10228
|
|
4
|
-
blksprs/ops/conversion.py,sha256=2zAdbaZ1iP2lisLVeG-k-f571G4HJapADhSwpY0Zd3o,21503
|
|
5
|
-
blksprs/ops/distribution.py,sha256=Gffhd7z85IDm57G8x_v7J2P3ezEVOHEWPD_36Lf8Irs,20453
|
|
6
|
-
blksprs/ops/flow.py,sha256=UO5ba5TFgVpEyT7r0hnWYw3vhRDpBOxyPHUBeNOAYPs,7935
|
|
7
|
-
blksprs/ops/matmul.py,sha256=02hujXMtFgF7ohepM3v6h9okrfcU-J3mQZV17B-qvh0,12235
|
|
8
|
-
blksprs/ops/partitioning.py,sha256=nAV28f3NtvT4OFvDtnE0A-VxpDQmMXS0pZw4CJwzqGA,9838
|
|
9
|
-
blksprs/ops/repeat.py,sha256=bQpJuwtt8aRdSzxT78lJ8f8fLDhPkYK5UvMfJ-PQrkc,8977
|
|
10
|
-
blksprs/ops/softmax.py,sha256=-NoTf1Cpuku9C99N0LuMydT_ObozWTnZJGDZxseXEXI,12209
|
|
11
|
-
blksprs/ops/transpose.py,sha256=PQKteFnzNAOEC7voO7wh_dq9c54UjCboJz889aBCwKc,4010
|
|
12
|
-
blksprs/ops/misc/broadcast_ops.py,sha256=wBusOtscfGSbtfsCynI1ypr93KuCfVpLX_4b4l8-dck,5811
|
|
13
|
-
blksprs/ops/misc/row_wise.py,sha256=k23p1rizOLS_iRWFhiKiRW6KnR2qxmHfsE8jq0VFfa0,18991
|
|
14
|
-
blksprs/utils/autotuning.py,sha256=tDfMWklm2rvbo0-ahH81C3Gg0U6LHjPn3d_3pEOzmJs,2053
|
|
15
|
-
blksprs/utils/benchmarking.py,sha256=dLabDscTFn5NkmOI1g7DnKeTneUYW3RIVv9MDF-8BKc,1271
|
|
16
|
-
blksprs/utils/blksprs_tensor.py,sha256=pfoz59aJixj_fIoFx76ySiygwRQUemmgjMKepZ2c4j0,244
|
|
17
|
-
blksprs/utils/processing.py,sha256=xuu9iDpwTvsqI_WKMSD8QCNuvPnfcKMRcuF2L4Zs6Ts,3808
|
|
18
|
-
blksprs/utils/tools.py,sha256=NusV0H_XPn4ETJTibQwh3bJBqfW12iUrBuk1EfjbAQs,851
|
|
19
|
-
blksprs/utils/validation.py,sha256=G8eQlvJVMKfEX3k2AwBD0A6Ck-gFoRLpLNY6HXsB3fA,4348
|
|
20
|
-
blksprs-2.0rc6.dist-info/METADATA,sha256=gC-91T17-byW4_f0R41hWjwzOr-N1fjp-HIDu3phunU,9179
|
|
21
|
-
blksprs-2.0rc6.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
|
22
|
-
blksprs-2.0rc6.dist-info/top_level.txt,sha256=qyp0IHeY3H2GQA97i4hk_To5rRBS2YcE1HRPSLy04fk,8
|
|
23
|
-
blksprs-2.0rc6.dist-info/RECORD,,
|
|
File without changes
|