blksprs 2.0rc6__py3-none-any.whl → 2.0rc8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
blksprs/utils/tools.py CHANGED
@@ -1,3 +1,6 @@
1
+ import tomllib
2
+ from pathlib import Path
3
+
1
4
  import torch
2
5
  from torch import Tensor, Size
3
6
 
@@ -5,6 +8,11 @@ from torch import Tensor, Size
5
8
  torch._dynamo.config.capture_scalar_outputs = True
6
9
 
7
10
 
11
+ def version():
12
+ with open(Path(__file__).parent.parent.parent.joinpath("pyproject.toml"), "rb") as f:
13
+ return tomllib.load(f)["project"]["version"]
14
+
15
+
8
16
  def do_shape_blocksparse(x: Tensor):
9
17
  if x.dim() == 3:
10
18
  return x.contiguous(), x.size()
@@ -26,12 +34,3 @@ def stride(x: Tensor):
26
34
  return x.size(1) * x.size(2), x.size(2), 1
27
35
  else:
28
36
  raise NotImplementedError
29
-
30
-
31
- def get_autocast_min_val():
32
- if torch.is_autocast_enabled():
33
- dtype = torch.get_autocast_dtype("cuda")
34
- else:
35
- dtype = torch.float
36
-
37
- return torch.finfo(dtype).min
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: blksprs
3
- Version: 2.0rc6
3
+ Version: 2.0rc8
4
4
  Summary: A lightweight library for operations on blocksparse matrices in PyTorch.
5
5
  Author-email: Felix Schön <schoen@kr.tuwien.ac.at>
6
6
  Project-URL: Homepage, https://github.com/FelixSchoen/blksprs
@@ -108,12 +108,16 @@ library.
108
108
 
109
109
  ## Known Limitations and Issues
110
110
 
111
+ - Triton has a bug with `tl.atomix_max()` used for the row-wise max operation.
112
+ In order to work around this bug a manual conversion of some values is needed, (slightly) negatively impacting
113
+ performance.
114
+ Watch the [issue](https://github.com/triton-lang/triton/issues/6376) on Triton's issue tracker for more information.
111
115
  - PyTorch's `wrap_triton()` currently does not support config pruning. It thus cannot be used for some of the kernels,
112
116
  which could impact graph compilation.
113
117
  - There seem to be some issues with autocasting, forcing some operations to manually cast.
114
118
  - There will be some slight numerical differences between vanilla and blksprs operations.
115
- These instabilities are due to Triton and thus cannot be fixed by this library alone.
116
- However, for all intents and purposes, these very minor differences should not matter and can safely be ignored.
119
+ These instabilities are due to Triton and thus cannot be fixed by this library alone.
120
+ However, for all intents and purposes, these very minor differences should not matter and can safely be ignored.
117
121
 
118
122
  ## Usage
119
123
 
@@ -0,0 +1,23 @@
1
+ blksprs/__init__.py,sha256=283rF0fbrUqsH_KXUvCgbCMqO0GOgenMkxwDVh1QdpU,1617
2
+ blksprs/layouting/distribution_layout.py,sha256=ur1ty_2U-Hfj78hMWsLZvu7ZuGhzW3qGLKMc72DfTZM,5861
3
+ blksprs/layouting/sparsity_layout.py,sha256=eXHmu2h7K5Q-YUpfOxocJoeP_5ZoQFZf_eHLxRZQbYU,11207
4
+ blksprs/ops/conversion.py,sha256=_g32aEEZdeuHHPj1pBfTNMxknRwJ9O1zk3Wv76pBIrg,21898
5
+ blksprs/ops/distribution.py,sha256=0tPldv0ARzmCV1CU2jvfqpHBgOuHPrDFiCtqsLs7CZc,20789
6
+ blksprs/ops/flow.py,sha256=PDZAD8u4y9qW1IXERki6ItKbEKnm_ChG8SKWM3_P9Oc,8245
7
+ blksprs/ops/matmul.py,sha256=5tVBKU_lglUjaLDi6J_dscdqlmzRz38OGxqAxZxZXDs,11879
8
+ blksprs/ops/partitioning.py,sha256=cfQmY9BZqGTvvJorIhtb-EyuGRJGPraWR-wTKdb47aI,9954
9
+ blksprs/ops/repeat.py,sha256=TLYNxwPuT9y5K9xyM41WK5gnggAJF3lI61Q2K7zWjns,9035
10
+ blksprs/ops/softmax.py,sha256=BwrRQdtRdkiSvl2mf5bpsTmyIxWiJOpa1HFg0st5yGU,12778
11
+ blksprs/ops/transpose.py,sha256=U-VAyLRT6_NDv9qYSFzBqfVlDeIpTqAMEXkqto0VF6w,4072
12
+ blksprs/ops/misc/broadcast_ops.py,sha256=-PrHiSJikZh8nXUmXxSCtFEP27TTxFr4wcrNxBjnimk,5987
13
+ blksprs/ops/misc/row_wise.py,sha256=n5FJjAuOd8BHBJQx4bsQwr-HmXkR9PYVAqfk77wjOFU,19653
14
+ blksprs/utils/autotuning.py,sha256=a-kmWRjJ3eED2XbjkQeOJSyW8bdIs27HgKMPvAKqWeU,2052
15
+ blksprs/utils/benchmarking.py,sha256=dLabDscTFn5NkmOI1g7DnKeTneUYW3RIVv9MDF-8BKc,1271
16
+ blksprs/utils/blksprs_tensor.py,sha256=pfoz59aJixj_fIoFx76ySiygwRQUemmgjMKepZ2c4j0,244
17
+ blksprs/utils/processing.py,sha256=RNkEDc0g-sNHRuMPkRzNWU13d3_lIkXMJdoqES4yQTM,3738
18
+ blksprs/utils/tools.py,sha256=BozpH3oEXe3K9ZRJsIzlasDk-sZyJqmwSf1gl7xbbdo,865
19
+ blksprs/utils/validation.py,sha256=G8eQlvJVMKfEX3k2AwBD0A6Ck-gFoRLpLNY6HXsB3fA,4348
20
+ blksprs-2.0rc8.dist-info/METADATA,sha256=h70L26BthR6laP7sMQLF9L3dHIRQNCF_oKwZ5g4dZSg,9509
21
+ blksprs-2.0rc8.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
22
+ blksprs-2.0rc8.dist-info/top_level.txt,sha256=qyp0IHeY3H2GQA97i4hk_To5rRBS2YcE1HRPSLy04fk,8
23
+ blksprs-2.0rc8.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (78.1.0)
2
+ Generator: setuptools (80.8.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,23 +0,0 @@
1
- blksprs/__init__.py,sha256=OHfpwJCZWGUfpT-DVfC1YSaeZl4aCMNt9CrzMPymywU,1577
2
- blksprs/layouting/distribution_layout.py,sha256=TkMh_DYKX56Cb8Vq7EHyupMRvzm0XbUNP8QP7afv9wM,5122
3
- blksprs/layouting/sparsity_layout.py,sha256=6GOjwllDUK9L8jEQNu2i17Pp1BIIQm8fv3xVuiR0zIw,10228
4
- blksprs/ops/conversion.py,sha256=2zAdbaZ1iP2lisLVeG-k-f571G4HJapADhSwpY0Zd3o,21503
5
- blksprs/ops/distribution.py,sha256=Gffhd7z85IDm57G8x_v7J2P3ezEVOHEWPD_36Lf8Irs,20453
6
- blksprs/ops/flow.py,sha256=UO5ba5TFgVpEyT7r0hnWYw3vhRDpBOxyPHUBeNOAYPs,7935
7
- blksprs/ops/matmul.py,sha256=02hujXMtFgF7ohepM3v6h9okrfcU-J3mQZV17B-qvh0,12235
8
- blksprs/ops/partitioning.py,sha256=nAV28f3NtvT4OFvDtnE0A-VxpDQmMXS0pZw4CJwzqGA,9838
9
- blksprs/ops/repeat.py,sha256=bQpJuwtt8aRdSzxT78lJ8f8fLDhPkYK5UvMfJ-PQrkc,8977
10
- blksprs/ops/softmax.py,sha256=-NoTf1Cpuku9C99N0LuMydT_ObozWTnZJGDZxseXEXI,12209
11
- blksprs/ops/transpose.py,sha256=PQKteFnzNAOEC7voO7wh_dq9c54UjCboJz889aBCwKc,4010
12
- blksprs/ops/misc/broadcast_ops.py,sha256=wBusOtscfGSbtfsCynI1ypr93KuCfVpLX_4b4l8-dck,5811
13
- blksprs/ops/misc/row_wise.py,sha256=k23p1rizOLS_iRWFhiKiRW6KnR2qxmHfsE8jq0VFfa0,18991
14
- blksprs/utils/autotuning.py,sha256=tDfMWklm2rvbo0-ahH81C3Gg0U6LHjPn3d_3pEOzmJs,2053
15
- blksprs/utils/benchmarking.py,sha256=dLabDscTFn5NkmOI1g7DnKeTneUYW3RIVv9MDF-8BKc,1271
16
- blksprs/utils/blksprs_tensor.py,sha256=pfoz59aJixj_fIoFx76ySiygwRQUemmgjMKepZ2c4j0,244
17
- blksprs/utils/processing.py,sha256=xuu9iDpwTvsqI_WKMSD8QCNuvPnfcKMRcuF2L4Zs6Ts,3808
18
- blksprs/utils/tools.py,sha256=NusV0H_XPn4ETJTibQwh3bJBqfW12iUrBuk1EfjbAQs,851
19
- blksprs/utils/validation.py,sha256=G8eQlvJVMKfEX3k2AwBD0A6Ck-gFoRLpLNY6HXsB3fA,4348
20
- blksprs-2.0rc6.dist-info/METADATA,sha256=gC-91T17-byW4_f0R41hWjwzOr-N1fjp-HIDu3phunU,9179
21
- blksprs-2.0rc6.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
22
- blksprs-2.0rc6.dist-info/top_level.txt,sha256=qyp0IHeY3H2GQA97i4hk_To5rRBS2YcE1HRPSLy04fk,8
23
- blksprs-2.0rc6.dist-info/RECORD,,