compressed-tensors-nightly 0.9.2.20250309__py3-none-any.whl → 0.9.2.20250312__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- compressed_tensors/linear/compressed_linear.py +16 -7
- {compressed_tensors_nightly-0.9.2.20250309.dist-info → compressed_tensors_nightly-0.9.2.20250312.dist-info}/METADATA +1 -1
- {compressed_tensors_nightly-0.9.2.20250309.dist-info → compressed_tensors_nightly-0.9.2.20250312.dist-info}/RECORD +6 -6
- {compressed_tensors_nightly-0.9.2.20250309.dist-info → compressed_tensors_nightly-0.9.2.20250312.dist-info}/WHEEL +1 -1
- {compressed_tensors_nightly-0.9.2.20250309.dist-info → compressed_tensors_nightly-0.9.2.20250312.dist-info}/LICENSE +0 -0
- {compressed_tensors_nightly-0.9.2.20250309.dist-info → compressed_tensors_nightly-0.9.2.20250312.dist-info}/top_level.txt +0 -0
@@ -12,6 +12,7 @@
|
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
|
+
import warnings
|
15
16
|
from typing import Dict, Tuple
|
16
17
|
|
17
18
|
import torch
|
@@ -33,14 +34,15 @@ class CompressedLinear(Linear):
|
|
33
34
|
Wrapper module for running a compressed forward pass of a quantized Linear module.
|
34
35
|
The wrapped layer will decompressed on each forward call.
|
35
36
|
|
36
|
-
:param module: dense linear module to replace
|
37
|
-
:param quantization_scheme: quantization config for the module to wrap
|
38
|
-
:param quantization_format: compression format module is stored as
|
39
37
|
"""
|
40
38
|
|
41
39
|
def __init__(self, *args, **kwargs) -> None:
|
42
40
|
super().__init__(*args, **kwargs)
|
43
|
-
|
41
|
+
warnings.warn(
|
42
|
+
"CompressedLinear should not be initialized directly. "
|
43
|
+
"Use the from_linear method instead.",
|
44
|
+
UserWarning,
|
45
|
+
)
|
44
46
|
|
45
47
|
@classmethod
|
46
48
|
@torch.no_grad()
|
@@ -50,6 +52,12 @@ class CompressedLinear(Linear):
|
|
50
52
|
quantization_scheme: QuantizationScheme,
|
51
53
|
quantization_format: str,
|
52
54
|
):
|
55
|
+
"""
|
56
|
+
:param module: dense linear module to replace
|
57
|
+
:param quantization_scheme: quantization config for the module to wrap
|
58
|
+
:param quantization_format: compression format module is stored as
|
59
|
+
:return: CompressedLinear module wrapping the input module
|
60
|
+
"""
|
53
61
|
module.__class__ = CompressedLinear
|
54
62
|
module.compressor = BaseCompressor.load_from_registry(quantization_format)
|
55
63
|
device = next(module.parameters()).device
|
@@ -90,8 +98,9 @@ class CompressedLinear(Linear):
|
|
90
98
|
"""
|
91
99
|
Decompresses the weight, then runs the wrapped forward pass
|
92
100
|
"""
|
93
|
-
if self.
|
94
|
-
|
95
|
-
self.
|
101
|
+
if self.quantization_status == QuantizationStatus.COMPRESSED:
|
102
|
+
decompressed_weight = self.compressor.decompress_module(self)
|
103
|
+
self.weight.data = decompressed_weight
|
104
|
+
self.quantization_status = QuantizationStatus.FROZEN
|
96
105
|
|
97
106
|
return linear(input, self.weight, self.bias)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.2
|
2
2
|
Name: compressed-tensors-nightly
|
3
|
-
Version: 0.9.2.
|
3
|
+
Version: 0.9.2.20250312
|
4
4
|
Summary: Library for utilization of compressed safetensors of neural network models
|
5
5
|
Home-page: https://github.com/neuralmagic/compressed-tensors
|
6
6
|
Author: Neuralmagic, Inc.
|
@@ -23,7 +23,7 @@ compressed_tensors/config/dense.py,sha256=NgSxnFCnckU9-iunxEaqiFwqgdO7YYxlWKR74j
|
|
23
23
|
compressed_tensors/config/sparse_24_bitmask.py,sha256=Lhj39zT2V1hxftprvxvneyhv45ShlXOKd75DBbDTyTE,1401
|
24
24
|
compressed_tensors/config/sparse_bitmask.py,sha256=pZUboRNZTu6NajGOQEFExoPknak5ynVAUeiiYpS1Gt8,1308
|
25
25
|
compressed_tensors/linear/__init__.py,sha256=fH6rjBYAxuwrTzBTlTjTgCYNyh6TCvCqajCz4Im4YrA,617
|
26
|
-
compressed_tensors/linear/compressed_linear.py,sha256=
|
26
|
+
compressed_tensors/linear/compressed_linear.py,sha256=4553oRbnuMW1siVtnHk7Dg617iVQzu2kRSX5WyLZzxk,3912
|
27
27
|
compressed_tensors/quantization/__init__.py,sha256=83J5bPB7PavN2TfCoW7_vEDhfYpm4TDrqYO9vdSQ5bk,760
|
28
28
|
compressed_tensors/quantization/quant_args.py,sha256=sKpb8DcNObidjXjNol1Tn_Iih3ZXBycSp-fyz68TGhY,9117
|
29
29
|
compressed_tensors/quantization/quant_config.py,sha256=vx06wBo91p4LCb3Vzd-2eCTUeIf_Sz2ZXRP263eQyjQ,10385
|
@@ -45,8 +45,8 @@ compressed_tensors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVy
|
|
45
45
|
compressed_tensors/utils/permute.py,sha256=V6tJLKo3Syccj-viv4F7ZKZgJeCB-hl-dK8RKI_kBwI,2355
|
46
46
|
compressed_tensors/utils/safetensors_load.py,sha256=5SeM2hzLh77Ne8Vk7qR6-km7cf8bhov41ExpWITqX3A,11470
|
47
47
|
compressed_tensors/utils/semi_structured_conversions.py,sha256=XKNffPum54kPASgqKzgKvyeqWPAkair2XEQXjkp7ho8,13489
|
48
|
-
compressed_tensors_nightly-0.9.2.
|
49
|
-
compressed_tensors_nightly-0.9.2.
|
50
|
-
compressed_tensors_nightly-0.9.2.
|
51
|
-
compressed_tensors_nightly-0.9.2.
|
52
|
-
compressed_tensors_nightly-0.9.2.
|
48
|
+
compressed_tensors_nightly-0.9.2.20250312.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
49
|
+
compressed_tensors_nightly-0.9.2.20250312.dist-info/METADATA,sha256=OsZMbP2kxO9dpksHaqRdcNSe6HLGXmdqyqb9bGuRZtw,6992
|
50
|
+
compressed_tensors_nightly-0.9.2.20250312.dist-info/WHEEL,sha256=52BFRY2Up02UkjOa29eZOS2VxUrpPORXg1pkohGGUS8,91
|
51
|
+
compressed_tensors_nightly-0.9.2.20250312.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
|
52
|
+
compressed_tensors_nightly-0.9.2.20250312.dist-info/RECORD,,
|
File without changes
|