compressed-tensors-nightly 0.4.0.20240711__py3-none-any.whl → 0.4.0.20240712__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- compressed_tensors/compressors/marlin_24.py +6 -5
- compressed_tensors/quantization/quant_scheme.py +45 -9
- compressed_tensors/utils/__init__.py +3 -0
- compressed_tensors/utils/helpers.py +31 -1
- {compressed_tensors_nightly-0.4.0.20240711.dist-info → compressed_tensors_nightly-0.4.0.20240712.dist-info}/METADATA +1 -1
- {compressed_tensors_nightly-0.4.0.20240711.dist-info → compressed_tensors_nightly-0.4.0.20240712.dist-info}/RECORD +11 -13
- compressed_tensors/compressors/utils/__init__.py +0 -19
- compressed_tensors/compressors/utils/helpers.py +0 -43
- /compressed_tensors/{compressors/utils → utils}/permutations_24.py +0 -0
- /compressed_tensors/{compressors/utils → utils}/semi_structured_conversions.py +0 -0
- {compressed_tensors_nightly-0.4.0.20240711.dist-info → compressed_tensors_nightly-0.4.0.20240712.dist-info}/LICENSE +0 -0
- {compressed_tensors_nightly-0.4.0.20240711.dist-info → compressed_tensors_nightly-0.4.0.20240712.dist-info}/WHEEL +0 -0
- {compressed_tensors_nightly-0.4.0.20240711.dist-info → compressed_tensors_nightly-0.4.0.20240712.dist-info}/top_level.txt +0 -0
@@ -18,15 +18,16 @@ from typing import Dict, Generator, Tuple
|
|
18
18
|
import numpy as np
|
19
19
|
import torch
|
20
20
|
from compressed_tensors.compressors import Compressor
|
21
|
-
from compressed_tensors.
|
21
|
+
from compressed_tensors.config import CompressionFormat
|
22
|
+
from compressed_tensors.quantization import QuantizationArgs, QuantizationStrategy
|
23
|
+
from compressed_tensors.quantization.lifecycle.forward import quantize
|
24
|
+
from compressed_tensors.utils import (
|
22
25
|
get_permutations_24,
|
26
|
+
is_quantization_param,
|
27
|
+
merge_names,
|
23
28
|
sparse_semi_structured_from_dense_cutlass,
|
24
29
|
tensor_follows_mask_structure,
|
25
30
|
)
|
26
|
-
from compressed_tensors.config import CompressionFormat
|
27
|
-
from compressed_tensors.quantization import QuantizationArgs, QuantizationStrategy
|
28
|
-
from compressed_tensors.quantization.lifecycle.forward import quantize
|
29
|
-
from compressed_tensors.utils import is_quantization_param, merge_names
|
30
31
|
from torch import Tensor
|
31
32
|
from tqdm import tqdm
|
32
33
|
|
@@ -111,55 +111,91 @@ def is_preset_scheme(name: str) -> bool:
|
|
111
111
|
return name.upper() in PRESET_SCHEMES
|
112
112
|
|
113
113
|
|
114
|
+
# 8 bit integer weights and 8 bit activations quantization
|
114
115
|
W8A8 = dict(
|
115
116
|
weights=QuantizationArgs(
|
116
117
|
num_bits=8,
|
117
|
-
symmetric=True,
|
118
118
|
type=QuantizationType.INT,
|
119
119
|
strategy=QuantizationStrategy.CHANNEL,
|
120
|
+
symmetric=True,
|
121
|
+
dynamic=False,
|
120
122
|
),
|
121
123
|
input_activations=QuantizationArgs(
|
122
124
|
num_bits=8,
|
123
|
-
symmetric=True,
|
124
125
|
type=QuantizationType.INT,
|
125
126
|
strategy=QuantizationStrategy.TOKEN,
|
127
|
+
symmetric=True,
|
126
128
|
dynamic=True,
|
127
129
|
),
|
128
130
|
)
|
129
131
|
|
132
|
+
# 8 bit integer weights only quantization
|
130
133
|
W8A16 = dict(
|
131
134
|
weights=QuantizationArgs(
|
132
135
|
num_bits=8,
|
133
|
-
symmetric=True,
|
134
136
|
type=QuantizationType.INT,
|
135
137
|
strategy=QuantizationStrategy.CHANNEL,
|
136
|
-
|
138
|
+
symmetric=True,
|
139
|
+
dynamic=False,
|
140
|
+
),
|
137
141
|
)
|
138
142
|
|
143
|
+
# 4 bit integer weights only quantization
|
139
144
|
W4A16 = dict(
|
140
145
|
weights=QuantizationArgs(
|
141
146
|
num_bits=4,
|
142
|
-
symmetric=True,
|
143
147
|
type=QuantizationType.INT,
|
144
148
|
strategy=QuantizationStrategy.GROUP,
|
145
149
|
group_size=128,
|
146
|
-
|
150
|
+
symmetric=True,
|
151
|
+
dynamic=False,
|
152
|
+
),
|
147
153
|
)
|
148
154
|
|
149
|
-
|
155
|
+
# 4 bit integer weights and 8 bit activations quantization
|
156
|
+
W4A8 = dict(
|
150
157
|
weights=QuantizationArgs(
|
158
|
+
num_bits=4,
|
159
|
+
type=QuantizationType.INT,
|
160
|
+
group_size=128,
|
161
|
+
strategy=QuantizationStrategy.GROUP,
|
162
|
+
symmetric=True,
|
163
|
+
dynamic=False,
|
164
|
+
),
|
165
|
+
input_activations=QuantizationArgs(
|
151
166
|
num_bits=8,
|
167
|
+
type=QuantizationType.INT,
|
168
|
+
strategy=QuantizationStrategy.TENSOR,
|
152
169
|
symmetric=True,
|
170
|
+
dynamic=True,
|
171
|
+
),
|
172
|
+
)
|
173
|
+
|
174
|
+
# FP8 weights and FP8 activations quantization
|
175
|
+
FP8 = dict(
|
176
|
+
weights=QuantizationArgs(
|
177
|
+
num_bits=8,
|
153
178
|
type=QuantizationType.FLOAT,
|
154
179
|
strategy=QuantizationStrategy.TENSOR,
|
180
|
+
symmetric=True,
|
181
|
+
dynamic=False,
|
155
182
|
),
|
156
183
|
input_activations=QuantizationArgs(
|
157
184
|
num_bits=8,
|
158
|
-
symmetric=True,
|
159
185
|
type=QuantizationType.FLOAT,
|
160
186
|
strategy=QuantizationStrategy.TENSOR,
|
187
|
+
symmetric=True,
|
161
188
|
dynamic=False,
|
162
189
|
),
|
163
190
|
)
|
164
191
|
|
165
|
-
PRESET_SCHEMES = {
|
192
|
+
PRESET_SCHEMES = {
|
193
|
+
# Integer weight only schemes
|
194
|
+
"W8A16": W8A16,
|
195
|
+
"W4A16": W4A16,
|
196
|
+
# Integer weight and activation schemes
|
197
|
+
"W8A8": W8A8,
|
198
|
+
"W4A8": W4A8,
|
199
|
+
# Float weight and activation schemes
|
200
|
+
"FP8": FP8,
|
201
|
+
}
|
@@ -14,10 +14,15 @@
|
|
14
14
|
|
15
15
|
from typing import Optional
|
16
16
|
|
17
|
+
import torch
|
17
18
|
from transformers import AutoConfig
|
18
19
|
|
19
20
|
|
20
|
-
__all__ = [
|
21
|
+
__all__ = [
|
22
|
+
"infer_compressor_from_model_config",
|
23
|
+
"fix_fsdp_module_name",
|
24
|
+
"tensor_follows_mask_structure",
|
25
|
+
]
|
21
26
|
|
22
27
|
FSDP_WRAPPER_NAME = "_fsdp_wrapped_module"
|
23
28
|
|
@@ -60,3 +65,28 @@ def fix_fsdp_module_name(name: str) -> str:
|
|
60
65
|
return name.replace(FSDP_WRAPPER_NAME + ".", "").replace(
|
61
66
|
"." + FSDP_WRAPPER_NAME, ""
|
62
67
|
)
|
68
|
+
|
69
|
+
|
70
|
+
def tensor_follows_mask_structure(tensor, mask: str = "2:4") -> bool:
|
71
|
+
"""
|
72
|
+
:param tensor: tensor to check
|
73
|
+
:param mask: mask structure to check for, in the format "n:m"
|
74
|
+
:return: True if the tensor follows the mask structure, False otherwise.
|
75
|
+
Note, some weights can incidentally be zero, so we check for
|
76
|
+
atleast n zeros in each chunk of size m
|
77
|
+
"""
|
78
|
+
|
79
|
+
n, m = tuple(map(int, mask.split(":")))
|
80
|
+
# Reshape the tensor into chunks of size m
|
81
|
+
tensor = tensor.view(-1, m)
|
82
|
+
|
83
|
+
# Count the number of zeros in each chunk
|
84
|
+
zero_counts = (tensor == 0).sum(dim=1)
|
85
|
+
|
86
|
+
# Check if the number of zeros in each chunk atleast n
|
87
|
+
# Greater than sign is needed as some weights can incidentally
|
88
|
+
# be zero
|
89
|
+
if not torch.all(zero_counts >= n).item():
|
90
|
+
raise ValueError()
|
91
|
+
|
92
|
+
return True
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: compressed-tensors-nightly
|
3
|
-
Version: 0.4.0.
|
3
|
+
Version: 0.4.0.20240712
|
4
4
|
Summary: Library for utilization of compressed safetensors of neural network models
|
5
5
|
Home-page: https://github.com/neuralmagic/compressed-tensors
|
6
6
|
Author: Neuralmagic, Inc.
|
@@ -5,15 +5,11 @@ compressed_tensors/compressors/__init__.py,sha256=wmX4VnkUTS63xBwK5-6w8FP78bNZpc
|
|
5
5
|
compressed_tensors/compressors/base.py,sha256=-rqT2h9G2iwDkwrVj0d0jxxn9h0dccJA1mqOzVEkwGM,2144
|
6
6
|
compressed_tensors/compressors/dense.py,sha256=xcWECjcRY4INN6jC7vHx5wvUX3NmnKlxA9SVE1A6m2Q,1267
|
7
7
|
compressed_tensors/compressors/helpers.py,sha256=k9avlkmeYj6vkOAvl-MgcixtP7ib24SCfhzZ-RusXfw,5403
|
8
|
-
compressed_tensors/compressors/marlin_24.py,sha256=
|
8
|
+
compressed_tensors/compressors/marlin_24.py,sha256=e7fGUyZbjUpA5VUMCPxqcYPGNiwoDKupHJaXWCoVKRw,9410
|
9
9
|
compressed_tensors/compressors/model_compressor.py,sha256=9dyM2mvAgO7QeFTBWXBzT29JtmRMKQWWU7xh8StaFyI,13446
|
10
10
|
compressed_tensors/compressors/naive_quantized.py,sha256=6_1wuTF96-lw-UzzrsiEX_ipciKiQQJoZ8uotVwtbyQ,5569
|
11
11
|
compressed_tensors/compressors/pack_quantized.py,sha256=tnhqvkko6fIaTywI2JNvh5lE2xXWKJ_hYShv_s6C9Vk,8506
|
12
12
|
compressed_tensors/compressors/sparse_bitmask.py,sha256=kiDwBlFV0sJGLcIdDYxIiuF64ccgwDfqq1hWRQThYDc,8647
|
13
|
-
compressed_tensors/compressors/utils/__init__.py,sha256=-mbGDZh1hd9T6u62Ht_iBIK255UmMg0f5bLkSs1f9Cc,731
|
14
|
-
compressed_tensors/compressors/utils/helpers.py,sha256=4fq7KclSIK__jemCG9pwYlgWLrQjsaAMxhIrhjdw0BQ,1506
|
15
|
-
compressed_tensors/compressors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVyah6BUUir_StT28,2530
|
16
|
-
compressed_tensors/compressors/utils/semi_structured_conversions.py,sha256=g1EZHzdv-ko7ufPX430dp7wE33o6FWJXuSP4zZydCu0,13488
|
17
13
|
compressed_tensors/config/__init__.py,sha256=ZBqWn3r6ku1qfmlHHYp0mQueY0i7Pwhr9rbQk9dDlMc,704
|
18
14
|
compressed_tensors/config/base.py,sha256=caSZ7xZ_kgcHRMXZ5hM1i6TKbgY__CkiSjZ93imHZQ0,1562
|
19
15
|
compressed_tensors/config/dense.py,sha256=NgSxnFCnckU9-iunxEaqiFwqgdO7YYxlWKR74jNbjks,1317
|
@@ -21,7 +17,7 @@ compressed_tensors/config/sparse_bitmask.py,sha256=pZUboRNZTu6NajGOQEFExoPknak5y
|
|
21
17
|
compressed_tensors/quantization/__init__.py,sha256=83J5bPB7PavN2TfCoW7_vEDhfYpm4TDrqYO9vdSQ5bk,760
|
22
18
|
compressed_tensors/quantization/quant_args.py,sha256=Vc_tWSTcbZZsMJlACpLq4JEPvGx87izc8VEx-mcXjoM,5621
|
23
19
|
compressed_tensors/quantization/quant_config.py,sha256=PU3BchHm09ks6_yAderrHoIZI07zBlU9ejC87v3A-54,9568
|
24
|
-
compressed_tensors/quantization/quant_scheme.py,sha256=
|
20
|
+
compressed_tensors/quantization/quant_scheme.py,sha256=IKTtMfusSe7x31t7hipBfptTbCwGd9eGMtrWC1sPM9o,5522
|
25
21
|
compressed_tensors/quantization/lifecycle/__init__.py,sha256=ggRGWRqhCxCaTTDWRcgTVX3axnS2xV6rc5YvdzK7fSg,798
|
26
22
|
compressed_tensors/quantization/lifecycle/apply.py,sha256=fyv5ujZC0__oG1ESOTmMyMsKK7DGAxG7uQI7_sxT7Mw,13308
|
27
23
|
compressed_tensors/quantization/lifecycle/calibration.py,sha256=mLns4jlaWmBwOW8Jtlm5bMX-JET1AiZYUBO7qa-XuxI,1776
|
@@ -38,11 +34,13 @@ compressed_tensors/quantization/utils/__init__.py,sha256=VdtEmP0bvuND_IGQnyqUPc5
|
|
38
34
|
compressed_tensors/quantization/utils/helpers.py,sha256=YjXABJQUnelof-z7qcwck6fnrFLh4uMSrOmPiqNp_RY,8591
|
39
35
|
compressed_tensors/registry/__init__.py,sha256=FwLSNYqfIrb5JD_6OK_MT4_svvKTN_nEhpgQlQvGbjI,658
|
40
36
|
compressed_tensors/registry/registry.py,sha256=fxjOjh2wklCvJhQxwofdy-zV8q7MkQ85SLG77nml2iA,11890
|
41
|
-
compressed_tensors/utils/__init__.py,sha256=
|
42
|
-
compressed_tensors/utils/helpers.py,sha256=
|
37
|
+
compressed_tensors/utils/__init__.py,sha256=dvAatm3p0He4J7u5ZmZYVa8Iwpwq3ZSm6S9ZJleta5M,762
|
38
|
+
compressed_tensors/utils/helpers.py,sha256=d3yP9ViQ8R3GzMHfohxNlaokzyrRuj2PyjxWAJZmSws,3156
|
39
|
+
compressed_tensors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVyah6BUUir_StT28,2530
|
43
40
|
compressed_tensors/utils/safetensors_load.py,sha256=0MheXwx1jeY12PeISppiSIZHs6rmN2YddwPpFb9V67I,8527
|
44
|
-
|
45
|
-
compressed_tensors_nightly-0.4.0.
|
46
|
-
compressed_tensors_nightly-0.4.0.
|
47
|
-
compressed_tensors_nightly-0.4.0.
|
48
|
-
compressed_tensors_nightly-0.4.0.
|
41
|
+
compressed_tensors/utils/semi_structured_conversions.py,sha256=g1EZHzdv-ko7ufPX430dp7wE33o6FWJXuSP4zZydCu0,13488
|
42
|
+
compressed_tensors_nightly-0.4.0.20240712.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
43
|
+
compressed_tensors_nightly-0.4.0.20240712.dist-info/METADATA,sha256=K39YHRwW4YcpN7VjAeCt5wE28KM8oHuRFc4-YgoNQAI,5668
|
44
|
+
compressed_tensors_nightly-0.4.0.20240712.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
45
|
+
compressed_tensors_nightly-0.4.0.20240712.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
|
46
|
+
compressed_tensors_nightly-0.4.0.20240712.dist-info/RECORD,,
|
@@ -1,19 +0,0 @@
|
|
1
|
-
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
|
2
|
-
#
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
-
# you may not use this file except in compliance with the License.
|
5
|
-
# You may obtain a copy of the License at
|
6
|
-
#
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
#
|
9
|
-
# Unless required by applicable law or agreed to in writing,
|
10
|
-
# software distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
# See the License for the specific language governing permissions and
|
13
|
-
# limitations under the License.
|
14
|
-
|
15
|
-
# flake8: noqa
|
16
|
-
|
17
|
-
from .helpers import *
|
18
|
-
from .permutations_24 import *
|
19
|
-
from .semi_structured_conversions import *
|
@@ -1,43 +0,0 @@
|
|
1
|
-
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
|
2
|
-
#
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
-
# you may not use this file except in compliance with the License.
|
5
|
-
# You may obtain a copy of the License at
|
6
|
-
#
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
#
|
9
|
-
# Unless required by applicable law or agreed to in writing,
|
10
|
-
# software distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
# See the License for the specific language governing permissions and
|
13
|
-
# limitations under the License.
|
14
|
-
|
15
|
-
import torch
|
16
|
-
|
17
|
-
|
18
|
-
__all__ = ["tensor_follows_mask_structure"]
|
19
|
-
|
20
|
-
|
21
|
-
def tensor_follows_mask_structure(tensor, mask: str = "2:4") -> bool:
|
22
|
-
"""
|
23
|
-
:param tensor: tensor to check
|
24
|
-
:param mask: mask structure to check for, in the format "n:m"
|
25
|
-
:return: True if the tensor follows the mask structure, False otherwise.
|
26
|
-
Note, some weights can incidentally be zero, so we check for
|
27
|
-
atleast n zeros in each chunk of size m
|
28
|
-
"""
|
29
|
-
|
30
|
-
n, m = tuple(map(int, mask.split(":")))
|
31
|
-
# Reshape the tensor into chunks of size m
|
32
|
-
tensor = tensor.view(-1, m)
|
33
|
-
|
34
|
-
# Count the number of zeros in each chunk
|
35
|
-
zero_counts = (tensor == 0).sum(dim=1)
|
36
|
-
|
37
|
-
# Check if the number of zeros in each chunk atleast n
|
38
|
-
# Greater than sign is needed as some weights can incidentally
|
39
|
-
# be zero
|
40
|
-
if not torch.all(zero_counts >= n).item():
|
41
|
-
raise ValueError()
|
42
|
-
|
43
|
-
return True
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|