compressed-tensors-nightly 0.4.0.20240710__py3-none-any.whl → 0.4.0.20240712__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -18,15 +18,16 @@ from typing import Dict, Generator, Tuple
18
18
  import numpy as np
19
19
  import torch
20
20
  from compressed_tensors.compressors import Compressor
21
- from compressed_tensors.compressors.utils import (
21
+ from compressed_tensors.config import CompressionFormat
22
+ from compressed_tensors.quantization import QuantizationArgs, QuantizationStrategy
23
+ from compressed_tensors.quantization.lifecycle.forward import quantize
24
+ from compressed_tensors.utils import (
22
25
  get_permutations_24,
26
+ is_quantization_param,
27
+ merge_names,
23
28
  sparse_semi_structured_from_dense_cutlass,
24
29
  tensor_follows_mask_structure,
25
30
  )
26
- from compressed_tensors.config import CompressionFormat
27
- from compressed_tensors.quantization import QuantizationArgs, QuantizationStrategy
28
- from compressed_tensors.quantization.lifecycle.forward import quantize
29
- from compressed_tensors.utils import is_quantization_param, merge_names
30
31
  from torch import Tensor
31
32
  from tqdm import tqdm
32
33
 
@@ -111,55 +111,91 @@ def is_preset_scheme(name: str) -> bool:
111
111
  return name.upper() in PRESET_SCHEMES
112
112
 
113
113
 
114
+ # 8 bit integer weights and 8 bit activations quantization
114
115
  W8A8 = dict(
115
116
  weights=QuantizationArgs(
116
117
  num_bits=8,
117
- symmetric=True,
118
118
  type=QuantizationType.INT,
119
119
  strategy=QuantizationStrategy.CHANNEL,
120
+ symmetric=True,
121
+ dynamic=False,
120
122
  ),
121
123
  input_activations=QuantizationArgs(
122
124
  num_bits=8,
123
- symmetric=True,
124
125
  type=QuantizationType.INT,
125
126
  strategy=QuantizationStrategy.TOKEN,
127
+ symmetric=True,
126
128
  dynamic=True,
127
129
  ),
128
130
  )
129
131
 
132
+ # 8 bit integer weights only quantization
130
133
  W8A16 = dict(
131
134
  weights=QuantizationArgs(
132
135
  num_bits=8,
133
- symmetric=True,
134
136
  type=QuantizationType.INT,
135
137
  strategy=QuantizationStrategy.CHANNEL,
136
- )
138
+ symmetric=True,
139
+ dynamic=False,
140
+ ),
137
141
  )
138
142
 
143
+ # 4 bit integer weights only quantization
139
144
  W4A16 = dict(
140
145
  weights=QuantizationArgs(
141
146
  num_bits=4,
142
- symmetric=True,
143
147
  type=QuantizationType.INT,
144
148
  strategy=QuantizationStrategy.GROUP,
145
149
  group_size=128,
146
- )
150
+ symmetric=True,
151
+ dynamic=False,
152
+ ),
147
153
  )
148
154
 
149
- FP8 = dict(
155
+ # 4 bit integer weights and 8 bit activations quantization
156
+ W4A8 = dict(
150
157
  weights=QuantizationArgs(
158
+ num_bits=4,
159
+ type=QuantizationType.INT,
160
+ group_size=128,
161
+ strategy=QuantizationStrategy.GROUP,
162
+ symmetric=True,
163
+ dynamic=False,
164
+ ),
165
+ input_activations=QuantizationArgs(
151
166
  num_bits=8,
167
+ type=QuantizationType.INT,
168
+ strategy=QuantizationStrategy.TENSOR,
152
169
  symmetric=True,
170
+ dynamic=True,
171
+ ),
172
+ )
173
+
174
+ # FP8 weights and FP8 activations quantization
175
+ FP8 = dict(
176
+ weights=QuantizationArgs(
177
+ num_bits=8,
153
178
  type=QuantizationType.FLOAT,
154
179
  strategy=QuantizationStrategy.TENSOR,
180
+ symmetric=True,
181
+ dynamic=False,
155
182
  ),
156
183
  input_activations=QuantizationArgs(
157
184
  num_bits=8,
158
- symmetric=True,
159
185
  type=QuantizationType.FLOAT,
160
186
  strategy=QuantizationStrategy.TENSOR,
187
+ symmetric=True,
161
188
  dynamic=False,
162
189
  ),
163
190
  )
164
191
 
165
- PRESET_SCHEMES = {"W8A8": W8A8, "W8A16": W8A16, "W4A16": W4A16, "FP8": FP8}
192
+ PRESET_SCHEMES = {
193
+ # Integer weight only schemes
194
+ "W8A16": W8A16,
195
+ "W4A16": W4A16,
196
+ # Integer weight and activation schemes
197
+ "W8A8": W8A8,
198
+ "W4A8": W4A8,
199
+ # Float weight and activation schemes
200
+ "FP8": FP8,
201
+ }
@@ -13,4 +13,7 @@
13
13
  # limitations under the License.
14
14
  # flake8: noqa
15
15
 
16
+ from .helpers import *
17
+ from .permutations_24 import *
16
18
  from .safetensors_load import *
19
+ from .semi_structured_conversions import *
@@ -14,10 +14,15 @@
14
14
 
15
15
  from typing import Optional
16
16
 
17
+ import torch
17
18
  from transformers import AutoConfig
18
19
 
19
20
 
20
- __all__ = ["infer_compressor_from_model_config", "fix_fsdp_module_name"]
21
+ __all__ = [
22
+ "infer_compressor_from_model_config",
23
+ "fix_fsdp_module_name",
24
+ "tensor_follows_mask_structure",
25
+ ]
21
26
 
22
27
  FSDP_WRAPPER_NAME = "_fsdp_wrapped_module"
23
28
 
@@ -60,3 +65,28 @@ def fix_fsdp_module_name(name: str) -> str:
60
65
  return name.replace(FSDP_WRAPPER_NAME + ".", "").replace(
61
66
  "." + FSDP_WRAPPER_NAME, ""
62
67
  )
68
+
69
+
70
+ def tensor_follows_mask_structure(tensor, mask: str = "2:4") -> bool:
71
+ """
72
+ :param tensor: tensor to check
73
+ :param mask: mask structure to check for, in the format "n:m"
74
+ :return: True if the tensor follows the mask structure, False otherwise.
75
+ Note, some weights can incidentally be zero, so we check for
76
+ atleast n zeros in each chunk of size m
77
+ """
78
+
79
+ n, m = tuple(map(int, mask.split(":")))
80
+ # Reshape the tensor into chunks of size m
81
+ tensor = tensor.view(-1, m)
82
+
83
+ # Count the number of zeros in each chunk
84
+ zero_counts = (tensor == 0).sum(dim=1)
85
+
86
+ # Check if the number of zeros in each chunk atleast n
87
+ # Greater than sign is needed as some weights can incidentally
88
+ # be zero
89
+ if not torch.all(zero_counts >= n).item():
90
+ raise ValueError()
91
+
92
+ return True
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: compressed-tensors-nightly
3
- Version: 0.4.0.20240710
3
+ Version: 0.4.0.20240712
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -5,15 +5,11 @@ compressed_tensors/compressors/__init__.py,sha256=wmX4VnkUTS63xBwK5-6w8FP78bNZpc
5
5
  compressed_tensors/compressors/base.py,sha256=-rqT2h9G2iwDkwrVj0d0jxxn9h0dccJA1mqOzVEkwGM,2144
6
6
  compressed_tensors/compressors/dense.py,sha256=xcWECjcRY4INN6jC7vHx5wvUX3NmnKlxA9SVE1A6m2Q,1267
7
7
  compressed_tensors/compressors/helpers.py,sha256=k9avlkmeYj6vkOAvl-MgcixtP7ib24SCfhzZ-RusXfw,5403
8
- compressed_tensors/compressors/marlin_24.py,sha256=PULMP1fp1sNWz-xOxvM0JXhOrUbq6sPwOTscYSifgDw,9450
8
+ compressed_tensors/compressors/marlin_24.py,sha256=e7fGUyZbjUpA5VUMCPxqcYPGNiwoDKupHJaXWCoVKRw,9410
9
9
  compressed_tensors/compressors/model_compressor.py,sha256=9dyM2mvAgO7QeFTBWXBzT29JtmRMKQWWU7xh8StaFyI,13446
10
10
  compressed_tensors/compressors/naive_quantized.py,sha256=6_1wuTF96-lw-UzzrsiEX_ipciKiQQJoZ8uotVwtbyQ,5569
11
11
  compressed_tensors/compressors/pack_quantized.py,sha256=tnhqvkko6fIaTywI2JNvh5lE2xXWKJ_hYShv_s6C9Vk,8506
12
12
  compressed_tensors/compressors/sparse_bitmask.py,sha256=kiDwBlFV0sJGLcIdDYxIiuF64ccgwDfqq1hWRQThYDc,8647
13
- compressed_tensors/compressors/utils/__init__.py,sha256=-mbGDZh1hd9T6u62Ht_iBIK255UmMg0f5bLkSs1f9Cc,731
14
- compressed_tensors/compressors/utils/helpers.py,sha256=4fq7KclSIK__jemCG9pwYlgWLrQjsaAMxhIrhjdw0BQ,1506
15
- compressed_tensors/compressors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVyah6BUUir_StT28,2530
16
- compressed_tensors/compressors/utils/semi_structured_conversions.py,sha256=g1EZHzdv-ko7ufPX430dp7wE33o6FWJXuSP4zZydCu0,13488
17
13
  compressed_tensors/config/__init__.py,sha256=ZBqWn3r6ku1qfmlHHYp0mQueY0i7Pwhr9rbQk9dDlMc,704
18
14
  compressed_tensors/config/base.py,sha256=caSZ7xZ_kgcHRMXZ5hM1i6TKbgY__CkiSjZ93imHZQ0,1562
19
15
  compressed_tensors/config/dense.py,sha256=NgSxnFCnckU9-iunxEaqiFwqgdO7YYxlWKR74jNbjks,1317
@@ -21,7 +17,7 @@ compressed_tensors/config/sparse_bitmask.py,sha256=pZUboRNZTu6NajGOQEFExoPknak5y
21
17
  compressed_tensors/quantization/__init__.py,sha256=83J5bPB7PavN2TfCoW7_vEDhfYpm4TDrqYO9vdSQ5bk,760
22
18
  compressed_tensors/quantization/quant_args.py,sha256=Vc_tWSTcbZZsMJlACpLq4JEPvGx87izc8VEx-mcXjoM,5621
23
19
  compressed_tensors/quantization/quant_config.py,sha256=PU3BchHm09ks6_yAderrHoIZI07zBlU9ejC87v3A-54,9568
24
- compressed_tensors/quantization/quant_scheme.py,sha256=urZz0YOvxjC2l9waSD5iLDTg9Pqu7N1IAeXldCXDNk0,4604
20
+ compressed_tensors/quantization/quant_scheme.py,sha256=IKTtMfusSe7x31t7hipBfptTbCwGd9eGMtrWC1sPM9o,5522
25
21
  compressed_tensors/quantization/lifecycle/__init__.py,sha256=ggRGWRqhCxCaTTDWRcgTVX3axnS2xV6rc5YvdzK7fSg,798
26
22
  compressed_tensors/quantization/lifecycle/apply.py,sha256=fyv5ujZC0__oG1ESOTmMyMsKK7DGAxG7uQI7_sxT7Mw,13308
27
23
  compressed_tensors/quantization/lifecycle/calibration.py,sha256=mLns4jlaWmBwOW8Jtlm5bMX-JET1AiZYUBO7qa-XuxI,1776
@@ -38,11 +34,13 @@ compressed_tensors/quantization/utils/__init__.py,sha256=VdtEmP0bvuND_IGQnyqUPc5
38
34
  compressed_tensors/quantization/utils/helpers.py,sha256=YjXABJQUnelof-z7qcwck6fnrFLh4uMSrOmPiqNp_RY,8591
39
35
  compressed_tensors/registry/__init__.py,sha256=FwLSNYqfIrb5JD_6OK_MT4_svvKTN_nEhpgQlQvGbjI,658
40
36
  compressed_tensors/registry/registry.py,sha256=fxjOjh2wklCvJhQxwofdy-zV8q7MkQ85SLG77nml2iA,11890
41
- compressed_tensors/utils/__init__.py,sha256=5DrYjoZbaEvSkJcC-GRSbM_RBHVF4tG9gMd3zsJnjLw,665
42
- compressed_tensors/utils/helpers.py,sha256=dt4uxSIeqvqDmeJBJ6UUVHEOnMI7EtMSzEDv6PRUu14,2266
37
+ compressed_tensors/utils/__init__.py,sha256=dvAatm3p0He4J7u5ZmZYVa8Iwpwq3ZSm6S9ZJleta5M,762
38
+ compressed_tensors/utils/helpers.py,sha256=d3yP9ViQ8R3GzMHfohxNlaokzyrRuj2PyjxWAJZmSws,3156
39
+ compressed_tensors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVyah6BUUir_StT28,2530
43
40
  compressed_tensors/utils/safetensors_load.py,sha256=0MheXwx1jeY12PeISppiSIZHs6rmN2YddwPpFb9V67I,8527
44
- compressed_tensors_nightly-0.4.0.20240710.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
45
- compressed_tensors_nightly-0.4.0.20240710.dist-info/METADATA,sha256=5m6Vq_LqADD1E34x--jj-FNvrz8pjWSJFedvSdfJJts,5668
46
- compressed_tensors_nightly-0.4.0.20240710.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
47
- compressed_tensors_nightly-0.4.0.20240710.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
48
- compressed_tensors_nightly-0.4.0.20240710.dist-info/RECORD,,
41
+ compressed_tensors/utils/semi_structured_conversions.py,sha256=g1EZHzdv-ko7ufPX430dp7wE33o6FWJXuSP4zZydCu0,13488
42
+ compressed_tensors_nightly-0.4.0.20240712.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
43
+ compressed_tensors_nightly-0.4.0.20240712.dist-info/METADATA,sha256=K39YHRwW4YcpN7VjAeCt5wE28KM8oHuRFc4-YgoNQAI,5668
44
+ compressed_tensors_nightly-0.4.0.20240712.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
45
+ compressed_tensors_nightly-0.4.0.20240712.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
46
+ compressed_tensors_nightly-0.4.0.20240712.dist-info/RECORD,,
@@ -1,19 +0,0 @@
1
- # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing,
10
- # software distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
-
15
- # flake8: noqa
16
-
17
- from .helpers import *
18
- from .permutations_24 import *
19
- from .semi_structured_conversions import *
@@ -1,43 +0,0 @@
1
- # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing,
10
- # software distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
-
15
- import torch
16
-
17
-
18
- __all__ = ["tensor_follows_mask_structure"]
19
-
20
-
21
- def tensor_follows_mask_structure(tensor, mask: str = "2:4") -> bool:
22
- """
23
- :param tensor: tensor to check
24
- :param mask: mask structure to check for, in the format "n:m"
25
- :return: True if the tensor follows the mask structure, False otherwise.
26
- Note, some weights can incidentally be zero, so we check for
27
- atleast n zeros in each chunk of size m
28
- """
29
-
30
- n, m = tuple(map(int, mask.split(":")))
31
- # Reshape the tensor into chunks of size m
32
- tensor = tensor.view(-1, m)
33
-
34
- # Count the number of zeros in each chunk
35
- zero_counts = (tensor == 0).sum(dim=1)
36
-
37
- # Check if the number of zeros in each chunk atleast n
38
- # Greater than sign is needed as some weights can incidentally
39
- # be zero
40
- if not torch.all(zero_counts >= n).item():
41
- raise ValueError()
42
-
43
- return True