compressed-tensors-nightly 0.4.0.20240709__py3-none-any.whl → 0.4.0.20240711__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- compressed_tensors/quantization/lifecycle/forward.py +5 -0
- compressed_tensors/quantization/observers/base.py +39 -0
- compressed_tensors/quantization/observers/helpers.py +21 -3
- {compressed_tensors_nightly-0.4.0.20240709.dist-info → compressed_tensors_nightly-0.4.0.20240711.dist-info}/METADATA +1 -1
- {compressed_tensors_nightly-0.4.0.20240709.dist-info → compressed_tensors_nightly-0.4.0.20240711.dist-info}/RECORD +8 -8
- {compressed_tensors_nightly-0.4.0.20240709.dist-info → compressed_tensors_nightly-0.4.0.20240711.dist-info}/LICENSE +0 -0
- {compressed_tensors_nightly-0.4.0.20240709.dist-info → compressed_tensors_nightly-0.4.0.20240711.dist-info}/WHEEL +0 -0
- {compressed_tensors_nightly-0.4.0.20240709.dist-info → compressed_tensors_nightly-0.4.0.20240711.dist-info}/top_level.txt +0 -0
@@ -293,6 +293,11 @@ def maybe_calibrate_or_quantize(
|
|
293
293
|
}:
|
294
294
|
return value
|
295
295
|
|
296
|
+
if value.numel() == 0:
|
297
|
+
# if the tensor is empty,
|
298
|
+
# skip quantization
|
299
|
+
return value
|
300
|
+
|
296
301
|
if args.dynamic:
|
297
302
|
# dynamic quantization - get scale and zero point directly from observer
|
298
303
|
observer = getattr(module, f"{base_name}_observer")
|
@@ -12,6 +12,7 @@
|
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
|
+
import logging
|
15
16
|
from typing import Any, Iterable, Optional, Tuple, Union
|
16
17
|
|
17
18
|
import torch
|
@@ -24,6 +25,9 @@ from torch import FloatTensor, IntTensor, Tensor
|
|
24
25
|
from torch.nn import Module
|
25
26
|
|
26
27
|
|
28
|
+
_LOGGER = logging.getLogger(__name__)
|
29
|
+
|
30
|
+
|
27
31
|
__all__ = ["Observer"]
|
28
32
|
|
29
33
|
|
@@ -39,6 +43,7 @@ class Observer(Module, RegistryMixin):
|
|
39
43
|
super().__init__()
|
40
44
|
self._scale = None
|
41
45
|
self._zero_point = None
|
46
|
+
self._num_observed_tokens = None
|
42
47
|
|
43
48
|
@torch.no_grad()
|
44
49
|
def forward(self, observed: Tensor) -> Tuple[FloatTensor, IntTensor]:
|
@@ -48,6 +53,7 @@ class Observer(Module, RegistryMixin):
|
|
48
53
|
from
|
49
54
|
:return: tuple of scale and zero point based on last observed value
|
50
55
|
"""
|
56
|
+
self.record_observed_tokens(observed)
|
51
57
|
return self.get_qparams(observed=observed)
|
52
58
|
|
53
59
|
def calculate_qparams(
|
@@ -132,3 +138,36 @@ class Observer(Module, RegistryMixin):
|
|
132
138
|
return self.calculate_qparams(
|
133
139
|
observed, reduce_dims=reduce_dims, tensor_id=tensor_id
|
134
140
|
)
|
141
|
+
|
142
|
+
def record_observed_tokens(self, batch_tensor: Tensor):
|
143
|
+
"""
|
144
|
+
Counts the number of tokens observed during the
|
145
|
+
forward passes. The count is aggregated in the
|
146
|
+
_num_observed_tokens attribute of the class.
|
147
|
+
|
148
|
+
Note: The batch_tensor is expected to have two dimensions
|
149
|
+
(batch_size * sequence_length, num_features). This is the
|
150
|
+
general shape expected by the forward pass of the expert
|
151
|
+
layers in a MOE model. If the input tensor does not have
|
152
|
+
two dimensions, the _num_observed_tokens attribute will be set
|
153
|
+
to None.
|
154
|
+
"""
|
155
|
+
if not isinstance(batch_tensor, Tensor):
|
156
|
+
raise ValueError(f"Expected value to be a tensor, got {type(batch_tensor)}")
|
157
|
+
|
158
|
+
if batch_tensor.ndim != 2:
|
159
|
+
_LOGGER.debug(
|
160
|
+
"The input tensor is expected to have two dimensions "
|
161
|
+
"(batch_size * sequence_length, num_features). "
|
162
|
+
f"The input tensor has {batch_tensor.ndim} dimensions."
|
163
|
+
)
|
164
|
+
return
|
165
|
+
|
166
|
+
if self._num_observed_tokens is None:
|
167
|
+
# initialize the count
|
168
|
+
self._num_observed_tokens = 0
|
169
|
+
|
170
|
+
# batch_tensor (batch_size * sequence_length, num_features)
|
171
|
+
# observed_tokens (batch_size * sequence_length)
|
172
|
+
observed_tokens, _ = batch_tensor.shape
|
173
|
+
self._num_observed_tokens += observed_tokens
|
@@ -12,6 +12,7 @@
|
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
|
+
from collections import Counter
|
15
16
|
from typing import Tuple
|
16
17
|
|
17
18
|
import torch
|
@@ -23,16 +24,33 @@ from compressed_tensors.quantization.quant_args import (
|
|
23
24
|
from torch import FloatTensor, IntTensor, Tensor
|
24
25
|
|
25
26
|
|
26
|
-
__all__ = ["calculate_qparams", "calculate_range"]
|
27
|
+
__all__ = ["calculate_qparams", "get_observer_token_count", "calculate_range"]
|
28
|
+
|
29
|
+
|
30
|
+
def get_observer_token_count(module: torch.nn.Module) -> Counter:
|
31
|
+
"""
|
32
|
+
Parse the module and return the number of tokens observed by
|
33
|
+
each module's observer.
|
34
|
+
|
35
|
+
:param module: module to parse
|
36
|
+
:return: counter with the number of tokens observed by each observer
|
37
|
+
"""
|
38
|
+
token_counts = Counter()
|
39
|
+
for name, module in module.named_modules():
|
40
|
+
if name.endswith(".input_observer"):
|
41
|
+
token_counts[
|
42
|
+
name.replace(".input_observer", "")
|
43
|
+
] = module._num_observed_tokens
|
44
|
+
return token_counts
|
27
45
|
|
28
46
|
|
29
47
|
def calculate_qparams(
|
30
48
|
min_vals: Tensor, max_vals: Tensor, quantization_args: QuantizationArgs
|
31
49
|
) -> Tuple[FloatTensor, IntTensor]:
|
32
50
|
"""
|
33
|
-
:param min_vals: tensor of min value(s) to
|
51
|
+
:param min_vals: tensor of min value(s) to calculate scale(s) and zero point(s)
|
34
52
|
from
|
35
|
-
:param max_vals: tensor of max value(s) to
|
53
|
+
:param max_vals: tensor of max value(s) to calculate scale(s) and zero point(s)
|
36
54
|
from
|
37
55
|
:param quantization_args: settings to quantization
|
38
56
|
:return: tuple of the calculated scale(s) and zero point(s)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: compressed-tensors-nightly
|
3
|
-
Version: 0.4.0.
|
3
|
+
Version: 0.4.0.20240711
|
4
4
|
Summary: Library for utilization of compressed safetensors of neural network models
|
5
5
|
Home-page: https://github.com/neuralmagic/compressed-tensors
|
6
6
|
Author: Neuralmagic, Inc.
|
@@ -26,12 +26,12 @@ compressed_tensors/quantization/lifecycle/__init__.py,sha256=ggRGWRqhCxCaTTDWRcg
|
|
26
26
|
compressed_tensors/quantization/lifecycle/apply.py,sha256=fyv5ujZC0__oG1ESOTmMyMsKK7DGAxG7uQI7_sxT7Mw,13308
|
27
27
|
compressed_tensors/quantization/lifecycle/calibration.py,sha256=mLns4jlaWmBwOW8Jtlm5bMX-JET1AiZYUBO7qa-XuxI,1776
|
28
28
|
compressed_tensors/quantization/lifecycle/compressed.py,sha256=VreB10xPwgSLQQlTu20UCrFpRS--cA7-lx5s7nrPPrg,2247
|
29
|
-
compressed_tensors/quantization/lifecycle/forward.py,sha256=
|
29
|
+
compressed_tensors/quantization/lifecycle/forward.py,sha256=iVIVt17U3ObjGVgYlmdBc-8SZQFgZbi20hvjW_NGzI4,12019
|
30
30
|
compressed_tensors/quantization/lifecycle/frozen.py,sha256=h1XYt89MouBTf3jTYLG_6OdFxIu5q2N8tPjsy6J4E6Y,1726
|
31
31
|
compressed_tensors/quantization/lifecycle/initialize.py,sha256=kIEx6a7UyqAIG_ZPNBhijrDiAHnp2wR7K_GC3envz4M,4631
|
32
32
|
compressed_tensors/quantization/observers/__init__.py,sha256=DNH31NQYrIBBcmHsMyFA6whh4pbRsLwuNa6L8AeXaGc,745
|
33
|
-
compressed_tensors/quantization/observers/base.py,sha256=
|
34
|
-
compressed_tensors/quantization/observers/helpers.py,sha256=
|
33
|
+
compressed_tensors/quantization/observers/base.py,sha256=2WO7N2eyXf1r1gxVidos1bUS5o7pcrpug4gQgHIazrQ,6794
|
34
|
+
compressed_tensors/quantization/observers/helpers.py,sha256=s_A23Qa_BLfOdHJCN5bm-qPWkhjjj_RIVrhSp1Y9Dtk,4211
|
35
35
|
compressed_tensors/quantization/observers/memoryless.py,sha256=jH_c6K3gxf4W3VNXQ7tbnP-J_86QTrEfjBn6Kh1C-H8,2165
|
36
36
|
compressed_tensors/quantization/observers/min_max.py,sha256=UK7zCMzxv9GGn6BflBxdajV20RiWaCY2RHcvZodCP1w,3669
|
37
37
|
compressed_tensors/quantization/utils/__init__.py,sha256=VdtEmP0bvuND_IGQnyqUPc5lnFp-1_yD7StKSX4x80w,656
|
@@ -41,8 +41,8 @@ compressed_tensors/registry/registry.py,sha256=fxjOjh2wklCvJhQxwofdy-zV8q7MkQ85S
|
|
41
41
|
compressed_tensors/utils/__init__.py,sha256=5DrYjoZbaEvSkJcC-GRSbM_RBHVF4tG9gMd3zsJnjLw,665
|
42
42
|
compressed_tensors/utils/helpers.py,sha256=dt4uxSIeqvqDmeJBJ6UUVHEOnMI7EtMSzEDv6PRUu14,2266
|
43
43
|
compressed_tensors/utils/safetensors_load.py,sha256=0MheXwx1jeY12PeISppiSIZHs6rmN2YddwPpFb9V67I,8527
|
44
|
-
compressed_tensors_nightly-0.4.0.
|
45
|
-
compressed_tensors_nightly-0.4.0.
|
46
|
-
compressed_tensors_nightly-0.4.0.
|
47
|
-
compressed_tensors_nightly-0.4.0.
|
48
|
-
compressed_tensors_nightly-0.4.0.
|
44
|
+
compressed_tensors_nightly-0.4.0.20240711.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
45
|
+
compressed_tensors_nightly-0.4.0.20240711.dist-info/METADATA,sha256=8nqYjnAofwgxizqKTlBjrVg3pz8QX4ya0EtzscwE2F0,5668
|
46
|
+
compressed_tensors_nightly-0.4.0.20240711.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
47
|
+
compressed_tensors_nightly-0.4.0.20240711.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
|
48
|
+
compressed_tensors_nightly-0.4.0.20240711.dist-info/RECORD,,
|
File without changes
|
File without changes
|