compressed-tensors-nightly 0.4.0.20240709__py3-none-any.whl → 0.4.0.20240711__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -293,6 +293,11 @@ def maybe_calibrate_or_quantize(
293
293
  }:
294
294
  return value
295
295
 
296
+ if value.numel() == 0:
297
+ # if the tensor is empty,
298
+ # skip quantization
299
+ return value
300
+
296
301
  if args.dynamic:
297
302
  # dynamic quantization - get scale and zero point directly from observer
298
303
  observer = getattr(module, f"{base_name}_observer")
@@ -12,6 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ import logging
15
16
  from typing import Any, Iterable, Optional, Tuple, Union
16
17
 
17
18
  import torch
@@ -24,6 +25,9 @@ from torch import FloatTensor, IntTensor, Tensor
24
25
  from torch.nn import Module
25
26
 
26
27
 
28
+ _LOGGER = logging.getLogger(__name__)
29
+
30
+
27
31
  __all__ = ["Observer"]
28
32
 
29
33
 
@@ -39,6 +43,7 @@ class Observer(Module, RegistryMixin):
39
43
  super().__init__()
40
44
  self._scale = None
41
45
  self._zero_point = None
46
+ self._num_observed_tokens = None
42
47
 
43
48
  @torch.no_grad()
44
49
  def forward(self, observed: Tensor) -> Tuple[FloatTensor, IntTensor]:
@@ -48,6 +53,7 @@ class Observer(Module, RegistryMixin):
48
53
  from
49
54
  :return: tuple of scale and zero point based on last observed value
50
55
  """
56
+ self.record_observed_tokens(observed)
51
57
  return self.get_qparams(observed=observed)
52
58
 
53
59
  def calculate_qparams(
@@ -132,3 +138,36 @@ class Observer(Module, RegistryMixin):
132
138
  return self.calculate_qparams(
133
139
  observed, reduce_dims=reduce_dims, tensor_id=tensor_id
134
140
  )
141
+
142
+ def record_observed_tokens(self, batch_tensor: Tensor):
143
+ """
144
+ Counts the number of tokens observed during the
145
+ forward passes. The count is aggregated in the
146
+ _num_observed_tokens attribute of the class.
147
+
148
+ Note: The batch_tensor is expected to have two dimensions
149
+ (batch_size * sequence_length, num_features). This is the
150
+ general shape expected by the forward pass of the expert
151
+ layers in a MOE model. If the input tensor does not have
152
+ two dimensions, the _num_observed_tokens attribute will be set
153
+ to None.
154
+ """
155
+ if not isinstance(batch_tensor, Tensor):
156
+ raise ValueError(f"Expected value to be a tensor, got {type(batch_tensor)}")
157
+
158
+ if batch_tensor.ndim != 2:
159
+ _LOGGER.debug(
160
+ "The input tensor is expected to have two dimensions "
161
+ "(batch_size * sequence_length, num_features). "
162
+ f"The input tensor has {batch_tensor.ndim} dimensions."
163
+ )
164
+ return
165
+
166
+ if self._num_observed_tokens is None:
167
+ # initialize the count
168
+ self._num_observed_tokens = 0
169
+
170
+ # batch_tensor (batch_size * sequence_length, num_features)
171
+ # observed_tokens (batch_size * sequence_length)
172
+ observed_tokens, _ = batch_tensor.shape
173
+ self._num_observed_tokens += observed_tokens
@@ -12,6 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ from collections import Counter
15
16
  from typing import Tuple
16
17
 
17
18
  import torch
@@ -23,16 +24,33 @@ from compressed_tensors.quantization.quant_args import (
23
24
  from torch import FloatTensor, IntTensor, Tensor
24
25
 
25
26
 
26
- __all__ = ["calculate_qparams", "calculate_range"]
27
+ __all__ = ["calculate_qparams", "get_observer_token_count", "calculate_range"]
28
+
29
+
30
+ def get_observer_token_count(module: torch.nn.Module) -> Counter:
31
+ """
32
+ Parse the module and return the number of tokens observed by
33
+ each module's observer.
34
+
35
+ :param module: module to parse
36
+ :return: counter with the number of tokens observed by each observer
37
+ """
38
+ token_counts = Counter()
39
+ for name, module in module.named_modules():
40
+ if name.endswith(".input_observer"):
41
+ token_counts[
42
+ name.replace(".input_observer", "")
43
+ ] = module._num_observed_tokens
44
+ return token_counts
27
45
 
28
46
 
29
47
  def calculate_qparams(
30
48
  min_vals: Tensor, max_vals: Tensor, quantization_args: QuantizationArgs
31
49
  ) -> Tuple[FloatTensor, IntTensor]:
32
50
  """
33
- :param min_vals: tensor of min value(s) to caluclate scale(s) and zero point(s)
51
+ :param min_vals: tensor of min value(s) to calculate scale(s) and zero point(s)
34
52
  from
35
- :param max_vals: tensor of max value(s) to caluclate scale(s) and zero point(s)
53
+ :param max_vals: tensor of max value(s) to calculate scale(s) and zero point(s)
36
54
  from
37
55
  :param quantization_args: settings to quantization
38
56
  :return: tuple of the calculated scale(s) and zero point(s)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: compressed-tensors-nightly
3
- Version: 0.4.0.20240709
3
+ Version: 0.4.0.20240711
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -26,12 +26,12 @@ compressed_tensors/quantization/lifecycle/__init__.py,sha256=ggRGWRqhCxCaTTDWRcg
26
26
  compressed_tensors/quantization/lifecycle/apply.py,sha256=fyv5ujZC0__oG1ESOTmMyMsKK7DGAxG7uQI7_sxT7Mw,13308
27
27
  compressed_tensors/quantization/lifecycle/calibration.py,sha256=mLns4jlaWmBwOW8Jtlm5bMX-JET1AiZYUBO7qa-XuxI,1776
28
28
  compressed_tensors/quantization/lifecycle/compressed.py,sha256=VreB10xPwgSLQQlTu20UCrFpRS--cA7-lx5s7nrPPrg,2247
29
- compressed_tensors/quantization/lifecycle/forward.py,sha256=tcjL_qyE3ODourNprt2bndF7_ALlUEGY2_Yag4exLoE,11908
29
+ compressed_tensors/quantization/lifecycle/forward.py,sha256=iVIVt17U3ObjGVgYlmdBc-8SZQFgZbi20hvjW_NGzI4,12019
30
30
  compressed_tensors/quantization/lifecycle/frozen.py,sha256=h1XYt89MouBTf3jTYLG_6OdFxIu5q2N8tPjsy6J4E6Y,1726
31
31
  compressed_tensors/quantization/lifecycle/initialize.py,sha256=kIEx6a7UyqAIG_ZPNBhijrDiAHnp2wR7K_GC3envz4M,4631
32
32
  compressed_tensors/quantization/observers/__init__.py,sha256=DNH31NQYrIBBcmHsMyFA6whh4pbRsLwuNa6L8AeXaGc,745
33
- compressed_tensors/quantization/observers/base.py,sha256=z_JC-CRz-PY7WlpSoyOoSQQWz5ekTEd5LbXt0iHQRes,5239
34
- compressed_tensors/quantization/observers/helpers.py,sha256=DSNGNJpZyT2Lyu0c82dHEGf9q5vm4N3zgI3DpkBbp0Q,3597
33
+ compressed_tensors/quantization/observers/base.py,sha256=2WO7N2eyXf1r1gxVidos1bUS5o7pcrpug4gQgHIazrQ,6794
34
+ compressed_tensors/quantization/observers/helpers.py,sha256=s_A23Qa_BLfOdHJCN5bm-qPWkhjjj_RIVrhSp1Y9Dtk,4211
35
35
  compressed_tensors/quantization/observers/memoryless.py,sha256=jH_c6K3gxf4W3VNXQ7tbnP-J_86QTrEfjBn6Kh1C-H8,2165
36
36
  compressed_tensors/quantization/observers/min_max.py,sha256=UK7zCMzxv9GGn6BflBxdajV20RiWaCY2RHcvZodCP1w,3669
37
37
  compressed_tensors/quantization/utils/__init__.py,sha256=VdtEmP0bvuND_IGQnyqUPc5lnFp-1_yD7StKSX4x80w,656
@@ -41,8 +41,8 @@ compressed_tensors/registry/registry.py,sha256=fxjOjh2wklCvJhQxwofdy-zV8q7MkQ85S
41
41
  compressed_tensors/utils/__init__.py,sha256=5DrYjoZbaEvSkJcC-GRSbM_RBHVF4tG9gMd3zsJnjLw,665
42
42
  compressed_tensors/utils/helpers.py,sha256=dt4uxSIeqvqDmeJBJ6UUVHEOnMI7EtMSzEDv6PRUu14,2266
43
43
  compressed_tensors/utils/safetensors_load.py,sha256=0MheXwx1jeY12PeISppiSIZHs6rmN2YddwPpFb9V67I,8527
44
- compressed_tensors_nightly-0.4.0.20240709.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
45
- compressed_tensors_nightly-0.4.0.20240709.dist-info/METADATA,sha256=5r_nS2oC6ajjpzmptwZRrG9sUCM779107N8oifS2HIA,5668
46
- compressed_tensors_nightly-0.4.0.20240709.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
47
- compressed_tensors_nightly-0.4.0.20240709.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
48
- compressed_tensors_nightly-0.4.0.20240709.dist-info/RECORD,,
44
+ compressed_tensors_nightly-0.4.0.20240711.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
45
+ compressed_tensors_nightly-0.4.0.20240711.dist-info/METADATA,sha256=8nqYjnAofwgxizqKTlBjrVg3pz8QX4ya0EtzscwE2F0,5668
46
+ compressed_tensors_nightly-0.4.0.20240711.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
47
+ compressed_tensors_nightly-0.4.0.20240711.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
48
+ compressed_tensors_nightly-0.4.0.20240711.dist-info/RECORD,,