PyPI - compressed-tensors-nightly - Versions diffs - 0.4.0.20240709__py3-none-any.whl → 0.4.0.20240711__py3-none-any.whl - Mend

compressed-tensors-nightly 0.4.0.20240709py3-none-any.whl → 0.4.0.20240711py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

compressed_tensors/quantization/lifecycle/forward.py CHANGED Viewed

@@ -293,6 +293,11 @@ def maybe_calibrate_or_quantize(
     }:
         return value
+    if value.numel() == 0:
+        # if the tensor is empty,
+        # skip quantization
+        return value
     if args.dynamic:
         # dynamic quantization - get scale and zero point directly from observer
         observer = getattr(module, f"{base_name}_observer")

compressed_tensors/quantization/observers/base.py CHANGED Viewed

@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import logging
 from typing import Any, Iterable, Optional, Tuple, Union
 import torch
@@ -24,6 +25,9 @@ from torch import FloatTensor, IntTensor, Tensor
 from torch.nn import Module
+_LOGGER = logging.getLogger(__name__)
 __all__ = ["Observer"]
@@ -39,6 +43,7 @@ class Observer(Module, RegistryMixin):
         super().__init__()
         self._scale = None
         self._zero_point = None
+        self._num_observed_tokens = None
     @torch.no_grad()
     def forward(self, observed: Tensor) -> Tuple[FloatTensor, IntTensor]:
@@ -48,6 +53,7 @@ class Observer(Module, RegistryMixin):
             from
         :return: tuple of scale and zero point based on last observed value
         """
+        self.record_observed_tokens(observed)
         return self.get_qparams(observed=observed)
     def calculate_qparams(
@@ -132,3 +138,36 @@ class Observer(Module, RegistryMixin):
         return self.calculate_qparams(
             observed, reduce_dims=reduce_dims, tensor_id=tensor_id
         )
+    def record_observed_tokens(self, batch_tensor: Tensor):
+        """
+        Counts the number of tokens observed during the
+        forward passes. The count is aggregated in the
+        _num_observed_tokens attribute of the class.
+        Note: The batch_tensor is expected to have two dimensions
+            (batch_size * sequence_length, num_features). This is the
+            general shape expected by the forward pass of the expert
+            layers in a MOE model. If the input tensor does not have
+            two dimensions, the _num_observed_tokens attribute will be set
+            to None.
+        """
+        if not isinstance(batch_tensor, Tensor):
+            raise ValueError(f"Expected value to be a tensor, got {type(batch_tensor)}")
+        if batch_tensor.ndim != 2:
+            _LOGGER.debug(
+                "The input tensor is expected to have two dimensions "
+                "(batch_size * sequence_length, num_features). "
+                f"The input tensor has {batch_tensor.ndim} dimensions."
+            )
+            return
+        if self._num_observed_tokens is None:
+            # initialize the count
+            self._num_observed_tokens = 0
+        # batch_tensor (batch_size * sequence_length, num_features)
+        # observed_tokens (batch_size * sequence_length)
+        observed_tokens, _ = batch_tensor.shape
+        self._num_observed_tokens += observed_tokens

compressed_tensors/quantization/observers/helpers.py CHANGED Viewed

@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from collections import Counter
 from typing import Tuple
 import torch
@@ -23,16 +24,33 @@ from compressed_tensors.quantization.quant_args import (
 from torch import FloatTensor, IntTensor, Tensor
-__all__ = ["calculate_qparams", "calculate_range"]
+__all__ = ["calculate_qparams", "get_observer_token_count", "calculate_range"]
+def get_observer_token_count(module: torch.nn.Module) -> Counter:
+    """
+    Parse the module and return the number of tokens observed by
+    each module's observer.
+    :param module: module to parse
+    :return: counter with the number of tokens observed by each observer
+    """
+    token_counts = Counter()
+    for name, module in module.named_modules():
+        if name.endswith(".input_observer"):
+            token_counts[
+                name.replace(".input_observer", "")
+            ] = module._num_observed_tokens
+    return token_counts
 def calculate_qparams(
     min_vals: Tensor, max_vals: Tensor, quantization_args: QuantizationArgs
 ) -> Tuple[FloatTensor, IntTensor]:
     """
-    :param min_vals: tensor of min value(s) to caluclate scale(s) and zero point(s)
+    :param min_vals: tensor of min value(s) to calculate scale(s) and zero point(s)
         from
-    :param max_vals: tensor of max value(s) to caluclate scale(s) and zero point(s)
+    :param max_vals: tensor of max value(s) to calculate scale(s) and zero point(s)
         from
     :param quantization_args: settings to quantization
     :return: tuple of the calculated scale(s) and zero point(s)

{compressed_tensors_nightly-0.4.0.20240709.dist-info → compressed_tensors_nightly-0.4.0.20240711.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: compressed-tensors-nightly
-Version: 0.4.0.20240709
+Version: 0.4.0.20240711
 Summary: Library for utilization of compressed safetensors of neural network models
 Home-page: https://github.com/neuralmagic/compressed-tensors
 Author: Neuralmagic, Inc.

{compressed_tensors_nightly-0.4.0.20240709.dist-info → compressed_tensors_nightly-0.4.0.20240711.dist-info}/RECORD RENAMED Viewed

@@ -26,12 +26,12 @@ compressed_tensors/quantization/lifecycle/__init__.py,sha256=ggRGWRqhCxCaTTDWRcg
 compressed_tensors/quantization/lifecycle/apply.py,sha256=fyv5ujZC0__oG1ESOTmMyMsKK7DGAxG7uQI7_sxT7Mw,13308
 compressed_tensors/quantization/lifecycle/calibration.py,sha256=mLns4jlaWmBwOW8Jtlm5bMX-JET1AiZYUBO7qa-XuxI,1776
 compressed_tensors/quantization/lifecycle/compressed.py,sha256=VreB10xPwgSLQQlTu20UCrFpRS--cA7-lx5s7nrPPrg,2247
-compressed_tensors/quantization/lifecycle/forward.py,sha256=tcjL_qyE3ODourNprt2bndF7_ALlUEGY2_Yag4exLoE,11908
+compressed_tensors/quantization/lifecycle/forward.py,sha256=iVIVt17U3ObjGVgYlmdBc-8SZQFgZbi20hvjW_NGzI4,12019
 compressed_tensors/quantization/lifecycle/frozen.py,sha256=h1XYt89MouBTf3jTYLG_6OdFxIu5q2N8tPjsy6J4E6Y,1726
 compressed_tensors/quantization/lifecycle/initialize.py,sha256=kIEx6a7UyqAIG_ZPNBhijrDiAHnp2wR7K_GC3envz4M,4631
 compressed_tensors/quantization/observers/__init__.py,sha256=DNH31NQYrIBBcmHsMyFA6whh4pbRsLwuNa6L8AeXaGc,745
-compressed_tensors/quantization/observers/base.py,sha256=z_JC-CRz-PY7WlpSoyOoSQQWz5ekTEd5LbXt0iHQRes,5239
-compressed_tensors/quantization/observers/helpers.py,sha256=DSNGNJpZyT2Lyu0c82dHEGf9q5vm4N3zgI3DpkBbp0Q,3597
+compressed_tensors/quantization/observers/base.py,sha256=2WO7N2eyXf1r1gxVidos1bUS5o7pcrpug4gQgHIazrQ,6794
+compressed_tensors/quantization/observers/helpers.py,sha256=s_A23Qa_BLfOdHJCN5bm-qPWkhjjj_RIVrhSp1Y9Dtk,4211
 compressed_tensors/quantization/observers/memoryless.py,sha256=jH_c6K3gxf4W3VNXQ7tbnP-J_86QTrEfjBn6Kh1C-H8,2165
 compressed_tensors/quantization/observers/min_max.py,sha256=UK7zCMzxv9GGn6BflBxdajV20RiWaCY2RHcvZodCP1w,3669
 compressed_tensors/quantization/utils/__init__.py,sha256=VdtEmP0bvuND_IGQnyqUPc5lnFp-1_yD7StKSX4x80w,656
@@ -41,8 +41,8 @@ compressed_tensors/registry/registry.py,sha256=fxjOjh2wklCvJhQxwofdy-zV8q7MkQ85S
 compressed_tensors/utils/__init__.py,sha256=5DrYjoZbaEvSkJcC-GRSbM_RBHVF4tG9gMd3zsJnjLw,665
 compressed_tensors/utils/helpers.py,sha256=dt4uxSIeqvqDmeJBJ6UUVHEOnMI7EtMSzEDv6PRUu14,2266
 compressed_tensors/utils/safetensors_load.py,sha256=0MheXwx1jeY12PeISppiSIZHs6rmN2YddwPpFb9V67I,8527
-compressed_tensors_nightly-0.4.0.20240709.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-compressed_tensors_nightly-0.4.0.20240709.dist-info/METADATA,sha256=5r_nS2oC6ajjpzmptwZRrG9sUCM779107N8oifS2HIA,5668
-compressed_tensors_nightly-0.4.0.20240709.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
-compressed_tensors_nightly-0.4.0.20240709.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
-compressed_tensors_nightly-0.4.0.20240709.dist-info/RECORD,,
+compressed_tensors_nightly-0.4.0.20240711.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+compressed_tensors_nightly-0.4.0.20240711.dist-info/METADATA,sha256=8nqYjnAofwgxizqKTlBjrVg3pz8QX4ya0EtzscwE2F0,5668
+compressed_tensors_nightly-0.4.0.20240711.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+compressed_tensors_nightly-0.4.0.20240711.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
+compressed_tensors_nightly-0.4.0.20240711.dist-info/RECORD,,

{compressed_tensors_nightly-0.4.0.20240709.dist-info → compressed_tensors_nightly-0.4.0.20240711.dist-info}/LICENSE RENAMED Viewed

File without changes

{compressed_tensors_nightly-0.4.0.20240709.dist-info → compressed_tensors_nightly-0.4.0.20240711.dist-info}/WHEEL RENAMED Viewed

File without changes

{compressed_tensors_nightly-0.4.0.20240709.dist-info → compressed_tensors_nightly-0.4.0.20240711.dist-info}/top_level.txt RENAMED Viewed

File without changes

compressed-tensors-nightly 0.4.0.20240709__py3-none-any.whl → 0.4.0.20240711__py3-none-any.whl

compressed-tensors-nightly 0.4.0.20240709py3-none-any.whl → 0.4.0.20240711py3-none-any.whl