PyPI - compressed-tensors - Versions diffs - 0.8.0__py3-none-any.whl → 0.8.1__py3-none-any.whl - Mend

compressed-tensors 0.8.0py3-none-any.whl → 0.8.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

compressed_tensors/compressors/model_compressors/model_compressor.py CHANGED Viewed

@@ -24,7 +24,6 @@ import compressed_tensors
 import torch
 import transformers
 from compressed_tensors.base import (
-    COMPRESSION_CONFIG_NAME,
     COMPRESSION_VERSION_NAME,
     QUANTIZATION_CONFIG_NAME,
     QUANTIZATION_METHOD_NAME,
@@ -39,6 +38,7 @@ from compressed_tensors.quantization import (
     apply_quantization_config,
     load_pretrained_quantization,
 )
+from compressed_tensors.quantization.quant_args import QuantizationArgs
 from compressed_tensors.quantization.utils import (
     is_module_quantized,
     iter_named_leaf_modules,
@@ -103,12 +103,14 @@ class ModelCompressor:
         :return: compressor for the configs, or None if model is not compressed
         """
         config = AutoConfig.from_pretrained(pretrained_model_name_or_path, **kwargs)
-        compression_config = getattr(config, COMPRESSION_CONFIG_NAME, None)
+        compression_config = getattr(config, QUANTIZATION_CONFIG_NAME, None)
         return cls.from_compression_config(compression_config)
     @classmethod
     def from_compression_config(
-        cls, compression_config: Union[Dict[str, Any], "CompressedTensorsConfig"]
+        cls,
+        compression_config: Union[Dict[str, Any], "CompressedTensorsConfig"],
     ):
         """
         :param compression_config:
@@ -265,7 +267,11 @@ class ModelCompressor:
             state_dict = model.state_dict()
         compressed_state_dict = state_dict
-        quantized_modules_to_args = map_modules_to_quant_args(model)
+        quantized_modules_to_args: Dict[
+            str, QuantizationArgs
+        ] = map_modules_to_quant_args(model)
         if self.quantization_compressor is not None:
             compressed_state_dict = self.quantization_compressor.compress(
                 state_dict, names_to_scheme=quantized_modules_to_args
@@ -369,7 +375,13 @@ class ModelCompressor:
             update_parameter_data(module, data, param_name)
-def map_modules_to_quant_args(model: Module) -> Dict:
+def map_modules_to_quant_args(model: Module) -> Dict[str, QuantizationArgs]:
+    """
+    Given a pytorch model, map out the submodule name (usually linear layers)
+     to the QuantizationArgs
+    :param model: pytorch model
+    """
     quantized_modules_to_args = {}
     for name, submodule in iter_named_leaf_modules(model):
         if is_module_quantized(submodule):

compressed_tensors/compressors/quantized_compressors/naive_quantized.py CHANGED Viewed

@@ -93,9 +93,11 @@ class NaiveQuantizationCompressor(BaseQuantizationCompressor):
                 args=quantization_args,
                 dtype=quantization_args.pytorch_dtype(),
             )
+        else:
+            quantized_weight = weight
-            if device is not None:
-                quantized_weight = quantized_weight.to(device)
+        if device is not None:
+            quantized_weight = quantized_weight.to(device)
         return {"weight": quantized_weight}

compressed_tensors/compressors/quantized_compressors/pack_quantized.py CHANGED Viewed

@@ -94,6 +94,8 @@ class PackedQuantizationCompressor(BaseQuantizationCompressor):
                 args=quantization_args,
                 dtype=torch.int8,
             )
+        else:
+            quantized_weight = weight
         packed_weight = pack_to_int32(quantized_weight, quantization_args.num_bits)
         weight_shape = torch.tensor(weight.shape)

compressed_tensors/linear/compressed_linear.py CHANGED Viewed

@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from typing import Dict, Tuple
 import torch
 from compressed_tensors.compressors.base import BaseCompressor
 from compressed_tensors.quantization import (
@@ -53,7 +55,7 @@ class CompressedLinear(Linear):
         )
         # get the shape and dtype of compressed parameters
-        compression_params = module.compressor.compression_param_info(
+        compression_params: Dict[str, Tuple] = module.compressor.compression_param_info(
             module.weight.shape, quantization_scheme.weights
         )

compressed_tensors/quantization/lifecycle/apply.py CHANGED Viewed

@@ -106,7 +106,8 @@ def apply_quantization_config(
     model: Module, config: Union[QuantizationConfig, None], run_compressed: bool = False
 ) -> OrderedDict:
     """
-    Initializes the model for quantization in-place based on the given config
+    Initializes the model for quantization in-place based on the given config.
+    Optionally coverts quantizable modules to compressed_linear modules
     :param model: model to apply quantization config to
     :param config: quantization config

compressed_tensors/quantization/quant_args.py CHANGED Viewed

@@ -17,6 +17,7 @@ from enum import Enum
 from typing import Any, Dict, Optional, Union
 import torch
+from compressed_tensors.utils import Aliasable
 from pydantic import BaseModel, Field, field_validator, model_validator
@@ -53,17 +54,29 @@ class QuantizationStrategy(str, Enum):
     TOKEN = "token"
-class ActivationOrdering(str, Enum):
+class ActivationOrdering(Aliasable, str, Enum):
     """
     Enum storing strategies for activation ordering
     Group: reorder groups and weight\n
-    Weight: only reorder weight, not groups. Slightly lower latency and
-    accuracy compared to group actorder\n
+    Weight: only reorder weight, not groups. Slightly lower accuracy but also lower
+    latency when compared to group actorder\n
+    Dynamic: alias for Group\n
+    Static: alias for Weight\n
     """
     GROUP = "group"
     WEIGHT = "weight"
+    # aliases
+    DYNAMIC = "dynamic"
+    STATIC = "static"
+    @staticmethod
+    def get_aliases() -> Dict[str, str]:
+        return {
+            "dynamic": "group",
+            "static": "weight",
+        }
 class QuantizationArgs(BaseModel, use_enum_values=True):

compressed_tensors/quantization/quant_config.py CHANGED Viewed

@@ -132,9 +132,9 @@ class QuantizationConfig(BaseModel):
         `k_proj` and `v_proj` in their names. If this is not the case
         and kv_cache_scheme != None, the quantization of kv cache will fail
     :global_compression_ratio: optional informational config to report the model
-    compression ratio acheived by the quantization config
+        compression ratio acheived by the quantization config
     :ignore: optional list of layers to ignore from config_groups. Layers in this list
-    are not quantized even if they match up with a target in config_groups
+        are not quantized even if they match up with a target in config_groups
     """
     config_groups: Dict[str, Union[QuantizationScheme, List[str]]]

compressed_tensors/quantization/quant_scheme.py CHANGED Viewed

@@ -13,14 +13,14 @@
 # limitations under the License.
 from copy import deepcopy
-from typing import List, Optional
+from typing import Any, Dict, List, Optional
 from compressed_tensors.quantization.quant_args import (
     QuantizationArgs,
     QuantizationStrategy,
     QuantizationType,
 )
-from pydantic import BaseModel
+from pydantic import BaseModel, model_validator
 __all__ = [
@@ -36,7 +36,7 @@ class QuantizationScheme(BaseModel):
     of modules should be quantized
     :param targets: list of modules to apply the QuantizationArgs to, can be layer
-    names, layer types or a regular expression
+    names, layer types or a regular expression, typically ["Linear"]
     :param weights: quantization config for layer weights
     :param input_activations: quantization config for layer inputs
     :param output_activations: quantization config for layer outputs
@@ -47,27 +47,20 @@ class QuantizationScheme(BaseModel):
     input_activations: Optional[QuantizationArgs] = None
     output_activations: Optional[QuantizationArgs] = None
-    @classmethod
-    def default_scheme(
-        cls,
-        targets: Optional[List[str]] = None,
-    ):
-        if targets is None:
-            # default to quantizing all Linear layers
-            targets = ["Linear"]
-        # by default, activations and weights are left unquantized
-        weights = None
-        input_activations = None
-        output_activations = None
-        return cls(
-            targets=targets,
-            weights=weights,
-            input_activations=input_activations,
-            output_activations=output_activations,
-        )
+    @model_validator(mode="after")
+    def validate_model_after(model: "QuantizationArgs") -> Dict[str, Any]:
+        inputs = model.input_activations
+        outputs = model.output_activations
+        if inputs is not None:
+            if inputs.actorder is not None:
+                raise ValueError("Cannot apply actorder to input activations")
+        if outputs is not None:
+            if outputs.actorder is not None:
+                raise ValueError("Cannot apply actorder to output activations")
+        return model
 """

compressed_tensors/utils/helpers.py CHANGED Viewed

@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Any, Optional
+from typing import Any, Dict, Optional
 import torch
 from transformers import AutoConfig
@@ -24,6 +24,7 @@ __all__ = [
     "tensor_follows_mask_structure",
     "replace_module",
     "is_compressed_tensors_config",
+    "Aliasable",
 ]
 FSDP_WRAPPER_NAME = "_fsdp_wrapped_module"
@@ -119,3 +120,34 @@ def is_compressed_tensors_config(compression_config: Any) -> bool:
         return isinstance(compression_config, CompressedTensorsConfig)
     except ImportError:
         return False
+class Aliasable:
+    """
+    A mixin for enums to allow aliasing of enum members
+    Example:
+    >>> class MyClass(Aliasable, int, Enum):
+    >>>     ...
+    """
+    @staticmethod
+    def get_aliases() -> Dict[str, str]:
+        raise NotImplementedError()
+    def __eq__(self, other):
+        if isinstance(other, self.__class__):
+            aliases = self.get_aliases()
+            return self.value == other.value or (
+                aliases.get(self.value, self.value)
+                == aliases.get(other.value, other.value)
+            )
+        else:
+            aliases = self.get_aliases()
+            self_value = aliases.get(self.value, self.value)
+            other_value = aliases.get(other, other)
+            return self_value == other_value
+    def __hash__(self):
+        canonical_value = self.aliases.get(self.value, self.value)
+        return hash(canonical_value)

compressed_tensors/version.py CHANGED Viewed

@@ -17,7 +17,7 @@ Functionality for storing and setting the version info for SparseML
 """
-version_base = "0.8.0"
+version_base = "0.8.1"
 is_release = True  # change to True to set the generated version as a release version

{compressed_tensors-0.8.0.dist-info → compressed_tensors-0.8.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: compressed-tensors
-Version: 0.8.0
+Version: 0.8.1
 Summary: Library for utilization of compressed safetensors of neural network models
 Home-page: https://github.com/neuralmagic/compressed-tensors
 Author: Neuralmagic, Inc.

{compressed_tensors-0.8.0.dist-info → compressed_tensors-0.8.1.dist-info}/RECORD RENAMED Viewed

@@ -1,15 +1,15 @@
 compressed_tensors/__init__.py,sha256=UtKmifNeBCSE2TZSAfduVNNzHY-3V7bLjZ7n7RuXLOE,812
 compressed_tensors/base.py,sha256=73HYH7HY7O2roC89yG_piPFnZwrBfn_i7HmKl90SKc0,875
-compressed_tensors/version.py,sha256=Z9w80ldLHldBZrnrRolznhe-AZsAg5ftvHw17kgPs10,1585
+compressed_tensors/version.py,sha256=U6bppqc5inOxvcJDHWhDoSXvBrvbH425oJM2WG7TECY,1585
 compressed_tensors/compressors/__init__.py,sha256=smSygTSfcfuujRrAXDc6uZm4L_ccV1tWZewqVnOb4lM,825
 compressed_tensors/compressors/base.py,sha256=D9TNwQcjanDiAHODPbg8JUqc66e3j50rctY7A708NEs,6743
 compressed_tensors/compressors/helpers.py,sha256=OK6qxX9j3bHwF9JfIYSGMgBJe2PWjlTA3byXKCJaTIQ,5431
 compressed_tensors/compressors/model_compressors/__init__.py,sha256=5RGGPFu4YqEt_aOdFSQYFYFDjcZFJN0CsMqRtDZz3Js,666
-compressed_tensors/compressors/model_compressors/model_compressor.py,sha256=XJgPsq8KiDfiR4e8bSI38lmoOd2ApqRk1aPcXS2obqY,15600
+compressed_tensors/compressors/model_compressors/model_compressor.py,sha256=sxh1TvW1Bp9YJE41hW0XZfd0kYYB85nhJvBLVRTDcV0,15886
 compressed_tensors/compressors/quantized_compressors/__init__.py,sha256=09UJq68Pht6Bf-4iP9xYl3tetKsncNPHD8IAGbePsr4,714
 compressed_tensors/compressors/quantized_compressors/base.py,sha256=K1KOnS6Y8nUA1-HN7VhyfsDc01nilW0WfXMUhuD-l8w,5954
-compressed_tensors/compressors/quantized_compressors/naive_quantized.py,sha256=Mmfr-hap-4zw7CzE1mXi0UirknqGidNxw38GGWVgTqM,4916
-compressed_tensors/compressors/quantized_compressors/pack_quantized.py,sha256=9H8UrG5v1GRtslLjOEiUM2dnyxJnR-HJmlsFezQs_r0,7706
+compressed_tensors/compressors/quantized_compressors/naive_quantized.py,sha256=MMUya3Iwarm0BkeYXqKTUnEDPiBw98GKF09QiNST45k,4960
+compressed_tensors/compressors/quantized_compressors/pack_quantized.py,sha256=1CLwvBlu4AtGkuo3IisD1-rQzwLiA6hE1bCc-pF_XGo,7758
 compressed_tensors/compressors/sparse_compressors/__init__.py,sha256=i2TESH27l7KXeOhJ6hShIoI904XX96l-cRQiMR6MAaU,704
 compressed_tensors/compressors/sparse_compressors/base.py,sha256=Ua4rUSGyucEs-YJI5z3oIUF-zqQLrFsQ9f-qKasEdUM,4410
 compressed_tensors/compressors/sparse_compressors/dense.py,sha256=lSKNWRx6H7aUqaJj1j4qbXk8Gkm1UohbnvW1Rvq6Ra4,1284
@@ -21,13 +21,13 @@ compressed_tensors/config/base.py,sha256=3bFAdwDZjOt-U3fneOeL8dRci-PS8DqstnXuQVt
 compressed_tensors/config/dense.py,sha256=NgSxnFCnckU9-iunxEaqiFwqgdO7YYxlWKR74jNbjks,1317
 compressed_tensors/config/sparse_bitmask.py,sha256=pZUboRNZTu6NajGOQEFExoPknak5ynVAUeiiYpS1Gt8,1308
 compressed_tensors/linear/__init__.py,sha256=fH6rjBYAxuwrTzBTlTjTgCYNyh6TCvCqajCz4Im4YrA,617
-compressed_tensors/linear/compressed_linear.py,sha256=0jTTf6XxOAjAYs3tvFtgiNMAO4W10sSeR-pdH2M413g,3218
+compressed_tensors/linear/compressed_linear.py,sha256=MJa-UfoKhIkdUWRD1shrXXri2cOwR5GK0a4t4bNYosM,3268
 compressed_tensors/quantization/__init__.py,sha256=83J5bPB7PavN2TfCoW7_vEDhfYpm4TDrqYO9vdSQ5bk,760
-compressed_tensors/quantization/quant_args.py,sha256=osjNwCSB6tcyH9Qeg5sHEiB-bHyi3XJ8TzkGVJuGTc4,8711
-compressed_tensors/quantization/quant_config.py,sha256=NCiMvUMnnz5kTyAkDylxjtEGQnjgsIYIeNR2zyHEdTQ,10371
-compressed_tensors/quantization/quant_scheme.py,sha256=5ggPz5sqEfTUgvJJeiPIINA74QtO-08hb3szsm7UHGE,6000
+compressed_tensors/quantization/quant_args.py,sha256=jwC__lSmuiJ2qSJYYZGgWgQNbZu6YhhS0e-qugrTNXE,9058
+compressed_tensors/quantization/quant_config.py,sha256=K6kOZ6LDXpFlqsVzR4NEATV6y6Ea83rJWnNyVlvw-pI,10379
+compressed_tensors/quantization/quant_scheme.py,sha256=eQ0JrRZ80GX69fpwW87VzPzzhajhk4mUaJScjk82OY4,6010
 compressed_tensors/quantization/lifecycle/__init__.py,sha256=_uItzFWusyV74Zco_pHLOTdE9a83cL-R-ZdyQrBkIyw,772
-compressed_tensors/quantization/lifecycle/apply.py,sha256=pdCqxXnVw7HoDDanaOtek13g8x_nb54CBUlfuMdhFG4,14993
+compressed_tensors/quantization/lifecycle/apply.py,sha256=jCUSgeOBtagE5IhgIbyYMZ4kv8Rm20VGJ4IxXZ5HAnw,15066
 compressed_tensors/quantization/lifecycle/compressed.py,sha256=Fj9n66IN0EWsOAkBHg3O0GlOQpxstqjCcs0ttzMXrJ0,2296
 compressed_tensors/quantization/lifecycle/forward.py,sha256=QPL6-vKOFuKdKIEsVqMhsw4x552Jpm2sqO0oeChbnrM,12941
 compressed_tensors/quantization/lifecycle/helpers.py,sha256=C0mhy2vJ0fCjVeN4kFNhw8Eq1wkteBGHiZ36RVLThRY,944
@@ -37,14 +37,14 @@ compressed_tensors/quantization/utils/helpers.py,sha256=DBP-sGRpGAY01K0LFE7qqonN
 compressed_tensors/registry/__init__.py,sha256=FwLSNYqfIrb5JD_6OK_MT4_svvKTN_nEhpgQlQvGbjI,658
 compressed_tensors/registry/registry.py,sha256=vRcjVB1ITfSbfYUaGndBBmqhip_5vsS62weorVg0iXo,11896
 compressed_tensors/utils/__init__.py,sha256=gS4gSU2pwcAbsKj-6YMaqhm25udFy6ISYaWBf-myRSM,808
-compressed_tensors/utils/helpers.py,sha256=hWGIR0W7ENHwdC7wW2SQJJiCF9-xOu_u3fY2RzLyYg4,4101
+compressed_tensors/utils/helpers.py,sha256=T3p0TbhWbQIRjL6Up2Z7UhZO5jpR6WxBhYPPvrhE6lE,5018
 compressed_tensors/utils/offload.py,sha256=d9q8LNe8HyF8tOjgjA7QGLD3HRysmNp0d8eBbdqBgIM,4089
 compressed_tensors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVyah6BUUir_StT28,2530
 compressed_tensors/utils/permute.py,sha256=V6tJLKo3Syccj-viv4F7ZKZgJeCB-hl-dK8RKI_kBwI,2355
 compressed_tensors/utils/safetensors_load.py,sha256=m08ANVuTBxQdoa6LufDgcNJ7wCLDJolyZljB8VEybAU,8578
 compressed_tensors/utils/semi_structured_conversions.py,sha256=XKNffPum54kPASgqKzgKvyeqWPAkair2XEQXjkp7ho8,13489
-compressed_tensors-0.8.0.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-compressed_tensors-0.8.0.dist-info/METADATA,sha256=lRjH5wempREQ2lTFNqzMusIW95YHN4rF8yd73MVvOe0,6782
-compressed_tensors-0.8.0.dist-info/WHEEL,sha256=bFJAMchF8aTQGUgMZzHJyDDMPTO3ToJ7x23SLJa1SVo,92
-compressed_tensors-0.8.0.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
-compressed_tensors-0.8.0.dist-info/RECORD,,
+compressed_tensors-0.8.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+compressed_tensors-0.8.1.dist-info/METADATA,sha256=rDPAoGePUI_yRN7LRP23t3vKWhDfxPbeNR1TX6vpPPI,6782
+compressed_tensors-0.8.1.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
+compressed_tensors-0.8.1.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
+compressed_tensors-0.8.1.dist-info/RECORD,,

{compressed_tensors-0.8.0.dist-info → compressed_tensors-0.8.1.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: bdist_wheel (0.45.0)
+Generator: bdist_wheel (0.45.1)
 Root-Is-Purelib: true
 Tag: py3-none-any

{compressed_tensors-0.8.0.dist-info → compressed_tensors-0.8.1.dist-info}/LICENSE RENAMED Viewed

File without changes

{compressed_tensors-0.8.0.dist-info → compressed_tensors-0.8.1.dist-info}/top_level.txt RENAMED Viewed

File without changes

compressed-tensors 0.8.0__py3-none-any.whl → 0.8.1__py3-none-any.whl

compressed-tensors 0.8.0py3-none-any.whl → 0.8.1py3-none-any.whl