PyPI - compressed-tensors - Versions diffs - 0.8.0__tar.gz → 0.8.1__tar.gz - Mend

compressed-tensors 0.8.0tar.gz → 0.8.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (56) hide show

{compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: compressed-tensors
-Version: 0.8.0
+Version: 0.8.1
 Summary: Library for utilization of compressed safetensors of neural network models
 Home-page: https://github.com/neuralmagic/compressed-tensors
 Author: Neuralmagic, Inc.

{compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/compressors/model_compressors/model_compressor.py RENAMED Viewed

@@ -24,7 +24,6 @@ import compressed_tensors
 import torch
 import transformers
 from compressed_tensors.base import (
-    COMPRESSION_CONFIG_NAME,
     COMPRESSION_VERSION_NAME,
     QUANTIZATION_CONFIG_NAME,
     QUANTIZATION_METHOD_NAME,
@@ -39,6 +38,7 @@ from compressed_tensors.quantization import (
     apply_quantization_config,
     load_pretrained_quantization,
 )
+from compressed_tensors.quantization.quant_args import QuantizationArgs
 from compressed_tensors.quantization.utils import (
     is_module_quantized,
     iter_named_leaf_modules,
@@ -103,12 +103,14 @@ class ModelCompressor:
         :return: compressor for the configs, or None if model is not compressed
         """
         config = AutoConfig.from_pretrained(pretrained_model_name_or_path, **kwargs)
-        compression_config = getattr(config, COMPRESSION_CONFIG_NAME, None)
+        compression_config = getattr(config, QUANTIZATION_CONFIG_NAME, None)
         return cls.from_compression_config(compression_config)
     @classmethod
     def from_compression_config(
-        cls, compression_config: Union[Dict[str, Any], "CompressedTensorsConfig"]
+        cls,
+        compression_config: Union[Dict[str, Any], "CompressedTensorsConfig"],
     ):
         """
         :param compression_config:
@@ -265,7 +267,11 @@ class ModelCompressor:
             state_dict = model.state_dict()
         compressed_state_dict = state_dict
-        quantized_modules_to_args = map_modules_to_quant_args(model)
+        quantized_modules_to_args: Dict[
+            str, QuantizationArgs
+        ] = map_modules_to_quant_args(model)
         if self.quantization_compressor is not None:
             compressed_state_dict = self.quantization_compressor.compress(
                 state_dict, names_to_scheme=quantized_modules_to_args
@@ -369,7 +375,13 @@ class ModelCompressor:
             update_parameter_data(module, data, param_name)
-def map_modules_to_quant_args(model: Module) -> Dict:
+def map_modules_to_quant_args(model: Module) -> Dict[str, QuantizationArgs]:
+    """
+    Given a pytorch model, map out the submodule name (usually linear layers)
+     to the QuantizationArgs
+    :param model: pytorch model
+    """
     quantized_modules_to_args = {}
     for name, submodule in iter_named_leaf_modules(model):
         if is_module_quantized(submodule):

{compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/compressors/quantized_compressors/naive_quantized.py RENAMED Viewed

@@ -93,9 +93,11 @@ class NaiveQuantizationCompressor(BaseQuantizationCompressor):
                 args=quantization_args,
                 dtype=quantization_args.pytorch_dtype(),
             )
+        else:
+            quantized_weight = weight
-            if device is not None:
-                quantized_weight = quantized_weight.to(device)
+        if device is not None:
+            quantized_weight = quantized_weight.to(device)
         return {"weight": quantized_weight}

{compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/compressors/quantized_compressors/pack_quantized.py RENAMED Viewed

@@ -94,6 +94,8 @@ class PackedQuantizationCompressor(BaseQuantizationCompressor):
                 args=quantization_args,
                 dtype=torch.int8,
             )
+        else:
+            quantized_weight = weight
         packed_weight = pack_to_int32(quantized_weight, quantization_args.num_bits)
         weight_shape = torch.tensor(weight.shape)

{compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/linear/compressed_linear.py RENAMED Viewed

@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from typing import Dict, Tuple
 import torch
 from compressed_tensors.compressors.base import BaseCompressor
 from compressed_tensors.quantization import (
@@ -53,7 +55,7 @@ class CompressedLinear(Linear):
         )
         # get the shape and dtype of compressed parameters
-        compression_params = module.compressor.compression_param_info(
+        compression_params: Dict[str, Tuple] = module.compressor.compression_param_info(
             module.weight.shape, quantization_scheme.weights
         )

{compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/quantization/lifecycle/apply.py RENAMED Viewed

@@ -106,7 +106,8 @@ def apply_quantization_config(
     model: Module, config: Union[QuantizationConfig, None], run_compressed: bool = False
 ) -> OrderedDict:
     """
-    Initializes the model for quantization in-place based on the given config
+    Initializes the model for quantization in-place based on the given config.
+    Optionally coverts quantizable modules to compressed_linear modules
     :param model: model to apply quantization config to
     :param config: quantization config

{compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/quantization/quant_args.py RENAMED Viewed

@@ -17,6 +17,7 @@ from enum import Enum
 from typing import Any, Dict, Optional, Union
 import torch
+from compressed_tensors.utils import Aliasable
 from pydantic import BaseModel, Field, field_validator, model_validator
@@ -53,17 +54,29 @@ class QuantizationStrategy(str, Enum):
     TOKEN = "token"
-class ActivationOrdering(str, Enum):
+class ActivationOrdering(Aliasable, str, Enum):
     """
     Enum storing strategies for activation ordering
     Group: reorder groups and weight\n
-    Weight: only reorder weight, not groups. Slightly lower latency and
-    accuracy compared to group actorder\n
+    Weight: only reorder weight, not groups. Slightly lower accuracy but also lower
+    latency when compared to group actorder\n
+    Dynamic: alias for Group\n
+    Static: alias for Weight\n
     """
     GROUP = "group"
     WEIGHT = "weight"
+    # aliases
+    DYNAMIC = "dynamic"
+    STATIC = "static"
+    @staticmethod
+    def get_aliases() -> Dict[str, str]:
+        return {
+            "dynamic": "group",
+            "static": "weight",
+        }
 class QuantizationArgs(BaseModel, use_enum_values=True):

{compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/quantization/quant_config.py RENAMED Viewed

@@ -132,9 +132,9 @@ class QuantizationConfig(BaseModel):
         `k_proj` and `v_proj` in their names. If this is not the case
         and kv_cache_scheme != None, the quantization of kv cache will fail
     :global_compression_ratio: optional informational config to report the model
-    compression ratio acheived by the quantization config
+        compression ratio acheived by the quantization config
     :ignore: optional list of layers to ignore from config_groups. Layers in this list
-    are not quantized even if they match up with a target in config_groups
+        are not quantized even if they match up with a target in config_groups
     """
     config_groups: Dict[str, Union[QuantizationScheme, List[str]]]

{compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/quantization/quant_scheme.py RENAMED Viewed

@@ -13,14 +13,14 @@
 # limitations under the License.
 from copy import deepcopy
-from typing import List, Optional
+from typing import Any, Dict, List, Optional
 from compressed_tensors.quantization.quant_args import (
     QuantizationArgs,
     QuantizationStrategy,
     QuantizationType,
 )
-from pydantic import BaseModel
+from pydantic import BaseModel, model_validator
 __all__ = [
@@ -36,7 +36,7 @@ class QuantizationScheme(BaseModel):
     of modules should be quantized
     :param targets: list of modules to apply the QuantizationArgs to, can be layer
-    names, layer types or a regular expression
+    names, layer types or a regular expression, typically ["Linear"]
     :param weights: quantization config for layer weights
     :param input_activations: quantization config for layer inputs
     :param output_activations: quantization config for layer outputs
@@ -47,27 +47,20 @@ class QuantizationScheme(BaseModel):
     input_activations: Optional[QuantizationArgs] = None
     output_activations: Optional[QuantizationArgs] = None
-    @classmethod
-    def default_scheme(
-        cls,
-        targets: Optional[List[str]] = None,
-    ):
-        if targets is None:
-            # default to quantizing all Linear layers
-            targets = ["Linear"]
-        # by default, activations and weights are left unquantized
-        weights = None
-        input_activations = None
-        output_activations = None
-        return cls(
-            targets=targets,
-            weights=weights,
-            input_activations=input_activations,
-            output_activations=output_activations,
-        )
+    @model_validator(mode="after")
+    def validate_model_after(model: "QuantizationArgs") -> Dict[str, Any]:
+        inputs = model.input_activations
+        outputs = model.output_activations
+        if inputs is not None:
+            if inputs.actorder is not None:
+                raise ValueError("Cannot apply actorder to input activations")
+        if outputs is not None:
+            if outputs.actorder is not None:
+                raise ValueError("Cannot apply actorder to output activations")
+        return model
 """

{compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/utils/helpers.py RENAMED Viewed

@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import Any, Optional
+from typing import Any, Dict, Optional
 import torch
 from transformers import AutoConfig
@@ -24,6 +24,7 @@ __all__ = [
     "tensor_follows_mask_structure",
     "replace_module",
     "is_compressed_tensors_config",
+    "Aliasable",
 ]
 FSDP_WRAPPER_NAME = "_fsdp_wrapped_module"
@@ -119,3 +120,34 @@ def is_compressed_tensors_config(compression_config: Any) -> bool:
         return isinstance(compression_config, CompressedTensorsConfig)
     except ImportError:
         return False
+class Aliasable:
+    """
+    A mixin for enums to allow aliasing of enum members
+    Example:
+    >>> class MyClass(Aliasable, int, Enum):
+    >>>     ...
+    """
+    @staticmethod
+    def get_aliases() -> Dict[str, str]:
+        raise NotImplementedError()
+    def __eq__(self, other):
+        if isinstance(other, self.__class__):
+            aliases = self.get_aliases()
+            return self.value == other.value or (
+                aliases.get(self.value, self.value)
+                == aliases.get(other.value, other.value)
+            )
+        else:
+            aliases = self.get_aliases()
+            self_value = aliases.get(self.value, self.value)
+            other_value = aliases.get(other, other)
+            return self_value == other_value
+    def __hash__(self):
+        canonical_value = self.aliases.get(self.value, self.value)
+        return hash(canonical_value)

{compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/version.py RENAMED Viewed

@@ -17,7 +17,7 @@ Functionality for storing and setting the version info for SparseML
 """
-version_base = "0.8.0"
+version_base = "0.8.1"
 is_release = True  # change to True to set the generated version as a release version

{compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: compressed-tensors
-Version: 0.8.0
+Version: 0.8.1
 Summary: Library for utilization of compressed safetensors of neural network models
 Home-page: https://github.com/neuralmagic/compressed-tensors
 Author: Neuralmagic, Inc.