PyPI - compressed-tensors - Versions diffs - 0.11.1a20250909__py3-none-any.whl → 0.11.1a20250910__py3-none-any.whl - Mend

compressed-tensors 0.11.1a20250909py3-none-any.whl → 0.11.1a20250910py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

compressed_tensors/compressors/model_compressors/model_compressor.py CHANGED Viewed

@@ -50,7 +50,6 @@ from compressed_tensors.utils import (
     get_offloaded_device,
     get_safetensors_folder,
     has_offloaded_params,
-    merge_names,
     register_offload_parameter,
     update_parameter_data,
 )
@@ -224,7 +223,8 @@ class ModelCompressor:
             s_config = compression_config.sparsity_config
             return s_config.model_dump() if s_config is not None else None
-        return compression_config.get(SPARSITY_CONFIG_NAME, None)
+        # explicitly return None if {} in config
+        return compression_config.get(SPARSITY_CONFIG_NAME, None) or None
     @staticmethod
     def parse_quantization_config(
@@ -320,112 +320,6 @@ class ModelCompressor:
                     format, config=quantization_config
                 )
-    # ----- used by hf quantizer ----- #
-    def get_missing_module_keys(self, model: Module) -> List[str]:
-        """
-        Identifies the expected missing weight keys in the compressed state_dict.
-        When a model undergoes sparsity or quantization compression, certain
-        weight tensors may be absent from the checkpoint by virtue of compression.
-        This function determines which weight keys are missing based on the
-        applied compression techniques.
-        :param model: The PyTorch model to check for missing keys.
-        :return: A list of missing keys expected in the compressed state_dict.
-        """
-        missing_keys = set()
-        # Determine missing keys due to sparsity compression
-        if (
-            self.sparsity_compressor
-            and self.sparsity_config.format != CompressionFormat.dense.value
-        ):
-            sparse_targets = match_named_modules(
-                model=model,
-                targets=self.sparsity_config.targets,
-                ignore=self.sparsity_config.ignore,
-            )
-            missing_keys.update(
-                merge_names(target_name, "weight")
-                for target_name, _module in sparse_targets
-            )
-        # Determine missing keys due to pack quantization
-        if (
-            self.quantization_compressor
-            and self.quantization_config.format
-            == CompressionFormat.pack_quantized.value
-        ):
-            for scheme in self.quantization_config.config_groups.values():
-                quant_targets = match_named_modules(
-                    model=model,
-                    targets=scheme.targets,
-                    ignore=self.quantization_config.ignore,
-                )
-                missing_keys.update(
-                    merge_names(target_name, "weight")
-                    for target_name, _module in quant_targets
-                )
-        return list(missing_keys)
-    def get_unexpected_file_keys(self, model: Module) -> List[str]:
-        """
-        Identifies extra keys introduced by the compression process in the
-        compressed state_dict that are not expected by the model graph.
-        During sparsity or quantization compression, additional metadata or
-        auxiliary parameters may be stored in the checkpoint, which do not
-        correspond to any parameter in the original model. These keys are
-        typically introduced to support the reconstruction of compressed weights.
-        For example, Sparse24Bitmask compression may introduce keys such as
-        'compressed', 'bitmask', and 'shape' in the checkpoint, which are
-        not part of the original model parameters.
-        :param model: The PyTorch model to check for unexpected keys.
-        :return: A list of extra keys introduced by the compression process
-                that are not expected by the model.
-        """
-        unexpected_keys = set()
-        # Identify unexpected keys from sparsity compression
-        if (
-            self.sparsity_compressor
-            and self.sparsity_config.format != CompressionFormat.dense.value
-        ):
-            sparse_targets = match_named_modules(
-                model=model,
-                targets=self.sparsity_config.targets,
-                ignore=self.sparsity_config.ignore,
-            )
-            unexpected_keys.update(
-                merge_names(target_name, param)
-                for target_name, _module in sparse_targets
-                for param in self.sparsity_compressor.compression_param_names
-            )
-        # Identify unexpected keys from quantization compression
-        if self.quantization_compressor:
-            for scheme in self.quantization_config.config_groups.values():
-                quant_targets = match_named_modules(
-                    model=model,
-                    targets=scheme.targets,
-                    ignore=self.quantization_config.ignore,
-                )
-                for quant_compressor in self.quantization_compressor.values():
-                    unexpected_keys.update(
-                        merge_names(target_name, param)
-                        for target_name, _module in quant_targets
-                        for param in quant_compressor.compression_param_names
-                        if param != "weight"
-                    )
-        return list(unexpected_keys)
     # ----- model memory compression/decompression pathways ----- #
     def compress_model(self, model: Module):
@@ -712,17 +606,16 @@ class ModelCompressor:
                 # Load activation scales/zp or any other quantization parameters
                 # Conditionally load the weight quantization parameters if we have a
                 # dense compressor or if a sparsity compressor has already been applied
+                load_weight_qparams = sparse_decompressed or isinstance(
+                    quant_compressor, DenseCompressor
+                )
                 load_pretrained_quantization_parameters(
                     model,
                     model_path,
                     # TODO: all weight quantization params will be moved to the
                     # compressor in a follow-up including initialization
-                    load_weight_quantization=(
-                        sparse_decompressed
-                        or isinstance(quant_compressor, DenseCompressor)
-                    ),
+                    load_weight_qparams=load_weight_qparams,
                 )
             model_path_or_state_dict = (
                 model.state_dict() if sparse_decompressed else model_path
             )
@@ -732,7 +625,9 @@ class ModelCompressor:
             )
             # TODO: all weight quantization params will be moved to the compressor
             # to prevent duplicate parameter updates in update_parameter_data
-            self._replace_weights(dense_gen, model)
+            self._replace_weights(
+                dense_gen, model, load_weight_qparams=not load_weight_qparams
+            )
             def freeze_quantization_status(module):
                 module.quantization_status = QuantizationStatus.FROZEN
@@ -819,7 +714,9 @@ class ModelCompressor:
             param = torch.nn.Parameter(data.to(device), requires_grad=requires_grad)
             register_offload_parameter(module, param_name, param)
-    def _replace_weights(self, dense_weight_generator, model: Module):
+    def _replace_weights(
+        self, dense_weight_generator, model: Module, load_weight_qparams: bool = True
+    ):
         """
         Replace the weights of the model with the
         provided dense weights.
@@ -847,6 +744,7 @@ class ModelCompressor:
                     # decompression in init to be consistent with loading which happens
                     # later as well however, update_data does a good shape check -
                     # should be moved to the compressor
                     if param_name == "weight":
                         delattr(module, param_name)
                         requires_grad = param_data.dtype in (
@@ -858,7 +756,7 @@ class ModelCompressor:
                             param_data.to(device), requires_grad=requires_grad
                         )
                         register_offload_parameter(module, param_name, param)
-                    else:
+                    elif load_weight_qparams:
                         # Should already be registered to the correct device for
                         # for scales/zero-points
                         update_parameter_data(module, param_data, param_name)

compressed_tensors/quantization/lifecycle/apply.py CHANGED Viewed

@@ -65,19 +65,19 @@ _LOGGER = logging.getLogger(__name__)
 def load_pretrained_quantization_parameters(
     model: Module,
     model_name_or_path: Optional[str] = None,
-    load_weight_quantization: Optional[bool] = False,
+    load_weight_qparams: Optional[bool] = False,
 ):
     """
     Loads the quantization parameters (scale and zero point) from model_name_or_path to
     a model that has already been initialized with a quantization config.
     NOTE: Will always load inputs/output parameters. Will conditioanlly load weight
-    parameters, if load_weight_quantization is set to True.
+    parameters, if load_weight_qparams is set to True.
     :param model: model to load pretrained quantization parameters to
     :param model_name_or_path: Hugging Face stub or local folder containing a quantized
         model, which is used to load quantization parameters
-    :param load_weight_quantization: whether or not the weight quantization parameters
+    :param load_weight_qparams: whether or not the weight quantization parameters
         should be loaded
     """
     model_path = get_safetensors_folder(model_name_or_path)
@@ -103,7 +103,7 @@ def load_pretrained_quantization_parameters(
                 mapping=mapping,
             )
-        if load_weight_quantization and submodule.quantization_scheme.weights:
+        if load_weight_qparams and submodule.quantization_scheme.weights:
             base_name = "weight"
             _load_quant_args_from_mapping(
                 base_name=base_name,
@@ -219,18 +219,9 @@ def apply_quantization_status(model: Module, status: QuantizationStatus):
     if status >= QuantizationStatus.INITIALIZED > current_status:
         force_zero_point_init = status != QuantizationStatus.COMPRESSED
-        # When decompressing, we set the scale_dtype as the model's dtype
-        # This is because the normal workflow of using the weight's dtype
-        # will be incorrect as the model weight will be compressed
-        # Therfore, use the dtype set by the user using the PretrainedModel
-        scale_dtype = None
-        if status == QuantizationStatus.FROZEN:
-            if hasattr(model, "dtype"):
-                scale_dtype = model.dtype
         model.apply(
             lambda module: initialize_module_for_quantization(
-                module, force_zero_point=force_zero_point_init, scale_dtype=scale_dtype
+                module, force_zero_point=force_zero_point_init
             )
         )

compressed_tensors/quantization/lifecycle/initialize.py CHANGED Viewed

@@ -59,7 +59,6 @@ def initialize_module_for_quantization(
     module: Module,
     scheme: Optional[QuantizationScheme] = None,
     force_zero_point: bool = True,
-    scale_dtype: Optional[torch.dtype] = None,
 ):
     """
     attaches appropriate scales, zero points, and observers to a layer
@@ -73,8 +72,6 @@ def initialize_module_for_quantization(
         if not provided, the layer will be skipped
     :param force_zero_point: whether to force initialization of a zero point for
         symmetric quantization
-    :param scale_dtype: dtype to used for the scales, if overriding the
-        weight dtype as the scale dtype
     """
     # TODO: don't initialize parameters when running decompression
     scheme = scheme or getattr(module, "quantization_scheme", None)
@@ -93,7 +90,6 @@ def initialize_module_for_quantization(
                 "input",
                 scheme.input_activations,
                 force_zero_point=force_zero_point,
-                scale_dtype=scale_dtype,
             )
         if scheme.weights is not None:
@@ -107,7 +103,6 @@ def initialize_module_for_quantization(
                     scheme.weights,
                     weight_shape=weight_shape,
                     force_zero_point=force_zero_point,
-                    scale_dtype=scale_dtype,
                 )
             else:
                 _LOGGER.warning(
@@ -119,7 +114,7 @@ def initialize_module_for_quantization(
         if scheme.output_activations is not None:
             if not is_kv_cache_quant_scheme(scheme):
                 _initialize_scale_zero_point(
-                    module, "output", scheme.output_activations, scale_dtype=scale_dtype
+                    module, "output", scheme.output_activations
                 )
         module.quantization_scheme = scheme
@@ -145,7 +140,6 @@ def _initialize_scale_zero_point(
     quantization_args: QuantizationArgs,
     weight_shape: Optional[torch.Size] = None,
     force_zero_point: bool = True,
-    scale_dtype: Optional[torch.dtype] = None,
 ):
     if quantization_args.dynamic is True:
         return
@@ -213,7 +207,7 @@ def _initialize_scale_zero_point(
         expected_shape = 1
     # 3. Identify quantization scale and zp dtype
-    scale_dtype = scale_dtype if scale_dtype is not None else module.weight.dtype
+    scale_dtype = module.weight.dtype
     if is_fp4(quantization_args=quantization_args):
         scale_dtype = zp_dtype = FP8_E4M3_DATA.dtype
@@ -226,7 +220,7 @@ def _initialize_scale_zero_point(
             torch.float32,
             torch.float64,
         ]:
-            scale_dtype = torch.float16
+            scale_dtype = torch.bfloat16
         zp_dtype = quantization_args.pytorch_dtype()
     # 4. Initializes empty scale, zero point, and g_idx parameters for the module

compressed_tensors/version.py CHANGED Viewed

@@ -17,5 +17,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '0.11.1.a20250909'
+__version__ = version = '0.11.1.a20250910'
 __version_tuple__ = version_tuple = (0, 11, 1)

{compressed_tensors-0.11.1a20250909.dist-info → compressed_tensors-0.11.1a20250910.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: compressed-tensors
-Version: 0.11.1a20250909
+Version: 0.11.1a20250910
 Summary: Library for utilization of compressed safetensors of neural network models
 Home-page: https://github.com/neuralmagic/compressed-tensors
 Author: Neuralmagic, Inc.

{compressed_tensors-0.11.1a20250909.dist-info → compressed_tensors-0.11.1a20250910.dist-info}/RECORD RENAMED Viewed

@@ -1,11 +1,11 @@
 compressed_tensors/__init__.py,sha256=UtKmifNeBCSE2TZSAfduVNNzHY-3V7bLjZ7n7RuXLOE,812
 compressed_tensors/base.py,sha256=-gxWvDF4LCkyeDP8YlGzvBBKxo4Dk9h4NINPD61drFU,921
-compressed_tensors/version.py,sha256=r7NPIWZc4XFCqdYyi4qPxtVWw1N9RBvLtcldfOSxGIA,523
+compressed_tensors/version.py,sha256=uspJ2GlCAlOy5_cMN5KqjdnqQs72wgmaYeWLk_2EVHU,523
 compressed_tensors/compressors/__init__.py,sha256=smSygTSfcfuujRrAXDc6uZm4L_ccV1tWZewqVnOb4lM,825
 compressed_tensors/compressors/base.py,sha256=nvWsv4xEw1Tkxkxth6TmHplDYXfBeP22xWxOsZERyDY,7204
 compressed_tensors/compressors/helpers.py,sha256=OK6qxX9j3bHwF9JfIYSGMgBJe2PWjlTA3byXKCJaTIQ,5431
 compressed_tensors/compressors/model_compressors/__init__.py,sha256=5RGGPFu4YqEt_aOdFSQYFYFDjcZFJN0CsMqRtDZz3Js,666
-compressed_tensors/compressors/model_compressors/model_compressor.py,sha256=mZqpBS5znPHedlVVkKsUsVCs52zK5bAmEiI8cqMBKnY,37618
+compressed_tensors/compressors/model_compressors/model_compressor.py,sha256=t_4r2u8PPXMkxKXfqENcmh30q11pG6Xdikj7Pjtf7dw,33444
 compressed_tensors/compressors/quantized_compressors/__init__.py,sha256=KvaFBL_Q84LxRGJOV035M8OBoCkAx8kOkfphswgkKWk,745
 compressed_tensors/compressors/quantized_compressors/base.py,sha256=rWvaWDqzi8cctBo982g2n3-y6afRiFl3jfTd90lSMrY,10413
 compressed_tensors/compressors/quantized_compressors/naive_quantized.py,sha256=0ANDcuD8aXPqTYNPY6GnX9iS6eXJw6P0TzNV_rYS2l8,5369
@@ -30,11 +30,11 @@ compressed_tensors/quantization/quant_args.py,sha256=5AxYKqCSlg7CDgz2N8G4ZRVIiSU
 compressed_tensors/quantization/quant_config.py,sha256=2NgDwKuQn0f-ojiHC8c6tXtYX_zQlk26Rj-bU71QKvA,10598
 compressed_tensors/quantization/quant_scheme.py,sha256=EG86Bq5c8q1O4fJL_o3s7gOu1S5SrcLjfNYOPDn414A,9673
 compressed_tensors/quantization/lifecycle/__init__.py,sha256=_uItzFWusyV74Zco_pHLOTdE9a83cL-R-ZdyQrBkIyw,772
-compressed_tensors/quantization/lifecycle/apply.py,sha256=TuSjKomSk4N0My-UY9PWk2Nyuze6TilEGPsZELgotzk,14716
+compressed_tensors/quantization/lifecycle/apply.py,sha256=Nn0NTtIQ91AWuU05_oYNnVxAXV6C_vW3RW46XcXZwX4,14222
 compressed_tensors/quantization/lifecycle/compressed.py,sha256=Fj9n66IN0EWsOAkBHg3O0GlOQpxstqjCcs0ttzMXrJ0,2296
 compressed_tensors/quantization/lifecycle/forward.py,sha256=xcLTgaff1wYUWzvQqYKmhWYkshWVI-PhLPtBOyyZro0,17576
 compressed_tensors/quantization/lifecycle/helpers.py,sha256=C0mhy2vJ0fCjVeN4kFNhw8Eq1wkteBGHiZ36RVLThRY,944
-compressed_tensors/quantization/lifecycle/initialize.py,sha256=f05UF6NaUGvR9qyxes_AgRcvg3KWgk5JeM_-NL1EQG0,10285
+compressed_tensors/quantization/lifecycle/initialize.py,sha256=GYH79007BPUojETNyvDm5SdHrnwPFVuMGlA8kXCI2Q0,9925
 compressed_tensors/quantization/utils/__init__.py,sha256=VdtEmP0bvuND_IGQnyqUPc5lnFp-1_yD7StKSX4x80w,656
 compressed_tensors/quantization/utils/helpers.py,sha256=-pfSmxqHkrB-RnjF0VYz8lMe9CVnB7IJrONf9Y9fjCo,17014
 compressed_tensors/registry/__init__.py,sha256=FwLSNYqfIrb5JD_6OK_MT4_svvKTN_nEhpgQlQvGbjI,658
@@ -63,8 +63,8 @@ compressed_tensors/utils/permute.py,sha256=V6tJLKo3Syccj-viv4F7ZKZgJeCB-hl-dK8RK
 compressed_tensors/utils/safetensors_load.py,sha256=Vql34aCTDHwmTZXJHzCyBISJo7iA7EQ78LdTlMjdpZo,12023
 compressed_tensors/utils/semi_structured_conversions.py,sha256=XKNffPum54kPASgqKzgKvyeqWPAkair2XEQXjkp7ho8,13489
 compressed_tensors/utils/type.py,sha256=bNwoo_FWlvLuDpYAGGzZJITRg0JA_Ngk9LGPo-kvjeU,2554
-compressed_tensors-0.11.1a20250909.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-compressed_tensors-0.11.1a20250909.dist-info/METADATA,sha256=ZESMNkRHo7FRcoSr9v_JKDGml5oWDK-Tgcboj-0CnE4,7031
-compressed_tensors-0.11.1a20250909.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-compressed_tensors-0.11.1a20250909.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
-compressed_tensors-0.11.1a20250909.dist-info/RECORD,,
+compressed_tensors-0.11.1a20250910.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+compressed_tensors-0.11.1a20250910.dist-info/METADATA,sha256=hoAVyQXgylkzGGRJD4SeIUlVh4FSMWeZLzaeMsKL_RI,7031
+compressed_tensors-0.11.1a20250910.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+compressed_tensors-0.11.1a20250910.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
+compressed_tensors-0.11.1a20250910.dist-info/RECORD,,

{compressed_tensors-0.11.1a20250909.dist-info → compressed_tensors-0.11.1a20250910.dist-info}/WHEEL RENAMED Viewed

File without changes

{compressed_tensors-0.11.1a20250909.dist-info → compressed_tensors-0.11.1a20250910.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{compressed_tensors-0.11.1a20250909.dist-info → compressed_tensors-0.11.1a20250910.dist-info}/top_level.txt RENAMED Viewed

File without changes

compressed-tensors 0.11.1a20250909__py3-none-any.whl → 0.11.1a20250910__py3-none-any.whl

compressed-tensors 0.11.1a20250909py3-none-any.whl → 0.11.1a20250910py3-none-any.whl