PyPI - compressed-tensors - Versions diffs - 0.9.5a20250603__py3-none-any.whl → 0.9.5a20250604__py3-none-any.whl - Mend

compressed-tensors 0.9.5a20250603py3-none-any.whl → 0.9.5a20250604py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

compressed_tensors/compressors/model_compressors/model_compressor.py CHANGED Viewed

@@ -50,6 +50,7 @@ from compressed_tensors.utils import (
     align_module_device,
     delete_offload_parameter,
     get_execution_device,
+    get_offloaded_device,
     get_safetensors_folder,
     has_offloaded_params,
     merge_names,
@@ -408,16 +409,17 @@ class ModelCompressor:
                     )
                 # remove any existing parameters
-                device = get_execution_device(module)
+                exec_device = get_execution_device(module)
+                offload_device = get_offloaded_device(module)
                 for name, _ in list(module.named_parameters()):
-                    delattr(module, name)
+                    delete_offload_parameter(module, name)
                 # replace with compressed parameters
                 for name, value in state_dict.items():
                     name = name.removeprefix(f"{prefix}.")
-                    value = value.to(device)
+                    value = value.to(exec_device)
                     param = torch.nn.Parameter(value, requires_grad=False)
-                    register_offload_parameter(module, name, param)
+                    register_offload_parameter(module, name, param, offload_device)
                 module.quantization_status = QuantizationStatus.COMPRESSED
@@ -460,30 +462,26 @@ class ModelCompressor:
                 # quantization second
                 if prefix in module_to_scheme:
-                    generator = self.quantization_compressor.decompress_from_state_dict(
-                        state_dict,
-                        names_to_scheme=module_to_scheme,
+                    state_dict = (
+                        self.quantization_compressor.decompress_module_from_state_dict(
+                            prefix,
+                            state_dict,
+                            scheme=module_to_scheme[prefix],
+                        )
                     )
-                    # generates (mod_path, {param_name, param_val})
-                    # of compressed params and used params, but not unused params
-                    # some used params are removed by get_unexpected_file_keys
-                    state_dict = {
-                        merge_names(module_path, param_name): param_value
-                        for module_path, compressed_data in generator
-                        for param_name, param_value in compressed_data.items()
-                    }
                 # remove any existing parameters
-                device = get_execution_device(module)
+                exec_device = get_execution_device(module)
+                offload_device = get_offloaded_device(module)
                 for name, _ in list(module.named_parameters()):
                     delete_offload_parameter(module, name)
                 # replace with decompressed parameters
                 for name, value in state_dict.items():
                     name = name.removeprefix(f"{prefix}.")
-                    value = value.to(device)
+                    value = value.to(exec_device)
                     param = torch.nn.Parameter(value, requires_grad=False)
-                    register_offload_parameter(module, name, param)
+                    register_offload_parameter(module, name, param, offload_device)
                 module.quantization_status = QuantizationStatus.FROZEN

compressed_tensors/compressors/quantized_compressors/base.py CHANGED Viewed

@@ -24,6 +24,7 @@ from compressed_tensors.utils import (
     get_nested_weight_mappings,
     merge_names,
 )
+from compressed_tensors.utils.safetensors_load import match_param_name
 from safetensors import safe_open
 from torch import Tensor
 from tqdm import tqdm
@@ -223,9 +224,7 @@ class BaseQuantizationCompressor(BaseCompressor):
             state_dict, self.compression_param_names
         )
         for module_path in weight_mappings.keys():
-            weight_data = {}
-            for param_name, param_value in weight_mappings[module_path].items():
-                weight_data[param_name] = param_value
+            weight_data = weight_mappings[module_path].copy()
             if "weight_scale" in weight_data:
                 quant_args = names_to_scheme[module_path].weights
@@ -234,3 +233,31 @@ class BaseQuantizationCompressor(BaseCompressor):
                 )
                 weight_data["weight"] = decompressed
                 yield module_path, weight_data
+    def decompress_module_from_state_dict(
+        self,
+        prefix: str,
+        state_dict: Dict[str, torch.Tensor],
+        scheme: QuantizationScheme,
+    ) -> Dict[str, torch.Tensor]:
+        """
+        Only used by in-memory decompression pathways to decompress the parameters of
+        one module
+        :param prefix: prefix of state_dict, typically the path to the module
+        :param state_dict: state dict containing module parameter values
+        :param scheme: quantization scheme of module to decompress
+        :return: state dict with weight decompressed if applicable
+        """
+        state_dict = {
+            key.removeprefix(f"{prefix}."): value for key, value in state_dict.items()
+        }
+        if "weight_scale" in state_dict:
+            state_dict["weight"] = self.decompress_weight(
+                compressed_data=state_dict, quantization_args=scheme.weights
+            )
+        state_dict = {f"{prefix}.{key}": value for key, value in state_dict.items()}
+        return state_dict

compressed_tensors/quantization/lifecycle/forward.py CHANGED Viewed

@@ -21,7 +21,6 @@ from compressed_tensors.quantization.quant_args import (
     DynamicType,
     QuantizationArgs,
     QuantizationStrategy,
-    QuantizationType,
     round_to_quantized_type,
 )
 from compressed_tensors.quantization.quant_config import QuantizationStatus
@@ -405,7 +404,7 @@ def _quantize(
     # if a global scale is optionally provided, use it
     # to further scale the local `scale` parameter
-    if global_scale:
+    if global_scale is not None:
         scale = scale.to(global_scale.dtype) / global_scale
     scaled = x / scale
@@ -438,7 +437,7 @@ def _dequantize(
     # if a global scale is optionally provided, use it
     # to further scale the local `scale` parameter
-    if global_scale:
+    if global_scale is not None:
         scale = scale.to(global_scale.dtype) / global_scale
     dequant_value = x_q.to(scale.dtype)

compressed_tensors/quantization/utils/helpers.py CHANGED Viewed

@@ -110,6 +110,7 @@ def calculate_qparams(
         else:
             scales = max_val_pos / (float(bit_range) / 2)
+        # TODO: in the case of MoEs, the global_scale may also be 0/need to be clamped
         if scales.dtype == FP8_E4M3_DATA.dtype:
             # torch.clamp not supported for FP8
             # use the next largest fp8 value from 0
@@ -495,4 +496,4 @@ def generate_gparam(
     max_vals = torch.max(updated_max_val, torch.zeros_like(updated_max_val))
     max_val_pos = torch.max(torch.abs(min_vals), torch.abs(max_vals))
     global_scale = scale_data.max * quant_data.max / max_val_pos
-    return global_scale.to(dtype)
+    return global_scale.to(dtype).reshape([1])

compressed_tensors/version.py CHANGED Viewed

@@ -17,5 +17,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '0.9.5.a20250603'
+__version__ = version = '0.9.5.a20250604'
 __version_tuple__ = version_tuple = (0, 9, 5)

{compressed_tensors-0.9.5a20250603.dist-info → compressed_tensors-0.9.5a20250604.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: compressed-tensors
-Version: 0.9.5a20250603
+Version: 0.9.5a20250604
 Summary: Library for utilization of compressed safetensors of neural network models
 Home-page: https://github.com/neuralmagic/compressed-tensors
 Author: Neuralmagic, Inc.

{compressed_tensors-0.9.5a20250603.dist-info → compressed_tensors-0.9.5a20250604.dist-info}/RECORD RENAMED Viewed

@@ -1,13 +1,13 @@
 compressed_tensors/__init__.py,sha256=UtKmifNeBCSE2TZSAfduVNNzHY-3V7bLjZ7n7RuXLOE,812
 compressed_tensors/base.py,sha256=73HYH7HY7O2roC89yG_piPFnZwrBfn_i7HmKl90SKc0,875
-compressed_tensors/version.py,sha256=fFlh5YaVrAZG4nN-6r62Ow43mZVj9W0f2ASMDww8e5k,521
+compressed_tensors/version.py,sha256=QmqChcTnn-HquSfq_8n_1b_CkQT93OOAGt5yzbeUk0A,521
 compressed_tensors/compressors/__init__.py,sha256=smSygTSfcfuujRrAXDc6uZm4L_ccV1tWZewqVnOb4lM,825
 compressed_tensors/compressors/base.py,sha256=nvWsv4xEw1Tkxkxth6TmHplDYXfBeP22xWxOsZERyDY,7204
 compressed_tensors/compressors/helpers.py,sha256=OK6qxX9j3bHwF9JfIYSGMgBJe2PWjlTA3byXKCJaTIQ,5431
 compressed_tensors/compressors/model_compressors/__init__.py,sha256=5RGGPFu4YqEt_aOdFSQYFYFDjcZFJN0CsMqRtDZz3Js,666
-compressed_tensors/compressors/model_compressors/model_compressor.py,sha256=BBJd3Ei6FtqVQLBkOm80G6pSJ11IMTGuTA-FL4n6_5g,32704
+compressed_tensors/compressors/model_compressors/model_compressor.py,sha256=72h2tWDIGbbqLQF8MDzOehy18eu5TvsCLd_AuzGv_O4,32517
 compressed_tensors/compressors/quantized_compressors/__init__.py,sha256=KvaFBL_Q84LxRGJOV035M8OBoCkAx8kOkfphswgkKWk,745
-compressed_tensors/compressors/quantized_compressors/base.py,sha256=n_sVSzySHUBgXt-nkLggM1DtB0aEgQmiKhTzcnQU9Dc,9266
+compressed_tensors/compressors/quantized_compressors/base.py,sha256=ByE3z61boZ5wdz0nhc-2CJH61bSixJQE78pfkS6XRDg,10269
 compressed_tensors/compressors/quantized_compressors/naive_quantized.py,sha256=0ANDcuD8aXPqTYNPY6GnX9iS6eXJw6P0TzNV_rYS2l8,5369
 compressed_tensors/compressors/quantized_compressors/nvfp4_quantized.py,sha256=Gw-lVzk5jrKUlM5UTCiJBmhM5gHzB9mn8r298MVUbDI,6395
 compressed_tensors/compressors/quantized_compressors/pack_quantized.py,sha256=_66tQ8bxslDUdas-ULORXblPw9kdNNn1UJJU9-ZOGPY,11380
@@ -32,11 +32,11 @@ compressed_tensors/quantization/quant_scheme.py,sha256=IDWa1GWUbUdWCo8j78Jz6svYF
 compressed_tensors/quantization/lifecycle/__init__.py,sha256=_uItzFWusyV74Zco_pHLOTdE9a83cL-R-ZdyQrBkIyw,772
 compressed_tensors/quantization/lifecycle/apply.py,sha256=DOoxH4jM8r0270GGGUFOpRrgwaisiJi7TV-Q6E8qM8E,18067
 compressed_tensors/quantization/lifecycle/compressed.py,sha256=Fj9n66IN0EWsOAkBHg3O0GlOQpxstqjCcs0ttzMXrJ0,2296
-compressed_tensors/quantization/lifecycle/forward.py,sha256=WFwvNebxXNUlpX5p1xG80oa8W9fz4-Xd6LCH_B_nptg,14881
+compressed_tensors/quantization/lifecycle/forward.py,sha256=JWOQ-03bsgh9_nnOLAjmLZ0S8bFQA-GjwDK6YUBwcrU,14883
 compressed_tensors/quantization/lifecycle/helpers.py,sha256=C0mhy2vJ0fCjVeN4kFNhw8Eq1wkteBGHiZ36RVLThRY,944
 compressed_tensors/quantization/lifecycle/initialize.py,sha256=9d5Ee7qt3zxaa5_PFitkvadvRDXeDqBIxYgooBqtrf8,8638
 compressed_tensors/quantization/utils/__init__.py,sha256=VdtEmP0bvuND_IGQnyqUPc5lnFp-1_yD7StKSX4x80w,656
-compressed_tensors/quantization/utils/helpers.py,sha256=DLSPX-5cmrXxVQbt-keN9Qgbvn_lPOL674pXa2gR8-A,17740
+compressed_tensors/quantization/utils/helpers.py,sha256=bqxNL2NU1XVsSxNzmDVZE3zd65PlLFq1Ir-RHwff8G0,17840
 compressed_tensors/registry/__init__.py,sha256=FwLSNYqfIrb5JD_6OK_MT4_svvKTN_nEhpgQlQvGbjI,658
 compressed_tensors/registry/registry.py,sha256=0s15BxdGgzBv8RL4kUJCYcuDOFUh_KZYvNvLEeRqWTc,11956
 compressed_tensors/transform/__init__.py,sha256=oa5VdrE-GtDYYceXNSwj5X_ropoXLLukm6Aufcc9WhY,747
@@ -50,8 +50,8 @@ compressed_tensors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVy
 compressed_tensors/utils/permute.py,sha256=V6tJLKo3Syccj-viv4F7ZKZgJeCB-hl-dK8RKI_kBwI,2355
 compressed_tensors/utils/safetensors_load.py,sha256=DMfZBuUbA6qp_BG_zIWT3ckiEE33K9ob34s-OgzReO4,12057
 compressed_tensors/utils/semi_structured_conversions.py,sha256=XKNffPum54kPASgqKzgKvyeqWPAkair2XEQXjkp7ho8,13489
-compressed_tensors-0.9.5a20250603.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-compressed_tensors-0.9.5a20250603.dist-info/METADATA,sha256=VEjGe1Y3JAwrVC7SfWv3yu1Xq85-mrQsYW2UxUwKuyE,7004
-compressed_tensors-0.9.5a20250603.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-compressed_tensors-0.9.5a20250603.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
-compressed_tensors-0.9.5a20250603.dist-info/RECORD,,
+compressed_tensors-0.9.5a20250604.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+compressed_tensors-0.9.5a20250604.dist-info/METADATA,sha256=2dI2Y96LKAAG_vshTtYzxXhvM5Fby_hSISAAFbXYJXE,7004
+compressed_tensors-0.9.5a20250604.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+compressed_tensors-0.9.5a20250604.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
+compressed_tensors-0.9.5a20250604.dist-info/RECORD,,

{compressed_tensors-0.9.5a20250603.dist-info → compressed_tensors-0.9.5a20250604.dist-info}/WHEEL RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250603.dist-info → compressed_tensors-0.9.5a20250604.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{compressed_tensors-0.9.5a20250603.dist-info → compressed_tensors-0.9.5a20250604.dist-info}/top_level.txt RENAMED Viewed

File without changes

compressed-tensors 0.9.5a20250603__py3-none-any.whl → 0.9.5a20250604__py3-none-any.whl

compressed-tensors 0.9.5a20250603py3-none-any.whl → 0.9.5a20250604py3-none-any.whl