PyPI - compressed-tensors - Versions diffs - 0.11.1a20250821__py3-none-any.whl → 0.11.1a20250828__py3-none-any.whl - Mend

compressed-tensors 0.11.1a20250821py3-none-any.whl → 0.11.1a20250828py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

compressed_tensors/compressors/model_compressors/model_compressor.py CHANGED Viewed

@@ -703,9 +703,12 @@ class ModelCompressor:
             with override_quantization_status(
                 self.quantization_config, QuantizationStatus.FROZEN
             ):
-                names_to_scheme = apply_quantization_config(
-                    model, self.quantization_config
-                )
+                apply_quantization_config(model, self.quantization_config)
+                names_to_scheme: Set[QuantizationScheme] = {
+                    name: getattr(module, "quantization_scheme")
+                    for name, module in model.named_modules()
+                    if getattr(module, "quantization_scheme", None) is not None
+                }
                 # Load activation scales/zp or any other quantization parameters
                 # Conditionally load the weight quantization parameters if we have a
                 # dense compressor or if a sparsity compressor has already been applied

compressed_tensors/compressors/quantized_compressors/nvfp4_quantized.py CHANGED Viewed

@@ -123,6 +123,7 @@ class NVFP4PackedCompressor(BaseQuantizationCompressor):
         return decompressed_weight
+@torch.compile(fullgraph=True, dynamic=True)
 def pack_fp4_to_uint8(x: torch.Tensor) -> torch.Tensor:
     """
     Packs a tensor with values in the fp4 range into uint8.
@@ -145,12 +146,11 @@ def pack_fp4_to_uint8(x: torch.Tensor) -> torch.Tensor:
     # Find closest valid FP4 value index for each element
     abs_x = torch.abs(x)
-    abs_indices = torch.zeros_like(abs_x, dtype=torch.long)
-    for i, val in enumerate(kE2M1):
-        abs_indices = torch.where(torch.isclose(abs_x, val), i, abs_indices)
+    abs_diff_x = torch.abs(abs_x.unsqueeze(-1) - kE2M1)  # [m, n, 8]
+    abs_indices = torch.argmin(abs_diff_x, dim=-1)  # [m, n]
     # Apply sign bit (bit 3) to get final 4-bit representation
-    indices = abs_indices + (torch.signbit(x) << 3).to(torch.long)
+    indices = abs_indices + (torch.signbit(x).to(torch.long) << 3)
     # Reshape to prepare for packing pairs of values
     indices = indices.reshape(-1)
@@ -174,6 +174,7 @@ kE2M1ToFloat = torch.tensor(
 # reference: : https://github.com/vllm-project/vllm/pull/16362
+@torch.compile(fullgraph=True, dynamic=True)
 def unpack_fp4_from_uint8(
     a: torch.Tensor, m: int, n: int, dtype: Optional[torch.dtype] = torch.bfloat16
 ) -> torch.Tensor:

compressed_tensors/quantization/lifecycle/apply.py CHANGED Viewed

@@ -115,7 +115,7 @@ def load_pretrained_quantization_parameters(
 def apply_quantization_config(
     model: Module, config: Union[QuantizationConfig, None], run_compressed: bool = False
-) -> Dict[str, QuantizationScheme]:
+):
     """
     Initializes the model for quantization in-place based on the given config.
     Optionally coverts quantizable modules to compressed_linear modules
@@ -125,26 +125,22 @@ def apply_quantization_config(
     :param run_compressed: Whether the model will be run in compressed mode or
         decompressed fully on load
     """
-    # Workaround for when HF Quantizer passes None, see PR #180
-    if config is None:
-        return dict()
+    from compressed_tensors.linear.compressed_linear import CompressedLinear
-    # remove reference to the original `config`
-    # argument. This function can mutate it, and we'd
-    # like to keep the original `config` as it is.
     config = deepcopy(config)
+    if config is None:  # see PR #180
+        return dict()
+    # preprocess to support kv cache scheme
+    config = process_quantization_config(config)
     # build mapping of targets to schemes for easier matching
     # use ordered dict to preserve target ordering in config
     target_to_scheme = OrderedDict()
-    config = process_quantization_config(config)
-    names_to_scheme = dict()
     for scheme in config.config_groups.values():
         for target in scheme.targets:
             target_to_scheme[target] = scheme
-    if run_compressed:
-        from compressed_tensors.linear.compressed_linear import CompressedLinear
     # mark appropriate layers for quantization by setting their quantization schemes
     for name, submodule in match_named_modules(
         model, target_to_scheme, config.ignore, warn_on_fail=True
@@ -153,7 +149,12 @@ def apply_quantization_config(
         # quant scheme to the matching layers
         matched_targets = match_targets(name, submodule, target_to_scheme)
         scheme = _scheme_from_targets(target_to_scheme, matched_targets, name)
-        if run_compressed:
+        # target matched - add layer and scheme to target list
+        submodule.quantization_scheme = scheme
+        # replace with run compressed if applicable
+        # FUTURE: move this to model compressor
+        if isinstance(submodule, torch.nn.Linear) and run_compressed:
             format = config.format
             if format != CompressionFormat.dense.value:
                 if isinstance(submodule, torch.nn.Linear):
@@ -165,14 +166,8 @@ def apply_quantization_config(
                     )
                     replace_module(model, name, compressed_linear)
-        # target matched - add layer and scheme to target list
-        submodule.quantization_scheme = scheme
-        names_to_scheme[name] = submodule.quantization_scheme
     # apply current quantization status across all targeted layers
     apply_quantization_status(model, config.quantization_status)
-    return names_to_scheme
 def process_quantization_config(config: QuantizationConfig) -> QuantizationConfig:

compressed_tensors/version.py CHANGED Viewed

@@ -17,5 +17,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '0.11.1.a20250821'
+__version__ = version = '0.11.1.a20250828'
 __version_tuple__ = version_tuple = (0, 11, 1)

{compressed_tensors-0.11.1a20250821.dist-info → compressed_tensors-0.11.1a20250828.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: compressed-tensors
-Version: 0.11.1a20250821
+Version: 0.11.1a20250828
 Summary: Library for utilization of compressed safetensors of neural network models
 Home-page: https://github.com/neuralmagic/compressed-tensors
 Author: Neuralmagic, Inc.

{compressed_tensors-0.11.1a20250821.dist-info → compressed_tensors-0.11.1a20250828.dist-info}/RECORD RENAMED Viewed

@@ -1,15 +1,15 @@
 compressed_tensors/__init__.py,sha256=UtKmifNeBCSE2TZSAfduVNNzHY-3V7bLjZ7n7RuXLOE,812
 compressed_tensors/base.py,sha256=-gxWvDF4LCkyeDP8YlGzvBBKxo4Dk9h4NINPD61drFU,921
-compressed_tensors/version.py,sha256=QiPWK4b5m-LXWHE8_W5EK7VPtKZvorPc5Opz7BYczvA,523
+compressed_tensors/version.py,sha256=NChUyeUoxQUAMGsjmgMd6I-sPb4p5iHss-5eGrWhivg,523
 compressed_tensors/compressors/__init__.py,sha256=smSygTSfcfuujRrAXDc6uZm4L_ccV1tWZewqVnOb4lM,825
 compressed_tensors/compressors/base.py,sha256=nvWsv4xEw1Tkxkxth6TmHplDYXfBeP22xWxOsZERyDY,7204
 compressed_tensors/compressors/helpers.py,sha256=OK6qxX9j3bHwF9JfIYSGMgBJe2PWjlTA3byXKCJaTIQ,5431
 compressed_tensors/compressors/model_compressors/__init__.py,sha256=5RGGPFu4YqEt_aOdFSQYFYFDjcZFJN0CsMqRtDZz3Js,666
-compressed_tensors/compressors/model_compressors/model_compressor.py,sha256=x2AS1NAPQx51O8uxyLf3wItnp2-_0qU2fI6eQVFBBfY,37388
+compressed_tensors/compressors/model_compressors/model_compressor.py,sha256=mZqpBS5znPHedlVVkKsUsVCs52zK5bAmEiI8cqMBKnY,37618
 compressed_tensors/compressors/quantized_compressors/__init__.py,sha256=KvaFBL_Q84LxRGJOV035M8OBoCkAx8kOkfphswgkKWk,745
 compressed_tensors/compressors/quantized_compressors/base.py,sha256=_mqTG_HjAIbHqDGucA3ZR_01OXU3CMFxtrDjfM-kY0g,10301
 compressed_tensors/compressors/quantized_compressors/naive_quantized.py,sha256=0ANDcuD8aXPqTYNPY6GnX9iS6eXJw6P0TzNV_rYS2l8,5369
-compressed_tensors/compressors/quantized_compressors/nvfp4_quantized.py,sha256=Z8k2gi5a1F_36DiI0GJsXGc03Gh0qwBRMwMxuKIWkj8,7136
+compressed_tensors/compressors/quantized_compressors/nvfp4_quantized.py,sha256=Qq790d5VQQccq6Dj8YhBwhr7S3DqMJNoYPI5S6M1FNo,7183
 compressed_tensors/compressors/quantized_compressors/pack_quantized.py,sha256=D8h9ltxSIYi1XEKYgbYu1ebbXzCibhPi-eZsBUi0NOg,11245
 compressed_tensors/compressors/sparse_compressors/__init__.py,sha256=Atuz-OdEgn8OCUhx7Ovd6gXdyImAI186uCR-uR0t_Nk,737
 compressed_tensors/compressors/sparse_compressors/base.py,sha256=YNZWcHjDleAlqbgRZQ6oJf44MQb_UDNvJGOqhl26uFA,8098
@@ -30,7 +30,7 @@ compressed_tensors/quantization/quant_args.py,sha256=5AxYKqCSlg7CDgz2N8G4ZRVIiSU
 compressed_tensors/quantization/quant_config.py,sha256=2NgDwKuQn0f-ojiHC8c6tXtYX_zQlk26Rj-bU71QKvA,10598
 compressed_tensors/quantization/quant_scheme.py,sha256=X5Z7oXMLPXnX8g-UvWXlRjn4YnD_qTk5mXfGzu20k9o,8903
 compressed_tensors/quantization/lifecycle/__init__.py,sha256=_uItzFWusyV74Zco_pHLOTdE9a83cL-R-ZdyQrBkIyw,772
-compressed_tensors/quantization/lifecycle/apply.py,sha256=yc9xCuQIcdhy-MGFh8OmBrB45dzJ8TzZju4mBa3AONg,14909
+compressed_tensors/quantization/lifecycle/apply.py,sha256=TuSjKomSk4N0My-UY9PWk2Nyuze6TilEGPsZELgotzk,14716
 compressed_tensors/quantization/lifecycle/compressed.py,sha256=Fj9n66IN0EWsOAkBHg3O0GlOQpxstqjCcs0ttzMXrJ0,2296
 compressed_tensors/quantization/lifecycle/forward.py,sha256=xcLTgaff1wYUWzvQqYKmhWYkshWVI-PhLPtBOyyZro0,17576
 compressed_tensors/quantization/lifecycle/helpers.py,sha256=C0mhy2vJ0fCjVeN4kFNhw8Eq1wkteBGHiZ36RVLThRY,944
@@ -63,8 +63,8 @@ compressed_tensors/utils/permute.py,sha256=V6tJLKo3Syccj-viv4F7ZKZgJeCB-hl-dK8RK
 compressed_tensors/utils/safetensors_load.py,sha256=Vql34aCTDHwmTZXJHzCyBISJo7iA7EQ78LdTlMjdpZo,12023
 compressed_tensors/utils/semi_structured_conversions.py,sha256=XKNffPum54kPASgqKzgKvyeqWPAkair2XEQXjkp7ho8,13489
 compressed_tensors/utils/type.py,sha256=bNwoo_FWlvLuDpYAGGzZJITRg0JA_Ngk9LGPo-kvjeU,2554
-compressed_tensors-0.11.1a20250821.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-compressed_tensors-0.11.1a20250821.dist-info/METADATA,sha256=jpkjjAiWJwPLa19Ej2tIJm5MEHJ9gwYsPPfvkhF6YYg,7031
-compressed_tensors-0.11.1a20250821.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-compressed_tensors-0.11.1a20250821.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
-compressed_tensors-0.11.1a20250821.dist-info/RECORD,,
+compressed_tensors-0.11.1a20250828.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+compressed_tensors-0.11.1a20250828.dist-info/METADATA,sha256=lPVoawn0HxkV3dRXP0U6C7UWpulLrYHeTpyzWGSfGvM,7031
+compressed_tensors-0.11.1a20250828.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+compressed_tensors-0.11.1a20250828.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
+compressed_tensors-0.11.1a20250828.dist-info/RECORD,,

{compressed_tensors-0.11.1a20250821.dist-info → compressed_tensors-0.11.1a20250828.dist-info}/WHEEL RENAMED Viewed

File without changes

{compressed_tensors-0.11.1a20250821.dist-info → compressed_tensors-0.11.1a20250828.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{compressed_tensors-0.11.1a20250821.dist-info → compressed_tensors-0.11.1a20250828.dist-info}/top_level.txt RENAMED Viewed

File without changes

compressed-tensors 0.11.1a20250821__py3-none-any.whl → 0.11.1a20250828__py3-none-any.whl

compressed-tensors 0.11.1a20250821py3-none-any.whl → 0.11.1a20250828py3-none-any.whl