PyPI - compressed-tensors - Versions diffs - 0.11.1a20250903__py3-none-any.whl → 0.11.1a20250908__py3-none-any.whl - Mend

compressed-tensors 0.11.1a20250903py3-none-any.whl → 0.11.1a20250908py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

compressed_tensors/compressors/quantized_compressors/base.py CHANGED Viewed

@@ -131,7 +131,11 @@ class BaseQuantizationCompressor(BaseCompressor):
                 # omit saving for g_idx if uninitialized
                 # TODO: does this case actually occur?
-                elif name.endswith("g_idx") and torch.any(value <= -1):
+                elif (
+                    name.endswith("g_idx")
+                    and value.device.type != "meta"
+                    and torch.any(value <= -1)
+                ):
                     continue
                 compressed_dict[name] = value.to(compression_device)

compressed_tensors/quantization/quant_scheme.py CHANGED Viewed

@@ -60,6 +60,16 @@ class QuantizationScheme(BaseModel):
         format = model.format
         if inputs is not None:
+            if inputs.strategy not in (
+                QuantizationStrategy.TOKEN,
+                QuantizationStrategy.TENSOR,
+                QuantizationStrategy.TENSOR_GROUP,
+            ):
+                raise ValueError(
+                    f"Using {inputs.strategy} strategy is not supported for "
+                    "activation quantization"
+                )
             if inputs.actorder is not None:
                 raise ValueError("Cannot apply actorder to input activations")

compressed_tensors/transform/factory/base.py CHANGED Viewed

@@ -18,6 +18,7 @@ from typing import List, Optional, Set, Tuple
 import torch
 import torch.nn.utils.parametrize as P
+import tqdm
 from compressed_tensors.registry.registry import RegistryMixin, T
 from compressed_tensors.transform import (
     TransformArgs,
@@ -84,15 +85,21 @@ class TransformFactory(RegistryMixin, ABC):
         """
         raise NotImplementedError()
-    def apply_to_model(self, model: Module):
+    def apply_to_model(self, model: Module, use_tqdm=True):
         """
         Create transforms and apply them to the model
         :param model: module to apply transforms to
         """
-        for arg in self.scheme.apply:
-            for _, module in match_named_modules(model, arg.targets, arg.ignore):
-                self._apply_to_module(module, arg)
+        modules_args = [
+            (module, arg)
+            for arg in self.scheme.apply
+            for _, module in match_named_modules(model, arg.targets, arg.ignore)
+        ]
+        desc = f"Applying {self.name} transforms"
+        for module, arg in tqdm.tqdm(modules_args, desc=desc, disable=(not use_tqdm)):
+            self._apply_to_module(module, arg)
         self._update_tied_weights()

compressed_tensors/transform/factory/hadamard.py CHANGED Viewed

@@ -53,24 +53,28 @@ class HadamardFactory(TransformFactory):
         """
         assert hasattr(module, "weight")
         size = get_transform_size(module, args.location, self.scheme.head_dim)
-        dtype = self.scheme.precision
-        device = get_offloaded_device(module)
         exec_device = get_execution_device(module)
-        factory_kwargs = {"construct_device": exec_device}
-        weight = self.weights.get(size, dtype, device, factory_kwargs=factory_kwargs)
+        device = get_offloaded_device(module)
+        precision = self.scheme.precision if args.is_online() else torch.float64
+        factory_kwargs = {
+            "device": device,
+            "construct_device": exec_device,
+            "precision": precision,
+        }
+        weight = self.weights.get(size, factory_kwargs=factory_kwargs)
+        # TODO: permutations should be keyed by fused modules, not weight
         perm = self.perms[weight] if self.scheme.randomize else None
         return HadamardTransform(weight, perm, self.scheme, args, type(module))
     def _create_weight(
         self,
         size: int,
-        dtype: dtype,
         device: device,
         construct_device: device,
+        precision: dtype,
     ) -> Parameter:
-        # construct on execution device, cache on offload device
-        data = deterministic_hadamard_matrix(size, dtype, construct_device)
+        data = deterministic_hadamard_matrix(size, precision, construct_device)
         data = data.to(device=device)
         return Parameter(data, requires_grad=self.scheme.requires_grad)
@@ -94,8 +98,7 @@ class HadamardTransform(TransformBase):
         self.scheme = scheme
         self.args = args
         self.module_type = module_type
-        self._scale = torch.tensor(weight.size(0), dtype=self.scheme.precision).sqrt()
-        self._precision = scheme.precision if args.is_online() else torch.float64
+        self._scale = torch.tensor(weight.size(0), dtype=torch.float64).sqrt()
     def forward(self, value: Tensor) -> Tensor:
         weight = self.weight
@@ -108,8 +111,8 @@ class HadamardTransform(TransformBase):
         return (
             apply_transform_weight(
-                weight.to(self._precision),
-                value.to(self._precision),
+                weight.to(device=value.device),
+                value.to(dtype=weight.dtype),
                 self.args.location,
                 self.module_type,
             )

compressed_tensors/transform/factory/matrix_multiply.py CHANGED Viewed

@@ -21,8 +21,8 @@ from compressed_tensors.transform.utils.matrix import (
     apply_transform_weight,
     get_transform_size,
 )
-from compressed_tensors.utils import get_offloaded_device
 from compressed_tensors.utils.helpers import ParameterizedDefaultDict
+from compressed_tensors.utils.offload import get_offloaded_device
 from torch import Tensor, device, dtype
 from torch.nn import Module, Parameter
@@ -52,19 +52,23 @@ class RandomMatrixFactory(TransformFactory):
         """
         assert hasattr(module, "weight")
         size = get_transform_size(module, args.location, self.scheme.head_dim)
-        dtype = self.scheme.precision
         device = get_offloaded_device(module)
+        precision = self.scheme.precision if args.is_online() else torch.float64
-        weight = self.weights[size, dtype, device]
+        factory_kwargs = {"device": device, "precision": precision}
+        weight = self.weights.get(size, factory_kwargs=factory_kwargs)
         if args.inverse:
             weight = self.inverses[weight]
         return RandomMatrixTransform(weight, self.scheme, args, type(module))
-    def _create_weight(self, size: int, dtype: dtype, device: device) -> Parameter:
-        # TODO: verify that weight is invertible (has non-zero determinant)
+    def _create_weight(self, size: int, device: device, precision: dtype) -> Parameter:
+        # TODO: construct such that weight is invertible (has non-zero determinant)
         data = torch.rand(
-            (size, size), generator=self.generator, dtype=dtype, device=device
+            (size, size),
+            generator=self.generator,
+            dtype=precision,
+            device=device,
         )
         return Parameter(data, requires_grad=self.scheme.requires_grad)
@@ -87,12 +91,11 @@ class RandomMatrixTransform(TransformBase):
         self.scheme = scheme
         self.args = args
         self.module_type = module_type
-        self._precision = scheme.precision if args.is_online() else torch.float64
     def forward(self, value: Tensor) -> Parameter:
         return apply_transform_weight(
-            self.weight.to(self._precision),
-            value.to(self._precision),
+            self.weight.to(device=value.device),
+            value.to(dtype=self.weight.dtype),
             self.args.location,
             self.module_type,
         ).to(value.dtype)
@@ -100,8 +103,8 @@ class RandomMatrixTransform(TransformBase):
     def right_inverse(self, value: Tensor) -> Tensor:
         inverse = high_precision_invert(self.weight)
         return apply_transform_weight(
-            inverse.to(self._precision),
-            value.to(self._precision),
+            inverse.to(device=value.device),
+            value.to(dtype=inverse.dtype),
             self.args.location,
             self.module_type,
         ).to(value.dtype)

compressed_tensors/transform/factory/random_hadamard.py CHANGED Viewed

@@ -31,11 +31,10 @@ class RandomHadamardFactory(HadamardFactory):
     def _create_weight(
         self,
         size: int,
-        dtype: dtype,
         device: device,
         construct_device: device,
+        precision: dtype,
     ) -> Parameter:
-        # construct on execution device, cache on offload device
-        data = random_hadamard_matrix(size, dtype, construct_device, self.generator)
+        data = random_hadamard_matrix(size, precision, construct_device, self.generator)
         data = data.to(device=device)
         return Parameter(data, requires_grad=self.scheme.requires_grad)

compressed_tensors/utils/offload.py CHANGED Viewed

@@ -131,7 +131,10 @@ def get_offloaded_device(module: torch.nn.Module) -> torch.device:
         first_key = list(module._hf_hook.weights_map.keys())[0]
         prefix_dataset = module._hf_hook.weights_map.dataset
         return prefix_dataset[first_key].device
-    return next(module.parameters()).device
+    else:
+        # if the module is not offloaded, then any addded weights
+        # should be placed the module's execution device
+        return get_execution_device(module)
 @check_accelerate(fallback=None)

compressed_tensors/version.py CHANGED Viewed

@@ -17,5 +17,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '0.11.1.a20250903'
+__version__ = version = '0.11.1.a20250908'
 __version_tuple__ = version_tuple = (0, 11, 1)

{compressed_tensors-0.11.1a20250903.dist-info → compressed_tensors-0.11.1a20250908.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: compressed-tensors
-Version: 0.11.1a20250903
+Version: 0.11.1a20250908
 Summary: Library for utilization of compressed safetensors of neural network models
 Home-page: https://github.com/neuralmagic/compressed-tensors
 Author: Neuralmagic, Inc.

{compressed_tensors-0.11.1a20250903.dist-info → compressed_tensors-0.11.1a20250908.dist-info}/RECORD RENAMED Viewed

@@ -1,13 +1,13 @@
 compressed_tensors/__init__.py,sha256=UtKmifNeBCSE2TZSAfduVNNzHY-3V7bLjZ7n7RuXLOE,812
 compressed_tensors/base.py,sha256=-gxWvDF4LCkyeDP8YlGzvBBKxo4Dk9h4NINPD61drFU,921
-compressed_tensors/version.py,sha256=ONej1u3G91wzdsS5eGMwBmRxyOtqPnKgYHxEjlNMUa4,523
+compressed_tensors/version.py,sha256=8qf_B1P1NNbEDyEkRyxNNhdvTofGEV0EE02UMN3na5k,523
 compressed_tensors/compressors/__init__.py,sha256=smSygTSfcfuujRrAXDc6uZm4L_ccV1tWZewqVnOb4lM,825
 compressed_tensors/compressors/base.py,sha256=nvWsv4xEw1Tkxkxth6TmHplDYXfBeP22xWxOsZERyDY,7204
 compressed_tensors/compressors/helpers.py,sha256=OK6qxX9j3bHwF9JfIYSGMgBJe2PWjlTA3byXKCJaTIQ,5431
 compressed_tensors/compressors/model_compressors/__init__.py,sha256=5RGGPFu4YqEt_aOdFSQYFYFDjcZFJN0CsMqRtDZz3Js,666
 compressed_tensors/compressors/model_compressors/model_compressor.py,sha256=mZqpBS5znPHedlVVkKsUsVCs52zK5bAmEiI8cqMBKnY,37618
 compressed_tensors/compressors/quantized_compressors/__init__.py,sha256=KvaFBL_Q84LxRGJOV035M8OBoCkAx8kOkfphswgkKWk,745
-compressed_tensors/compressors/quantized_compressors/base.py,sha256=_mqTG_HjAIbHqDGucA3ZR_01OXU3CMFxtrDjfM-kY0g,10301
+compressed_tensors/compressors/quantized_compressors/base.py,sha256=rWvaWDqzi8cctBo982g2n3-y6afRiFl3jfTd90lSMrY,10413
 compressed_tensors/compressors/quantized_compressors/naive_quantized.py,sha256=0ANDcuD8aXPqTYNPY6GnX9iS6eXJw6P0TzNV_rYS2l8,5369
 compressed_tensors/compressors/quantized_compressors/nvfp4_quantized.py,sha256=Qq790d5VQQccq6Dj8YhBwhr7S3DqMJNoYPI5S6M1FNo,7183
 compressed_tensors/compressors/quantized_compressors/pack_quantized.py,sha256=D8h9ltxSIYi1XEKYgbYu1ebbXzCibhPi-eZsBUi0NOg,11245
@@ -28,7 +28,7 @@ compressed_tensors/linear/compressed_linear.py,sha256=1yo9RyjA0aQ--iuIknFfcSorJn
 compressed_tensors/quantization/__init__.py,sha256=83J5bPB7PavN2TfCoW7_vEDhfYpm4TDrqYO9vdSQ5bk,760
 compressed_tensors/quantization/quant_args.py,sha256=5AxYKqCSlg7CDgz2N8G4ZRVIiSUKvIm-SCQa-Bq_SF0,12916
 compressed_tensors/quantization/quant_config.py,sha256=2NgDwKuQn0f-ojiHC8c6tXtYX_zQlk26Rj-bU71QKvA,10598
-compressed_tensors/quantization/quant_scheme.py,sha256=X5Z7oXMLPXnX8g-UvWXlRjn4YnD_qTk5mXfGzu20k9o,8903
+compressed_tensors/quantization/quant_scheme.py,sha256=2pV3tPNgo6ovi6FLxP4ZFznEmInlC1L90Pq9I1HI_Xk,9275
 compressed_tensors/quantization/lifecycle/__init__.py,sha256=_uItzFWusyV74Zco_pHLOTdE9a83cL-R-ZdyQrBkIyw,772
 compressed_tensors/quantization/lifecycle/apply.py,sha256=TuSjKomSk4N0My-UY9PWk2Nyuze6TilEGPsZELgotzk,14716
 compressed_tensors/quantization/lifecycle/compressed.py,sha256=Fj9n66IN0EWsOAkBHg3O0GlOQpxstqjCcs0ttzMXrJ0,2296
@@ -45,10 +45,10 @@ compressed_tensors/transform/transform_args.py,sha256=rVgReFp7wMXcYugkfd325e2tTF
 compressed_tensors/transform/transform_config.py,sha256=3YdtGcau3qkcapX9GMUiLuhQHFQZKFYT3eLgJGj1L6s,1204
 compressed_tensors/transform/transform_scheme.py,sha256=S7vYLnuv7xZ_bwphkpCiGqZLjnnTnb4lj1T8a6WwnE0,2094
 compressed_tensors/transform/factory/__init__.py,sha256=fH6rjBYAxuwrTzBTlTjTgCYNyh6TCvCqajCz4Im4YrA,617
-compressed_tensors/transform/factory/base.py,sha256=Txkr1nWKtlMU1MmBcQ85-JqJzD356Z9nYbaF24tJ5rw,7755
-compressed_tensors/transform/factory/hadamard.py,sha256=CEy98vOIip_Pomh1XB62BqcjU8GQ9fUZSpnZH4GrBnE,4499
-compressed_tensors/transform/factory/matrix_multiply.py,sha256=boZLMkaNrgXQ9cU-tFzJ-1N1tLgbKMJzAxiYZAr4Pu8,4326
-compressed_tensors/transform/factory/random_hadamard.py,sha256=nUhTlFa4ikSpcl4Umme71pnjMPgwYoGlwjKlU27UHZ4,1634
+compressed_tensors/transform/factory/base.py,sha256=82fwlX4gVlN67H7P_T3pbvN5pB-XQnG-dZJ53evj-DA,7979
+compressed_tensors/transform/factory/hadamard.py,sha256=0SS_gM7b2df8SbsCF9LNbLLBMcwly7bXND1KED8uxhc,4550
+compressed_tensors/transform/factory/matrix_multiply.py,sha256=u-7V04EvEe9G3VEF--YwoVV-h5kmh6hXq8stY_EWmLY,4456
+compressed_tensors/transform/factory/random_hadamard.py,sha256=ck-LF7sl7i9NW4fxLypgHgkw91lc_TpwHO8bXX-0fPU,1577
 compressed_tensors/transform/utils/__init__.py,sha256=fH6rjBYAxuwrTzBTlTjTgCYNyh6TCvCqajCz4Im4YrA,617
 compressed_tensors/transform/utils/hadamard.py,sha256=hDJZC0Gw2fKdxqa3f8TmFc5J0eJqxHtFRxswLU_yVJc,5548
 compressed_tensors/transform/utils/hadamards.safetensors,sha256=mFd1GzNodGG-ifA1IoH-0nHYzfraCOvrq_dX2zFI1B4,1436901
@@ -57,14 +57,14 @@ compressed_tensors/utils/__init__.py,sha256=spzbjUO4-hZ2jXGST27r3MIt2yzIXsjdbEaY
 compressed_tensors/utils/helpers.py,sha256=Q3iRAa2XSdmmn4vSpUplnvKOmWwn4Clao9ZkPBHXtpI,12604
 compressed_tensors/utils/internal.py,sha256=7SSWgDoNFRnlfadwkoFhLW-T2jOc7Po_WzWv5h32Sa8,982
 compressed_tensors/utils/match.py,sha256=y03xJyWTXV8bjIPN5Z4S0_w797qMnh-Z4aiPEGQ4zNE,11239
-compressed_tensors/utils/offload.py,sha256=jE9xj3VewMc85iOLWSikqdyjNL9JB3oZpO1uDKKCLUE,24444
+compressed_tensors/utils/offload.py,sha256=b0Q2P0hJLQBGEqdRwOh6SOK3_eJCqHNnIX38-wqeef0,24577
 compressed_tensors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVyah6BUUir_StT28,2530
 compressed_tensors/utils/permute.py,sha256=V6tJLKo3Syccj-viv4F7ZKZgJeCB-hl-dK8RKI_kBwI,2355
 compressed_tensors/utils/safetensors_load.py,sha256=Vql34aCTDHwmTZXJHzCyBISJo7iA7EQ78LdTlMjdpZo,12023
 compressed_tensors/utils/semi_structured_conversions.py,sha256=XKNffPum54kPASgqKzgKvyeqWPAkair2XEQXjkp7ho8,13489
 compressed_tensors/utils/type.py,sha256=bNwoo_FWlvLuDpYAGGzZJITRg0JA_Ngk9LGPo-kvjeU,2554
-compressed_tensors-0.11.1a20250903.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-compressed_tensors-0.11.1a20250903.dist-info/METADATA,sha256=KMUKwzvh_FjAhk5ABHXpTUIM3dj5DV87o-kJy8RcPvk,7031
-compressed_tensors-0.11.1a20250903.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-compressed_tensors-0.11.1a20250903.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
-compressed_tensors-0.11.1a20250903.dist-info/RECORD,,
+compressed_tensors-0.11.1a20250908.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+compressed_tensors-0.11.1a20250908.dist-info/METADATA,sha256=lLokpvLLt0OOrfW9axfgwLmBhqCR7IqyjLCQFRdxCsU,7031
+compressed_tensors-0.11.1a20250908.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+compressed_tensors-0.11.1a20250908.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
+compressed_tensors-0.11.1a20250908.dist-info/RECORD,,

{compressed_tensors-0.11.1a20250903.dist-info → compressed_tensors-0.11.1a20250908.dist-info}/WHEEL RENAMED Viewed

File without changes

{compressed_tensors-0.11.1a20250903.dist-info → compressed_tensors-0.11.1a20250908.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{compressed_tensors-0.11.1a20250903.dist-info → compressed_tensors-0.11.1a20250908.dist-info}/top_level.txt RENAMED Viewed

File without changes

compressed-tensors 0.11.1a20250903__py3-none-any.whl → 0.11.1a20250908__py3-none-any.whl

compressed-tensors 0.11.1a20250903py3-none-any.whl → 0.11.1a20250908py3-none-any.whl