compressed-tensors 0.11.1a20250903__py3-none-any.whl → 0.11.1a20250908__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -131,7 +131,11 @@ class BaseQuantizationCompressor(BaseCompressor):
131
131
 
132
132
  # omit saving for g_idx if uninitialized
133
133
  # TODO: does this case actually occur?
134
- elif name.endswith("g_idx") and torch.any(value <= -1):
134
+ elif (
135
+ name.endswith("g_idx")
136
+ and value.device.type != "meta"
137
+ and torch.any(value <= -1)
138
+ ):
135
139
  continue
136
140
  compressed_dict[name] = value.to(compression_device)
137
141
 
@@ -60,6 +60,16 @@ class QuantizationScheme(BaseModel):
60
60
  format = model.format
61
61
 
62
62
  if inputs is not None:
63
+ if inputs.strategy not in (
64
+ QuantizationStrategy.TOKEN,
65
+ QuantizationStrategy.TENSOR,
66
+ QuantizationStrategy.TENSOR_GROUP,
67
+ ):
68
+ raise ValueError(
69
+ f"Using {inputs.strategy} strategy is not supported for "
70
+ "activation quantization"
71
+ )
72
+
63
73
  if inputs.actorder is not None:
64
74
  raise ValueError("Cannot apply actorder to input activations")
65
75
 
@@ -18,6 +18,7 @@ from typing import List, Optional, Set, Tuple
18
18
 
19
19
  import torch
20
20
  import torch.nn.utils.parametrize as P
21
+ import tqdm
21
22
  from compressed_tensors.registry.registry import RegistryMixin, T
22
23
  from compressed_tensors.transform import (
23
24
  TransformArgs,
@@ -84,15 +85,21 @@ class TransformFactory(RegistryMixin, ABC):
84
85
  """
85
86
  raise NotImplementedError()
86
87
 
87
- def apply_to_model(self, model: Module):
88
+ def apply_to_model(self, model: Module, use_tqdm=True):
88
89
  """
89
90
  Create transforms and apply them to the model
90
91
 
91
92
  :param model: module to apply transforms to
92
93
  """
93
- for arg in self.scheme.apply:
94
- for _, module in match_named_modules(model, arg.targets, arg.ignore):
95
- self._apply_to_module(module, arg)
94
+ modules_args = [
95
+ (module, arg)
96
+ for arg in self.scheme.apply
97
+ for _, module in match_named_modules(model, arg.targets, arg.ignore)
98
+ ]
99
+
100
+ desc = f"Applying {self.name} transforms"
101
+ for module, arg in tqdm.tqdm(modules_args, desc=desc, disable=(not use_tqdm)):
102
+ self._apply_to_module(module, arg)
96
103
 
97
104
  self._update_tied_weights()
98
105
 
@@ -53,24 +53,28 @@ class HadamardFactory(TransformFactory):
53
53
  """
54
54
  assert hasattr(module, "weight")
55
55
  size = get_transform_size(module, args.location, self.scheme.head_dim)
56
- dtype = self.scheme.precision
57
- device = get_offloaded_device(module)
58
56
  exec_device = get_execution_device(module)
59
-
60
- factory_kwargs = {"construct_device": exec_device}
61
- weight = self.weights.get(size, dtype, device, factory_kwargs=factory_kwargs)
57
+ device = get_offloaded_device(module)
58
+ precision = self.scheme.precision if args.is_online() else torch.float64
59
+
60
+ factory_kwargs = {
61
+ "device": device,
62
+ "construct_device": exec_device,
63
+ "precision": precision,
64
+ }
65
+ weight = self.weights.get(size, factory_kwargs=factory_kwargs)
66
+ # TODO: permutations should be keyed by fused modules, not weight
62
67
  perm = self.perms[weight] if self.scheme.randomize else None
63
68
  return HadamardTransform(weight, perm, self.scheme, args, type(module))
64
69
 
65
70
  def _create_weight(
66
71
  self,
67
72
  size: int,
68
- dtype: dtype,
69
73
  device: device,
70
74
  construct_device: device,
75
+ precision: dtype,
71
76
  ) -> Parameter:
72
- # construct on execution device, cache on offload device
73
- data = deterministic_hadamard_matrix(size, dtype, construct_device)
77
+ data = deterministic_hadamard_matrix(size, precision, construct_device)
74
78
  data = data.to(device=device)
75
79
  return Parameter(data, requires_grad=self.scheme.requires_grad)
76
80
 
@@ -94,8 +98,7 @@ class HadamardTransform(TransformBase):
94
98
  self.scheme = scheme
95
99
  self.args = args
96
100
  self.module_type = module_type
97
- self._scale = torch.tensor(weight.size(0), dtype=self.scheme.precision).sqrt()
98
- self._precision = scheme.precision if args.is_online() else torch.float64
101
+ self._scale = torch.tensor(weight.size(0), dtype=torch.float64).sqrt()
99
102
 
100
103
  def forward(self, value: Tensor) -> Tensor:
101
104
  weight = self.weight
@@ -108,8 +111,8 @@ class HadamardTransform(TransformBase):
108
111
 
109
112
  return (
110
113
  apply_transform_weight(
111
- weight.to(self._precision),
112
- value.to(self._precision),
114
+ weight.to(device=value.device),
115
+ value.to(dtype=weight.dtype),
113
116
  self.args.location,
114
117
  self.module_type,
115
118
  )
@@ -21,8 +21,8 @@ from compressed_tensors.transform.utils.matrix import (
21
21
  apply_transform_weight,
22
22
  get_transform_size,
23
23
  )
24
- from compressed_tensors.utils import get_offloaded_device
25
24
  from compressed_tensors.utils.helpers import ParameterizedDefaultDict
25
+ from compressed_tensors.utils.offload import get_offloaded_device
26
26
  from torch import Tensor, device, dtype
27
27
  from torch.nn import Module, Parameter
28
28
 
@@ -52,19 +52,23 @@ class RandomMatrixFactory(TransformFactory):
52
52
  """
53
53
  assert hasattr(module, "weight")
54
54
  size = get_transform_size(module, args.location, self.scheme.head_dim)
55
- dtype = self.scheme.precision
56
55
  device = get_offloaded_device(module)
56
+ precision = self.scheme.precision if args.is_online() else torch.float64
57
57
 
58
- weight = self.weights[size, dtype, device]
58
+ factory_kwargs = {"device": device, "precision": precision}
59
+ weight = self.weights.get(size, factory_kwargs=factory_kwargs)
59
60
  if args.inverse:
60
61
  weight = self.inverses[weight]
61
62
 
62
63
  return RandomMatrixTransform(weight, self.scheme, args, type(module))
63
64
 
64
- def _create_weight(self, size: int, dtype: dtype, device: device) -> Parameter:
65
- # TODO: verify that weight is invertible (has non-zero determinant)
65
+ def _create_weight(self, size: int, device: device, precision: dtype) -> Parameter:
66
+ # TODO: construct such that weight is invertible (has non-zero determinant)
66
67
  data = torch.rand(
67
- (size, size), generator=self.generator, dtype=dtype, device=device
68
+ (size, size),
69
+ generator=self.generator,
70
+ dtype=precision,
71
+ device=device,
68
72
  )
69
73
  return Parameter(data, requires_grad=self.scheme.requires_grad)
70
74
 
@@ -87,12 +91,11 @@ class RandomMatrixTransform(TransformBase):
87
91
  self.scheme = scheme
88
92
  self.args = args
89
93
  self.module_type = module_type
90
- self._precision = scheme.precision if args.is_online() else torch.float64
91
94
 
92
95
  def forward(self, value: Tensor) -> Parameter:
93
96
  return apply_transform_weight(
94
- self.weight.to(self._precision),
95
- value.to(self._precision),
97
+ self.weight.to(device=value.device),
98
+ value.to(dtype=self.weight.dtype),
96
99
  self.args.location,
97
100
  self.module_type,
98
101
  ).to(value.dtype)
@@ -100,8 +103,8 @@ class RandomMatrixTransform(TransformBase):
100
103
  def right_inverse(self, value: Tensor) -> Tensor:
101
104
  inverse = high_precision_invert(self.weight)
102
105
  return apply_transform_weight(
103
- inverse.to(self._precision),
104
- value.to(self._precision),
106
+ inverse.to(device=value.device),
107
+ value.to(dtype=inverse.dtype),
105
108
  self.args.location,
106
109
  self.module_type,
107
110
  ).to(value.dtype)
@@ -31,11 +31,10 @@ class RandomHadamardFactory(HadamardFactory):
31
31
  def _create_weight(
32
32
  self,
33
33
  size: int,
34
- dtype: dtype,
35
34
  device: device,
36
35
  construct_device: device,
36
+ precision: dtype,
37
37
  ) -> Parameter:
38
- # construct on execution device, cache on offload device
39
- data = random_hadamard_matrix(size, dtype, construct_device, self.generator)
38
+ data = random_hadamard_matrix(size, precision, construct_device, self.generator)
40
39
  data = data.to(device=device)
41
40
  return Parameter(data, requires_grad=self.scheme.requires_grad)
@@ -131,7 +131,10 @@ def get_offloaded_device(module: torch.nn.Module) -> torch.device:
131
131
  first_key = list(module._hf_hook.weights_map.keys())[0]
132
132
  prefix_dataset = module._hf_hook.weights_map.dataset
133
133
  return prefix_dataset[first_key].device
134
- return next(module.parameters()).device
134
+ else:
135
+ # if the module is not offloaded, then any addded weights
136
+ # should be placed the module's execution device
137
+ return get_execution_device(module)
135
138
 
136
139
 
137
140
  @check_accelerate(fallback=None)
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.11.1.a20250903'
20
+ __version__ = version = '0.11.1.a20250908'
21
21
  __version_tuple__ = version_tuple = (0, 11, 1)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compressed-tensors
3
- Version: 0.11.1a20250903
3
+ Version: 0.11.1a20250908
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -1,13 +1,13 @@
1
1
  compressed_tensors/__init__.py,sha256=UtKmifNeBCSE2TZSAfduVNNzHY-3V7bLjZ7n7RuXLOE,812
2
2
  compressed_tensors/base.py,sha256=-gxWvDF4LCkyeDP8YlGzvBBKxo4Dk9h4NINPD61drFU,921
3
- compressed_tensors/version.py,sha256=ONej1u3G91wzdsS5eGMwBmRxyOtqPnKgYHxEjlNMUa4,523
3
+ compressed_tensors/version.py,sha256=8qf_B1P1NNbEDyEkRyxNNhdvTofGEV0EE02UMN3na5k,523
4
4
  compressed_tensors/compressors/__init__.py,sha256=smSygTSfcfuujRrAXDc6uZm4L_ccV1tWZewqVnOb4lM,825
5
5
  compressed_tensors/compressors/base.py,sha256=nvWsv4xEw1Tkxkxth6TmHplDYXfBeP22xWxOsZERyDY,7204
6
6
  compressed_tensors/compressors/helpers.py,sha256=OK6qxX9j3bHwF9JfIYSGMgBJe2PWjlTA3byXKCJaTIQ,5431
7
7
  compressed_tensors/compressors/model_compressors/__init__.py,sha256=5RGGPFu4YqEt_aOdFSQYFYFDjcZFJN0CsMqRtDZz3Js,666
8
8
  compressed_tensors/compressors/model_compressors/model_compressor.py,sha256=mZqpBS5znPHedlVVkKsUsVCs52zK5bAmEiI8cqMBKnY,37618
9
9
  compressed_tensors/compressors/quantized_compressors/__init__.py,sha256=KvaFBL_Q84LxRGJOV035M8OBoCkAx8kOkfphswgkKWk,745
10
- compressed_tensors/compressors/quantized_compressors/base.py,sha256=_mqTG_HjAIbHqDGucA3ZR_01OXU3CMFxtrDjfM-kY0g,10301
10
+ compressed_tensors/compressors/quantized_compressors/base.py,sha256=rWvaWDqzi8cctBo982g2n3-y6afRiFl3jfTd90lSMrY,10413
11
11
  compressed_tensors/compressors/quantized_compressors/naive_quantized.py,sha256=0ANDcuD8aXPqTYNPY6GnX9iS6eXJw6P0TzNV_rYS2l8,5369
12
12
  compressed_tensors/compressors/quantized_compressors/nvfp4_quantized.py,sha256=Qq790d5VQQccq6Dj8YhBwhr7S3DqMJNoYPI5S6M1FNo,7183
13
13
  compressed_tensors/compressors/quantized_compressors/pack_quantized.py,sha256=D8h9ltxSIYi1XEKYgbYu1ebbXzCibhPi-eZsBUi0NOg,11245
@@ -28,7 +28,7 @@ compressed_tensors/linear/compressed_linear.py,sha256=1yo9RyjA0aQ--iuIknFfcSorJn
28
28
  compressed_tensors/quantization/__init__.py,sha256=83J5bPB7PavN2TfCoW7_vEDhfYpm4TDrqYO9vdSQ5bk,760
29
29
  compressed_tensors/quantization/quant_args.py,sha256=5AxYKqCSlg7CDgz2N8G4ZRVIiSUKvIm-SCQa-Bq_SF0,12916
30
30
  compressed_tensors/quantization/quant_config.py,sha256=2NgDwKuQn0f-ojiHC8c6tXtYX_zQlk26Rj-bU71QKvA,10598
31
- compressed_tensors/quantization/quant_scheme.py,sha256=X5Z7oXMLPXnX8g-UvWXlRjn4YnD_qTk5mXfGzu20k9o,8903
31
+ compressed_tensors/quantization/quant_scheme.py,sha256=2pV3tPNgo6ovi6FLxP4ZFznEmInlC1L90Pq9I1HI_Xk,9275
32
32
  compressed_tensors/quantization/lifecycle/__init__.py,sha256=_uItzFWusyV74Zco_pHLOTdE9a83cL-R-ZdyQrBkIyw,772
33
33
  compressed_tensors/quantization/lifecycle/apply.py,sha256=TuSjKomSk4N0My-UY9PWk2Nyuze6TilEGPsZELgotzk,14716
34
34
  compressed_tensors/quantization/lifecycle/compressed.py,sha256=Fj9n66IN0EWsOAkBHg3O0GlOQpxstqjCcs0ttzMXrJ0,2296
@@ -45,10 +45,10 @@ compressed_tensors/transform/transform_args.py,sha256=rVgReFp7wMXcYugkfd325e2tTF
45
45
  compressed_tensors/transform/transform_config.py,sha256=3YdtGcau3qkcapX9GMUiLuhQHFQZKFYT3eLgJGj1L6s,1204
46
46
  compressed_tensors/transform/transform_scheme.py,sha256=S7vYLnuv7xZ_bwphkpCiGqZLjnnTnb4lj1T8a6WwnE0,2094
47
47
  compressed_tensors/transform/factory/__init__.py,sha256=fH6rjBYAxuwrTzBTlTjTgCYNyh6TCvCqajCz4Im4YrA,617
48
- compressed_tensors/transform/factory/base.py,sha256=Txkr1nWKtlMU1MmBcQ85-JqJzD356Z9nYbaF24tJ5rw,7755
49
- compressed_tensors/transform/factory/hadamard.py,sha256=CEy98vOIip_Pomh1XB62BqcjU8GQ9fUZSpnZH4GrBnE,4499
50
- compressed_tensors/transform/factory/matrix_multiply.py,sha256=boZLMkaNrgXQ9cU-tFzJ-1N1tLgbKMJzAxiYZAr4Pu8,4326
51
- compressed_tensors/transform/factory/random_hadamard.py,sha256=nUhTlFa4ikSpcl4Umme71pnjMPgwYoGlwjKlU27UHZ4,1634
48
+ compressed_tensors/transform/factory/base.py,sha256=82fwlX4gVlN67H7P_T3pbvN5pB-XQnG-dZJ53evj-DA,7979
49
+ compressed_tensors/transform/factory/hadamard.py,sha256=0SS_gM7b2df8SbsCF9LNbLLBMcwly7bXND1KED8uxhc,4550
50
+ compressed_tensors/transform/factory/matrix_multiply.py,sha256=u-7V04EvEe9G3VEF--YwoVV-h5kmh6hXq8stY_EWmLY,4456
51
+ compressed_tensors/transform/factory/random_hadamard.py,sha256=ck-LF7sl7i9NW4fxLypgHgkw91lc_TpwHO8bXX-0fPU,1577
52
52
  compressed_tensors/transform/utils/__init__.py,sha256=fH6rjBYAxuwrTzBTlTjTgCYNyh6TCvCqajCz4Im4YrA,617
53
53
  compressed_tensors/transform/utils/hadamard.py,sha256=hDJZC0Gw2fKdxqa3f8TmFc5J0eJqxHtFRxswLU_yVJc,5548
54
54
  compressed_tensors/transform/utils/hadamards.safetensors,sha256=mFd1GzNodGG-ifA1IoH-0nHYzfraCOvrq_dX2zFI1B4,1436901
@@ -57,14 +57,14 @@ compressed_tensors/utils/__init__.py,sha256=spzbjUO4-hZ2jXGST27r3MIt2yzIXsjdbEaY
57
57
  compressed_tensors/utils/helpers.py,sha256=Q3iRAa2XSdmmn4vSpUplnvKOmWwn4Clao9ZkPBHXtpI,12604
58
58
  compressed_tensors/utils/internal.py,sha256=7SSWgDoNFRnlfadwkoFhLW-T2jOc7Po_WzWv5h32Sa8,982
59
59
  compressed_tensors/utils/match.py,sha256=y03xJyWTXV8bjIPN5Z4S0_w797qMnh-Z4aiPEGQ4zNE,11239
60
- compressed_tensors/utils/offload.py,sha256=jE9xj3VewMc85iOLWSikqdyjNL9JB3oZpO1uDKKCLUE,24444
60
+ compressed_tensors/utils/offload.py,sha256=b0Q2P0hJLQBGEqdRwOh6SOK3_eJCqHNnIX38-wqeef0,24577
61
61
  compressed_tensors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVyah6BUUir_StT28,2530
62
62
  compressed_tensors/utils/permute.py,sha256=V6tJLKo3Syccj-viv4F7ZKZgJeCB-hl-dK8RKI_kBwI,2355
63
63
  compressed_tensors/utils/safetensors_load.py,sha256=Vql34aCTDHwmTZXJHzCyBISJo7iA7EQ78LdTlMjdpZo,12023
64
64
  compressed_tensors/utils/semi_structured_conversions.py,sha256=XKNffPum54kPASgqKzgKvyeqWPAkair2XEQXjkp7ho8,13489
65
65
  compressed_tensors/utils/type.py,sha256=bNwoo_FWlvLuDpYAGGzZJITRg0JA_Ngk9LGPo-kvjeU,2554
66
- compressed_tensors-0.11.1a20250903.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
67
- compressed_tensors-0.11.1a20250903.dist-info/METADATA,sha256=KMUKwzvh_FjAhk5ABHXpTUIM3dj5DV87o-kJy8RcPvk,7031
68
- compressed_tensors-0.11.1a20250903.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
69
- compressed_tensors-0.11.1a20250903.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
70
- compressed_tensors-0.11.1a20250903.dist-info/RECORD,,
66
+ compressed_tensors-0.11.1a20250908.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
67
+ compressed_tensors-0.11.1a20250908.dist-info/METADATA,sha256=lLokpvLLt0OOrfW9axfgwLmBhqCR7IqyjLCQFRdxCsU,7031
68
+ compressed_tensors-0.11.1a20250908.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
69
+ compressed_tensors-0.11.1a20250908.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
70
+ compressed_tensors-0.11.1a20250908.dist-info/RECORD,,