compressed-tensors 0.10.3a20250724__py3-none-any.whl → 0.10.3a20250728__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -112,17 +112,21 @@ def dequantize(
112
112
  if scale.shape[1] == 1:
113
113
  args = QuantizationArgs(strategy=QuantizationStrategy.CHANNEL)
114
114
  # Scale height matches input or is 1 -> group quantization across columns
115
- #
115
+ #
116
116
  # Example 1: scale.shape[0] == 1
117
117
  # x_q: (4, 8), scale: (1, 4) -> 2 columns per group
118
118
  #
119
- # Example 2: scale.shape[0] == x_q.shape[0]
119
+ # Example 2: scale.shape[0] == x_q.shape[0]
120
120
  # x_q: (4, 8), scale: (4, 4) -> 2 elements per group (per row)
121
121
  elif (scale.shape[0] == 1) or (scale.shape[0] == x_q.shape[0]):
122
122
  group_size = int(x_q.shape[1] / scale.shape[1])
123
- args = QuantizationArgs(strategy=QuantizationStrategy.GROUP, group_size=group_size)
123
+ args = QuantizationArgs(
124
+ strategy=QuantizationStrategy.GROUP, group_size=group_size
125
+ )
124
126
  else:
125
- args = QuantizationArgs(strategy=QuantizationStrategy.BLOCK, block_structure=scale.shape)
127
+ args = QuantizationArgs(
128
+ strategy=QuantizationStrategy.BLOCK, block_structure=scale.shape
129
+ )
126
130
  else:
127
131
  raise ValueError(
128
132
  f"Could not infer a quantization strategy from scale with {scale.ndim} "
@@ -185,27 +185,29 @@ def _initialize_scale_zero_point(
185
185
  elif quantization_args.strategy == QuantizationStrategy.BLOCK:
186
186
  # For block quantization, scale shape should match number of blocks - only for weights
187
187
  if quantization_args.block_structure is None:
188
- raise ValueError("Block quantization requires block_structure to be specified")
188
+ raise ValueError(
189
+ "Block quantization requires block_structure to be specified"
190
+ )
189
191
  block_height, block_width = quantization_args.block_structure
190
192
  rows, cols = weight_shape[-2], weight_shape[-1]
191
193
  num_rows_blocks = math.ceil(rows / block_height)
192
194
  num_cols_blocks = math.ceil(cols / block_width)
193
-
195
+
194
196
  # Warn if dimensions don't divide evenly
195
197
  if rows % block_height != 0 or cols % block_width != 0:
196
198
  warnings.warn(
197
199
  f"Block quantization: tensor shape {weight_shape} does not divide evenly "
198
200
  f"by block structure {quantization_args.block_structure}. "
199
201
  f"Some blocks will be incomplete which may affect quantization quality.",
200
- UserWarning
202
+ UserWarning,
201
203
  )
202
-
204
+
203
205
  expected_shape = (num_rows_blocks, num_cols_blocks)
204
206
  elif quantization_args.strategy == QuantizationStrategy.BLOCK:
205
207
  warnings.warn(
206
208
  f"BLOCK quantization not supported for {base_name} activations. "
207
209
  f"Falling back to tensor-level quantization.",
208
- UserWarning
210
+ UserWarning,
209
211
  )
210
212
  expected_shape = 1
211
213
 
@@ -64,8 +64,9 @@ class QuantizationScheme(BaseModel):
64
64
  raise ValueError("Cannot apply actorder to output activations")
65
65
 
66
66
  if (
67
- inputs and weights
68
- and weights.strategy == QuantizationStrategy.GROUP
67
+ inputs
68
+ and weights
69
+ and weights.strategy == QuantizationStrategy.GROUP
69
70
  and inputs.strategy == QuantizationStrategy.GROUP
70
71
  and weights.group_size != inputs.group_size
71
72
  ):
@@ -75,7 +76,7 @@ class QuantizationScheme(BaseModel):
75
76
  "may complicate fused kernel implementations. Consider using "
76
77
  "TENSOR_GROUP strategy for both or matching group sizes.",
77
78
  UserWarning,
78
- stacklevel=2
79
+ stacklevel=2,
79
80
  )
80
81
 
81
82
  return model
@@ -12,9 +12,9 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ import math
15
16
  from typing import Optional, Union
16
17
 
17
- import math
18
18
  import torch
19
19
  from compressed_tensors.transform import TransformArgs, TransformScheme
20
20
  from compressed_tensors.transform.factory.base import TransformBase, TransformFactory
@@ -103,7 +103,8 @@ class HadamardTransform(TransformBase):
103
103
 
104
104
  if self.args.inverse:
105
105
  weight = weight.T
106
-
107
- return apply_transform_weight(
108
- weight, value, self.args.location, self.module_type
109
- ) / self._scale
106
+
107
+ return (
108
+ apply_transform_weight(weight, value, self.args.location, self.module_type)
109
+ / self._scale
110
+ )
@@ -18,6 +18,7 @@ from collections.abc import Generator
18
18
  from typing import Iterable, Tuple
19
19
 
20
20
  import torch
21
+ from compressed_tensors.utils.internal import InternalModule
21
22
 
22
23
 
23
24
  _LOGGER: logging.Logger = logging.getLogger(__name__)
@@ -28,8 +29,6 @@ __all__ = [
28
29
  "match_named_parameters",
29
30
  "match_modules_set",
30
31
  "is_match",
31
- "match_name",
32
- "match_class",
33
32
  ]
34
33
 
35
34
 
@@ -83,13 +82,16 @@ def match_named_parameters(
83
82
  """
84
83
  unmatched_targets = set(targets)
85
84
  for module_name, module in model.named_modules():
85
+ if isinstance(module, InternalModule):
86
+ continue
87
+
86
88
  for param_name, param in module.named_parameters(recurse=False):
87
89
  param_fqn = f"{module_name}.{param_name}"
88
90
  for target in targets:
89
- if match_name(param_fqn, target):
91
+ if _match_name(param_fqn, target):
90
92
  unmatched_targets -= {target}
91
93
 
92
- if not any(match_name(param_fqn, ign) for ign in ignore):
94
+ if not any(_match_name(param_fqn, ign) for ign in ignore):
93
95
  yield param_fqn, module, param
94
96
 
95
97
  if warn_on_fail:
@@ -165,11 +167,14 @@ def match_modules_set(
165
167
  def is_match(name: str, module: torch.nn.Module, target: str) -> bool:
166
168
  """
167
169
  Returns true if either module name or module parent classes match against target
170
+ and the module is not an internal module
168
171
  """
169
- return match_name(name, target) or match_class(module, target)
172
+ return not isinstance(module, InternalModule) and (
173
+ _match_name(name, target) or _match_class(module, target)
174
+ )
170
175
 
171
176
 
172
- def match_name(name: str, target: str) -> bool:
177
+ def _match_name(name: str, target: str) -> bool:
173
178
  """
174
179
  Returns true if target string begins with "re:" and
175
180
  regex matches or if target string exactly matches name
@@ -180,7 +185,7 @@ def match_name(name: str, target: str) -> bool:
180
185
  return target == name
181
186
 
182
187
 
183
- def match_class(module: torch.nn.Module, target: str) -> bool:
188
+ def _match_class(module: torch.nn.Module, target: str) -> bool:
184
189
  """
185
190
  Returns true if any torch parent class names match the target string exactly
186
191
  """
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.10.3.a20250724'
20
+ __version__ = version = '0.10.3.a20250728'
21
21
  __version_tuple__ = version_tuple = (0, 10, 3)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compressed-tensors
3
- Version: 0.10.3a20250724
3
+ Version: 0.10.3a20250728
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -1,6 +1,6 @@
1
1
  compressed_tensors/__init__.py,sha256=UtKmifNeBCSE2TZSAfduVNNzHY-3V7bLjZ7n7RuXLOE,812
2
2
  compressed_tensors/base.py,sha256=73HYH7HY7O2roC89yG_piPFnZwrBfn_i7HmKl90SKc0,875
3
- compressed_tensors/version.py,sha256=LKiXh8O_XB2unUsk0HmC-_PgpfbOswj5PZqtDHOPnRg,523
3
+ compressed_tensors/version.py,sha256=EY3NpvLIsm31BPA-e32djbQIUYdm3sP8W28lHH72d0Y,523
4
4
  compressed_tensors/compressors/__init__.py,sha256=smSygTSfcfuujRrAXDc6uZm4L_ccV1tWZewqVnOb4lM,825
5
5
  compressed_tensors/compressors/base.py,sha256=nvWsv4xEw1Tkxkxth6TmHplDYXfBeP22xWxOsZERyDY,7204
6
6
  compressed_tensors/compressors/helpers.py,sha256=OK6qxX9j3bHwF9JfIYSGMgBJe2PWjlTA3byXKCJaTIQ,5431
@@ -28,13 +28,13 @@ compressed_tensors/linear/compressed_linear.py,sha256=1yo9RyjA0aQ--iuIknFfcSorJn
28
28
  compressed_tensors/quantization/__init__.py,sha256=83J5bPB7PavN2TfCoW7_vEDhfYpm4TDrqYO9vdSQ5bk,760
29
29
  compressed_tensors/quantization/quant_args.py,sha256=yKTj_4lAy_pnXeTCyUADpyz2qAzJXYJU2P03NF_TP68,12835
30
30
  compressed_tensors/quantization/quant_config.py,sha256=w6sEEZGVGIF0Ub2r_cqRfZwbkBT8WzfY3ug52olmjGY,10049
31
- compressed_tensors/quantization/quant_scheme.py,sha256=qApRLsPxELe5S2qFv8OVyAZ5TpRL7gT35i4U3c9PAwI,8461
31
+ compressed_tensors/quantization/quant_scheme.py,sha256=xk2LPn18tjS1PEOyf0WKvavBq3rzAVHFLB3H2mQQWnc,8473
32
32
  compressed_tensors/quantization/lifecycle/__init__.py,sha256=_uItzFWusyV74Zco_pHLOTdE9a83cL-R-ZdyQrBkIyw,772
33
33
  compressed_tensors/quantization/lifecycle/apply.py,sha256=wM8mVcbKvZjBo18pSXMp28i30YWwUXJPSS7_HCakH9U,17892
34
34
  compressed_tensors/quantization/lifecycle/compressed.py,sha256=Fj9n66IN0EWsOAkBHg3O0GlOQpxstqjCcs0ttzMXrJ0,2296
35
- compressed_tensors/quantization/lifecycle/forward.py,sha256=jT70Mbbu9pH10vu5ALVD7VWGoFdMEUpxmihGrf4frjM,17432
35
+ compressed_tensors/quantization/lifecycle/forward.py,sha256=V98jWzb3rfV91EC6kfzAyXtmnbLjNF01Rd_EHU2bLo8,17506
36
36
  compressed_tensors/quantization/lifecycle/helpers.py,sha256=C0mhy2vJ0fCjVeN4kFNhw8Eq1wkteBGHiZ36RVLThRY,944
37
- compressed_tensors/quantization/lifecycle/initialize.py,sha256=3Vuj1a-Y7f_7QXagG7BAeAPnDGtbWGFJXBATg6eT-O0,10241
37
+ compressed_tensors/quantization/lifecycle/initialize.py,sha256=BM7bR_uNa-Ex4T-roHonWiRaxCi5sFysXyl0cFh1ZVs,10257
38
38
  compressed_tensors/quantization/utils/__init__.py,sha256=VdtEmP0bvuND_IGQnyqUPc5lnFp-1_yD7StKSX4x80w,656
39
39
  compressed_tensors/quantization/utils/helpers.py,sha256=Je96Wai9SOizbdE5ph0nsJ86zS96lE4fkf_9q9o2tpA,17212
40
40
  compressed_tensors/registry/__init__.py,sha256=FwLSNYqfIrb5JD_6OK_MT4_svvKTN_nEhpgQlQvGbjI,658
@@ -46,7 +46,7 @@ compressed_tensors/transform/transform_config.py,sha256=A3RuLNDqBNEByQNeu40Kg7sI
46
46
  compressed_tensors/transform/transform_scheme.py,sha256=uGLC4avdbhrVqNC3-Eo0p7WzNRQK92Fpg0N9hWiuCRQ,1752
47
47
  compressed_tensors/transform/factory/__init__.py,sha256=fH6rjBYAxuwrTzBTlTjTgCYNyh6TCvCqajCz4Im4YrA,617
48
48
  compressed_tensors/transform/factory/base.py,sha256=Zplf8QO-mFqGwDEhLdYL_afSu7v4nMa79oNhidRNPvY,5880
49
- compressed_tensors/transform/factory/hadamard.py,sha256=iJ2OyKitR2Duw0z5Jqj69GTih2C1WtHRXQCTtATaTtw,4180
49
+ compressed_tensors/transform/factory/hadamard.py,sha256=B0BVjbF3y707MO6L2XfEoZJTQU965vU9dUPLOiUSXII,4193
50
50
  compressed_tensors/transform/factory/matrix_multiply.py,sha256=LdoV2E12HTucmUWcw7UKOpRNnL8QhOOIUnNVlpOpGiI,3925
51
51
  compressed_tensors/transform/factory/random_hadamard.py,sha256=nUhTlFa4ikSpcl4Umme71pnjMPgwYoGlwjKlU27UHZ4,1634
52
52
  compressed_tensors/transform/utils/__init__.py,sha256=fH6rjBYAxuwrTzBTlTjTgCYNyh6TCvCqajCz4Im4YrA,617
@@ -56,14 +56,14 @@ compressed_tensors/transform/utils/matrix.py,sha256=FIHCUlpWVIIhdr3c6EbQec41JeiP
56
56
  compressed_tensors/utils/__init__.py,sha256=KZctuotCmX4byXhwDvSeXgp-Ny_awpziAX-WUkZfodI,853
57
57
  compressed_tensors/utils/helpers.py,sha256=Q3iRAa2XSdmmn4vSpUplnvKOmWwn4Clao9ZkPBHXtpI,12604
58
58
  compressed_tensors/utils/internal.py,sha256=7SSWgDoNFRnlfadwkoFhLW-T2jOc7Po_WzWv5h32Sa8,982
59
- compressed_tensors/utils/match.py,sha256=DjqTH-J9-E7ULVXPLV-HBRhdi07JhK-H90PbFK-DRAY,7017
59
+ compressed_tensors/utils/match.py,sha256=ZVBPzrGYExq7-6RRUlU5XeCjl0ooLaNUoDO6Cgnn9cY,7220
60
60
  compressed_tensors/utils/offload.py,sha256=3XiBuWbUkBAt8v1t5i57qDcbB3VJQs_FDeayi-JzIWg,23896
61
61
  compressed_tensors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVyah6BUUir_StT28,2530
62
62
  compressed_tensors/utils/permute.py,sha256=V6tJLKo3Syccj-viv4F7ZKZgJeCB-hl-dK8RKI_kBwI,2355
63
63
  compressed_tensors/utils/safetensors_load.py,sha256=DMfZBuUbA6qp_BG_zIWT3ckiEE33K9ob34s-OgzReO4,12057
64
64
  compressed_tensors/utils/semi_structured_conversions.py,sha256=XKNffPum54kPASgqKzgKvyeqWPAkair2XEQXjkp7ho8,13489
65
- compressed_tensors-0.10.3a20250724.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
66
- compressed_tensors-0.10.3a20250724.dist-info/METADATA,sha256=ZH66sWeKBfvuLUe-ArnII1LYXG3UAEYUt6D6YPQ_W-M,7031
67
- compressed_tensors-0.10.3a20250724.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
68
- compressed_tensors-0.10.3a20250724.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
69
- compressed_tensors-0.10.3a20250724.dist-info/RECORD,,
65
+ compressed_tensors-0.10.3a20250728.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
66
+ compressed_tensors-0.10.3a20250728.dist-info/METADATA,sha256=rQbbrFahVspKPEfY86EpebdjgoYAtSyyH7JLOPTPcrg,7031
67
+ compressed_tensors-0.10.3a20250728.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
68
+ compressed_tensors-0.10.3a20250728.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
69
+ compressed_tensors-0.10.3a20250728.dist-info/RECORD,,