compressed-tensors 0.11.1a20250909__py3-none-any.whl → 0.11.1a20250910__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -50,7 +50,6 @@ from compressed_tensors.utils import (
50
50
  get_offloaded_device,
51
51
  get_safetensors_folder,
52
52
  has_offloaded_params,
53
- merge_names,
54
53
  register_offload_parameter,
55
54
  update_parameter_data,
56
55
  )
@@ -224,7 +223,8 @@ class ModelCompressor:
224
223
  s_config = compression_config.sparsity_config
225
224
  return s_config.model_dump() if s_config is not None else None
226
225
 
227
- return compression_config.get(SPARSITY_CONFIG_NAME, None)
226
+ # explicitly return None if {} in config
227
+ return compression_config.get(SPARSITY_CONFIG_NAME, None) or None
228
228
 
229
229
  @staticmethod
230
230
  def parse_quantization_config(
@@ -320,112 +320,6 @@ class ModelCompressor:
320
320
  format, config=quantization_config
321
321
  )
322
322
 
323
- # ----- used by hf quantizer ----- #
324
-
325
- def get_missing_module_keys(self, model: Module) -> List[str]:
326
- """
327
- Identifies the expected missing weight keys in the compressed state_dict.
328
-
329
- When a model undergoes sparsity or quantization compression, certain
330
- weight tensors may be absent from the checkpoint by virtue of compression.
331
- This function determines which weight keys are missing based on the
332
- applied compression techniques.
333
-
334
- :param model: The PyTorch model to check for missing keys.
335
- :return: A list of missing keys expected in the compressed state_dict.
336
- """
337
- missing_keys = set()
338
-
339
- # Determine missing keys due to sparsity compression
340
- if (
341
- self.sparsity_compressor
342
- and self.sparsity_config.format != CompressionFormat.dense.value
343
- ):
344
- sparse_targets = match_named_modules(
345
- model=model,
346
- targets=self.sparsity_config.targets,
347
- ignore=self.sparsity_config.ignore,
348
- )
349
-
350
- missing_keys.update(
351
- merge_names(target_name, "weight")
352
- for target_name, _module in sparse_targets
353
- )
354
-
355
- # Determine missing keys due to pack quantization
356
- if (
357
- self.quantization_compressor
358
- and self.quantization_config.format
359
- == CompressionFormat.pack_quantized.value
360
- ):
361
- for scheme in self.quantization_config.config_groups.values():
362
- quant_targets = match_named_modules(
363
- model=model,
364
- targets=scheme.targets,
365
- ignore=self.quantization_config.ignore,
366
- )
367
- missing_keys.update(
368
- merge_names(target_name, "weight")
369
- for target_name, _module in quant_targets
370
- )
371
-
372
- return list(missing_keys)
373
-
374
- def get_unexpected_file_keys(self, model: Module) -> List[str]:
375
- """
376
- Identifies extra keys introduced by the compression process in the
377
- compressed state_dict that are not expected by the model graph.
378
-
379
- During sparsity or quantization compression, additional metadata or
380
- auxiliary parameters may be stored in the checkpoint, which do not
381
- correspond to any parameter in the original model. These keys are
382
- typically introduced to support the reconstruction of compressed weights.
383
-
384
- For example, Sparse24Bitmask compression may introduce keys such as
385
- 'compressed', 'bitmask', and 'shape' in the checkpoint, which are
386
- not part of the original model parameters.
387
-
388
- :param model: The PyTorch model to check for unexpected keys.
389
- :return: A list of extra keys introduced by the compression process
390
- that are not expected by the model.
391
- """
392
-
393
- unexpected_keys = set()
394
-
395
- # Identify unexpected keys from sparsity compression
396
- if (
397
- self.sparsity_compressor
398
- and self.sparsity_config.format != CompressionFormat.dense.value
399
- ):
400
- sparse_targets = match_named_modules(
401
- model=model,
402
- targets=self.sparsity_config.targets,
403
- ignore=self.sparsity_config.ignore,
404
- )
405
- unexpected_keys.update(
406
- merge_names(target_name, param)
407
- for target_name, _module in sparse_targets
408
- for param in self.sparsity_compressor.compression_param_names
409
- )
410
-
411
- # Identify unexpected keys from quantization compression
412
- if self.quantization_compressor:
413
- for scheme in self.quantization_config.config_groups.values():
414
- quant_targets = match_named_modules(
415
- model=model,
416
- targets=scheme.targets,
417
- ignore=self.quantization_config.ignore,
418
- )
419
- for quant_compressor in self.quantization_compressor.values():
420
- unexpected_keys.update(
421
- merge_names(target_name, param)
422
- for target_name, _module in quant_targets
423
- for param in quant_compressor.compression_param_names
424
- if param != "weight"
425
- )
426
-
427
- return list(unexpected_keys)
428
-
429
323
  # ----- model memory compression/decompression pathways ----- #
430
324
 
431
325
  def compress_model(self, model: Module):
@@ -712,17 +606,16 @@ class ModelCompressor:
712
606
  # Load activation scales/zp or any other quantization parameters
713
607
  # Conditionally load the weight quantization parameters if we have a
714
608
  # dense compressor or if a sparsity compressor has already been applied
609
+ load_weight_qparams = sparse_decompressed or isinstance(
610
+ quant_compressor, DenseCompressor
611
+ )
715
612
  load_pretrained_quantization_parameters(
716
613
  model,
717
614
  model_path,
718
615
  # TODO: all weight quantization params will be moved to the
719
616
  # compressor in a follow-up including initialization
720
- load_weight_quantization=(
721
- sparse_decompressed
722
- or isinstance(quant_compressor, DenseCompressor)
723
- ),
617
+ load_weight_qparams=load_weight_qparams,
724
618
  )
725
-
726
619
  model_path_or_state_dict = (
727
620
  model.state_dict() if sparse_decompressed else model_path
728
621
  )
@@ -732,7 +625,9 @@ class ModelCompressor:
732
625
  )
733
626
  # TODO: all weight quantization params will be moved to the compressor
734
627
  # to prevent duplicate parameter updates in update_parameter_data
735
- self._replace_weights(dense_gen, model)
628
+ self._replace_weights(
629
+ dense_gen, model, load_weight_qparams=not load_weight_qparams
630
+ )
736
631
 
737
632
  def freeze_quantization_status(module):
738
633
  module.quantization_status = QuantizationStatus.FROZEN
@@ -819,7 +714,9 @@ class ModelCompressor:
819
714
  param = torch.nn.Parameter(data.to(device), requires_grad=requires_grad)
820
715
  register_offload_parameter(module, param_name, param)
821
716
 
822
- def _replace_weights(self, dense_weight_generator, model: Module):
717
+ def _replace_weights(
718
+ self, dense_weight_generator, model: Module, load_weight_qparams: bool = True
719
+ ):
823
720
  """
824
721
  Replace the weights of the model with the
825
722
  provided dense weights.
@@ -847,6 +744,7 @@ class ModelCompressor:
847
744
  # decompression in init to be consistent with loading which happens
848
745
  # later as well however, update_data does a good shape check -
849
746
  # should be moved to the compressor
747
+
850
748
  if param_name == "weight":
851
749
  delattr(module, param_name)
852
750
  requires_grad = param_data.dtype in (
@@ -858,7 +756,7 @@ class ModelCompressor:
858
756
  param_data.to(device), requires_grad=requires_grad
859
757
  )
860
758
  register_offload_parameter(module, param_name, param)
861
- else:
759
+ elif load_weight_qparams:
862
760
  # Should already be registered to the correct device for
863
761
  # for scales/zero-points
864
762
  update_parameter_data(module, param_data, param_name)
@@ -65,19 +65,19 @@ _LOGGER = logging.getLogger(__name__)
65
65
  def load_pretrained_quantization_parameters(
66
66
  model: Module,
67
67
  model_name_or_path: Optional[str] = None,
68
- load_weight_quantization: Optional[bool] = False,
68
+ load_weight_qparams: Optional[bool] = False,
69
69
  ):
70
70
  """
71
71
  Loads the quantization parameters (scale and zero point) from model_name_or_path to
72
72
  a model that has already been initialized with a quantization config.
73
73
 
74
74
  NOTE: Will always load inputs/output parameters. Will conditioanlly load weight
75
- parameters, if load_weight_quantization is set to True.
75
+ parameters, if load_weight_qparams is set to True.
76
76
 
77
77
  :param model: model to load pretrained quantization parameters to
78
78
  :param model_name_or_path: Hugging Face stub or local folder containing a quantized
79
79
  model, which is used to load quantization parameters
80
- :param load_weight_quantization: whether or not the weight quantization parameters
80
+ :param load_weight_qparams: whether or not the weight quantization parameters
81
81
  should be loaded
82
82
  """
83
83
  model_path = get_safetensors_folder(model_name_or_path)
@@ -103,7 +103,7 @@ def load_pretrained_quantization_parameters(
103
103
  mapping=mapping,
104
104
  )
105
105
 
106
- if load_weight_quantization and submodule.quantization_scheme.weights:
106
+ if load_weight_qparams and submodule.quantization_scheme.weights:
107
107
  base_name = "weight"
108
108
  _load_quant_args_from_mapping(
109
109
  base_name=base_name,
@@ -219,18 +219,9 @@ def apply_quantization_status(model: Module, status: QuantizationStatus):
219
219
  if status >= QuantizationStatus.INITIALIZED > current_status:
220
220
  force_zero_point_init = status != QuantizationStatus.COMPRESSED
221
221
 
222
- # When decompressing, we set the scale_dtype as the model's dtype
223
- # This is because the normal workflow of using the weight's dtype
224
- # will be incorrect as the model weight will be compressed
225
- # Therfore, use the dtype set by the user using the PretrainedModel
226
- scale_dtype = None
227
- if status == QuantizationStatus.FROZEN:
228
- if hasattr(model, "dtype"):
229
- scale_dtype = model.dtype
230
-
231
222
  model.apply(
232
223
  lambda module: initialize_module_for_quantization(
233
- module, force_zero_point=force_zero_point_init, scale_dtype=scale_dtype
224
+ module, force_zero_point=force_zero_point_init
234
225
  )
235
226
  )
236
227
 
@@ -59,7 +59,6 @@ def initialize_module_for_quantization(
59
59
  module: Module,
60
60
  scheme: Optional[QuantizationScheme] = None,
61
61
  force_zero_point: bool = True,
62
- scale_dtype: Optional[torch.dtype] = None,
63
62
  ):
64
63
  """
65
64
  attaches appropriate scales, zero points, and observers to a layer
@@ -73,8 +72,6 @@ def initialize_module_for_quantization(
73
72
  if not provided, the layer will be skipped
74
73
  :param force_zero_point: whether to force initialization of a zero point for
75
74
  symmetric quantization
76
- :param scale_dtype: dtype to used for the scales, if overriding the
77
- weight dtype as the scale dtype
78
75
  """
79
76
  # TODO: don't initialize parameters when running decompression
80
77
  scheme = scheme or getattr(module, "quantization_scheme", None)
@@ -93,7 +90,6 @@ def initialize_module_for_quantization(
93
90
  "input",
94
91
  scheme.input_activations,
95
92
  force_zero_point=force_zero_point,
96
- scale_dtype=scale_dtype,
97
93
  )
98
94
 
99
95
  if scheme.weights is not None:
@@ -107,7 +103,6 @@ def initialize_module_for_quantization(
107
103
  scheme.weights,
108
104
  weight_shape=weight_shape,
109
105
  force_zero_point=force_zero_point,
110
- scale_dtype=scale_dtype,
111
106
  )
112
107
  else:
113
108
  _LOGGER.warning(
@@ -119,7 +114,7 @@ def initialize_module_for_quantization(
119
114
  if scheme.output_activations is not None:
120
115
  if not is_kv_cache_quant_scheme(scheme):
121
116
  _initialize_scale_zero_point(
122
- module, "output", scheme.output_activations, scale_dtype=scale_dtype
117
+ module, "output", scheme.output_activations
123
118
  )
124
119
 
125
120
  module.quantization_scheme = scheme
@@ -145,7 +140,6 @@ def _initialize_scale_zero_point(
145
140
  quantization_args: QuantizationArgs,
146
141
  weight_shape: Optional[torch.Size] = None,
147
142
  force_zero_point: bool = True,
148
- scale_dtype: Optional[torch.dtype] = None,
149
143
  ):
150
144
  if quantization_args.dynamic is True:
151
145
  return
@@ -213,7 +207,7 @@ def _initialize_scale_zero_point(
213
207
  expected_shape = 1
214
208
 
215
209
  # 3. Identify quantization scale and zp dtype
216
- scale_dtype = scale_dtype if scale_dtype is not None else module.weight.dtype
210
+ scale_dtype = module.weight.dtype
217
211
 
218
212
  if is_fp4(quantization_args=quantization_args):
219
213
  scale_dtype = zp_dtype = FP8_E4M3_DATA.dtype
@@ -226,7 +220,7 @@ def _initialize_scale_zero_point(
226
220
  torch.float32,
227
221
  torch.float64,
228
222
  ]:
229
- scale_dtype = torch.float16
223
+ scale_dtype = torch.bfloat16
230
224
  zp_dtype = quantization_args.pytorch_dtype()
231
225
 
232
226
  # 4. Initializes empty scale, zero point, and g_idx parameters for the module
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.11.1.a20250909'
20
+ __version__ = version = '0.11.1.a20250910'
21
21
  __version_tuple__ = version_tuple = (0, 11, 1)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compressed-tensors
3
- Version: 0.11.1a20250909
3
+ Version: 0.11.1a20250910
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -1,11 +1,11 @@
1
1
  compressed_tensors/__init__.py,sha256=UtKmifNeBCSE2TZSAfduVNNzHY-3V7bLjZ7n7RuXLOE,812
2
2
  compressed_tensors/base.py,sha256=-gxWvDF4LCkyeDP8YlGzvBBKxo4Dk9h4NINPD61drFU,921
3
- compressed_tensors/version.py,sha256=r7NPIWZc4XFCqdYyi4qPxtVWw1N9RBvLtcldfOSxGIA,523
3
+ compressed_tensors/version.py,sha256=uspJ2GlCAlOy5_cMN5KqjdnqQs72wgmaYeWLk_2EVHU,523
4
4
  compressed_tensors/compressors/__init__.py,sha256=smSygTSfcfuujRrAXDc6uZm4L_ccV1tWZewqVnOb4lM,825
5
5
  compressed_tensors/compressors/base.py,sha256=nvWsv4xEw1Tkxkxth6TmHplDYXfBeP22xWxOsZERyDY,7204
6
6
  compressed_tensors/compressors/helpers.py,sha256=OK6qxX9j3bHwF9JfIYSGMgBJe2PWjlTA3byXKCJaTIQ,5431
7
7
  compressed_tensors/compressors/model_compressors/__init__.py,sha256=5RGGPFu4YqEt_aOdFSQYFYFDjcZFJN0CsMqRtDZz3Js,666
8
- compressed_tensors/compressors/model_compressors/model_compressor.py,sha256=mZqpBS5znPHedlVVkKsUsVCs52zK5bAmEiI8cqMBKnY,37618
8
+ compressed_tensors/compressors/model_compressors/model_compressor.py,sha256=t_4r2u8PPXMkxKXfqENcmh30q11pG6Xdikj7Pjtf7dw,33444
9
9
  compressed_tensors/compressors/quantized_compressors/__init__.py,sha256=KvaFBL_Q84LxRGJOV035M8OBoCkAx8kOkfphswgkKWk,745
10
10
  compressed_tensors/compressors/quantized_compressors/base.py,sha256=rWvaWDqzi8cctBo982g2n3-y6afRiFl3jfTd90lSMrY,10413
11
11
  compressed_tensors/compressors/quantized_compressors/naive_quantized.py,sha256=0ANDcuD8aXPqTYNPY6GnX9iS6eXJw6P0TzNV_rYS2l8,5369
@@ -30,11 +30,11 @@ compressed_tensors/quantization/quant_args.py,sha256=5AxYKqCSlg7CDgz2N8G4ZRVIiSU
30
30
  compressed_tensors/quantization/quant_config.py,sha256=2NgDwKuQn0f-ojiHC8c6tXtYX_zQlk26Rj-bU71QKvA,10598
31
31
  compressed_tensors/quantization/quant_scheme.py,sha256=EG86Bq5c8q1O4fJL_o3s7gOu1S5SrcLjfNYOPDn414A,9673
32
32
  compressed_tensors/quantization/lifecycle/__init__.py,sha256=_uItzFWusyV74Zco_pHLOTdE9a83cL-R-ZdyQrBkIyw,772
33
- compressed_tensors/quantization/lifecycle/apply.py,sha256=TuSjKomSk4N0My-UY9PWk2Nyuze6TilEGPsZELgotzk,14716
33
+ compressed_tensors/quantization/lifecycle/apply.py,sha256=Nn0NTtIQ91AWuU05_oYNnVxAXV6C_vW3RW46XcXZwX4,14222
34
34
  compressed_tensors/quantization/lifecycle/compressed.py,sha256=Fj9n66IN0EWsOAkBHg3O0GlOQpxstqjCcs0ttzMXrJ0,2296
35
35
  compressed_tensors/quantization/lifecycle/forward.py,sha256=xcLTgaff1wYUWzvQqYKmhWYkshWVI-PhLPtBOyyZro0,17576
36
36
  compressed_tensors/quantization/lifecycle/helpers.py,sha256=C0mhy2vJ0fCjVeN4kFNhw8Eq1wkteBGHiZ36RVLThRY,944
37
- compressed_tensors/quantization/lifecycle/initialize.py,sha256=f05UF6NaUGvR9qyxes_AgRcvg3KWgk5JeM_-NL1EQG0,10285
37
+ compressed_tensors/quantization/lifecycle/initialize.py,sha256=GYH79007BPUojETNyvDm5SdHrnwPFVuMGlA8kXCI2Q0,9925
38
38
  compressed_tensors/quantization/utils/__init__.py,sha256=VdtEmP0bvuND_IGQnyqUPc5lnFp-1_yD7StKSX4x80w,656
39
39
  compressed_tensors/quantization/utils/helpers.py,sha256=-pfSmxqHkrB-RnjF0VYz8lMe9CVnB7IJrONf9Y9fjCo,17014
40
40
  compressed_tensors/registry/__init__.py,sha256=FwLSNYqfIrb5JD_6OK_MT4_svvKTN_nEhpgQlQvGbjI,658
@@ -63,8 +63,8 @@ compressed_tensors/utils/permute.py,sha256=V6tJLKo3Syccj-viv4F7ZKZgJeCB-hl-dK8RK
63
63
  compressed_tensors/utils/safetensors_load.py,sha256=Vql34aCTDHwmTZXJHzCyBISJo7iA7EQ78LdTlMjdpZo,12023
64
64
  compressed_tensors/utils/semi_structured_conversions.py,sha256=XKNffPum54kPASgqKzgKvyeqWPAkair2XEQXjkp7ho8,13489
65
65
  compressed_tensors/utils/type.py,sha256=bNwoo_FWlvLuDpYAGGzZJITRg0JA_Ngk9LGPo-kvjeU,2554
66
- compressed_tensors-0.11.1a20250909.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
67
- compressed_tensors-0.11.1a20250909.dist-info/METADATA,sha256=ZESMNkRHo7FRcoSr9v_JKDGml5oWDK-Tgcboj-0CnE4,7031
68
- compressed_tensors-0.11.1a20250909.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
69
- compressed_tensors-0.11.1a20250909.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
70
- compressed_tensors-0.11.1a20250909.dist-info/RECORD,,
66
+ compressed_tensors-0.11.1a20250910.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
67
+ compressed_tensors-0.11.1a20250910.dist-info/METADATA,sha256=hoAVyQXgylkzGGRJD4SeIUlVh4FSMWeZLzaeMsKL_RI,7031
68
+ compressed_tensors-0.11.1a20250910.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
69
+ compressed_tensors-0.11.1a20250910.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
70
+ compressed_tensors-0.11.1a20250910.dist-info/RECORD,,