compressed-tensors 0.9.5a20250513__py3-none-any.whl → 0.9.5a20250514__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -47,6 +47,9 @@ from compressed_tensors.quantization.utils import (
47
47
  iter_named_leaf_modules,
48
48
  )
49
49
  from compressed_tensors.utils import (
50
+ align_module_device,
51
+ delete_offload_parameter,
52
+ get_execution_device,
50
53
  get_safetensors_folder,
51
54
  has_offloaded_params,
52
55
  merge_names,
@@ -98,6 +101,9 @@ class ModelCompressor:
98
101
  :param quantization_config: config specifying quantization compression parameters
99
102
  """
100
103
 
104
+ sparsity_config: Optional[SparsityCompressionConfig] = None
105
+ quantization_config: Optional[QuantizationConfig] = None
106
+
101
107
  @classmethod
102
108
  def from_pretrained(
103
109
  cls,
@@ -261,6 +267,8 @@ class ModelCompressor:
261
267
  quantization_config.format, config=quantization_config
262
268
  )
263
269
 
270
+ # ----- used by hf quantizer ----- #
271
+
264
272
  def get_missing_module_keys(self, model: Module) -> List[str]:
265
273
  """
266
274
  Identifies the expected missing weight keys in the compressed state_dict.
@@ -270,7 +278,6 @@ class ModelCompressor:
270
278
  This function determines which weight keys are missing based on the
271
279
  applied compression techniques.
272
280
 
273
-
274
281
  :param model: The PyTorch model to check for missing keys.
275
282
  :return: A list of missing keys expected in the compressed state_dict.
276
283
  """
@@ -362,8 +369,124 @@ class ModelCompressor:
362
369
 
363
370
  return list(unexpected_keys)
364
371
 
372
+ # ----- model memory compression/decompression pathways ----- #
373
+
374
+ def compress_model(self, model: Module):
375
+ """
376
+ Compress a model in memory. Because the model structure is modified in place,
377
+ this method is more memory-efficient than `self.compress`
378
+
379
+ :param model: model containing parameters to compress
380
+ """
381
+ module_to_scheme = map_module_to_scheme(model)
382
+ sparse_compression_targets: Set[str] = expand_target_names(
383
+ model=model,
384
+ targets=self.sparsity_config.targets if self.sparsity_config else [],
385
+ ignore=self.sparsity_config.ignore if self.sparsity_config else [],
386
+ )
387
+
388
+ for prefix, module in tqdm(model.named_modules(), desc="Compressing model"):
389
+ if prefix in module_to_scheme or prefix in sparse_compression_targets:
390
+ # in the future, support compression on same device
391
+ with align_module_device(module, execution_device="cpu"):
392
+ state_dict = module.state_dict(prefix=f"{prefix}.")
393
+
394
+ # quantization first
395
+ if prefix in module_to_scheme:
396
+ state_dict = self.quantization_compressor.compress(
397
+ state_dict,
398
+ names_to_scheme=module_to_scheme,
399
+ show_progress=False,
400
+ )
401
+
402
+ # sparsity second
403
+ if prefix in sparse_compression_targets:
404
+ state_dict = self.sparsity_compressor.compress(
405
+ state_dict,
406
+ compression_targets=sparse_compression_targets,
407
+ show_progress=False,
408
+ )
409
+
410
+ # remove any existing parameters
411
+ device = get_execution_device(module)
412
+ for name, _ in list(module.named_parameters()):
413
+ delattr(module, name)
414
+
415
+ # replace with compressed parameters
416
+ for name, value in state_dict.items():
417
+ name = name.removeprefix(f"{prefix}.")
418
+ value = value.to(device)
419
+ param = torch.nn.Parameter(value, requires_grad=False)
420
+ register_offload_parameter(module, name, param)
421
+
422
+ module.quantization_status = QuantizationStatus.COMPRESSED
423
+
424
+ def decompress_model(self, model: Module):
425
+ """
426
+ Decompress a model in memory. Because the model structure is modified in place,
427
+ this method does not require loading some compression parameters from disk
428
+
429
+ :param model: model containing parameters to compress
430
+ """
431
+ module_to_scheme = map_module_to_scheme(model)
432
+ sparse_compression_targets: Set[str] = expand_target_names(
433
+ model=model,
434
+ targets=self.sparsity_config.targets if self.sparsity_config else [],
435
+ ignore=self.sparsity_config.ignore if self.sparsity_config else [],
436
+ )
437
+
438
+ for prefix, module in tqdm(model.named_modules(), desc="Decompressing model"):
439
+ if prefix in module_to_scheme or prefix in sparse_compression_targets:
440
+ # in the future, support decompression on same device
441
+ with align_module_device(module, execution_device="cpu"):
442
+ state_dict = module.state_dict(prefix=f"{prefix}.")
443
+
444
+ # sparsity first
445
+ if prefix in sparse_compression_targets:
446
+ # sparse_compression_targets are automatically inferred by this fn
447
+ generator = self.sparsity_compressor.decompress_from_state_dict(
448
+ state_dict,
449
+ )
450
+ # generates (param_path, param_val)
451
+ # of compressed and unused params
452
+ state_dict = {key: value for key, value in generator}
453
+
454
+ # quantization second
455
+ if prefix in module_to_scheme:
456
+ generator = self.quantization_compressor.decompress_from_state_dict(
457
+ state_dict,
458
+ names_to_scheme=module_to_scheme,
459
+ )
460
+ # generates (mod_path, {param_name, param_val})
461
+ # of compressed params and used params, but not unused params
462
+ # some used params are removed by get_unexpected_file_keys
463
+ state_dict = {
464
+ merge_names(module_path, param_name): param_value
465
+ for module_path, compressed_data in generator
466
+ for param_name, param_value in compressed_data.items()
467
+ }
468
+
469
+ # remove any existing parameters
470
+ device = get_execution_device(module)
471
+ for name, _ in list(module.named_parameters()):
472
+ delete_offload_parameter(module, name)
473
+
474
+ # replace with decompressed parameters
475
+ for name, value in state_dict.items():
476
+ name = name.removeprefix(f"{prefix}.")
477
+ value = value.to(device)
478
+ param = torch.nn.Parameter(value, requires_grad=False)
479
+ register_offload_parameter(module, name, param)
480
+
481
+ module.quantization_status = QuantizationStatus.FROZEN
482
+
483
+ # ----- state dict compression pathways ----- #
484
+
365
485
  def compress(
366
- self, model: Module, state_dict: Optional[Dict[str, Tensor]] = None
486
+ self,
487
+ model: Module,
488
+ state_dict: Optional[Dict[str, Tensor]] = None,
489
+ show_progress: bool = False,
367
490
  ) -> Dict[str, Tensor]:
368
491
  """
369
492
  Compresses a dense state dict or model with sparsity and/or quantization
@@ -379,7 +502,9 @@ class ModelCompressor:
379
502
  if self.quantization_compressor is not None:
380
503
  module_to_scheme = map_module_to_scheme(model)
381
504
  state_dict = self.quantization_compressor.compress(
382
- state_dict, names_to_scheme=module_to_scheme
505
+ state_dict,
506
+ names_to_scheme=module_to_scheme,
507
+ show_progress=show_progress,
383
508
  )
384
509
 
385
510
  # TODO: consider sparse compression to also be compression
@@ -397,6 +522,7 @@ class ModelCompressor:
397
522
  state_dict = self.sparsity_compressor.compress(
398
523
  state_dict,
399
524
  compression_targets=sparse_compression_targets,
525
+ show_progress=show_progress,
400
526
  )
401
527
 
402
528
  # HACK: Override the dtype_byte_size function in transformers to
@@ -406,6 +532,8 @@ class ModelCompressor:
406
532
 
407
533
  return state_dict
408
534
 
535
+ # ----- disk decompression pathways ----- #
536
+
409
537
  def decompress(self, model_path: str, model: Module):
410
538
  """
411
539
  Overwrites the weights in model with weights decompressed from model_path
@@ -23,7 +23,6 @@ from compressed_tensors.utils import (
23
23
  get_nested_mappings_from_state_dict,
24
24
  get_nested_weight_mappings,
25
25
  merge_names,
26
- remove_suffix,
27
26
  )
28
27
  from safetensors import safe_open
29
28
  from torch import Tensor
@@ -71,6 +70,7 @@ class BaseQuantizationCompressor(BaseCompressor):
71
70
  self,
72
71
  model_state: Dict[str, Tensor],
73
72
  names_to_scheme: Dict[str, QuantizationScheme],
73
+ show_progress: bool = False,
74
74
  **kwargs,
75
75
  ) -> Dict[str, Tensor]:
76
76
  """
@@ -79,18 +79,21 @@ class BaseQuantizationCompressor(BaseCompressor):
79
79
  :param model_state: state dict of uncompressed model
80
80
  :param names_to_scheme: quantization args for each quantized weight, needed for
81
81
  quantize function to calculate bit depth
82
+ :param show_progress: whether to show tqdm progress
82
83
  :return: compressed state dict
83
84
  """
85
+ uncompressed_names = list(model_state.keys())
84
86
  compressed_dict = {}
85
87
  save_device = "cpu"
86
88
 
87
- uncompressed_names = list(model_state.keys())
88
- for name in tqdm(uncompressed_names, desc="Compressing with quantization"):
89
+ # compress values
90
+ desc = "Compressing with quantization"
91
+ for name in tqdm(uncompressed_names, desc=desc, disable=(not show_progress)):
89
92
  value = model_state[name]
90
93
 
91
94
  # compress weights
92
95
  if name.endswith("weight"):
93
- prefix = remove_suffix(name, "weight")
96
+ prefix = name.removesuffix("weight")
94
97
 
95
98
  # gather qparams
96
99
  scale = model_state.get(prefix + "weight_scale", None)
@@ -182,7 +185,7 @@ class BaseQuantizationCompressor(BaseCompressor):
182
185
  )
183
186
 
184
187
  else:
185
- yield from self._decompress_from_state_dict(
188
+ yield from self.decompress_from_state_dict(
186
189
  path_to_model_or_tensors, names_to_scheme
187
190
  )
188
191
 
@@ -209,7 +212,11 @@ class BaseQuantizationCompressor(BaseCompressor):
209
212
  weight_data["weight"] = decompressed
210
213
  yield module_path, weight_data
211
214
 
212
- def _decompress_from_state_dict(self, state_dict, names_to_scheme):
215
+ def decompress_from_state_dict(
216
+ self,
217
+ state_dict: Dict[str, torch.Tensor],
218
+ names_to_scheme: Dict[str, QuantizationScheme],
219
+ ) -> Generator[Tuple[str, Dict[str, torch.Tensor]], None, None]:
213
220
  weight_mappings = get_nested_mappings_from_state_dict(
214
221
  state_dict, self.compression_param_names
215
222
  )
@@ -219,7 +226,7 @@ class BaseQuantizationCompressor(BaseCompressor):
219
226
  weight_data[param_name] = param_value
220
227
 
221
228
  if "weight_scale" in weight_data:
222
- quant_args = names_to_scheme[module_path]
229
+ quant_args = names_to_scheme[module_path].weights
223
230
  decompressed = self.decompress_weight(
224
231
  compressed_data=weight_data, quantization_args=quant_args
225
232
  )
@@ -16,7 +16,11 @@ import logging
16
16
  from typing import Dict, Generator, Optional, Set, Tuple
17
17
 
18
18
  from compressed_tensors.compressors.base import BaseCompressor
19
- from compressed_tensors.utils import get_nested_weight_mappings, merge_names
19
+ from compressed_tensors.utils import (
20
+ get_nested_mappings_from_state_dict,
21
+ get_nested_weight_mappings,
22
+ merge_names,
23
+ )
20
24
  from safetensors import safe_open
21
25
  from torch import Tensor
22
26
  from tqdm import tqdm
@@ -63,6 +67,7 @@ class BaseSparseCompressor(BaseCompressor):
63
67
  self,
64
68
  model_state: Dict[str, Tensor],
65
69
  compression_targets: Optional[Set[str]] = None,
70
+ show_progress: bool = False,
66
71
  ) -> Dict[str, Tensor]:
67
72
  """
68
73
  Compresses a dense state dict using bitmask compression
@@ -76,7 +81,11 @@ class BaseSparseCompressor(BaseCompressor):
76
81
  _LOGGER.debug(
77
82
  f"Compressing model with {len(model_state)} parameterized layers..."
78
83
  )
79
- for name, value in tqdm(model_state.items(), desc="Compressing model"):
84
+ for name, value in tqdm(
85
+ model_state.items(),
86
+ desc="Compressing with sparsity",
87
+ disable=(not show_progress),
88
+ ):
80
89
  if not self.should_compress(name, compression_targets):
81
90
  compressed_dict[name] = value
82
91
  continue
@@ -124,15 +133,15 @@ class BaseSparseCompressor(BaseCompressor):
124
133
  self.compression_param_names,
125
134
  return_unmatched_params=True,
126
135
  )
127
- for weight_name in weight_mappings.keys():
136
+ for module_path in weight_mappings.keys():
128
137
  weight_data = {}
129
- for param_name, safe_path in weight_mappings[weight_name].items():
130
- full_name = merge_names(weight_name, param_name)
138
+ for param_name, safe_path in weight_mappings[module_path].items():
139
+ full_name = merge_names(module_path, param_name)
131
140
  with safe_open(safe_path, framework="pt", device=device) as f:
132
141
  weight_data[param_name] = f.get_tensor(full_name)
133
142
 
134
143
  decompressed = self.decompress_weight(weight_data)
135
- yield merge_names(weight_name, "weight"), decompressed
144
+ yield merge_names(module_path, "weight"), decompressed
136
145
 
137
146
  for ignored_param_name, safe_path in ignored_params.items():
138
147
  should_skip = False
@@ -146,6 +155,35 @@ class BaseSparseCompressor(BaseCompressor):
146
155
  value = f.get_tensor(ignored_param_name)
147
156
  yield ignored_param_name, value
148
157
 
158
+ def decompress_from_state_dict(
159
+ self,
160
+ state_dict: Dict[str, Tensor],
161
+ ) -> Generator[Tuple[str, Dict[str, Tensor]], None, None]:
162
+ """
163
+ Decompress the state dict of a module (or model)
164
+
165
+ Unlike `self.decompress`, this function does not need to explicitly skip params
166
+ via params_to_skip_load because it is more convenient for its only caller
167
+ (ModelCompressor.decompress_model) to retrieve all unused param keys
168
+
169
+ :param state_dict: state dict containing parameters to decompress
170
+ :return: Generator of (param_path, param_val)
171
+ """
172
+ weight_mappings, ignored_params = get_nested_mappings_from_state_dict(
173
+ state_dict, self.compression_param_names, return_unmatched_params=True
174
+ )
175
+
176
+ for module_path in weight_mappings.keys():
177
+ weight_data = {}
178
+ for param_name, param_value in weight_mappings[module_path].items():
179
+ weight_data[param_name] = param_value
180
+
181
+ decompressed = self.decompress_weight(weight_data)
182
+ yield merge_names(module_path, "weight"), decompressed
183
+
184
+ for ignored_param_path, ignored_param_value in ignored_params.items():
185
+ yield ignored_param_path, ignored_param_value
186
+
149
187
  @staticmethod
150
188
  def should_compress(name: str, expanded_targets: Optional[Set[str]] = None) -> bool:
151
189
  """
@@ -40,3 +40,10 @@ class DenseCompressor(BaseCompressor):
40
40
  self, path_to_model_or_tensors: str, device: str = "cpu", **kwargs
41
41
  ) -> Generator[Tuple[str, Tensor], None, None]:
42
42
  return iter([])
43
+
44
+ def decompress_from_state_dict(
45
+ self,
46
+ state_dict: Dict[str, Tensor],
47
+ ) -> Generator[Tuple[str, Dict[str, Tensor]], None, None]:
48
+ for key, value in state_dict.items():
49
+ yield key, value
@@ -13,7 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
  from dataclasses import dataclass
16
- from typing import Dict, List, Tuple, Union
16
+ from typing import Dict, Generator, List, Tuple, Union
17
17
 
18
18
  import torch
19
19
  from compressed_tensors.compressors.base import BaseCompressor
@@ -202,11 +202,7 @@ def sparse24_bitmask_decompress(
202
202
  decompressed_tensor = torch.zeros(original_shape, dtype=values.dtype)
203
203
  decompressed_tensor = decompressed_tensor.to(values.device)
204
204
  values = values.flatten()
205
- if decompressed_tensor.dtype == FP8_DTYPE:
206
- decompressed_tensor[bytemasks_unpacked] = values
207
- decompressed_tensor = decompressed_tensor.cuda()
208
- else:
209
- decompressed_tensor[bytemasks_unpacked] = values
205
+ decompressed_tensor[bytemasks_unpacked] = values
210
206
  return decompressed_tensor
211
207
 
212
208
 
@@ -125,6 +125,7 @@ class Marlin24Compressor(BaseCompressor):
125
125
  self,
126
126
  model_state: Dict[str, Tensor],
127
127
  names_to_scheme: Dict[str, QuantizationScheme],
128
+ show_progress: bool = False,
128
129
  **kwargs,
129
130
  ) -> Dict[str, Tensor]:
130
131
  """
@@ -134,6 +135,7 @@ class Marlin24Compressor(BaseCompressor):
134
135
  :param model_state: state dict of uncompressed model
135
136
  :param names_to_scheme: quantization scheme for each quantized weight, needed
136
137
  for quantize function to calculate bit depth
138
+ :param show_progress: whether to show tqdm progress
137
139
  :return: compressed state dict
138
140
  """
139
141
  self.validate_quant_compatability(names_to_scheme)
@@ -144,7 +146,9 @@ class Marlin24Compressor(BaseCompressor):
144
146
  f"Compressing model with {len(model_state)} parameterized layers..."
145
147
  )
146
148
 
147
- for name, value in tqdm(model_state.items(), desc="Compressing model"):
149
+ for name, value in tqdm(
150
+ model_state.items(), desc="Compressing model", disable=(not show_progress)
151
+ ):
148
152
  if name.endswith(weight_suffix):
149
153
  prefix = name[: -(len(weight_suffix))]
150
154
  scale = model_state.get(merge_names(prefix, "weight_scale"), None)
@@ -23,6 +23,7 @@ from compressed_tensors.quantization import (
23
23
  initialize_module_for_quantization,
24
24
  )
25
25
  from compressed_tensors.utils import register_offload_parameter
26
+ from compressed_tensors.utils.offload import get_execution_device
26
27
  from torch import Tensor
27
28
  from torch.nn import Parameter
28
29
  from torch.nn.functional import linear
@@ -60,7 +61,7 @@ class CompressedLinear(Linear):
60
61
  """
61
62
  module.__class__ = CompressedLinear
62
63
  module.compressor = BaseCompressor.load_from_registry(quantization_format)
63
- device = next(module.parameters()).device
64
+ init_device = get_execution_device(module)
64
65
 
65
66
  # this will initialize all the scales and zero points
66
67
  initialize_module_for_quantization(
@@ -79,7 +80,7 @@ class CompressedLinear(Linear):
79
80
  # populate compressed weights and quantization parameters
80
81
  for name, (shape, dtype) in compression_params.items():
81
82
  param = Parameter(
82
- torch.empty(shape, device=device, dtype=dtype), requires_grad=False
83
+ torch.empty(shape, device=init_device, dtype=dtype), requires_grad=False
83
84
  )
84
85
  register_offload_parameter(module, name, param)
85
86
 
@@ -38,7 +38,6 @@ __all__ = [
38
38
  "shard_tensor",
39
39
  "pack_bitmasks",
40
40
  "unpack_bitmasks",
41
- "remove_suffix",
42
41
  ]
43
42
 
44
43
  FSDP_WRAPPER_NAME = "_fsdp_wrapped_module"
@@ -329,9 +328,3 @@ def unpack_bitmasks(
329
328
  )
330
329
 
331
330
  return unpacked_bitmasks_torch
332
-
333
-
334
- def remove_suffix(value: str, suffix: str) -> str:
335
- # can replace with str.removesuffix in python3.9+
336
- assert value.endswith(suffix)
337
- return value[: -len(suffix)]
@@ -35,6 +35,7 @@ __all__ = [
35
35
  "is_quantization_param",
36
36
  ]
37
37
 
38
+ NestedStateDictType = Dict[str, Dict[str, Tensor]]
38
39
  WeightMappingType = Dict[str, str]
39
40
  NestedWeightMappingType = Dict[str, WeightMappingType]
40
41
 
@@ -249,8 +250,10 @@ def get_nested_weight_mappings(
249
250
 
250
251
 
251
252
  def get_nested_mappings_from_state_dict(
252
- state_dict, params_to_nest: Iterable[str]
253
- ) -> NestedWeightMappingType:
253
+ state_dict: Dict[str, Tensor],
254
+ params_to_nest: Iterable[str],
255
+ return_unmatched_params: bool = False,
256
+ ) -> Union[NestedStateDictType, Tuple[NestedStateDictType, Dict[str, Tensor]]]:
254
257
  """
255
258
  Takes a state dict and returns a nested mapping from uncompressed
256
259
  parameterized layer names to the value of
@@ -266,16 +269,26 @@ def get_nested_mappings_from_state_dict(
266
269
  :param state_dict: state dict of the model
267
270
  :param params_to_nest: Iterable of parameter names to nest.
268
271
  :return: Nested mapping of parameterized layer names to the value of
269
- each layer's compression parameters.
272
+ each layer's compression parameters. If `return_unmatched_params`, then
273
+ also return a dictionary mapping unused parameter names to their values
270
274
  """
271
275
  nested_weight_mappings = {}
276
+ unmatched_params = {}
277
+
272
278
  for key in state_dict.keys():
279
+ matched = False
273
280
  for param_name in params_to_nest:
274
281
  module_path = match_param_name(key, param_name)
275
282
  if module_path:
276
283
  if module_path not in nested_weight_mappings:
277
284
  nested_weight_mappings[module_path] = {}
278
285
  nested_weight_mappings[module_path][param_name] = state_dict[key]
286
+ matched = True
287
+ if return_unmatched_params and not matched:
288
+ unmatched_params[key] = state_dict[key]
289
+
290
+ if return_unmatched_params:
291
+ return nested_weight_mappings, unmatched_params
279
292
  return nested_weight_mappings
280
293
 
281
294
 
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.9.5.a20250513'
20
+ __version__ = version = '0.9.5.a20250514'
21
21
  __version_tuple__ = version_tuple = (0, 9, 5)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compressed-tensors
3
- Version: 0.9.5a20250513
3
+ Version: 0.9.5a20250514
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -1,30 +1,30 @@
1
1
  compressed_tensors/__init__.py,sha256=UtKmifNeBCSE2TZSAfduVNNzHY-3V7bLjZ7n7RuXLOE,812
2
2
  compressed_tensors/base.py,sha256=73HYH7HY7O2roC89yG_piPFnZwrBfn_i7HmKl90SKc0,875
3
- compressed_tensors/version.py,sha256=anvjogcrhtUkFpir0cHcNUxTI9qeqiV_ELg-b1mW8Xc,521
3
+ compressed_tensors/version.py,sha256=DipxWeTHSaI4zbOq4qATqt0oCJgWFClVEMZaHMo3xZE,521
4
4
  compressed_tensors/compressors/__init__.py,sha256=smSygTSfcfuujRrAXDc6uZm4L_ccV1tWZewqVnOb4lM,825
5
5
  compressed_tensors/compressors/base.py,sha256=nvWsv4xEw1Tkxkxth6TmHplDYXfBeP22xWxOsZERyDY,7204
6
6
  compressed_tensors/compressors/helpers.py,sha256=OK6qxX9j3bHwF9JfIYSGMgBJe2PWjlTA3byXKCJaTIQ,5431
7
7
  compressed_tensors/compressors/model_compressors/__init__.py,sha256=5RGGPFu4YqEt_aOdFSQYFYFDjcZFJN0CsMqRtDZz3Js,666
8
- compressed_tensors/compressors/model_compressors/model_compressor.py,sha256=uh3Rbyqhjvt8o8On6ioOn6utBKv2siRRmAvgM1lDrxU,26555
8
+ compressed_tensors/compressors/model_compressors/model_compressor.py,sha256=L-xfTooii1ubjMJeNQp2QL9_Q_T2Z6L1oqYdBYotDAc,32393
9
9
  compressed_tensors/compressors/quantized_compressors/__init__.py,sha256=KvaFBL_Q84LxRGJOV035M8OBoCkAx8kOkfphswgkKWk,745
10
- compressed_tensors/compressors/quantized_compressors/base.py,sha256=n0L2QH2_Y1vWtLeQ0uV78y2lV4bviFEAtUKODl8L_nw,8828
10
+ compressed_tensors/compressors/quantized_compressors/base.py,sha256=4YWT95GIhHETI7glsk_ITrnUzzN1MhEypt-0z9eKqOI,9134
11
11
  compressed_tensors/compressors/quantized_compressors/naive_quantized.py,sha256=fd0KlkSx6bvZ3xwIkK3jEUdPSUPs56Eua4dEDOtzKW0,5150
12
12
  compressed_tensors/compressors/quantized_compressors/nvfp4_quantized.py,sha256=Gw-lVzk5jrKUlM5UTCiJBmhM5gHzB9mn8r298MVUbDI,6395
13
13
  compressed_tensors/compressors/quantized_compressors/pack_quantized.py,sha256=SPIHlk8ewip2LcjgkCw02K21EkfUSFSd9qQqL0Pt5eM,11162
14
14
  compressed_tensors/compressors/sparse_compressors/__init__.py,sha256=Atuz-OdEgn8OCUhx7Ovd6gXdyImAI186uCR-uR0t_Nk,737
15
- compressed_tensors/compressors/sparse_compressors/base.py,sha256=PMiWIaW2XSF_esYJlQ12RVW7opeAzavdbkRFtelMFX0,6655
16
- compressed_tensors/compressors/sparse_compressors/dense.py,sha256=_uW_HISeDNz4yboSZWoh6GwrkUE6HFibzPQSKrHOCkg,1505
17
- compressed_tensors/compressors/sparse_compressors/sparse_24_bitmask.py,sha256=mEKSSgpXookqYSJw3mlyP6cYYKD-eaIvpQMvi4JO6TY,8807
15
+ compressed_tensors/compressors/sparse_compressors/base.py,sha256=YNZWcHjDleAlqbgRZQ6oJf44MQb_UDNvJGOqhl26uFA,8098
16
+ compressed_tensors/compressors/sparse_compressors/dense.py,sha256=rPaxbP7P52prWNs4lGaiBbpNvsQLElFMwOrq1oBP2Yg,1733
17
+ compressed_tensors/compressors/sparse_compressors/sparse_24_bitmask.py,sha256=4cwkj40SFrXEyE_jyt2xjz3R-gTdU9uMpMFUKo1pRBA,8643
18
18
  compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py,sha256=S8vW0FI9ep_XtUQOxj0P5utJt3vKEYOHjWEPp-Xd9aY,5820
19
19
  compressed_tensors/compressors/sparse_quantized_compressors/__init__.py,sha256=4f_cwcKXB1nVVMoiKgTFAc8jAPjPLElo-Df_EDm1_xw,675
20
- compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py,sha256=7VRLmtUTg1iJl3mXiOzLPi1RgIOhMISPAwzVi8v2QF0,9951
20
+ compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py,sha256=7F9J6wgkecitK5hHuqjetZ18HExHIF4QIw1wgm2Y6U8,10099
21
21
  compressed_tensors/config/__init__.py,sha256=8sOoZ6xvYSC79mBvEtO8l6xk4PC80d29AnnJiGMrY2M,737
22
22
  compressed_tensors/config/base.py,sha256=p3glQHvC2fjodf_SvlelVrTWSIjGXgGC86t8oVOlMng,3529
23
23
  compressed_tensors/config/dense.py,sha256=NgSxnFCnckU9-iunxEaqiFwqgdO7YYxlWKR74jNbjks,1317
24
24
  compressed_tensors/config/sparse_24_bitmask.py,sha256=Lhj39zT2V1hxftprvxvneyhv45ShlXOKd75DBbDTyTE,1401
25
25
  compressed_tensors/config/sparse_bitmask.py,sha256=pZUboRNZTu6NajGOQEFExoPknak5ynVAUeiiYpS1Gt8,1308
26
26
  compressed_tensors/linear/__init__.py,sha256=fH6rjBYAxuwrTzBTlTjTgCYNyh6TCvCqajCz4Im4YrA,617
27
- compressed_tensors/linear/compressed_linear.py,sha256=_m6XpNcI53eeSHO8VdiuAM6UBTdpDhn5Ivd8iRMwEKc,3980
27
+ compressed_tensors/linear/compressed_linear.py,sha256=1yo9RyjA0aQ--iuIknFfcSorJn43Mn4CoV-q4JlTJ_o,4052
28
28
  compressed_tensors/quantization/__init__.py,sha256=83J5bPB7PavN2TfCoW7_vEDhfYpm4TDrqYO9vdSQ5bk,760
29
29
  compressed_tensors/quantization/quant_args.py,sha256=CepGBAURFGxBzTyFXxHwsUs6wYEJ46_jPbEvJYMG0Tw,10491
30
30
  compressed_tensors/quantization/quant_config.py,sha256=MxSUcb5dOqMN6LFyD5K2h8X0TvEtcWIAoiUJqD2dHGE,10159
@@ -40,14 +40,14 @@ compressed_tensors/quantization/utils/helpers.py,sha256=-wX0H7zVysJ67jRRCGbx6Bfx
40
40
  compressed_tensors/registry/__init__.py,sha256=FwLSNYqfIrb5JD_6OK_MT4_svvKTN_nEhpgQlQvGbjI,658
41
41
  compressed_tensors/registry/registry.py,sha256=vRcjVB1ITfSbfYUaGndBBmqhip_5vsS62weorVg0iXo,11896
42
42
  compressed_tensors/utils/__init__.py,sha256=gS4gSU2pwcAbsKj-6YMaqhm25udFy6ISYaWBf-myRSM,808
43
- compressed_tensors/utils/helpers.py,sha256=Le3LWskSQRr7pw8fWy5qmfDKYlKiQFy0id83uVi4fUg,10610
43
+ compressed_tensors/utils/helpers.py,sha256=RrNvzD08naEjEiXdU-FdZjQVda1nQywu1hA_GCDj0vg,10415
44
44
  compressed_tensors/utils/offload.py,sha256=JNQ66_6vhSsizhlUaMgyEdBuFolYxbgUuT1mAZrCfKY,15436
45
45
  compressed_tensors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVyah6BUUir_StT28,2530
46
46
  compressed_tensors/utils/permute.py,sha256=V6tJLKo3Syccj-viv4F7ZKZgJeCB-hl-dK8RKI_kBwI,2355
47
- compressed_tensors/utils/safetensors_load.py,sha256=kkkUDmS1H40MFy6FDP-DFGiAYbtqke6bKE7YrAtORtA,11499
47
+ compressed_tensors/utils/safetensors_load.py,sha256=DMfZBuUbA6qp_BG_zIWT3ckiEE33K9ob34s-OgzReO4,12057
48
48
  compressed_tensors/utils/semi_structured_conversions.py,sha256=XKNffPum54kPASgqKzgKvyeqWPAkair2XEQXjkp7ho8,13489
49
- compressed_tensors-0.9.5a20250513.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
50
- compressed_tensors-0.9.5a20250513.dist-info/METADATA,sha256=gMyJF3nQLJu7DCEbM0LDyD2GeuKwu6zxewO6qST7_s8,7004
51
- compressed_tensors-0.9.5a20250513.dist-info/WHEEL,sha256=DnLRTWE75wApRYVsjgc6wsVswC54sMSJhAEd4xhDpBk,91
52
- compressed_tensors-0.9.5a20250513.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
53
- compressed_tensors-0.9.5a20250513.dist-info/RECORD,,
49
+ compressed_tensors-0.9.5a20250514.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
50
+ compressed_tensors-0.9.5a20250514.dist-info/METADATA,sha256=wEGdTDwjdDlrcGnYaYXPdatEZj_dykpwIcdAwDxqEtk,7004
51
+ compressed_tensors-0.9.5a20250514.dist-info/WHEEL,sha256=QZxptf4Y1BKFRCEDxD4h2V0mBFQOVFLFEpvxHmIs52A,91
52
+ compressed_tensors-0.9.5a20250514.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
53
+ compressed_tensors-0.9.5a20250514.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.4.0)
2
+ Generator: setuptools (80.6.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5