compressed-tensors 0.11.1a20250908__py3-none-any.whl → 0.11.1a20250910__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- compressed_tensors/compressors/model_compressors/model_compressor.py +14 -116
- compressed_tensors/compressors/quantized_compressors/nvfp4_quantized.py +5 -4
- compressed_tensors/quantization/lifecycle/apply.py +5 -14
- compressed_tensors/quantization/lifecycle/initialize.py +3 -9
- compressed_tensors/quantization/quant_scheme.py +11 -1
- compressed_tensors/utils/offload.py +0 -26
- compressed_tensors/version.py +1 -1
- {compressed_tensors-0.11.1a20250908.dist-info → compressed_tensors-0.11.1a20250910.dist-info}/METADATA +1 -1
- {compressed_tensors-0.11.1a20250908.dist-info → compressed_tensors-0.11.1a20250910.dist-info}/RECORD +12 -12
- {compressed_tensors-0.11.1a20250908.dist-info → compressed_tensors-0.11.1a20250910.dist-info}/WHEEL +0 -0
- {compressed_tensors-0.11.1a20250908.dist-info → compressed_tensors-0.11.1a20250910.dist-info}/licenses/LICENSE +0 -0
- {compressed_tensors-0.11.1a20250908.dist-info → compressed_tensors-0.11.1a20250910.dist-info}/top_level.txt +0 -0
@@ -50,7 +50,6 @@ from compressed_tensors.utils import (
|
|
50
50
|
get_offloaded_device,
|
51
51
|
get_safetensors_folder,
|
52
52
|
has_offloaded_params,
|
53
|
-
merge_names,
|
54
53
|
register_offload_parameter,
|
55
54
|
update_parameter_data,
|
56
55
|
)
|
@@ -224,7 +223,8 @@ class ModelCompressor:
|
|
224
223
|
s_config = compression_config.sparsity_config
|
225
224
|
return s_config.model_dump() if s_config is not None else None
|
226
225
|
|
227
|
-
return
|
226
|
+
# explicitly return None if {} in config
|
227
|
+
return compression_config.get(SPARSITY_CONFIG_NAME, None) or None
|
228
228
|
|
229
229
|
@staticmethod
|
230
230
|
def parse_quantization_config(
|
@@ -320,112 +320,6 @@ class ModelCompressor:
|
|
320
320
|
format, config=quantization_config
|
321
321
|
)
|
322
322
|
|
323
|
-
# ----- used by hf quantizer ----- #
|
324
|
-
|
325
|
-
def get_missing_module_keys(self, model: Module) -> List[str]:
|
326
|
-
"""
|
327
|
-
Identifies the expected missing weight keys in the compressed state_dict.
|
328
|
-
|
329
|
-
When a model undergoes sparsity or quantization compression, certain
|
330
|
-
weight tensors may be absent from the checkpoint by virtue of compression.
|
331
|
-
This function determines which weight keys are missing based on the
|
332
|
-
applied compression techniques.
|
333
|
-
|
334
|
-
:param model: The PyTorch model to check for missing keys.
|
335
|
-
:return: A list of missing keys expected in the compressed state_dict.
|
336
|
-
"""
|
337
|
-
missing_keys = set()
|
338
|
-
|
339
|
-
# Determine missing keys due to sparsity compression
|
340
|
-
if (
|
341
|
-
self.sparsity_compressor
|
342
|
-
and self.sparsity_config.format != CompressionFormat.dense.value
|
343
|
-
):
|
344
|
-
sparse_targets = match_named_modules(
|
345
|
-
model=model,
|
346
|
-
targets=self.sparsity_config.targets,
|
347
|
-
ignore=self.sparsity_config.ignore,
|
348
|
-
)
|
349
|
-
|
350
|
-
missing_keys.update(
|
351
|
-
merge_names(target_name, "weight")
|
352
|
-
for target_name, _module in sparse_targets
|
353
|
-
)
|
354
|
-
|
355
|
-
# Determine missing keys due to pack quantization
|
356
|
-
if (
|
357
|
-
self.quantization_compressor
|
358
|
-
and self.quantization_config.format
|
359
|
-
== CompressionFormat.pack_quantized.value
|
360
|
-
):
|
361
|
-
for scheme in self.quantization_config.config_groups.values():
|
362
|
-
quant_targets = match_named_modules(
|
363
|
-
model=model,
|
364
|
-
targets=scheme.targets,
|
365
|
-
ignore=self.quantization_config.ignore,
|
366
|
-
)
|
367
|
-
missing_keys.update(
|
368
|
-
merge_names(target_name, "weight")
|
369
|
-
for target_name, _module in quant_targets
|
370
|
-
)
|
371
|
-
|
372
|
-
return list(missing_keys)
|
373
|
-
|
374
|
-
def get_unexpected_file_keys(self, model: Module) -> List[str]:
|
375
|
-
"""
|
376
|
-
Identifies extra keys introduced by the compression process in the
|
377
|
-
compressed state_dict that are not expected by the model graph.
|
378
|
-
|
379
|
-
During sparsity or quantization compression, additional metadata or
|
380
|
-
auxiliary parameters may be stored in the checkpoint, which do not
|
381
|
-
correspond to any parameter in the original model. These keys are
|
382
|
-
typically introduced to support the reconstruction of compressed weights.
|
383
|
-
|
384
|
-
For example, Sparse24Bitmask compression may introduce keys such as
|
385
|
-
'compressed', 'bitmask', and 'shape' in the checkpoint, which are
|
386
|
-
not part of the original model parameters.
|
387
|
-
|
388
|
-
:param model: The PyTorch model to check for unexpected keys.
|
389
|
-
:return: A list of extra keys introduced by the compression process
|
390
|
-
that are not expected by the model.
|
391
|
-
"""
|
392
|
-
|
393
|
-
unexpected_keys = set()
|
394
|
-
|
395
|
-
# Identify unexpected keys from sparsity compression
|
396
|
-
if (
|
397
|
-
self.sparsity_compressor
|
398
|
-
and self.sparsity_config.format != CompressionFormat.dense.value
|
399
|
-
):
|
400
|
-
sparse_targets = match_named_modules(
|
401
|
-
model=model,
|
402
|
-
targets=self.sparsity_config.targets,
|
403
|
-
ignore=self.sparsity_config.ignore,
|
404
|
-
)
|
405
|
-
unexpected_keys.update(
|
406
|
-
merge_names(target_name, param)
|
407
|
-
for target_name, _module in sparse_targets
|
408
|
-
for param in self.sparsity_compressor.compression_param_names
|
409
|
-
)
|
410
|
-
|
411
|
-
# Identify unexpected keys from quantization compression
|
412
|
-
if self.quantization_compressor:
|
413
|
-
for scheme in self.quantization_config.config_groups.values():
|
414
|
-
quant_targets = match_named_modules(
|
415
|
-
model=model,
|
416
|
-
targets=scheme.targets,
|
417
|
-
ignore=self.quantization_config.ignore,
|
418
|
-
)
|
419
|
-
for quant_compressor in self.quantization_compressor.values():
|
420
|
-
unexpected_keys.update(
|
421
|
-
merge_names(target_name, param)
|
422
|
-
for target_name, _module in quant_targets
|
423
|
-
for param in quant_compressor.compression_param_names
|
424
|
-
if param != "weight"
|
425
|
-
)
|
426
|
-
|
427
|
-
return list(unexpected_keys)
|
428
|
-
|
429
323
|
# ----- model memory compression/decompression pathways ----- #
|
430
324
|
|
431
325
|
def compress_model(self, model: Module):
|
@@ -712,17 +606,16 @@ class ModelCompressor:
|
|
712
606
|
# Load activation scales/zp or any other quantization parameters
|
713
607
|
# Conditionally load the weight quantization parameters if we have a
|
714
608
|
# dense compressor or if a sparsity compressor has already been applied
|
609
|
+
load_weight_qparams = sparse_decompressed or isinstance(
|
610
|
+
quant_compressor, DenseCompressor
|
611
|
+
)
|
715
612
|
load_pretrained_quantization_parameters(
|
716
613
|
model,
|
717
614
|
model_path,
|
718
615
|
# TODO: all weight quantization params will be moved to the
|
719
616
|
# compressor in a follow-up including initialization
|
720
|
-
|
721
|
-
sparse_decompressed
|
722
|
-
or isinstance(quant_compressor, DenseCompressor)
|
723
|
-
),
|
617
|
+
load_weight_qparams=load_weight_qparams,
|
724
618
|
)
|
725
|
-
|
726
619
|
model_path_or_state_dict = (
|
727
620
|
model.state_dict() if sparse_decompressed else model_path
|
728
621
|
)
|
@@ -732,7 +625,9 @@ class ModelCompressor:
|
|
732
625
|
)
|
733
626
|
# TODO: all weight quantization params will be moved to the compressor
|
734
627
|
# to prevent duplicate parameter updates in update_parameter_data
|
735
|
-
self._replace_weights(
|
628
|
+
self._replace_weights(
|
629
|
+
dense_gen, model, load_weight_qparams=not load_weight_qparams
|
630
|
+
)
|
736
631
|
|
737
632
|
def freeze_quantization_status(module):
|
738
633
|
module.quantization_status = QuantizationStatus.FROZEN
|
@@ -819,7 +714,9 @@ class ModelCompressor:
|
|
819
714
|
param = torch.nn.Parameter(data.to(device), requires_grad=requires_grad)
|
820
715
|
register_offload_parameter(module, param_name, param)
|
821
716
|
|
822
|
-
def _replace_weights(
|
717
|
+
def _replace_weights(
|
718
|
+
self, dense_weight_generator, model: Module, load_weight_qparams: bool = True
|
719
|
+
):
|
823
720
|
"""
|
824
721
|
Replace the weights of the model with the
|
825
722
|
provided dense weights.
|
@@ -847,6 +744,7 @@ class ModelCompressor:
|
|
847
744
|
# decompression in init to be consistent with loading which happens
|
848
745
|
# later as well however, update_data does a good shape check -
|
849
746
|
# should be moved to the compressor
|
747
|
+
|
850
748
|
if param_name == "weight":
|
851
749
|
delattr(module, param_name)
|
852
750
|
requires_grad = param_data.dtype in (
|
@@ -858,7 +756,7 @@ class ModelCompressor:
|
|
858
756
|
param_data.to(device), requires_grad=requires_grad
|
859
757
|
)
|
860
758
|
register_offload_parameter(module, param_name, param)
|
861
|
-
|
759
|
+
elif load_weight_qparams:
|
862
760
|
# Should already be registered to the correct device for
|
863
761
|
# for scales/zero-points
|
864
762
|
update_parameter_data(module, param_data, param_name)
|
@@ -140,6 +140,11 @@ def pack_fp4_to_uint8(x: torch.Tensor) -> torch.Tensor:
|
|
140
140
|
m, n = x.shape
|
141
141
|
device = x.device
|
142
142
|
|
143
|
+
if n % 2 != 0:
|
144
|
+
raise ValueError(
|
145
|
+
"tensor must have an even number of columns for nvfp4 compression"
|
146
|
+
)
|
147
|
+
|
143
148
|
# Create lookup table for FP4 values to indices
|
144
149
|
# Map the absolute values to 0-7 indices
|
145
150
|
kE2M1 = torch.tensor(FLOAT_TO_E2M1, device=device, dtype=x.dtype)
|
@@ -155,10 +160,6 @@ def pack_fp4_to_uint8(x: torch.Tensor) -> torch.Tensor:
|
|
155
160
|
# Reshape to prepare for packing pairs of values
|
156
161
|
indices = indices.reshape(-1)
|
157
162
|
|
158
|
-
# Handle odd length by padding if necessary
|
159
|
-
if indices.numel() % 2 != 0:
|
160
|
-
indices = torch.cat([indices, torch.zeros(1, dtype=torch.long, device=device)])
|
161
|
-
|
162
163
|
# Reshape to pair consecutive elements
|
163
164
|
indices = indices.reshape(-1, 2)
|
164
165
|
|
@@ -65,19 +65,19 @@ _LOGGER = logging.getLogger(__name__)
|
|
65
65
|
def load_pretrained_quantization_parameters(
|
66
66
|
model: Module,
|
67
67
|
model_name_or_path: Optional[str] = None,
|
68
|
-
|
68
|
+
load_weight_qparams: Optional[bool] = False,
|
69
69
|
):
|
70
70
|
"""
|
71
71
|
Loads the quantization parameters (scale and zero point) from model_name_or_path to
|
72
72
|
a model that has already been initialized with a quantization config.
|
73
73
|
|
74
74
|
NOTE: Will always load inputs/output parameters. Will conditioanlly load weight
|
75
|
-
parameters, if
|
75
|
+
parameters, if load_weight_qparams is set to True.
|
76
76
|
|
77
77
|
:param model: model to load pretrained quantization parameters to
|
78
78
|
:param model_name_or_path: Hugging Face stub or local folder containing a quantized
|
79
79
|
model, which is used to load quantization parameters
|
80
|
-
:param
|
80
|
+
:param load_weight_qparams: whether or not the weight quantization parameters
|
81
81
|
should be loaded
|
82
82
|
"""
|
83
83
|
model_path = get_safetensors_folder(model_name_or_path)
|
@@ -103,7 +103,7 @@ def load_pretrained_quantization_parameters(
|
|
103
103
|
mapping=mapping,
|
104
104
|
)
|
105
105
|
|
106
|
-
if
|
106
|
+
if load_weight_qparams and submodule.quantization_scheme.weights:
|
107
107
|
base_name = "weight"
|
108
108
|
_load_quant_args_from_mapping(
|
109
109
|
base_name=base_name,
|
@@ -219,18 +219,9 @@ def apply_quantization_status(model: Module, status: QuantizationStatus):
|
|
219
219
|
if status >= QuantizationStatus.INITIALIZED > current_status:
|
220
220
|
force_zero_point_init = status != QuantizationStatus.COMPRESSED
|
221
221
|
|
222
|
-
# When decompressing, we set the scale_dtype as the model's dtype
|
223
|
-
# This is because the normal workflow of using the weight's dtype
|
224
|
-
# will be incorrect as the model weight will be compressed
|
225
|
-
# Therfore, use the dtype set by the user using the PretrainedModel
|
226
|
-
scale_dtype = None
|
227
|
-
if status == QuantizationStatus.FROZEN:
|
228
|
-
if hasattr(model, "dtype"):
|
229
|
-
scale_dtype = model.dtype
|
230
|
-
|
231
222
|
model.apply(
|
232
223
|
lambda module: initialize_module_for_quantization(
|
233
|
-
module, force_zero_point=force_zero_point_init
|
224
|
+
module, force_zero_point=force_zero_point_init
|
234
225
|
)
|
235
226
|
)
|
236
227
|
|
@@ -59,7 +59,6 @@ def initialize_module_for_quantization(
|
|
59
59
|
module: Module,
|
60
60
|
scheme: Optional[QuantizationScheme] = None,
|
61
61
|
force_zero_point: bool = True,
|
62
|
-
scale_dtype: Optional[torch.dtype] = None,
|
63
62
|
):
|
64
63
|
"""
|
65
64
|
attaches appropriate scales, zero points, and observers to a layer
|
@@ -73,8 +72,6 @@ def initialize_module_for_quantization(
|
|
73
72
|
if not provided, the layer will be skipped
|
74
73
|
:param force_zero_point: whether to force initialization of a zero point for
|
75
74
|
symmetric quantization
|
76
|
-
:param scale_dtype: dtype to used for the scales, if overriding the
|
77
|
-
weight dtype as the scale dtype
|
78
75
|
"""
|
79
76
|
# TODO: don't initialize parameters when running decompression
|
80
77
|
scheme = scheme or getattr(module, "quantization_scheme", None)
|
@@ -93,7 +90,6 @@ def initialize_module_for_quantization(
|
|
93
90
|
"input",
|
94
91
|
scheme.input_activations,
|
95
92
|
force_zero_point=force_zero_point,
|
96
|
-
scale_dtype=scale_dtype,
|
97
93
|
)
|
98
94
|
|
99
95
|
if scheme.weights is not None:
|
@@ -107,7 +103,6 @@ def initialize_module_for_quantization(
|
|
107
103
|
scheme.weights,
|
108
104
|
weight_shape=weight_shape,
|
109
105
|
force_zero_point=force_zero_point,
|
110
|
-
scale_dtype=scale_dtype,
|
111
106
|
)
|
112
107
|
else:
|
113
108
|
_LOGGER.warning(
|
@@ -119,7 +114,7 @@ def initialize_module_for_quantization(
|
|
119
114
|
if scheme.output_activations is not None:
|
120
115
|
if not is_kv_cache_quant_scheme(scheme):
|
121
116
|
_initialize_scale_zero_point(
|
122
|
-
module, "output", scheme.output_activations
|
117
|
+
module, "output", scheme.output_activations
|
123
118
|
)
|
124
119
|
|
125
120
|
module.quantization_scheme = scheme
|
@@ -145,7 +140,6 @@ def _initialize_scale_zero_point(
|
|
145
140
|
quantization_args: QuantizationArgs,
|
146
141
|
weight_shape: Optional[torch.Size] = None,
|
147
142
|
force_zero_point: bool = True,
|
148
|
-
scale_dtype: Optional[torch.dtype] = None,
|
149
143
|
):
|
150
144
|
if quantization_args.dynamic is True:
|
151
145
|
return
|
@@ -213,7 +207,7 @@ def _initialize_scale_zero_point(
|
|
213
207
|
expected_shape = 1
|
214
208
|
|
215
209
|
# 3. Identify quantization scale and zp dtype
|
216
|
-
scale_dtype =
|
210
|
+
scale_dtype = module.weight.dtype
|
217
211
|
|
218
212
|
if is_fp4(quantization_args=quantization_args):
|
219
213
|
scale_dtype = zp_dtype = FP8_E4M3_DATA.dtype
|
@@ -226,7 +220,7 @@ def _initialize_scale_zero_point(
|
|
226
220
|
torch.float32,
|
227
221
|
torch.float64,
|
228
222
|
]:
|
229
|
-
scale_dtype = torch.
|
223
|
+
scale_dtype = torch.bfloat16
|
230
224
|
zp_dtype = quantization_args.pytorch_dtype()
|
231
225
|
|
232
226
|
# 4. Initializes empty scale, zero point, and g_idx parameters for the module
|
@@ -63,9 +63,19 @@ class QuantizationScheme(BaseModel):
|
|
63
63
|
if inputs.strategy not in (
|
64
64
|
QuantizationStrategy.TOKEN,
|
65
65
|
QuantizationStrategy.TENSOR,
|
66
|
+
QuantizationStrategy.GROUP,
|
66
67
|
QuantizationStrategy.TENSOR_GROUP,
|
67
68
|
):
|
68
|
-
|
69
|
+
if (
|
70
|
+
inputs.strategy == QuantizationStrategy.GROUP
|
71
|
+
and inputs.dynamic is True
|
72
|
+
):
|
73
|
+
raise NotImplementedError(
|
74
|
+
"Static and local group-wise activation "
|
75
|
+
"quantization is not supported"
|
76
|
+
)
|
77
|
+
|
78
|
+
raise NotImplementedError(
|
69
79
|
f"Using {inputs.strategy} strategy is not supported for "
|
70
80
|
"activation quantization"
|
71
81
|
)
|
@@ -68,10 +68,8 @@ except ImportError:
|
|
68
68
|
|
69
69
|
|
70
70
|
__all__ = [
|
71
|
-
"is_module_offloaded",
|
72
71
|
"get_execution_device",
|
73
72
|
"get_offloaded_device",
|
74
|
-
"update_prefix_dict",
|
75
73
|
"update_parameter_data",
|
76
74
|
"register_offload_parameter",
|
77
75
|
"update_offload_parameter",
|
@@ -117,11 +115,6 @@ def check_accelerate(fallback: Any):
|
|
117
115
|
""" Candidates for Depreciation """
|
118
116
|
|
119
117
|
|
120
|
-
@check_accelerate(fallback=False)
|
121
|
-
def is_module_offloaded(module: torch.nn.Module) -> bool:
|
122
|
-
return has_offloaded_params(module)
|
123
|
-
|
124
|
-
|
125
118
|
def get_offloaded_device(module: torch.nn.Module) -> torch.device:
|
126
119
|
"""
|
127
120
|
:param module: module to check
|
@@ -137,25 +130,6 @@ def get_offloaded_device(module: torch.nn.Module) -> torch.device:
|
|
137
130
|
return get_execution_device(module)
|
138
131
|
|
139
132
|
|
140
|
-
@check_accelerate(fallback=None)
|
141
|
-
def update_prefix_dict(module: torch.nn.Module, key: str, data: torch.Tensor):
|
142
|
-
"""
|
143
|
-
Updates the offloaded state dict for a given module. Parameter named key is replaced
|
144
|
-
by data. This is neccesary because parameter updates for offloaded modules do not
|
145
|
-
persist automatically between loads. This function only affects the offloaded
|
146
|
-
state dict and not the current state of the loaded module.
|
147
|
-
|
148
|
-
:param module: module containing the parameter to update
|
149
|
-
:param key: name of parameter to update
|
150
|
-
:param data: tensor to update parameter with in the offloaded state dict
|
151
|
-
"""
|
152
|
-
if not has_offloaded_params(module):
|
153
|
-
raise ValueError("Prefix dict is only applicable to offloaded modules")
|
154
|
-
|
155
|
-
weights_map = module._hf_hook.weights_map
|
156
|
-
offload_to_weights_map(weights_map, key, data)
|
157
|
-
|
158
|
-
|
159
133
|
def update_parameter_data(
|
160
134
|
module: torch.nn.Module, new_param_data: torch.Tensor, param_name: str
|
161
135
|
):
|
compressed_tensors/version.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: compressed-tensors
|
3
|
-
Version: 0.11.
|
3
|
+
Version: 0.11.1a20250910
|
4
4
|
Summary: Library for utilization of compressed safetensors of neural network models
|
5
5
|
Home-page: https://github.com/neuralmagic/compressed-tensors
|
6
6
|
Author: Neuralmagic, Inc.
|
{compressed_tensors-0.11.1a20250908.dist-info → compressed_tensors-0.11.1a20250910.dist-info}/RECORD
RENAMED
@@ -1,15 +1,15 @@
|
|
1
1
|
compressed_tensors/__init__.py,sha256=UtKmifNeBCSE2TZSAfduVNNzHY-3V7bLjZ7n7RuXLOE,812
|
2
2
|
compressed_tensors/base.py,sha256=-gxWvDF4LCkyeDP8YlGzvBBKxo4Dk9h4NINPD61drFU,921
|
3
|
-
compressed_tensors/version.py,sha256=
|
3
|
+
compressed_tensors/version.py,sha256=uspJ2GlCAlOy5_cMN5KqjdnqQs72wgmaYeWLk_2EVHU,523
|
4
4
|
compressed_tensors/compressors/__init__.py,sha256=smSygTSfcfuujRrAXDc6uZm4L_ccV1tWZewqVnOb4lM,825
|
5
5
|
compressed_tensors/compressors/base.py,sha256=nvWsv4xEw1Tkxkxth6TmHplDYXfBeP22xWxOsZERyDY,7204
|
6
6
|
compressed_tensors/compressors/helpers.py,sha256=OK6qxX9j3bHwF9JfIYSGMgBJe2PWjlTA3byXKCJaTIQ,5431
|
7
7
|
compressed_tensors/compressors/model_compressors/__init__.py,sha256=5RGGPFu4YqEt_aOdFSQYFYFDjcZFJN0CsMqRtDZz3Js,666
|
8
|
-
compressed_tensors/compressors/model_compressors/model_compressor.py,sha256=
|
8
|
+
compressed_tensors/compressors/model_compressors/model_compressor.py,sha256=t_4r2u8PPXMkxKXfqENcmh30q11pG6Xdikj7Pjtf7dw,33444
|
9
9
|
compressed_tensors/compressors/quantized_compressors/__init__.py,sha256=KvaFBL_Q84LxRGJOV035M8OBoCkAx8kOkfphswgkKWk,745
|
10
10
|
compressed_tensors/compressors/quantized_compressors/base.py,sha256=rWvaWDqzi8cctBo982g2n3-y6afRiFl3jfTd90lSMrY,10413
|
11
11
|
compressed_tensors/compressors/quantized_compressors/naive_quantized.py,sha256=0ANDcuD8aXPqTYNPY6GnX9iS6eXJw6P0TzNV_rYS2l8,5369
|
12
|
-
compressed_tensors/compressors/quantized_compressors/nvfp4_quantized.py,sha256=
|
12
|
+
compressed_tensors/compressors/quantized_compressors/nvfp4_quantized.py,sha256=lrF-FVAZ2OGAd1yFvH5tp7d5_yvi37aZEuJui--7RmQ,7148
|
13
13
|
compressed_tensors/compressors/quantized_compressors/pack_quantized.py,sha256=D8h9ltxSIYi1XEKYgbYu1ebbXzCibhPi-eZsBUi0NOg,11245
|
14
14
|
compressed_tensors/compressors/sparse_compressors/__init__.py,sha256=Atuz-OdEgn8OCUhx7Ovd6gXdyImAI186uCR-uR0t_Nk,737
|
15
15
|
compressed_tensors/compressors/sparse_compressors/base.py,sha256=YNZWcHjDleAlqbgRZQ6oJf44MQb_UDNvJGOqhl26uFA,8098
|
@@ -28,13 +28,13 @@ compressed_tensors/linear/compressed_linear.py,sha256=1yo9RyjA0aQ--iuIknFfcSorJn
|
|
28
28
|
compressed_tensors/quantization/__init__.py,sha256=83J5bPB7PavN2TfCoW7_vEDhfYpm4TDrqYO9vdSQ5bk,760
|
29
29
|
compressed_tensors/quantization/quant_args.py,sha256=5AxYKqCSlg7CDgz2N8G4ZRVIiSUKvIm-SCQa-Bq_SF0,12916
|
30
30
|
compressed_tensors/quantization/quant_config.py,sha256=2NgDwKuQn0f-ojiHC8c6tXtYX_zQlk26Rj-bU71QKvA,10598
|
31
|
-
compressed_tensors/quantization/quant_scheme.py,sha256=
|
31
|
+
compressed_tensors/quantization/quant_scheme.py,sha256=EG86Bq5c8q1O4fJL_o3s7gOu1S5SrcLjfNYOPDn414A,9673
|
32
32
|
compressed_tensors/quantization/lifecycle/__init__.py,sha256=_uItzFWusyV74Zco_pHLOTdE9a83cL-R-ZdyQrBkIyw,772
|
33
|
-
compressed_tensors/quantization/lifecycle/apply.py,sha256=
|
33
|
+
compressed_tensors/quantization/lifecycle/apply.py,sha256=Nn0NTtIQ91AWuU05_oYNnVxAXV6C_vW3RW46XcXZwX4,14222
|
34
34
|
compressed_tensors/quantization/lifecycle/compressed.py,sha256=Fj9n66IN0EWsOAkBHg3O0GlOQpxstqjCcs0ttzMXrJ0,2296
|
35
35
|
compressed_tensors/quantization/lifecycle/forward.py,sha256=xcLTgaff1wYUWzvQqYKmhWYkshWVI-PhLPtBOyyZro0,17576
|
36
36
|
compressed_tensors/quantization/lifecycle/helpers.py,sha256=C0mhy2vJ0fCjVeN4kFNhw8Eq1wkteBGHiZ36RVLThRY,944
|
37
|
-
compressed_tensors/quantization/lifecycle/initialize.py,sha256=
|
37
|
+
compressed_tensors/quantization/lifecycle/initialize.py,sha256=GYH79007BPUojETNyvDm5SdHrnwPFVuMGlA8kXCI2Q0,9925
|
38
38
|
compressed_tensors/quantization/utils/__init__.py,sha256=VdtEmP0bvuND_IGQnyqUPc5lnFp-1_yD7StKSX4x80w,656
|
39
39
|
compressed_tensors/quantization/utils/helpers.py,sha256=-pfSmxqHkrB-RnjF0VYz8lMe9CVnB7IJrONf9Y9fjCo,17014
|
40
40
|
compressed_tensors/registry/__init__.py,sha256=FwLSNYqfIrb5JD_6OK_MT4_svvKTN_nEhpgQlQvGbjI,658
|
@@ -57,14 +57,14 @@ compressed_tensors/utils/__init__.py,sha256=spzbjUO4-hZ2jXGST27r3MIt2yzIXsjdbEaY
|
|
57
57
|
compressed_tensors/utils/helpers.py,sha256=Q3iRAa2XSdmmn4vSpUplnvKOmWwn4Clao9ZkPBHXtpI,12604
|
58
58
|
compressed_tensors/utils/internal.py,sha256=7SSWgDoNFRnlfadwkoFhLW-T2jOc7Po_WzWv5h32Sa8,982
|
59
59
|
compressed_tensors/utils/match.py,sha256=y03xJyWTXV8bjIPN5Z4S0_w797qMnh-Z4aiPEGQ4zNE,11239
|
60
|
-
compressed_tensors/utils/offload.py,sha256=
|
60
|
+
compressed_tensors/utils/offload.py,sha256=eXqLzl8kUkVDlNtcO5sn_4QoDcbAaxbCAS3tyZ-aGr8,23538
|
61
61
|
compressed_tensors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVyah6BUUir_StT28,2530
|
62
62
|
compressed_tensors/utils/permute.py,sha256=V6tJLKo3Syccj-viv4F7ZKZgJeCB-hl-dK8RKI_kBwI,2355
|
63
63
|
compressed_tensors/utils/safetensors_load.py,sha256=Vql34aCTDHwmTZXJHzCyBISJo7iA7EQ78LdTlMjdpZo,12023
|
64
64
|
compressed_tensors/utils/semi_structured_conversions.py,sha256=XKNffPum54kPASgqKzgKvyeqWPAkair2XEQXjkp7ho8,13489
|
65
65
|
compressed_tensors/utils/type.py,sha256=bNwoo_FWlvLuDpYAGGzZJITRg0JA_Ngk9LGPo-kvjeU,2554
|
66
|
-
compressed_tensors-0.11.
|
67
|
-
compressed_tensors-0.11.
|
68
|
-
compressed_tensors-0.11.
|
69
|
-
compressed_tensors-0.11.
|
70
|
-
compressed_tensors-0.11.
|
66
|
+
compressed_tensors-0.11.1a20250910.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
67
|
+
compressed_tensors-0.11.1a20250910.dist-info/METADATA,sha256=hoAVyQXgylkzGGRJD4SeIUlVh4FSMWeZLzaeMsKL_RI,7031
|
68
|
+
compressed_tensors-0.11.1a20250910.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
69
|
+
compressed_tensors-0.11.1a20250910.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
|
70
|
+
compressed_tensors-0.11.1a20250910.dist-info/RECORD,,
|
{compressed_tensors-0.11.1a20250908.dist-info → compressed_tensors-0.11.1a20250910.dist-info}/WHEEL
RENAMED
File without changes
|
File without changes
|
File without changes
|