compressed-tensors 0.10.3a20250708__py3-none-any.whl → 0.10.3a20250710__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -42,10 +42,7 @@ from compressed_tensors.quantization import (
42
42
  load_pretrained_quantization_parameters,
43
43
  )
44
44
  from compressed_tensors.quantization.lifecycle import expand_target_names
45
- from compressed_tensors.quantization.utils import (
46
- is_module_quantized,
47
- iter_named_leaf_modules,
48
- )
45
+ from compressed_tensors.quantization.utils import is_module_quantized
49
46
  from compressed_tensors.utils import (
50
47
  align_module_device,
51
48
  delete_offload_parameter,
@@ -393,9 +390,16 @@ class ModelCompressor:
393
390
  )
394
391
 
395
392
  for prefix, module in tqdm(model.named_modules(), desc="Compressing model"):
393
+
396
394
  if prefix in module_to_scheme or prefix in sparse_compression_targets:
395
+ module_device = get_execution_device(module)
396
+ is_meta = module_device.type == "meta"
397
+
398
+ exec_device = "meta" if is_meta else "cpu"
399
+ onloading_device = "meta" if is_meta else module_device
400
+
397
401
  # in the future, support compression on same device
398
- with align_module_device(module, execution_device="cpu"):
402
+ with align_module_device(module, execution_device=exec_device):
399
403
  state_dict = module.state_dict(prefix=f"{prefix}.")
400
404
 
401
405
  # quantization first
@@ -404,6 +408,7 @@ class ModelCompressor:
404
408
  state_dict,
405
409
  names_to_scheme=module_to_scheme,
406
410
  show_progress=False,
411
+ compression_device=exec_device,
407
412
  )
408
413
 
409
414
  # sparsity second
@@ -415,7 +420,6 @@ class ModelCompressor:
415
420
  )
416
421
 
417
422
  # remove any existing parameters
418
- exec_device = get_execution_device(module)
419
423
  offload_device = get_offloaded_device(module)
420
424
  for name, _ in list(module.named_parameters()):
421
425
  delete_offload_parameter(module, name)
@@ -423,7 +427,7 @@ class ModelCompressor:
423
427
  # replace with compressed parameters
424
428
  for name, value in state_dict.items():
425
429
  name = name.removeprefix(f"{prefix}.")
426
- value = value.to(exec_device)
430
+ value = value.to(onloading_device)
427
431
  param = torch.nn.Parameter(value, requires_grad=False)
428
432
  register_offload_parameter(module, name, param, offload_device)
429
433
 
@@ -747,7 +751,7 @@ def map_module_to_scheme(model: Module) -> Dict[str, QuantizationScheme]:
747
751
  """
748
752
  return {
749
753
  fix_fsdp_module_name(name): module.quantization_scheme
750
- for name, module in iter_named_leaf_modules(model)
754
+ for name, module in model.named_modules()
751
755
  if is_module_quantized(module)
752
756
  }
753
757
 
@@ -72,6 +72,7 @@ class BaseQuantizationCompressor(BaseCompressor):
72
72
  model_state: Dict[str, Tensor],
73
73
  names_to_scheme: Dict[str, QuantizationScheme],
74
74
  show_progress: bool = False,
75
+ compression_device: str = "cpu",
75
76
  **kwargs,
76
77
  ) -> Dict[str, Tensor]:
77
78
  """
@@ -85,7 +86,6 @@ class BaseQuantizationCompressor(BaseCompressor):
85
86
  """
86
87
  uncompressed_names = list(model_state.keys())
87
88
  compressed_dict = {}
88
- save_device = "cpu"
89
89
 
90
90
  # compress values
91
91
  desc = "Compressing with quantization"
@@ -104,10 +104,10 @@ class BaseQuantizationCompressor(BaseCompressor):
104
104
 
105
105
  # is scale does not exist, then weight cannot be compressed
106
106
  if scale is None:
107
- compressed_dict[name] = value.to(save_device)
107
+ compressed_dict[name] = value.to(compression_device)
108
108
  continue
109
109
 
110
- # compress values on cpu (memory movement too expensive)
110
+ # compress values on meta if loading from meta otherwise on cpu (memory movement too expensive)
111
111
  module_path = prefix[:-1] if prefix.endswith(".") else prefix
112
112
  quant_args = names_to_scheme[module_path].weights
113
113
  compressed_values = self.compress_weight(
@@ -117,12 +117,12 @@ class BaseQuantizationCompressor(BaseCompressor):
117
117
  global_scale=global_scale,
118
118
  g_idx=g_idx,
119
119
  quantization_args=quant_args,
120
- device="cpu",
120
+ device=compression_device,
121
121
  )
122
122
 
123
123
  # update state dict
124
124
  for key, value in compressed_values.items():
125
- compressed_dict[prefix + key] = value.to(save_device)
125
+ compressed_dict[prefix + key] = value.to(compression_device)
126
126
 
127
127
  else:
128
128
  # omit saving zero points for symmetric or packed quantization
@@ -133,8 +133,7 @@ class BaseQuantizationCompressor(BaseCompressor):
133
133
  # TODO: does this case actually occur?
134
134
  elif name.endswith("g_idx") and torch.any(value <= -1):
135
135
  continue
136
-
137
- compressed_dict[name] = value.to(save_device)
136
+ compressed_dict[name] = value.to(compression_device)
138
137
 
139
138
  return compressed_dict
140
139
 
@@ -220,30 +220,34 @@ def pack_to_int32(
220
220
  if num_bits < 1:
221
221
  raise ValueError(f"num_bits must be at least 1, got {num_bits}")
222
222
 
223
- # convert to unsigned for packing
223
+ # Convert to unsigned range for packing, matching quantization offset
224
224
  offset = 1 << (num_bits - 1)
225
225
  value = (value + offset).to(torch.uint8)
226
- value = value.cpu().numpy().astype(np.uint32)
226
+ device = value.device
227
+
227
228
  pack_factor = 32 // num_bits
228
229
 
229
- # pad input tensor and initialize packed output
230
- packed_size = math.ceil(value.shape[packed_dim] / pack_factor)
231
- padding = packed_size * pack_factor - value.shape[packed_dim]
232
- value = np.pad(value, pad_width=[(0, 0), (0, padding)], constant_values=0)
230
+ if packed_dim == 0:
231
+ value = value.transpose(0, 1)
233
232
 
234
- # pack values
235
- if packed_dim == 1:
236
- packed = np.zeros((value.shape[0], packed_size), dtype=np.uint32)
237
- for i in range(pack_factor):
238
- packed |= value[:, i::pack_factor] << num_bits * i
239
- else:
240
- packed = np.zeros((packed_size, value.shape[1]), dtype=np.uint32)
241
- for i in range(pack_factor):
242
- packed |= value[i::pack_factor, :] << num_bits * i
233
+ rows, cols = value.shape
234
+ padded_cols = math.ceil(cols / pack_factor) * pack_factor
235
+ pad_len = padded_cols - cols
236
+
237
+ if pad_len > 0:
238
+ value = torch.nn.functional.pad(value, (0, pad_len))
239
+
240
+ num_groups = padded_cols // pack_factor
241
+
242
+ # Use int32 here
243
+ reshaped = value.view(rows, num_groups, pack_factor).to(torch.int32)
244
+ bit_shifts = torch.arange(pack_factor, device=device, dtype=torch.int32) * num_bits
245
+ packed = (reshaped << bit_shifts).sum(dim=2, dtype=torch.int32)
246
+
247
+ if packed_dim == 0:
248
+ packed = packed.transpose(0, 1)
243
249
 
244
- # convert back to signed and torch
245
- packed = np.ascontiguousarray(packed).view(np.int32)
246
- return torch.from_numpy(packed)
250
+ return packed
247
251
 
248
252
 
249
253
  def unpack_from_int32(
@@ -56,8 +56,10 @@ class Sparse24BitMaskCompressor(BaseSparseCompressor):
56
56
  bitmask_tensor = Sparse24BitMaskTensor.from_dense(
57
57
  value, self.config.sparsity_structure
58
58
  )
59
- bitmask_dict = bitmask_tensor.dict(name_prefix=name, device="cpu")
60
- return bitmask_dict
59
+ return bitmask_tensor.dict(
60
+ name_prefix=name,
61
+ device="meta" if value.is_meta else "cpu",
62
+ )
61
63
 
62
64
  def decompress_weight(self, weight_data):
63
65
  data = Sparse24BitMaskTensor.from_compressed_data(**weight_data)
@@ -90,9 +92,14 @@ class Sparse24BitMaskTensor:
90
92
  :return: instantiated compressed tensor
91
93
  """
92
94
  shape = list(tensor.shape)
93
- compressed, bitmask = sparse24_bitmask_compress(
94
- tensor.cpu(), sparsity_structure=sparsity_structure
95
- )
95
+ if tensor.is_meta:
96
+ compressed, bitmask = sparse24_bitmask_compress(
97
+ tensor, sparsity_structure=sparsity_structure
98
+ )
99
+ else:
100
+ compressed, bitmask = sparse24_bitmask_compress(
101
+ tensor.cpu(), sparsity_structure=sparsity_structure
102
+ )
96
103
  return Sparse24BitMaskTensor(
97
104
  shape=shape,
98
105
  compressed=compressed,
@@ -169,6 +176,17 @@ def sparse24_bitmask_compress(
169
176
  SparsityStructure(sparsity_structure) == SparsityStructure.TWO_FOUR
170
177
  ), "Only 2:4 sparsity is supported"
171
178
 
179
+ if tensor.is_meta:
180
+ num_rows, num_cols = tensor.shape
181
+ compressed_values = torch.empty(
182
+ (num_rows, num_cols // 2), dtype=tensor.dtype, device="meta"
183
+ )
184
+ packed_cols = (num_cols + 7) // 8
185
+ bitmasks_packed = torch.empty(
186
+ (num_rows, packed_cols), dtype=torch.uint8, device="meta"
187
+ )
188
+ return compressed_values, bitmasks_packed
189
+
172
190
  bytemasks = get_24_bytemasks(tensor=tensor)
173
191
 
174
192
  if tensor.dtype == FP8_DTYPE:
@@ -38,8 +38,6 @@ from compressed_tensors.quantization.utils import (
38
38
  KV_CACHE_TARGETS,
39
39
  infer_quantization_status,
40
40
  is_kv_cache_quant_scheme,
41
- iter_named_leaf_modules,
42
- iter_named_quantizable_modules,
43
41
  )
44
42
  from compressed_tensors.utils.helpers import fix_fsdp_module_name, replace_module
45
43
  from compressed_tensors.utils.offload import update_parameter_data
@@ -87,7 +85,7 @@ def load_pretrained_quantization_parameters(
87
85
  model_path = get_safetensors_folder(model_name_or_path)
88
86
  mapping = get_quantization_parameter_to_path_mapping(model_path)
89
87
 
90
- for name, submodule in iter_named_leaf_modules(model):
88
+ for name, submodule in model.named_modules():
91
89
  if not is_module_quantized(submodule):
92
90
  continue
93
91
  if submodule.quantization_scheme.input_activations is not None:
@@ -152,11 +150,7 @@ def apply_quantization_config(
152
150
  # list of submodules to ignore
153
151
  ignored_submodules = defaultdict(list)
154
152
  # mark appropriate layers for quantization by setting their quantization schemes
155
- for name, submodule in iter_named_quantizable_modules(
156
- model,
157
- include_children=True,
158
- include_attn=True,
159
- ): # child modules and attention modules
153
+ for name, submodule in model.named_modules():
160
154
  # potentially fix module name to remove FSDP wrapper prefix
161
155
  name = fix_fsdp_module_name(name)
162
156
  if matches := find_name_or_class_matches(name, submodule, config.ignore):
@@ -287,7 +281,7 @@ def expand_target_names(
287
281
  """
288
282
  return {
289
283
  name
290
- for name, module in iter_named_leaf_modules(model)
284
+ for name, module in model.named_modules()
291
285
  if is_target(name, module, targets, ignore)
292
286
  }
293
287
 
@@ -328,6 +322,11 @@ def find_name_or_class_matches(
328
322
  2. matches on regex patterns
329
323
  3. matches on module names
330
324
  """
325
+ from compressed_tensors import InternalModule
326
+
327
+ if isinstance(module, InternalModule):
328
+ return []
329
+
331
330
  targets = sorted(targets, key=lambda x: ("re:" in x, x))
332
331
  if isinstance(targets, Iterable):
333
332
  matches = _find_matches(name, targets) + _find_matches(
@@ -189,7 +189,7 @@ def _initialize_scale_zero_point(
189
189
  else:
190
190
  # TODO: consider erroring out in the future as if the dtype if not one of these,
191
191
  # there is likely bug
192
- if scale_dtype not in [torch.float16, torch.bfloat16, torch.float32]:
192
+ if scale_dtype not in [torch.float16, torch.bfloat16, torch.float32, torch.float64]:
193
193
  scale_dtype = torch.float16
194
194
  zp_dtype = quantization_args.pytorch_dtype()
195
195
 
@@ -22,9 +22,7 @@ from compressed_tensors.quantization.quant_scheme import (
22
22
  preset_name_to_scheme,
23
23
  )
24
24
  from compressed_tensors.quantization.utils import (
25
- calculate_compression_ratio,
26
25
  is_module_quantized,
27
- iter_named_quantizable_modules,
28
26
  module_type,
29
27
  parse_out_kv_cache_args,
30
28
  )
@@ -177,9 +175,7 @@ class QuantizationConfig(BaseModel):
177
175
  quantization_status = None
178
176
  ignore = {}
179
177
  quantization_type_names = set()
180
- for name, submodule in iter_named_quantizable_modules(
181
- model, include_children=True, include_attn=True
182
- ):
178
+ for name, submodule in model.named_modules():
183
179
  layer_type = module_type(submodule)
184
180
  if not is_module_quantized(submodule):
185
181
  if layer_type not in ignore:
@@ -26,6 +26,7 @@ from compressed_tensors.quantization.quant_args import (
26
26
  QuantizationType,
27
27
  )
28
28
  from compressed_tensors.quantization.quant_scheme import QuantizationScheme
29
+ from compressed_tensors.utils import deprecated
29
30
  from torch import FloatTensor, IntTensor, Tensor
30
31
  from torch.nn import Module
31
32
  from tqdm import tqdm
@@ -36,7 +37,6 @@ __all__ = [
36
37
  "is_module_quantized",
37
38
  "is_model_quantized",
38
39
  "module_type",
39
- "calculate_compression_ratio",
40
40
  "get_torch_bit_depth",
41
41
  "can_quantize",
42
42
  "parse_out_kv_cache_args",
@@ -276,12 +276,7 @@ def is_model_quantized(model: Module) -> bool:
276
276
  :param model: pytorch model
277
277
  :return: True if model is quantized, False otherwise
278
278
  """
279
-
280
- for _, submodule in iter_named_leaf_modules(model):
281
- if is_module_quantized(submodule):
282
- return True
283
-
284
- return False
279
+ return any(is_module_quantized(submodule) for submodule in model.modules())
285
280
 
286
281
 
287
282
  def module_type(module: Module) -> str:
@@ -294,6 +289,11 @@ def module_type(module: Module) -> str:
294
289
  return type(module).__name__
295
290
 
296
291
 
292
+ @deprecated(
293
+ message="This function will be removed in a future release. "
294
+ "Please use `model.named_modules()` and filter by "
295
+ "compressed_tensors.InternalModule if neceessary"
296
+ )
297
297
  def iter_named_leaf_modules(model: Module) -> Generator[Tuple[str, Module], None, None]:
298
298
  """
299
299
  Yields modules that do not have any submodules except observers. The observers
@@ -320,6 +320,11 @@ def iter_named_leaf_modules(model: Module) -> Generator[Tuple[str, Module], None
320
320
  yield name, submodule
321
321
 
322
322
 
323
+ @deprecated(
324
+ message="This function will be removed in a future release. "
325
+ "Please use `model.named_modules()` and filter by "
326
+ "compressed_tensors.InternalModule if neceessary"
327
+ )
323
328
  def iter_named_quantizable_modules(
324
329
  model: Module,
325
330
  include_children: bool = True,
@@ -330,7 +335,6 @@ def iter_named_quantizable_modules(
330
335
  Yield name and submodule of
331
336
  - leaf modules, set by include_children
332
337
  - attention modyles, set by include_attn
333
-
334
338
  :param model: model to get leaf modules of
335
339
  :param include_children: flag to get the leaf modules
336
340
  :param inlcude_attn: flag to get the attention modules
@@ -397,34 +401,6 @@ def can_quantize(value: torch.Tensor, quant_args: "QuantizationArgs") -> bool:
397
401
  return bit_depth > quant_args.num_bits
398
402
 
399
403
 
400
- def calculate_compression_ratio(model: Module) -> float:
401
- """
402
- Calculates the quantization compression ratio of a pytorch model, based on the
403
- number of bits needed to represent the total weights in compressed form. Does not
404
- take into account activation quantizatons.
405
-
406
- :param model: pytorch module to calculate compression ratio for
407
- :return: compression ratio of the whole model
408
- """
409
- total_compressed = 0.0
410
- total_uncompressed = 0.0
411
- for name, submodule in tqdm(
412
- iter_named_leaf_modules(model),
413
- desc="Calculating quantization compression ratio",
414
- ):
415
- for parameter in model.parameters():
416
- uncompressed_bits = get_torch_bit_depth(parameter)
417
- compressed_bits = uncompressed_bits
418
- if is_module_quantized(submodule) and submodule.quantization_scheme.weights:
419
- compressed_bits = submodule.quantization_scheme.weights.num_bits
420
-
421
- num_weights = parameter.numel()
422
- total_compressed += compressed_bits * num_weights
423
- total_uncompressed += uncompressed_bits * num_weights
424
-
425
- return total_uncompressed / total_compressed
426
-
427
-
428
404
  def is_kv_cache_quant_scheme(scheme: QuantizationScheme) -> bool:
429
405
  """
430
406
  Check whether the QuantizationScheme targets the kv cache.
@@ -23,3 +23,4 @@ from .factory.base import *
23
23
  from .factory.hadamard import *
24
24
  from .factory.matrix_multiply import *
25
25
  from .factory.random_hadamard import *
26
+ from .apply import *
@@ -0,0 +1,32 @@
1
+ # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing,
10
+ # software distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import torch
16
+ from compressed_tensors.transform import TransformConfig, TransformFactory
17
+
18
+
19
+ __all__ = ["apply_transform_config"]
20
+
21
+
22
+ def apply_transform_config(model: torch.nn.Module, config: TransformConfig):
23
+ """
24
+ Apply a transform config to a model. Weight transforms are fused into weights, while
25
+ activation transforms are attached as submodules and trigger via pytorch hooks
26
+
27
+ :param model: model to apply config to
28
+ :param config: transform config to apply
29
+ """
30
+ for name, scheme in config.config_groups.items():
31
+ factory = TransformFactory.from_scheme(scheme, name=name)
32
+ factory.apply_to_model(model)
@@ -17,6 +17,7 @@ from typing import Optional
17
17
 
18
18
  import torch
19
19
  import torch.nn.utils.parametrize as P
20
+ from compressed_tensors import InternalModule
20
21
  from compressed_tensors.quantization.lifecycle import is_target # TODO: move to utils
21
22
  from compressed_tensors.registry.registry import RegistryMixin, T
22
23
  from compressed_tensors.transform import (
@@ -26,6 +27,7 @@ from compressed_tensors.transform import (
26
27
  )
27
28
  from compressed_tensors.utils import (
28
29
  align_module_device,
30
+ delete_offload_module,
29
31
  has_offloaded_params,
30
32
  patch_attr,
31
33
  register_offload_module,
@@ -99,7 +101,7 @@ class TransformFactory(RegistryMixin, ABC):
99
101
  # create transform as submodule
100
102
  transform_name = f"{self.name}_{args.location.value}"
101
103
  transform = self.create_transform(module, args)
102
- register_offload_module(module, transform_name, transform) # (1)
104
+ register_offload_module(module, transform_name, transform)
103
105
 
104
106
  # register input transformation hook
105
107
  if args.location == TransformLocation.INPUT:
@@ -118,6 +120,7 @@ class TransformFactory(RegistryMixin, ABC):
118
120
  assert isinstance(module, torch.nn.Linear)
119
121
  assert module.bias is None
120
122
 
123
+ # fuse transform into weight
121
124
  with torch.no_grad(), align_module_device(module):
122
125
  update_offload_parameter(module, "weight", transform(module.weight))
123
126
 
@@ -128,6 +131,9 @@ class TransformFactory(RegistryMixin, ABC):
128
131
  raise ValueError("Offloaded training is not supported")
129
132
  P.register_parametrization(module, "weight", transform)
130
133
 
134
+ # transform is no longer needed (unfusing is not supported)
135
+ delete_offload_module(module, transform_name)
136
+
131
137
  # register output transformation hook
132
138
  elif args.location == TransformLocation.OUTPUT:
133
139
 
@@ -140,11 +146,8 @@ class TransformFactory(RegistryMixin, ABC):
140
146
  else:
141
147
  raise NotImplementedError()
142
148
 
143
- # (1) even in the `weight` cases, this submodule attachment is needed in order
144
- # to support saving in the frozen state
145
-
146
149
 
147
- class TransformBase(Module, ABC):
150
+ class TransformBase(InternalModule, ABC):
148
151
  """
149
152
  Represents the application of a transform accord to TransformArgs
150
153
  """
@@ -14,6 +14,7 @@
14
14
  # flake8: noqa
15
15
 
16
16
  from .helpers import *
17
+ from .internal import *
17
18
  from .offload import *
18
19
  from .permutations_24 import *
19
20
  from .permute import *
@@ -0,0 +1,29 @@
1
+ # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing,
10
+ # software distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import torch
16
+
17
+
18
+ __all__ = ["InternalModule"]
19
+
20
+
21
+ class InternalModule(torch.nn.Module):
22
+ """
23
+ Abstract base class for modules which are not a part of the the model definition.
24
+ `torch.nn.Module`s which inherit from this class will not be targeted by configs
25
+
26
+ This is typically used to skip apply configs to `Observers` and `Transforms`
27
+ """
28
+
29
+ pass
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.10.3.a20250708'
20
+ __version__ = version = '0.10.3.a20250710'
21
21
  __version_tuple__ = version_tuple = (0, 10, 3)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compressed-tensors
3
- Version: 0.10.3a20250708
3
+ Version: 0.10.3a20250710
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -1,20 +1,20 @@
1
1
  compressed_tensors/__init__.py,sha256=UtKmifNeBCSE2TZSAfduVNNzHY-3V7bLjZ7n7RuXLOE,812
2
2
  compressed_tensors/base.py,sha256=73HYH7HY7O2roC89yG_piPFnZwrBfn_i7HmKl90SKc0,875
3
- compressed_tensors/version.py,sha256=rt9C5dMk9h9d8wWYD1c12HvB1Cl7FNRVoxT2irO1UxE,523
3
+ compressed_tensors/version.py,sha256=gqWN6LSE0C4r0hiPn2KpmE4YPwpZnktZcE8p9BTZksQ,523
4
4
  compressed_tensors/compressors/__init__.py,sha256=smSygTSfcfuujRrAXDc6uZm4L_ccV1tWZewqVnOb4lM,825
5
5
  compressed_tensors/compressors/base.py,sha256=nvWsv4xEw1Tkxkxth6TmHplDYXfBeP22xWxOsZERyDY,7204
6
6
  compressed_tensors/compressors/helpers.py,sha256=OK6qxX9j3bHwF9JfIYSGMgBJe2PWjlTA3byXKCJaTIQ,5431
7
7
  compressed_tensors/compressors/model_compressors/__init__.py,sha256=5RGGPFu4YqEt_aOdFSQYFYFDjcZFJN0CsMqRtDZz3Js,666
8
- compressed_tensors/compressors/model_compressors/model_compressor.py,sha256=nt0KxhZakDdlTIebBYcSvqxLCZhA6p6IL_1AYiHLFug,32695
8
+ compressed_tensors/compressors/model_compressors/model_compressor.py,sha256=Zl33ceJu_KH_Vx2B5EK-bvE7W5OklOL0hb-QqdOVSsE,32906
9
9
  compressed_tensors/compressors/quantized_compressors/__init__.py,sha256=KvaFBL_Q84LxRGJOV035M8OBoCkAx8kOkfphswgkKWk,745
10
- compressed_tensors/compressors/quantized_compressors/base.py,sha256=ByE3z61boZ5wdz0nhc-2CJH61bSixJQE78pfkS6XRDg,10269
10
+ compressed_tensors/compressors/quantized_compressors/base.py,sha256=YGUMzbxekj_36ChgQnVZN6T8uDjXtGG1zfMIBGBLWco,10354
11
11
  compressed_tensors/compressors/quantized_compressors/naive_quantized.py,sha256=0ANDcuD8aXPqTYNPY6GnX9iS6eXJw6P0TzNV_rYS2l8,5369
12
12
  compressed_tensors/compressors/quantized_compressors/nvfp4_quantized.py,sha256=Gw-lVzk5jrKUlM5UTCiJBmhM5gHzB9mn8r298MVUbDI,6395
13
- compressed_tensors/compressors/quantized_compressors/pack_quantized.py,sha256=_66tQ8bxslDUdas-ULORXblPw9kdNNn1UJJU9-ZOGPY,11380
13
+ compressed_tensors/compressors/quantized_compressors/pack_quantized.py,sha256=47W1hFTi5YHVNKEWptzztsSutwI1kxy2Troh-NW1y14,11244
14
14
  compressed_tensors/compressors/sparse_compressors/__init__.py,sha256=Atuz-OdEgn8OCUhx7Ovd6gXdyImAI186uCR-uR0t_Nk,737
15
15
  compressed_tensors/compressors/sparse_compressors/base.py,sha256=YNZWcHjDleAlqbgRZQ6oJf44MQb_UDNvJGOqhl26uFA,8098
16
16
  compressed_tensors/compressors/sparse_compressors/dense.py,sha256=-OujJ1e0iXBvxYVULrIGvAZ9l-IC0mXczZRnimQdgo4,2314
17
- compressed_tensors/compressors/sparse_compressors/sparse_24_bitmask.py,sha256=4cwkj40SFrXEyE_jyt2xjz3R-gTdU9uMpMFUKo1pRBA,8643
17
+ compressed_tensors/compressors/sparse_compressors/sparse_24_bitmask.py,sha256=p8cNV-W4TZKaWDlCwjptQyaIrwqlHizZ1Pn4Vx3-ANk,9262
18
18
  compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py,sha256=S8vW0FI9ep_XtUQOxj0P5utJt3vKEYOHjWEPp-Xd9aY,5820
19
19
  compressed_tensors/compressors/sparse_quantized_compressors/__init__.py,sha256=4f_cwcKXB1nVVMoiKgTFAc8jAPjPLElo-Df_EDm1_xw,675
20
20
  compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py,sha256=7F9J6wgkecitK5hHuqjetZ18HExHIF4QIw1wgm2Y6U8,10099
@@ -27,24 +27,25 @@ compressed_tensors/linear/__init__.py,sha256=fH6rjBYAxuwrTzBTlTjTgCYNyh6TCvCqajC
27
27
  compressed_tensors/linear/compressed_linear.py,sha256=1yo9RyjA0aQ--iuIknFfcSorJn43Mn4CoV-q4JlTJ_o,4052
28
28
  compressed_tensors/quantization/__init__.py,sha256=83J5bPB7PavN2TfCoW7_vEDhfYpm4TDrqYO9vdSQ5bk,760
29
29
  compressed_tensors/quantization/quant_args.py,sha256=2OpiiSdl4KidzNmjx7J8UlQoAYmt5k5GdXv_73ELw0A,11823
30
- compressed_tensors/quantization/quant_config.py,sha256=aFi6PKqmEX9iP9O8GVn3mEUjRDEwk_hOCbmmiq-j9oU,10198
30
+ compressed_tensors/quantization/quant_config.py,sha256=w6sEEZGVGIF0Ub2r_cqRfZwbkBT8WzfY3ug52olmjGY,10049
31
31
  compressed_tensors/quantization/quant_scheme.py,sha256=IDWa1GWUbUdWCo8j78Jz6svYF5hLz89J2PVYWBBnXRc,7102
32
32
  compressed_tensors/quantization/lifecycle/__init__.py,sha256=_uItzFWusyV74Zco_pHLOTdE9a83cL-R-ZdyQrBkIyw,772
33
- compressed_tensors/quantization/lifecycle/apply.py,sha256=v7D0TJU_eLT20Odn_J1VCPo2twll2ra-wxlEGBKB2OA,17990
33
+ compressed_tensors/quantization/lifecycle/apply.py,sha256=wM8mVcbKvZjBo18pSXMp28i30YWwUXJPSS7_HCakH9U,17892
34
34
  compressed_tensors/quantization/lifecycle/compressed.py,sha256=Fj9n66IN0EWsOAkBHg3O0GlOQpxstqjCcs0ttzMXrJ0,2296
35
35
  compressed_tensors/quantization/lifecycle/forward.py,sha256=JWOQ-03bsgh9_nnOLAjmLZ0S8bFQA-GjwDK6YUBwcrU,14883
36
36
  compressed_tensors/quantization/lifecycle/helpers.py,sha256=C0mhy2vJ0fCjVeN4kFNhw8Eq1wkteBGHiZ36RVLThRY,944
37
- compressed_tensors/quantization/lifecycle/initialize.py,sha256=9d5Ee7qt3zxaa5_PFitkvadvRDXeDqBIxYgooBqtrf8,8638
37
+ compressed_tensors/quantization/lifecycle/initialize.py,sha256=D7yxua1zELmsBYlQiJUTiClBOMIe2J0-IrN2d-jLFPk,8653
38
38
  compressed_tensors/quantization/utils/__init__.py,sha256=VdtEmP0bvuND_IGQnyqUPc5lnFp-1_yD7StKSX4x80w,656
39
- compressed_tensors/quantization/utils/helpers.py,sha256=bqxNL2NU1XVsSxNzmDVZE3zd65PlLFq1Ir-RHwff8G0,17840
39
+ compressed_tensors/quantization/utils/helpers.py,sha256=24MJ6-Az_LpZQziFOUCroM3povOAtKAfYLrdfiBmLO4,17018
40
40
  compressed_tensors/registry/__init__.py,sha256=FwLSNYqfIrb5JD_6OK_MT4_svvKTN_nEhpgQlQvGbjI,658
41
41
  compressed_tensors/registry/registry.py,sha256=0s15BxdGgzBv8RL4kUJCYcuDOFUh_KZYvNvLEeRqWTc,11956
42
- compressed_tensors/transform/__init__.py,sha256=mtUOzwq-H7fXGi7sMmfe7zj83fjMg_LAu4DjTZ5vaHk,886
42
+ compressed_tensors/transform/__init__.py,sha256=v2wfl4CMfA6KbD7Hxx_MbRev63y_6QLDlccZq-WTtdw,907
43
+ compressed_tensors/transform/apply.py,sha256=Cnc7Q8d8FzpLGtXixvdPzqApfjAXpfShxvVl_7nNJ4E,1259
43
44
  compressed_tensors/transform/transform_args.py,sha256=8-Ab5_dFfdObfwVCgrWrEWcoVRzXmMBSDSUxjftI-Ss,3177
44
45
  compressed_tensors/transform/transform_config.py,sha256=A3RuLNDqBNEByQNeu40Kg7sItwE6kWgnX18Umg1uONI,2128
45
46
  compressed_tensors/transform/transform_scheme.py,sha256=JAFQoCiNLg04diXG5KsynRGcLIB0Y0tC5s8U7HoDM7c,1692
46
47
  compressed_tensors/transform/factory/__init__.py,sha256=fH6rjBYAxuwrTzBTlTjTgCYNyh6TCvCqajCz4Im4YrA,617
47
- compressed_tensors/transform/factory/base.py,sha256=yVrYWEnrr2RFWE5AjSNeXzO9aXc443dTNMVSxuLztz8,5940
48
+ compressed_tensors/transform/factory/base.py,sha256=fgG97-HqqxekWkVAXF3dCm7E_1h9d-Bplhu0y-QdoqQ,6050
48
49
  compressed_tensors/transform/factory/hadamard.py,sha256=oLdDUu1p82lgD7li-sHMSvXZxz1SDjLeYf-EfXqNzvk,3918
49
50
  compressed_tensors/transform/factory/matrix_multiply.py,sha256=KYiQRGFSU33TpPWkGTKwNADTmYoU0E3hjQypOMclHbg,3689
50
51
  compressed_tensors/transform/factory/random_hadamard.py,sha256=nUhTlFa4ikSpcl4Umme71pnjMPgwYoGlwjKlU27UHZ4,1634
@@ -52,15 +53,16 @@ compressed_tensors/transform/utils/__init__.py,sha256=fH6rjBYAxuwrTzBTlTjTgCYNyh
52
53
  compressed_tensors/transform/utils/hadamard.py,sha256=U27Kvo-eDebKcVt8oXTSIAaQ5DvPQj9tDv2hdXHCPPQ,5584
53
54
  compressed_tensors/transform/utils/hadamards.safetensors,sha256=mFd1GzNodGG-ifA1IoH-0nHYzfraCOvrq_dX2zFI1B4,1436901
54
55
  compressed_tensors/transform/utils/utils.py,sha256=PRPTYwPs2nnNaQMq2GEbC4QYKHFKlZwaRyPgdDhl66g,2992
55
- compressed_tensors/utils/__init__.py,sha256=gS4gSU2pwcAbsKj-6YMaqhm25udFy6ISYaWBf-myRSM,808
56
+ compressed_tensors/utils/__init__.py,sha256=QFQzF6MpV3yStajPzYktZkmvZsxvfpKUZq2oGbd1Cvw,832
56
57
  compressed_tensors/utils/helpers.py,sha256=Q3iRAa2XSdmmn4vSpUplnvKOmWwn4Clao9ZkPBHXtpI,12604
58
+ compressed_tensors/utils/internal.py,sha256=7SSWgDoNFRnlfadwkoFhLW-T2jOc7Po_WzWv5h32Sa8,982
57
59
  compressed_tensors/utils/offload.py,sha256=3XiBuWbUkBAt8v1t5i57qDcbB3VJQs_FDeayi-JzIWg,23896
58
60
  compressed_tensors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVyah6BUUir_StT28,2530
59
61
  compressed_tensors/utils/permute.py,sha256=V6tJLKo3Syccj-viv4F7ZKZgJeCB-hl-dK8RKI_kBwI,2355
60
62
  compressed_tensors/utils/safetensors_load.py,sha256=DMfZBuUbA6qp_BG_zIWT3ckiEE33K9ob34s-OgzReO4,12057
61
63
  compressed_tensors/utils/semi_structured_conversions.py,sha256=XKNffPum54kPASgqKzgKvyeqWPAkair2XEQXjkp7ho8,13489
62
- compressed_tensors-0.10.3a20250708.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
63
- compressed_tensors-0.10.3a20250708.dist-info/METADATA,sha256=eY_wXSsGo1nsV1y993HgKBKU9KCbQDoi9VdSFEAkRes,7031
64
- compressed_tensors-0.10.3a20250708.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
65
- compressed_tensors-0.10.3a20250708.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
66
- compressed_tensors-0.10.3a20250708.dist-info/RECORD,,
64
+ compressed_tensors-0.10.3a20250710.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
65
+ compressed_tensors-0.10.3a20250710.dist-info/METADATA,sha256=TodSeLplEMKQAyDzDHZDT7DPvFFEK5-qUokJdpw2yCg,7031
66
+ compressed_tensors-0.10.3a20250710.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
67
+ compressed_tensors-0.10.3a20250710.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
68
+ compressed_tensors-0.10.3a20250710.dist-info/RECORD,,