compressed-tensors 0.9.4a20250414__py3-none-any.whl → 0.9.5a20250424__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19,6 +19,7 @@ import torch
19
19
  from compressed_tensors.config import SparsityCompressionConfig
20
20
  from compressed_tensors.quantization import QuantizationArgs, QuantizationConfig
21
21
  from compressed_tensors.registry import RegistryMixin
22
+ from compressed_tensors.utils import has_offloaded_params
22
23
  from torch import Tensor
23
24
  from torch.nn import Module
24
25
 
@@ -169,6 +170,10 @@ class BaseCompressor(RegistryMixin, ABC):
169
170
  :param module: PyTorch module to decompress
170
171
  :return: tensor of the decompressed weight, or None if module is not quantized
171
172
  """
173
+
174
+ params_device = next(module.parameters()).device
175
+ device = "cpu" if has_offloaded_params(module) else params_device
176
+
172
177
  if not hasattr(module, "quantization_scheme"):
173
178
  return None # module is not quantized
174
179
  quantization_scheme = module.quantization_scheme
@@ -182,7 +187,7 @@ class BaseCompressor(RegistryMixin, ABC):
182
187
 
183
188
  return self.decompress_weight(
184
189
  compressed_data=compressed_data, quantization_args=quantization_args
185
- )
190
+ ).to(device)
186
191
 
187
192
  def decompress_weight(
188
193
  self, compressed_data: Dict[str, Tensor], **kwargs
@@ -31,13 +31,14 @@ from compressed_tensors.base import (
31
31
  SPARSITY_CONFIG_NAME,
32
32
  )
33
33
  from compressed_tensors.compressors.base import BaseCompressor
34
+ from compressed_tensors.compressors.sparse_compressors import DenseCompressor
34
35
  from compressed_tensors.config import CompressionFormat, SparsityCompressionConfig
35
36
  from compressed_tensors.quantization import (
36
37
  DEFAULT_QUANTIZATION_METHOD,
37
38
  QuantizationConfig,
38
39
  QuantizationStatus,
39
40
  apply_quantization_config,
40
- load_pretrained_quantization,
41
+ load_pretrained_quantization_parameters,
41
42
  )
42
43
  from compressed_tensors.quantization.lifecycle import expand_target_names
43
44
  from compressed_tensors.quantization.quant_args import QuantizationArgs
@@ -47,7 +48,9 @@ from compressed_tensors.quantization.utils import (
47
48
  )
48
49
  from compressed_tensors.utils import (
49
50
  get_safetensors_folder,
51
+ has_offloaded_params,
50
52
  merge_names,
53
+ register_offload_parameter,
51
54
  update_parameter_data,
52
55
  )
53
56
  from compressed_tensors.utils.helpers import (
@@ -382,6 +385,7 @@ class ModelCompressor:
382
385
  compressed_state_dict = self.quantization_compressor.compress(
383
386
  state_dict, names_to_scheme=quantized_modules_to_args
384
387
  )
388
+
385
389
  if self.quantization_config.format != CompressionFormat.dense.value:
386
390
  self.quantization_config.quantization_status = (
387
391
  QuantizationStatus.COMPRESSED
@@ -411,6 +415,13 @@ class ModelCompressor:
411
415
 
412
416
  :param model_path: path to compressed weights
413
417
  :param model: pytorch model to load decompressed weights into
418
+
419
+ Note: decompress makes use of both _replace_sparsity_weights and _replace_weights
420
+ The variations in these methods are a result of the subtle variations between the sparsity
421
+ and quantization compressors. Specifically, quantization compressors return not just the
422
+ decompressed weight, but the quantization parameters (e.g scales, zero_point) whereas sparsity
423
+ compressors only return the decompressed weight.
424
+
414
425
  """
415
426
  model_path = get_safetensors_folder(model_path)
416
427
  sparse_decompressed = False
@@ -419,9 +430,16 @@ class ModelCompressor:
419
430
  self.sparsity_compressor is not None
420
431
  and self.sparsity_config.format != CompressionFormat.dense.value
421
432
  ):
433
+ params_to_ignore = None
434
+ if self.quantization_compressor is not None:
435
+ params_to_ignore = self.quantization_compressor.compression_param_names
422
436
  # Sparse decompression is applied on the model_path
423
- dense_gen = self.sparsity_compressor.decompress(model_path)
424
- self._replace_weights(dense_gen, model)
437
+ # The compressor will try and load any quantization parameters as well
438
+ # params_to_skip_load will skip over quantization params from being loaded
439
+ dense_gen = self.sparsity_compressor.decompress(
440
+ model_path, params_to_skip_load=params_to_ignore
441
+ )
442
+ self._replace_sparsity_weights(dense_gen, model)
425
443
  setattr(model, SPARSITY_CONFIG_NAME, self.sparsity_compressor.config)
426
444
  sparse_decompressed = True
427
445
 
@@ -430,13 +448,27 @@ class ModelCompressor:
430
448
  # quantization during apply_quantization_config. This ensures
431
449
  # that the dtypes of the weights are not unintentionally updated.
432
450
  # The status is restored after quantization params are loaded.
451
+
433
452
  with override_quantization_status(
434
453
  self.quantization_config, QuantizationStatus.FROZEN
435
454
  ):
455
+
436
456
  names_to_scheme = apply_quantization_config(
437
457
  model, self.quantization_config
438
458
  )
439
- load_pretrained_quantization(model, model_path)
459
+ # Load activation scales/zp or any other quantization parameters
460
+ # Conditionally load the weight quantization parameters if we have a dense compressor
461
+ # Or if a sparsity compressor has already been applied
462
+ load_pretrained_quantization_parameters(
463
+ model,
464
+ model_path,
465
+ # TODO: all weight quantization params will be moved to the compressor in a follow-up
466
+ # including initialization
467
+ load_weight_quantization=(
468
+ sparse_decompressed
469
+ or isinstance(self.quantization_compressor, DenseCompressor)
470
+ ),
471
+ )
440
472
 
441
473
  model_path_or_state_dict = (
442
474
  model.state_dict() if sparse_decompressed else model_path
@@ -445,6 +477,8 @@ class ModelCompressor:
445
477
  dense_gen = self.quantization_compressor.decompress(
446
478
  model_path_or_state_dict, names_to_scheme=names_to_scheme
447
479
  )
480
+ # TODO: all weight quantization params will be moved to the compressor
481
+ # to prevent duplicate parameter updates in update_parameter_data
448
482
  self._replace_weights(dense_gen, model)
449
483
 
450
484
  def freeze_quantization_status(module):
@@ -500,7 +534,7 @@ class ModelCompressor:
500
534
  with open(config_file_path, "w") as config_file:
501
535
  json.dump(config_data, config_file, indent=2, sort_keys=True)
502
536
 
503
- def _replace_weights(self, dense_weight_generator, model: Module):
537
+ def _replace_sparsity_weights(self, dense_weight_generator, model: Module):
504
538
  """
505
539
  Replace the weights of the model with the
506
540
  provided dense weights.
@@ -515,11 +549,60 @@ class ModelCompressor:
515
549
  :param model: The model whose weights are to be updated.
516
550
  """
517
551
  for name, data in tqdm(dense_weight_generator, desc="Decompressing model"):
552
+
518
553
  split_name = name.split(".")
519
554
  prefix, param_name = ".".join(split_name[:-1]), split_name[-1]
520
555
  module = operator.attrgetter(prefix)(model)
521
- if hasattr(module, param_name):
522
- update_parameter_data(module, data, param_name)
556
+
557
+ params_device = next(module.parameters()).device
558
+ device = "cpu" if has_offloaded_params(module) else params_device
559
+ delattr(module, param_name)
560
+ requires_grad = data.dtype in (torch.float16, torch.float32, torch.bfloat16)
561
+ param = torch.nn.Parameter(data.to(device), requires_grad=requires_grad)
562
+ register_offload_parameter(module, param_name, param)
563
+
564
+ def _replace_weights(self, dense_weight_generator, model: Module):
565
+ """
566
+ Replace the weights of the model with the
567
+ provided dense weights.
568
+
569
+ This method iterates over the dense_weight_generator and
570
+ updates the corresponding weights in the model. If a parameter
571
+ name does not exist in the model, it will be skipped.
572
+
573
+ :param dense_weight_generator (generator): A generator that yields
574
+ tuples of (name, data), where 'name' is the parameter name and
575
+ 'data' is the updated param data
576
+ :param model: The model whose weights are to be updated.
577
+ """
578
+
579
+ for name, data in tqdm(dense_weight_generator, desc="Decompressing model"):
580
+ module = operator.attrgetter(name)(model)
581
+
582
+ params_device = next(module.parameters()).device
583
+ device = "cpu" if has_offloaded_params(module) else params_device
584
+
585
+ for param_name, param_data in data.items():
586
+ if hasattr(module, param_name):
587
+ # If compressed, will have an incorrect dtype for transformers >4.49
588
+ # TODO: we can also just skip initialization of scales/zp if in decompression in init
589
+ # to be consistent with loading which happens later as well
590
+ # however, update_data does a good shape check - should be moved to the compressor
591
+ if param_name == "weight":
592
+ delattr(module, param_name)
593
+ requires_grad = param_data.dtype in (
594
+ torch.float16,
595
+ torch.float32,
596
+ torch.bfloat16,
597
+ )
598
+ param = torch.nn.Parameter(
599
+ param_data.to(device), requires_grad=requires_grad
600
+ )
601
+ register_offload_parameter(module, param_name, param)
602
+ else:
603
+ # Should already be registered to the correct device for
604
+ # for scales/zero-points
605
+ update_parameter_data(module, param_data, param_name)
523
606
 
524
607
 
525
608
  def map_modules_to_quant_args(
@@ -14,11 +14,11 @@
14
14
 
15
15
  import logging
16
16
  from pathlib import Path
17
- from typing import Any, Dict, Generator, Tuple, Union
17
+ from typing import Any, Dict, Generator, Optional, Tuple, Union
18
18
 
19
19
  import torch
20
20
  from compressed_tensors.compressors.base import BaseCompressor
21
- from compressed_tensors.quantization import QuantizationArgs
21
+ from compressed_tensors.quantization import QuantizationArgs, QuantizationStrategy
22
22
  from compressed_tensors.utils import (
23
23
  get_nested_mappings_from_state_dict,
24
24
  get_nested_weight_mappings,
@@ -132,8 +132,10 @@ class BaseQuantizationCompressor(BaseCompressor):
132
132
  compressed_dict[merge_names(prefix, key)] = value
133
133
  else:
134
134
  compressed_dict[name] = value.to("cpu")
135
- # only save if asym
136
- elif is_weight_zp and quant_args_zp.symmetric:
135
+ # only save zp if asym and not packed zp
136
+ elif is_weight_zp and (
137
+ quant_args_zp.symmetric or self._check_if_zp_pack_quantized(quant_args)
138
+ ):
137
139
  continue
138
140
  # only save if asym
139
141
  elif is_input_zp and input_args_zp.symmetric:
@@ -145,6 +147,17 @@ class BaseQuantizationCompressor(BaseCompressor):
145
147
 
146
148
  return compressed_dict
147
149
 
150
+ def _check_if_zp_pack_quantized(self, quant_args):
151
+ from compressed_tensors.compressors import PackedQuantizationCompressor
152
+
153
+ if isinstance(self, PackedQuantizationCompressor):
154
+ if not quant_args.symmetric and quant_args.strategy in [
155
+ QuantizationStrategy.GROUP.value,
156
+ QuantizationStrategy.CHANNEL.value,
157
+ ]:
158
+ return True
159
+ return False
160
+
148
161
  def decompress(
149
162
  self,
150
163
  path_to_model_or_tensors: Union[str, Path, Dict[str, Any]],
@@ -186,7 +199,8 @@ class BaseQuantizationCompressor(BaseCompressor):
186
199
  decompressed = self.decompress_weight(
187
200
  compressed_data=weight_data, quantization_args=quant_args
188
201
  )
189
- yield merge_names(weight_name, "weight"), decompressed
202
+ weight_data["weight"] = decompressed
203
+ yield weight_name, weight_data
190
204
 
191
205
  def _decompress_from_state_dict(self, state_dict, names_to_scheme):
192
206
  weight_mappings = get_nested_mappings_from_state_dict(
@@ -202,4 +216,5 @@ class BaseQuantizationCompressor(BaseCompressor):
202
216
  decompressed = self.decompress_weight(
203
217
  compressed_data=weight_data, quantization_args=quant_args
204
218
  )
205
- yield merge_names(weight_name, "weight"), decompressed
219
+ weight_data["weight"] = decompressed
220
+ yield weight_name, weight_data
@@ -12,7 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  import math
15
- from typing import Dict, Optional, Tuple
15
+ from typing import Dict, Literal, Optional, Tuple, Union
16
16
 
17
17
  import numpy as np
18
18
  import torch
@@ -21,7 +21,7 @@ from compressed_tensors.compressors.quantized_compressors.base import (
21
21
  BaseQuantizationCompressor,
22
22
  )
23
23
  from compressed_tensors.config import CompressionFormat
24
- from compressed_tensors.quantization import QuantizationArgs
24
+ from compressed_tensors.quantization import QuantizationArgs, QuantizationStrategy
25
25
  from compressed_tensors.quantization.lifecycle.forward import dequantize, quantize
26
26
  from compressed_tensors.quantization.utils import can_quantize
27
27
  from torch import Tensor
@@ -65,10 +65,26 @@ class PackedQuantizationCompressor(BaseQuantizationCompressor):
65
65
  """
66
66
  pack_factor = 32 // quantization_args.num_bits
67
67
  packed_size = math.ceil(weight_shape[1] / pack_factor)
68
- return {
68
+ packed_size_zp = math.ceil(weight_shape[0] / pack_factor)
69
+ output = {
69
70
  "weight_packed": (torch.Size((weight_shape[0], packed_size)), torch.int32),
70
71
  "weight_shape": (torch.Size((2,)), torch.int32),
71
72
  }
73
+ if not quantization_args.symmetric and quantization_args.strategy in [
74
+ QuantizationStrategy.GROUP.value,
75
+ QuantizationStrategy.CHANNEL.value,
76
+ ]:
77
+ zp_factor = (
78
+ quantization_args.group_size
79
+ if quantization_args.strategy == QuantizationStrategy.GROUP.value
80
+ else weight_shape[-1]
81
+ )
82
+
83
+ output["weight_zero_point"] = (
84
+ torch.Size((packed_size_zp, weight_shape[-1] // zp_factor)),
85
+ torch.int32,
86
+ )
87
+ return output
72
88
 
73
89
  def compress_weight(
74
90
  self,
@@ -104,6 +120,7 @@ class PackedQuantizationCompressor(BaseQuantizationCompressor):
104
120
  quantized_weight = weight
105
121
 
106
122
  packed_weight = pack_to_int32(quantized_weight, quantization_args.num_bits)
123
+
107
124
  weight_shape = torch.tensor(weight.shape)
108
125
  if device is not None:
109
126
  packed_weight = packed_weight.to(device)
@@ -112,6 +129,15 @@ class PackedQuantizationCompressor(BaseQuantizationCompressor):
112
129
  compressed_dict["weight_shape"] = weight_shape
113
130
  compressed_dict["weight_packed"] = packed_weight
114
131
 
132
+ # We typically don't compress zp; apart from when using the packed_compressor and when storing group/channel zp
133
+ if not quantization_args.symmetric and quantization_args.strategy in [
134
+ QuantizationStrategy.GROUP.value,
135
+ QuantizationStrategy.CHANNEL.value,
136
+ ]:
137
+ packed_zp = pack_to_int32(
138
+ zero_point, quantization_args.num_bits, packed_dim=0
139
+ )
140
+ compressed_dict["weight_zero_point"] = packed_zp
115
141
  return compressed_dict
116
142
 
117
143
  def decompress_weight(
@@ -133,6 +159,21 @@ class PackedQuantizationCompressor(BaseQuantizationCompressor):
133
159
  original_shape = torch.Size(compressed_data["weight_shape"])
134
160
  num_bits = quantization_args.num_bits
135
161
  unpacked = unpack_from_int32(weight, num_bits, original_shape)
162
+
163
+ # NOTE: this will fail decompression as we don't currently handle packed zp on decompression
164
+ if not quantization_args.symmetric and quantization_args.strategy in [
165
+ QuantizationStrategy.GROUP.value,
166
+ QuantizationStrategy.CHANNEL.value,
167
+ ]:
168
+ raise ValueError(
169
+ "Decompression of packed zero points is currently not supported"
170
+ )
171
+ assert zero_point is not None
172
+ original_zp_shape = (original_shape[0], scale.shape[-1])
173
+ zero_point = unpack_from_int32(
174
+ zero_point, num_bits, original_zp_shape, packed_dim=0
175
+ )
176
+
136
177
  decompressed_weight = dequantize(
137
178
  x_q=unpacked, scale=scale, zero_point=zero_point, g_idx=g_idx
138
179
  )
@@ -140,7 +181,11 @@ class PackedQuantizationCompressor(BaseQuantizationCompressor):
140
181
  return decompressed_weight
141
182
 
142
183
 
143
- def pack_to_int32(value: torch.Tensor, num_bits: int) -> torch.Tensor:
184
+ def pack_to_int32(
185
+ value: torch.Tensor,
186
+ num_bits: int,
187
+ packed_dim: Union[Literal[0], Literal[1]] = 1,
188
+ ) -> torch.Tensor:
144
189
  """
145
190
  Packs a tensor of quantized weights stored in int8 into int32s with padding
146
191
 
@@ -176,14 +221,19 @@ def pack_to_int32(value: torch.Tensor, num_bits: int) -> torch.Tensor:
176
221
  pack_factor = 32 // num_bits
177
222
 
178
223
  # pad input tensor and initialize packed output
179
- packed_size = math.ceil(value.shape[1] / pack_factor)
180
- padding = packed_size * pack_factor - value.shape[1]
224
+ packed_size = math.ceil(value.shape[packed_dim] / pack_factor)
225
+ padding = packed_size * pack_factor - value.shape[packed_dim]
181
226
  value = np.pad(value, pad_width=[(0, 0), (0, padding)], constant_values=0)
182
227
 
183
228
  # pack values
184
- packed = np.zeros((value.shape[0], packed_size), dtype=np.uint32)
185
- for i in range(pack_factor):
186
- packed |= value[:, i::pack_factor] << num_bits * i
229
+ if packed_dim == 1:
230
+ packed = np.zeros((value.shape[0], packed_size), dtype=np.uint32)
231
+ for i in range(pack_factor):
232
+ packed |= value[:, i::pack_factor] << num_bits * i
233
+ else:
234
+ packed = np.zeros((packed_size, value.shape[1]), dtype=np.uint32)
235
+ for i in range(pack_factor):
236
+ packed |= value[i::pack_factor, :] << num_bits * i
187
237
 
188
238
  # convert back to signed and torch
189
239
  packed = np.ascontiguousarray(packed).view(np.int32)
@@ -191,7 +241,10 @@ def pack_to_int32(value: torch.Tensor, num_bits: int) -> torch.Tensor:
191
241
 
192
242
 
193
243
  def unpack_from_int32(
194
- value: torch.Tensor, num_bits: int, shape: torch.Size
244
+ value: torch.Tensor,
245
+ num_bits: int,
246
+ shape: torch.Size,
247
+ packed_dim: Union[Literal[0], Literal[1]] = 1,
195
248
  ) -> torch.Tensor:
196
249
  """
197
250
  Unpacks a tensor of packed int32 weights into individual int8s, maintaining the
@@ -216,17 +269,31 @@ def unpack_from_int32(
216
269
 
217
270
  # unpack
218
271
  mask = (1 << num_bits) - 1
219
- unpacked = torch.zeros(
220
- (value.shape[0], value.shape[1] * pack_factor),
221
- device=value.device,
222
- dtype=torch.int32,
223
- )
224
- for i in range(pack_factor):
225
- unpacked[:, i::pack_factor] = (value >> (num_bits * i)) & mask
226
-
227
- # remove padding
228
- original_row_size = int(shape[1])
229
- unpacked = unpacked[:, :original_row_size]
272
+
273
+ if packed_dim == 1:
274
+ unpacked = torch.zeros(
275
+ (value.shape[0], value.shape[1] * pack_factor),
276
+ device=value.device,
277
+ dtype=torch.int32,
278
+ )
279
+ for i in range(pack_factor):
280
+ unpacked[:, i::pack_factor] = (value >> (num_bits * i)) & mask
281
+
282
+ # remove padding
283
+ original_row_size = int(shape[1])
284
+ unpacked = unpacked[:, :original_row_size]
285
+ else:
286
+ unpacked = torch.zeros(
287
+ (value.shape[0] * pack_factor, value.shape[1]),
288
+ device=value.device,
289
+ dtype=torch.int32,
290
+ )
291
+ for i in range(pack_factor):
292
+ unpacked[i::pack_factor, :] = (value >> (num_bits * i)) & mask
293
+
294
+ # remove padding
295
+ original_row_size = int(shape[0])
296
+ unpacked = unpacked[:original_row_size, :]
230
297
 
231
298
  # bits are packed in unsigned format, reformat to signed
232
299
  # update the value range from unsigned to signed
@@ -98,7 +98,11 @@ class BaseSparseCompressor(BaseCompressor):
98
98
  return compressed_dict
99
99
 
100
100
  def decompress(
101
- self, path_to_model_or_tensors: str, device: str = "cpu", **kwargs
101
+ self,
102
+ path_to_model_or_tensors: str,
103
+ device: str = "cpu",
104
+ params_to_skip_load: Optional[Tuple] = None,
105
+ **kwargs,
102
106
  ) -> Generator[Tuple[str, Tensor], None, None]:
103
107
  """
104
108
  Reads a bitmask compressed state dict located
@@ -108,6 +112,11 @@ class BaseSparseCompressor(BaseCompressor):
108
112
  :param model_path: path to compressed safetensors model (directory with
109
113
  one or more safetensors files) or compressed tensors file
110
114
  :param device: device to load decompressed weights onto
115
+ :param params_to_skip_load: a list of non-sparsity parameters (e.g quantization
116
+ parameters) that we want to skip loading. As the sparsity compresssor does
117
+ not handle quantized decompression, this should contain any quantization
118
+ parameters when decompressing stacked compressors. We want these parameters
119
+ to be handled by the quantization decompressor
111
120
  :return: iterator for generating decompressed weights
112
121
  """
113
122
  weight_mappings, ignored_params = get_nested_weight_mappings(
@@ -121,13 +130,21 @@ class BaseSparseCompressor(BaseCompressor):
121
130
  full_name = merge_names(weight_name, param_name)
122
131
  with safe_open(safe_path, framework="pt", device=device) as f:
123
132
  weight_data[param_name] = f.get_tensor(full_name)
133
+
124
134
  decompressed = self.decompress_weight(weight_data)
125
135
  yield merge_names(weight_name, "weight"), decompressed
126
136
 
127
137
  for ignored_param_name, safe_path in ignored_params.items():
128
- with safe_open(safe_path, framework="pt", device=device) as f:
129
- value = f.get_tensor(ignored_param_name)
130
- yield ignored_param_name, value
138
+ should_skip = False
139
+ if params_to_skip_load is not None:
140
+ for param_to_skip in params_to_skip_load:
141
+ if param_to_skip in ignored_param_name:
142
+ should_skip = True
143
+
144
+ if not should_skip:
145
+ with safe_open(safe_path, framework="pt", device=device) as f:
146
+ value = f.get_tensor(ignored_param_name)
147
+ yield ignored_param_name, value
131
148
 
132
149
  @staticmethod
133
150
  def should_compress(name: str, expanded_targets: Optional[Set[str]] = None) -> bool:
@@ -44,11 +44,12 @@ from compressed_tensors.quantization.utils import (
44
44
  from compressed_tensors.utils.helpers import fix_fsdp_module_name, replace_module
45
45
  from compressed_tensors.utils.offload import update_parameter_data
46
46
  from compressed_tensors.utils.safetensors_load import get_safetensors_folder
47
+ from safetensors import safe_open
47
48
  from torch.nn import Module
48
49
 
49
50
 
50
51
  __all__ = [
51
- "load_pretrained_quantization",
52
+ "load_pretrained_quantization_parameters",
52
53
  "apply_quantization_config",
53
54
  "apply_quantization_status",
54
55
  "find_name_or_class_matches",
@@ -57,50 +58,62 @@ __all__ = [
57
58
  ]
58
59
 
59
60
  from compressed_tensors.quantization.utils.helpers import is_module_quantized
60
- from compressed_tensors.utils.safetensors_load import get_quantization_state_dict
61
+ from compressed_tensors.utils.safetensors_load import (
62
+ get_quantization_parameter_to_path_mapping,
63
+ )
61
64
 
62
65
 
63
66
  _LOGGER = logging.getLogger(__name__)
64
67
 
65
68
 
66
- def load_pretrained_quantization(model: Module, model_name_or_path: str):
69
+ def load_pretrained_quantization_parameters(
70
+ model: Module,
71
+ model_name_or_path: Optional[str] = None,
72
+ load_weight_quantization: Optional[bool] = False,
73
+ ):
67
74
  """
68
75
  Loads the quantization parameters (scale and zero point) from model_name_or_path to
69
- a model that has already been initialized with a quantization config
76
+ a model that has already been initialized with a quantization config.
77
+
78
+ NOTE: Will always load inputs/output parameters.
79
+ Will conditioanlly load weight parameters, if load_weight_quantization is set to True.
70
80
 
71
81
  :param model: model to load pretrained quantization parameters to
72
82
  :param model_name_or_path: Hugging Face stub or local folder containing a quantized
73
- model, which is used to load quantization parameters
83
+ model, which is used to load quantization parameters
84
+ :param load_weight_quantization: whether or not the weight quantization parameters shoud
85
+ be laoded
74
86
  """
75
87
  model_path = get_safetensors_folder(model_name_or_path)
76
- state_dict = get_quantization_state_dict(model_path)
88
+ mapping = get_quantization_parameter_to_path_mapping(model_path)
77
89
 
78
90
  for name, submodule in iter_named_leaf_modules(model):
79
91
  if not is_module_quantized(submodule):
80
92
  continue
81
- if submodule.quantization_scheme.weights is not None:
82
- base_name = "weight"
83
- _load_quant_args_from_state_dict(
84
- base_name=base_name,
85
- module_name=name,
86
- module=submodule,
87
- state_dict=state_dict,
88
- )
89
93
  if submodule.quantization_scheme.input_activations is not None:
90
94
  base_name = "input"
91
- _load_quant_args_from_state_dict(
95
+ _load_quant_args_from_mapping(
92
96
  base_name=base_name,
93
97
  module_name=name,
94
98
  module=submodule,
95
- state_dict=state_dict,
99
+ mapping=mapping,
96
100
  )
97
101
  if submodule.quantization_scheme.output_activations is not None:
98
102
  base_name = "output"
99
- _load_quant_args_from_state_dict(
103
+ _load_quant_args_from_mapping(
100
104
  base_name=base_name,
101
105
  module_name=name,
102
106
  module=submodule,
103
- state_dict=state_dict,
107
+ mapping=mapping,
108
+ )
109
+
110
+ if load_weight_quantization and submodule.quantization_scheme.weights:
111
+ base_name = "weight"
112
+ _load_quant_args_from_mapping(
113
+ base_name=base_name,
114
+ module_name=name,
115
+ module=submodule,
116
+ mapping=mapping,
104
117
  )
105
118
 
106
119
 
@@ -237,9 +250,19 @@ def apply_quantization_status(model: Module, status: QuantizationStatus):
237
250
 
238
251
  if status >= QuantizationStatus.INITIALIZED > current_status:
239
252
  force_zero_point_init = status != QuantizationStatus.COMPRESSED
253
+
254
+ # When decompressing, we set the scale_dtype as the model's dtype
255
+ # This is because the normal workflow of using the weight's dtype
256
+ # will be incorrect as the model weight will be compressed
257
+ # Therfore, use the dtype set by the user using the PretrainedModel
258
+ scale_dtype = None
259
+ if status == QuantizationStatus.FROZEN:
260
+ if hasattr(model, "dtype"):
261
+ scale_dtype = model.dtype
262
+
240
263
  model.apply(
241
264
  lambda module: initialize_module_for_quantization(
242
- module, force_zero_point=force_zero_point_init
265
+ module, force_zero_point=force_zero_point_init, scale_dtype=scale_dtype
243
266
  )
244
267
  )
245
268
 
@@ -344,9 +367,10 @@ def _infer_status(model: Module) -> Optional[QuantizationStatus]:
344
367
  return None
345
368
 
346
369
 
347
- def _load_quant_args_from_state_dict(
348
- base_name: str, module_name: str, module: Module, state_dict: Dict
370
+ def _load_quant_args_from_mapping(
371
+ base_name: str, module_name: str, module: Module, mapping: Dict
349
372
  ):
373
+ # TODO: skip update and just register here, don't do it in initialize
350
374
  """
351
375
  Loads scale and zero point from a state_dict into the specified module
352
376
 
@@ -354,26 +378,37 @@ def _load_quant_args_from_state_dict(
354
378
  output_activations
355
379
  :param module_name: pytorch module name to look up in state_dict
356
380
  :module: pytorch module associated with module_name
357
- :state_dict: state_dict to search for matching quantization parameters
381
+ :mapping: mapping to search fetch paths on disk for a given parameter
358
382
  """
359
383
  scale_name = f"{base_name}_scale"
360
384
  zp_name = f"{base_name}_zero_point"
361
385
  g_idx_name = f"{base_name}_g_idx"
362
386
 
363
- state_dict_scale = state_dict.get(f"{module_name}.{scale_name}", None)
364
- state_dict_zp = state_dict.get(f"{module_name}.{zp_name}", None)
365
- state_dict_g_idx = state_dict.get(f"{module_name}.{g_idx_name}", None)
387
+ state_dict_scale_path = mapping.get(f"{module_name}.{scale_name}", None)
388
+ state_dict_zp_path = mapping.get(f"{module_name}.{zp_name}", None)
389
+ state_dict_g_idx_path = mapping.get(f"{module_name}.{g_idx_name}", None)
390
+
391
+ if state_dict_g_idx_path is not None:
392
+ with safe_open(state_dict_g_idx_path, framework="pt", device="cpu") as f:
393
+ state_dict_g_idx = f.get_tensor(f"{module_name}.{g_idx_name}")
394
+
395
+ update_parameter_data(module, state_dict_g_idx, g_idx_name)
366
396
 
367
- if state_dict_scale is not None:
397
+ if state_dict_scale_path is not None:
368
398
  # module is quantized
399
+ with safe_open(state_dict_scale_path, framework="pt", device="cpu") as f:
400
+ state_dict_scale = f.get_tensor(f"{module_name}.{scale_name}")
401
+
369
402
  update_parameter_data(module, state_dict_scale, scale_name)
370
- if state_dict_zp is None:
403
+
404
+ if state_dict_zp_path is None:
371
405
  # fill in zero point for symmetric quantization
372
406
  state_dict_zp = torch.zeros_like(state_dict_scale, device="cpu")
373
- update_parameter_data(module, state_dict_zp, zp_name)
407
+ else:
408
+ with safe_open(state_dict_zp_path, framework="pt", device="cpu") as f:
409
+ state_dict_zp = f.get_tensor(f"{module_name}.{zp_name}")
374
410
 
375
- if state_dict_g_idx is not None:
376
- update_parameter_data(module, state_dict_g_idx, g_idx_name)
411
+ update_parameter_data(module, state_dict_zp, zp_name)
377
412
 
378
413
 
379
414
  def _scheme_from_targets(
@@ -56,6 +56,7 @@ def initialize_module_for_quantization(
56
56
  module: Module,
57
57
  scheme: Optional[QuantizationScheme] = None,
58
58
  force_zero_point: bool = True,
59
+ scale_dtype: Optional[torch.dtype] = None,
59
60
  ):
60
61
  """
61
62
  attaches appropriate scales, zero points, and observers to a layer
@@ -69,7 +70,10 @@ def initialize_module_for_quantization(
69
70
  if not provided, the layer will be skipped
70
71
  :param force_zero_point: whether to force initialization of a zero point for
71
72
  symmetric quantization
73
+ :param scale_dtype: dtype to used for the scales, if overriding the
74
+ weight dtype as the scale dtype
72
75
  """
76
+ # TODO: don't initialize parameters when running decompression
73
77
  scheme = scheme or getattr(module, "quantization_scheme", None)
74
78
  if scheme is None:
75
79
  # no scheme passed and layer not targeted for quantization - skip
@@ -87,7 +91,9 @@ def initialize_module_for_quantization(
87
91
  "input",
88
92
  scheme.input_activations,
89
93
  force_zero_point=force_zero_point,
94
+ scale_dtype=scale_dtype,
90
95
  )
96
+
91
97
  if scheme.weights is not None:
92
98
  if hasattr(module, "weight"):
93
99
  weight_shape = None
@@ -99,6 +105,7 @@ def initialize_module_for_quantization(
99
105
  scheme.weights,
100
106
  weight_shape=weight_shape,
101
107
  force_zero_point=force_zero_point,
108
+ scale_dtype=scale_dtype,
102
109
  )
103
110
  else:
104
111
  _LOGGER.warning(
@@ -110,7 +117,7 @@ def initialize_module_for_quantization(
110
117
  if scheme.output_activations is not None:
111
118
  if not is_kv_cache_quant_scheme(scheme):
112
119
  _initialize_scale_zero_point(
113
- module, "output", scheme.output_activations
120
+ module, "output", scheme.output_activations, scale_dtype=scale_dtype
114
121
  )
115
122
 
116
123
  module.quantization_scheme = scheme
@@ -136,6 +143,7 @@ def _initialize_scale_zero_point(
136
143
  quantization_args: QuantizationArgs,
137
144
  weight_shape: Optional[torch.Size] = None,
138
145
  force_zero_point: bool = True,
146
+ scale_dtype: Optional[torch.dtype] = None,
139
147
  ):
140
148
  if quantization_args.dynamic:
141
149
  return
@@ -160,7 +168,10 @@ def _initialize_scale_zero_point(
160
168
  num_groups = weight_shape[1] // quantization_args.group_size
161
169
  expected_shape = (weight_shape[0], max(num_groups, 1))
162
170
 
163
- scale_dtype = module.weight.dtype
171
+ scale_dtype = scale_dtype if scale_dtype is not None else module.weight.dtype
172
+ # TODO: consider erroring out in the future as if the dtype if not one fo these,
173
+ # there is likely bug
174
+
164
175
  if scale_dtype not in [torch.float16, torch.bfloat16, torch.float32]:
165
176
  scale_dtype = torch.float16
166
177
 
@@ -94,22 +94,6 @@ def is_module_offloaded(module: torch.nn.Module) -> bool:
94
94
  return has_offloaded_params(module)
95
95
 
96
96
 
97
- def get_execution_device(module: torch.nn.Module) -> torch.device:
98
- """
99
- :param module: module to check
100
- :return: device module is loaded onto during forward pass
101
- """
102
- if has_offloaded_params(module):
103
- return module._hf_hook.execution_device
104
- device = next(module.parameters()).device
105
-
106
- # offload only gets set for leaf modules, fallback to checking for device type
107
- if device.type == "meta":
108
- return module._hf_hook.execution_device
109
-
110
- return device
111
-
112
-
113
97
  def get_offloaded_device(module: torch.nn.Module) -> torch.device:
114
98
  """
115
99
  :param module: module to check
@@ -158,6 +142,26 @@ def update_parameter_data(
158
142
  """ Candidates for Upstreaming """
159
143
 
160
144
 
145
+ def get_execution_device(module: torch.nn.Module) -> torch.device:
146
+ """
147
+ Get the device which inputs should be moved to before module execution
148
+
149
+ :param module: module to check, may be offloaded
150
+ :return: onload device of module
151
+ """
152
+ if has_offloaded_params(module):
153
+ return module._hf_hook.execution_device
154
+
155
+ first_param = next(module.parameters(), None)
156
+ if first_param is None:
157
+ warnings.warn(
158
+ f"Unable able to infer execution device of {module}, falling back to CPU"
159
+ )
160
+ return torch.device("cpu")
161
+
162
+ return first_param.device
163
+
164
+
161
165
  def register_offload_parameter(
162
166
  module: torch.nn.Module,
163
167
  name: str,
@@ -200,7 +204,6 @@ def update_offload_parameter(
200
204
  provided, then infer device from parameters on module
201
205
  """
202
206
  param = getattr(module, name)
203
- data = data.to(param.dtype)
204
207
  if param.data.shape != data.shape:
205
208
  warnings.warn(
206
209
  f"Shape of parameter being updated {param.data.shape} does not match shape "
@@ -31,7 +31,7 @@ __all__ = [
31
31
  "get_weight_mappings",
32
32
  "get_nested_weight_mappings",
33
33
  "get_nested_mappings_from_state_dict",
34
- "get_quantization_state_dict",
34
+ "get_quantization_parameter_to_path_mapping",
35
35
  "is_quantization_param",
36
36
  ]
37
37
 
@@ -279,16 +279,18 @@ def get_nested_mappings_from_state_dict(
279
279
  return nested_weight_mappings
280
280
 
281
281
 
282
- def get_quantization_state_dict(model_path: str) -> Dict[str, Tensor]:
282
+ def get_quantization_parameter_to_path_mapping(model_path: str) -> Dict[str, str]:
283
+ """
284
+ Given a model path, return a mapping between a parameter and its path
285
+ on disk
286
+ """
283
287
  weight_mappings = get_weight_mappings(model_path)
284
- state_dict = {}
288
+ mapping = {}
285
289
  for weight_name, safe_path in weight_mappings.items():
286
- if not is_quantization_param(weight_name):
290
+ if is_quantization_param(weight_name):
291
+ mapping[weight_name] = safe_path
287
292
  continue
288
- with safe_open(safe_path, framework="pt", device="cpu") as f:
289
- state_dict[weight_name] = f.get_tensor(weight_name)
290
-
291
- return state_dict
293
+ return mapping
292
294
 
293
295
 
294
296
  def is_quantization_param(name: str) -> bool:
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.9.4a20250414'
21
- __version_tuple__ = version_tuple = (0, 9, 4)
20
+ __version__ = version = '0.9.5.a20250424'
21
+ __version_tuple__ = version_tuple = (0, 9, 5)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compressed-tensors
3
- Version: 0.9.4a20250414
3
+ Version: 0.9.5a20250424
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -1,17 +1,17 @@
1
1
  compressed_tensors/__init__.py,sha256=UtKmifNeBCSE2TZSAfduVNNzHY-3V7bLjZ7n7RuXLOE,812
2
2
  compressed_tensors/base.py,sha256=73HYH7HY7O2roC89yG_piPFnZwrBfn_i7HmKl90SKc0,875
3
- compressed_tensors/version.py,sha256=JCDPCnyAovJOVFzV3xFxk3_fp65oLlCW-p8xggQzveU,520
3
+ compressed_tensors/version.py,sha256=fMpLfUNedNFTmTmQeHxGZnMaXAKOKiqpI9xyx46F2gI,521
4
4
  compressed_tensors/compressors/__init__.py,sha256=smSygTSfcfuujRrAXDc6uZm4L_ccV1tWZewqVnOb4lM,825
5
- compressed_tensors/compressors/base.py,sha256=x8dQrWVEurynXw03yHJZTaAmrRTOsdZJoHjmvs0IKwk,7002
5
+ compressed_tensors/compressors/base.py,sha256=nvWsv4xEw1Tkxkxth6TmHplDYXfBeP22xWxOsZERyDY,7204
6
6
  compressed_tensors/compressors/helpers.py,sha256=OK6qxX9j3bHwF9JfIYSGMgBJe2PWjlTA3byXKCJaTIQ,5431
7
7
  compressed_tensors/compressors/model_compressors/__init__.py,sha256=5RGGPFu4YqEt_aOdFSQYFYFDjcZFJN0CsMqRtDZz3Js,666
8
- compressed_tensors/compressors/model_compressors/model_compressor.py,sha256=n0gcrKwefJuO6b4LNjCynJQf7NNqNHDcoLlzZgTCPGc,23080
8
+ compressed_tensors/compressors/model_compressors/model_compressor.py,sha256=gZvhGSMYIWvLiH0Xl2dmh7PxfyLHAX5nFBvIUUDE6Qc,27451
9
9
  compressed_tensors/compressors/quantized_compressors/__init__.py,sha256=09UJq68Pht6Bf-4iP9xYl3tetKsncNPHD8IAGbePsr4,714
10
- compressed_tensors/compressors/quantized_compressors/base.py,sha256=GXTSWgFAhksbno94Ulpth9-YM4a7NsDlx4oQGGB0swQ,8567
10
+ compressed_tensors/compressors/quantized_compressors/base.py,sha256=PWSPLQ7zBBjHfQyHUqr9D-mGYLe5WczJHMSRZWCOxOI,9189
11
11
  compressed_tensors/compressors/quantized_compressors/naive_quantized.py,sha256=fd0KlkSx6bvZ3xwIkK3jEUdPSUPs56Eua4dEDOtzKW0,5150
12
- compressed_tensors/compressors/quantized_compressors/pack_quantized.py,sha256=zH2PocRe_T5yt1-3kLdZH9AUQWQyaVOi4U9nEJiYaWA,8509
12
+ compressed_tensors/compressors/quantized_compressors/pack_quantized.py,sha256=SPIHlk8ewip2LcjgkCw02K21EkfUSFSd9qQqL0Pt5eM,11162
13
13
  compressed_tensors/compressors/sparse_compressors/__init__.py,sha256=Atuz-OdEgn8OCUhx7Ovd6gXdyImAI186uCR-uR0t_Nk,737
14
- compressed_tensors/compressors/sparse_compressors/base.py,sha256=CVWbs3sd7GKJEoWOIKImABQ01VOTX8dlF2AQaEVPotw,5883
14
+ compressed_tensors/compressors/sparse_compressors/base.py,sha256=PMiWIaW2XSF_esYJlQ12RVW7opeAzavdbkRFtelMFX0,6655
15
15
  compressed_tensors/compressors/sparse_compressors/dense.py,sha256=_uW_HISeDNz4yboSZWoh6GwrkUE6HFibzPQSKrHOCkg,1505
16
16
  compressed_tensors/compressors/sparse_compressors/sparse_24_bitmask.py,sha256=mEKSSgpXookqYSJw3mlyP6cYYKD-eaIvpQMvi4JO6TY,8807
17
17
  compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py,sha256=S8vW0FI9ep_XtUQOxj0P5utJt3vKEYOHjWEPp-Xd9aY,5820
@@ -29,24 +29,24 @@ compressed_tensors/quantization/quant_args.py,sha256=sKpb8DcNObidjXjNol1Tn_Iih3Z
29
29
  compressed_tensors/quantization/quant_config.py,sha256=MxSUcb5dOqMN6LFyD5K2h8X0TvEtcWIAoiUJqD2dHGE,10159
30
30
  compressed_tensors/quantization/quant_scheme.py,sha256=yz0oMbbwp7QZXXd2k5KIJu-Q6aTqg2929VdUzZ7vysM,6324
31
31
  compressed_tensors/quantization/lifecycle/__init__.py,sha256=_uItzFWusyV74Zco_pHLOTdE9a83cL-R-ZdyQrBkIyw,772
32
- compressed_tensors/quantization/lifecycle/apply.py,sha256=lZmCCSm1_o79iUAy460w6Bv9FaOvntVisMdS-dN9fnk,16594
32
+ compressed_tensors/quantization/lifecycle/apply.py,sha256=OR-6QmN9pFRGteYMBAatu2T5qHutQt7Iw3jH4DILvEk,18071
33
33
  compressed_tensors/quantization/lifecycle/compressed.py,sha256=Fj9n66IN0EWsOAkBHg3O0GlOQpxstqjCcs0ttzMXrJ0,2296
34
34
  compressed_tensors/quantization/lifecycle/forward.py,sha256=DOWouUqfaLA4Qhg-ojVVBdhhSAlgZqFC26vZARxE0ko,12961
35
35
  compressed_tensors/quantization/lifecycle/helpers.py,sha256=C0mhy2vJ0fCjVeN4kFNhw8Eq1wkteBGHiZ36RVLThRY,944
36
- compressed_tensors/quantization/lifecycle/initialize.py,sha256=sK3PLm69N91QepBuq-83Qd2Br6XcOmRDpD5qo_WWNJo,7469
36
+ compressed_tensors/quantization/lifecycle/initialize.py,sha256=SY4-FJWpVSupQjuvy7rrIc0pFYU9cRL5Lo1KyfUSvoU,8010
37
37
  compressed_tensors/quantization/utils/__init__.py,sha256=VdtEmP0bvuND_IGQnyqUPc5lnFp-1_yD7StKSX4x80w,656
38
38
  compressed_tensors/quantization/utils/helpers.py,sha256=-wX0H7zVysJ67jRRCGbx6BfxbMU_1sqffTf5YUIpPiU,14391
39
39
  compressed_tensors/registry/__init__.py,sha256=FwLSNYqfIrb5JD_6OK_MT4_svvKTN_nEhpgQlQvGbjI,658
40
40
  compressed_tensors/registry/registry.py,sha256=vRcjVB1ITfSbfYUaGndBBmqhip_5vsS62weorVg0iXo,11896
41
41
  compressed_tensors/utils/__init__.py,sha256=gS4gSU2pwcAbsKj-6YMaqhm25udFy6ISYaWBf-myRSM,808
42
42
  compressed_tensors/utils/helpers.py,sha256=RrNvzD08naEjEiXdU-FdZjQVda1nQywu1hA_GCDj0vg,10415
43
- compressed_tensors/utils/offload.py,sha256=H4aAg21zUvJM2uwE6QCNYazX_p_o41yQUAgLLWBqR0w,14079
43
+ compressed_tensors/utils/offload.py,sha256=Fmb4jBJhH5OdSQFaecFSHK_UreSyZdynEkadZ_oKcvM,14153
44
44
  compressed_tensors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVyah6BUUir_StT28,2530
45
45
  compressed_tensors/utils/permute.py,sha256=V6tJLKo3Syccj-viv4F7ZKZgJeCB-hl-dK8RKI_kBwI,2355
46
- compressed_tensors/utils/safetensors_load.py,sha256=5SeM2hzLh77Ne8Vk7qR6-km7cf8bhov41ExpWITqX3A,11470
46
+ compressed_tensors/utils/safetensors_load.py,sha256=rwj0ufU5561ScWDoCG7tzLBRDtiykNno2Iq4PM_JA7E,11499
47
47
  compressed_tensors/utils/semi_structured_conversions.py,sha256=XKNffPum54kPASgqKzgKvyeqWPAkair2XEQXjkp7ho8,13489
48
- compressed_tensors-0.9.4a20250414.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
49
- compressed_tensors-0.9.4a20250414.dist-info/METADATA,sha256=3Qp5i-5uU9vMdZ5vsWLRrU68Htb1YbDx4lcn3I8r8Ts,7004
50
- compressed_tensors-0.9.4a20250414.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
51
- compressed_tensors-0.9.4a20250414.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
52
- compressed_tensors-0.9.4a20250414.dist-info/RECORD,,
48
+ compressed_tensors-0.9.5a20250424.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
49
+ compressed_tensors-0.9.5a20250424.dist-info/METADATA,sha256=P0oAhrS28ZU90nUEi9yjIu3CE-968yZTsTLTx1Uj1nM,7004
50
+ compressed_tensors-0.9.5a20250424.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
51
+ compressed_tensors-0.9.5a20250424.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
52
+ compressed_tensors-0.9.5a20250424.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (78.1.0)
2
+ Generator: setuptools (79.0.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5