compressed-tensors-nightly 0.3.3.20240514__py3-none-any.whl → 0.3.3.20240516__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19,4 +19,5 @@ from .dense import DenseCompressor
19
19
  from .helpers import load_compressed, save_compressed, save_compressed_model
20
20
  from .int_quantized import IntQuantizationCompressor
21
21
  from .model_compressor import ModelCompressor
22
+ from .pack_quantized import PackedQuantizationCompressor
22
23
  from .sparse_bitmask import BitmaskCompressor, BitmaskTensor
@@ -54,6 +54,7 @@ class Compressor(RegistryMixin):
54
54
 
55
55
  :param model_path: path to compressed safetensors model (directory with
56
56
  one or more safetensors files) or compressed tensors file
57
+ :param device: optional device to load intermediate weights into
57
58
  :return: compressed state dict
58
59
  """
59
60
  raise NotImplementedError()
@@ -18,7 +18,9 @@ from typing import Dict, Generator, Tuple
18
18
  import torch
19
19
  from compressed_tensors.compressors import Compressor
20
20
  from compressed_tensors.config import CompressionFormat
21
+ from compressed_tensors.quantization import QuantizationArgs
21
22
  from compressed_tensors.quantization.lifecycle.forward import dequantize, quantize
23
+ from compressed_tensors.quantization.utils import can_quantize
22
24
  from compressed_tensors.utils import get_nested_weight_mappings, merge_names
23
25
  from safetensors import safe_open
24
26
  from torch import Tensor
@@ -40,8 +42,20 @@ class IntQuantizationCompressor(Compressor):
40
42
 
41
43
  COMPRESSION_PARAM_NAMES = ["weight", "weight_scale", "weight_zero_point"]
42
44
 
43
- def compress(self, model_state: Dict[str, Tensor], **kwargs) -> Dict[str, Tensor]:
44
- model_quant_args = kwargs["model_quant_args"]
45
+ def compress(
46
+ self,
47
+ model_state: Dict[str, Tensor],
48
+ model_quant_args: Dict[str, QuantizationArgs],
49
+ **kwargs,
50
+ ) -> Dict[str, Tensor]:
51
+ """
52
+ Compresses a dense state dict
53
+
54
+ :param model_state: state dict of uncompressed model
55
+ :param model_quant_args: quantization args for each quantized weight, needed for
56
+ quantize function to calculate bit depth
57
+ :return: compressed state dict
58
+ """
45
59
  compressed_dict = {}
46
60
  _LOGGER.debug(
47
61
  f"Compressing model with {len(model_state)} parameterized layers..."
@@ -55,11 +69,7 @@ class IntQuantizationCompressor(Compressor):
55
69
  if scale is not None and zp is not None:
56
70
  # weight is quantized, compress it
57
71
  quant_args = model_quant_args[prefix]
58
- try:
59
- bit_depth = torch.finfo(value.dtype).bits
60
- except TypeError:
61
- bit_depth = torch.iinfo(value.dtype).bits
62
- if bit_depth > quant_args.num_bits:
72
+ if can_quantize(value, quant_args):
63
73
  # only quantize if not already quantized
64
74
  value = quantize(
65
75
  x=value,
@@ -76,6 +86,16 @@ class IntQuantizationCompressor(Compressor):
76
86
  def decompress(
77
87
  self, path_to_model_or_tensors: str, device: str = "cpu"
78
88
  ) -> Generator[Tuple[str, Tensor], None, None]:
89
+ """
90
+ Reads a compressed state dict located at path_to_model_or_tensors
91
+ and returns a generator for sequentially decompressing back to a
92
+ dense state dict
93
+
94
+ :param model_path: path to compressed safetensors model (directory with
95
+ one or more safetensors files) or compressed tensors file
96
+ :param device: optional device to load intermediate weights into
97
+ :return: compressed state dict
98
+ """
79
99
  weight_mappings = get_nested_weight_mappings(
80
100
  path_to_model_or_tensors, self.COMPRESSION_PARAM_NAMES
81
101
  )
@@ -0,0 +1,198 @@
1
+ # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing,
10
+ # software distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import logging
16
+ import math
17
+ from typing import Dict, Generator, Tuple
18
+
19
+ import numpy as np
20
+ import torch
21
+ from compressed_tensors.compressors import Compressor
22
+ from compressed_tensors.config import CompressionFormat
23
+ from compressed_tensors.quantization import QuantizationArgs
24
+ from compressed_tensors.quantization.lifecycle.forward import dequantize, quantize
25
+ from compressed_tensors.quantization.utils import can_quantize
26
+ from compressed_tensors.utils import get_nested_weight_mappings, merge_names
27
+ from safetensors import safe_open
28
+ from torch import Tensor
29
+ from tqdm import tqdm
30
+
31
+
32
+ __all__ = ["PackedQuantizationCompressor", "pack_4bit_ints", "unpack_4bit_ints"]
33
+
34
+ _LOGGER: logging.Logger = logging.getLogger(__name__)
35
+
36
+
37
+ @Compressor.register(name=CompressionFormat.pack_quantized.value)
38
+ class PackedQuantizationCompressor(Compressor):
39
+ """
40
+ Compresses a quantized model by packing every eight 4-bit weights into an int32
41
+ """
42
+
43
+ COMPRESSION_PARAM_NAMES = [
44
+ "weight",
45
+ "weight_scale",
46
+ "weight_zero_point",
47
+ "weight_shape",
48
+ ]
49
+
50
+ def compress(
51
+ self,
52
+ model_state: Dict[str, Tensor],
53
+ model_quant_args: Dict[str, QuantizationArgs],
54
+ **kwargs,
55
+ ) -> Dict[str, Tensor]:
56
+ """
57
+ Compresses a dense state dict
58
+
59
+ :param model_state: state dict of uncompressed model
60
+ :param model_quant_args: quantization args for each quantized weight, needed for
61
+ quantize function to calculate bit depth
62
+ :return: compressed state dict
63
+ """
64
+ compressed_dict = {}
65
+ _LOGGER.debug(
66
+ f"Compressing model with {len(model_state)} parameterized layers..."
67
+ )
68
+
69
+ for name, value in tqdm(model_state.items(), desc="Compressing model"):
70
+ if name.endswith(".weight"):
71
+ prefix = name.removesuffix(".weight")
72
+ scale = model_state.get(merge_names(prefix, "weight_scale"), None)
73
+ zp = model_state.get(merge_names(prefix, "weight_zero_point"), None)
74
+ shape = torch.tensor(value.shape)
75
+ if scale is not None and zp is not None:
76
+ # weight is quantized, compress it
77
+ # weight is quantized, compress it
78
+ quant_args = model_quant_args[prefix]
79
+ if can_quantize(value, quant_args):
80
+ # convert weight to an int if not already compressed
81
+ value = quantize(
82
+ x=value,
83
+ scale=scale,
84
+ zero_point=zp,
85
+ args=quant_args,
86
+ dtype=torch.int8,
87
+ )
88
+ value = pack_4bit_ints(value.cpu())
89
+ compressed_dict[merge_names(prefix, "weight_shape")] = shape
90
+
91
+ compressed_dict[name] = value.to("cpu")
92
+
93
+ return compressed_dict
94
+
95
+ def decompress(
96
+ self, path_to_model_or_tensors: str, device: str = "cpu"
97
+ ) -> Generator[Tuple[str, Tensor], None, None]:
98
+ """
99
+ Reads a compressed state dict located at path_to_model_or_tensors
100
+ and returns a generator for sequentially decompressing back to a
101
+ dense state dict
102
+
103
+ :param model_path: path to compressed safetensors model (directory with
104
+ one or more safetensors files) or compressed tensors file
105
+ :param device: optional device to load intermediate weights into
106
+ :return: compressed state dict
107
+ """
108
+ weight_mappings = get_nested_weight_mappings(
109
+ path_to_model_or_tensors, self.COMPRESSION_PARAM_NAMES
110
+ )
111
+ for weight_name in weight_mappings.keys():
112
+ weight_data = {}
113
+ for param_name, safe_path in weight_mappings[weight_name].items():
114
+ full_name = merge_names(weight_name, param_name)
115
+ with safe_open(safe_path, framework="pt", device=device) as f:
116
+ weight_data[param_name] = f.get_tensor(full_name)
117
+
118
+ if len(weight_data) == len(self.COMPRESSION_PARAM_NAMES):
119
+ weight = weight_data["weight"]
120
+ original_shape = torch.Size(weight_data["weight_shape"])
121
+ unpacked = unpack_4bit_ints(weight, original_shape)
122
+ decompressed = dequantize(
123
+ x_q=unpacked,
124
+ scale=weight_data["weight_scale"],
125
+ zero_point=weight_data["weight_zero_point"],
126
+ )
127
+ yield merge_names(weight_name, "weight"), decompressed
128
+
129
+
130
+ def pack_4bit_ints(value: torch.Tensor) -> torch.Tensor:
131
+ """
132
+ Packs a tensor of int4 weights stored in int8 into int32s with padding
133
+
134
+ :param value: tensor to pack
135
+ :returns: packed int32 tensor
136
+ """
137
+ if value.dtype is not torch.int8:
138
+ raise ValueError("Tensor must be quantized to torch.int8 before packing")
139
+
140
+ # need to convert to unsigned 8bit to use numpy's pack/unpack
141
+ temp = (value - 8).to(torch.uint8)
142
+ bits = np.unpackbits(temp.numpy(), axis=-1, bitorder="little")
143
+ ranges = np.array([range(x, x + 4) for x in range(0, bits.shape[1], 8)]).flatten()
144
+ only_4_bits = bits[:, ranges] # top 4 bits are 0 because we're really uint4
145
+
146
+ # pad each row to fill a full 32bit int
147
+ pack_depth = 32
148
+ padding = (
149
+ math.ceil(only_4_bits.shape[1] / pack_depth) * pack_depth - only_4_bits.shape[1]
150
+ )
151
+ padded_bits = np.pad(
152
+ only_4_bits, pad_width=[(0, 0), (0, padding)], constant_values=0
153
+ )
154
+
155
+ # after packbits each uint8 is two packed uint4s
156
+ # then we keep the bit pattern the same but convert to int32
157
+ compressed = np.packbits(padded_bits, axis=-1, bitorder="little")
158
+ compressed = np.ascontiguousarray(compressed).view(np.int32)
159
+
160
+ return torch.from_numpy(compressed)
161
+
162
+
163
+ def unpack_4bit_ints(value: torch.Tensor, shape: torch.Size) -> torch.Tensor:
164
+ """
165
+ Unpacks a tensor packed int4 weights into individual int8s, maintaining the
166
+ original their int4 range
167
+
168
+ :param value: tensor to upack
169
+ :param shape: shape to unpack into, used to remove padding
170
+ :returns: unpacked int8 tensor
171
+ """
172
+ if value.dtype is not torch.int32:
173
+ raise ValueError(
174
+ f"Expected {torch.int32} but got {value.dtype}, Aborting unpack."
175
+ )
176
+
177
+ # unpack bits and undo padding to nearest int32 bits
178
+ individual_depth = 4
179
+ as_uint8 = value.numpy().view(np.uint8)
180
+ bits = np.unpackbits(as_uint8, axis=-1, bitorder="little")
181
+ original_row_size = int(shape[1] * individual_depth)
182
+ bits = bits[:, :original_row_size]
183
+
184
+ # reformat each packed uint4 to a uint8 by filling to top 4 bits with zeros
185
+ # (uint8 format is required by np.packbits)
186
+ shape_8bit = (bits.shape[0], bits.shape[1] * 2)
187
+ bits_as_8bit = np.zeros(shape_8bit, dtype=np.uint8)
188
+ ranges = np.array([range(x, x + 4) for x in range(0, shape_8bit[1], 8)]).flatten()
189
+ bits_as_8bit[:, ranges] = bits
190
+
191
+ # repack the bits to uint8
192
+ repacked = np.packbits(bits_as_8bit, axis=-1, bitorder="little")
193
+
194
+ # bits are packed in unsigned format, reformat to signed
195
+ # update the value range from uint4 to int4
196
+ final = repacked.astype(np.int8) - 8
197
+
198
+ return torch.from_numpy(final)
@@ -26,6 +26,7 @@ class CompressionFormat(Enum):
26
26
  dense = "dense"
27
27
  sparse_bitmask = "sparse-bitmask"
28
28
  int_quantized = "int-quantized"
29
+ pack_quantized = "pack-quantized"
29
30
 
30
31
 
31
32
  class SparsityCompressionConfig(RegistryMixin, BaseModel):
@@ -30,7 +30,10 @@ from compressed_tensors.quantization.quant_config import (
30
30
  QuantizationConfig,
31
31
  QuantizationStatus,
32
32
  )
33
- from compressed_tensors.quantization.utils import iter_named_leaf_modules
33
+ from compressed_tensors.quantization.utils import (
34
+ infer_quantization_status,
35
+ iter_named_leaf_modules,
36
+ )
34
37
  from compressed_tensors.utils.safetensors_load import get_safetensors_folder
35
38
  from torch.nn import Module
36
39
 
@@ -121,7 +124,7 @@ def apply_quantization_status(model: Module, status: QuantizationStatus):
121
124
  :param model: model to apply quantization to
122
125
  :param status: status to update the module to
123
126
  """
124
- current_status = _infer_status(model)
127
+ current_status = infer_quantization_status(model)
125
128
 
126
129
  if status >= QuantizationStatus.INITIALIZED > current_status:
127
130
  model.apply(initialize_module_for_quantization)
@@ -229,7 +229,10 @@ def _process_quantization(
229
229
  def wrap_module_forward_quantized(module: Module, scheme: QuantizationScheme):
230
230
  # expects a module already initialized and injected with the parameters in
231
231
  # initialize_module_for_quantization
232
- forward_func_orig = module.forward.__func__
232
+ if hasattr(module.forward, "__func__"):
233
+ forward_func_orig = module.forward.__func__
234
+ else:
235
+ forward_func_orig = module.forward.func
233
236
 
234
237
  @wraps(forward_func_orig) # ensures docstring, names, etc are propagated
235
238
  def wrapped_forward(self, *args, **kwargs):
@@ -12,7 +12,8 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import Tuple
15
+ import logging
16
+ from typing import Optional, Tuple
16
17
 
17
18
  import torch
18
19
  from compressed_tensors.quantization.observers.base import Observer
@@ -21,13 +22,33 @@ from tqdm import tqdm
21
22
 
22
23
 
23
24
  __all__ = [
25
+ "infer_quantization_status",
24
26
  "is_module_quantized",
25
27
  "is_model_quantized",
26
28
  "iter_named_leaf_modules",
27
29
  "module_type",
28
30
  "calculate_compression_ratio",
31
+ "get_torch_bit_depth",
32
+ "can_quantize",
29
33
  ]
30
34
 
35
+ _LOGGER: logging.Logger = logging.getLogger(__name__)
36
+
37
+
38
+ def infer_quantization_status(model: Module) -> Optional["QuantizationStatus"]: # noqa
39
+ """
40
+ Checks the quantization status of a model. Assumes all modules in the model have
41
+ the same status, so only the first quantized model is checked.
42
+
43
+ :param model: model to check quantization status for
44
+ :return: quantization status if the model is quantized, otherwise None
45
+ """
46
+ for module in model.modules():
47
+ status = getattr(module, "quantization_status", None)
48
+ if status is not None:
49
+ return status
50
+ return None
51
+
31
52
 
32
53
  def is_module_quantized(module: Module) -> bool:
33
54
  """
@@ -100,6 +121,41 @@ def iter_named_leaf_modules(model: Module) -> Tuple[str, Module]:
100
121
  yield name, submodule
101
122
 
102
123
 
124
+ def get_torch_bit_depth(value: torch.Tensor) -> int:
125
+ """
126
+ Determine the number of bits used to represent the dtype of a tensor
127
+
128
+ :param value: tensor to check bit depth of
129
+ :return: bit depth of each element in the value tensor
130
+ """
131
+ try:
132
+ bit_depth = torch.finfo(value.dtype).bits
133
+ except TypeError:
134
+ bit_depth = torch.iinfo(value.dtype).bits
135
+
136
+ return bit_depth
137
+
138
+
139
+ def can_quantize(value: torch.Tensor, quant_args: "QuantizationArgs") -> bool: # noqa
140
+ """
141
+ Checks if value can be quantized by quant_args.
142
+
143
+ :param value: tensor to check for quantization
144
+ :param quant_args: QuantizationArgs to use for quantization
145
+ :return: False if value is already quantized to quant_args or value is incompatible
146
+ with quant_args, True if value can be quantized with quant_args
147
+ """
148
+ bit_depth = get_torch_bit_depth(value)
149
+ requested_depth = quant_args.num_bits
150
+ if bit_depth < quant_args.num_bits:
151
+ _LOGGER.warn(
152
+ f"Can't quantize tensor with bit depth {bit_depth} to {requested_depth}."
153
+ "The QuantizationArgs provided are not compatible with the input tensor."
154
+ )
155
+
156
+ return bit_depth > quant_args.num_bits
157
+
158
+
103
159
  def calculate_compression_ratio(model: Module) -> float:
104
160
  """
105
161
  Calculates the quantization compression ratio of a pytorch model, based on the
@@ -116,10 +172,7 @@ def calculate_compression_ratio(model: Module) -> float:
116
172
  desc="Calculating quantization compression ratio",
117
173
  ):
118
174
  for parameter in model.parameters():
119
- try:
120
- uncompressed_bits = torch.finfo(parameter.dtype).bits
121
- except TypeError:
122
- uncompressed_bits = torch.iinfo(parameter.dtype).bits
175
+ uncompressed_bits = get_torch_bit_depth(parameter)
123
176
  compressed_bits = uncompressed_bits
124
177
  if is_module_quantized(submodule):
125
178
  compressed_bits = submodule.quantization_scheme.weights.num_bits
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: compressed-tensors-nightly
3
- Version: 0.3.3.20240514
3
+ Version: 0.3.3.20240516
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -103,3 +103,44 @@ state_dict = dict(load_compressed("compressed_model.safetensors", compression_co
103
103
  ```
104
104
 
105
105
  For more in-depth tutorial on bitmask compression, refer to the [notebook](https://github.com/neuralmagic/compressed-tensors/blob/d707c5b84bc3fef164aebdcd97cb6eaa571982f8/examples/bitmask_compression.ipynb).
106
+
107
+
108
+ ## Saving a Compressed Model with PTQ
109
+
110
+ We can use compressed-tensors to run basic post training quantization (PTQ) and save the quantized model compressed on disk
111
+
112
+ ```python
113
+ model_name = "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T"
114
+ model = AutoModelForCausalLM.from_pretrained(model_name, device_map="cuda:0")
115
+
116
+ config = QuantizationConfig.parse_file("./examples/bit_packing/int4_config.json")
117
+ config.quantization_status = QuantizationStatus.CALIBRATION
118
+ apply_quantization_config(model, config)
119
+
120
+ dataset = load_dataset("ptb_text_only")["train"]
121
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
122
+
123
+ def tokenize_function(examples):
124
+ return tokenizer(examples["sentence"], padding=False, truncation=True, max_length=1024)
125
+
126
+ tokenized_dataset = dataset.map(tokenize_function, batched=True)
127
+ data_loader = DataLoader(tokenized_dataset, batch_size=1, collate_fn=DefaultDataCollator())
128
+
129
+ with torch.no_grad():
130
+ for idx, sample in tqdm(enumerate(data_loader), desc="Running calibration"):
131
+ sample = {key: value.to(device) for key,value in sample.items()}
132
+ _ = model(**sample)
133
+
134
+ if idx >= 512:
135
+ break
136
+
137
+ model.apply(freeze_module_quantization)
138
+ model.apply(compress_quantized_weights)
139
+
140
+ output_dir = "./ex_llama1.1b_w4a16_packed_quantize"
141
+ compressor = ModelCompressor(quantization_config=config)
142
+ compressed_state_dict = compressor.compress(model)
143
+ model.save_pretrained(output_dir, state_dict=compressed_state_dict)
144
+ ```
145
+
146
+ For more in-depth tutorial on quantization compression, refer to the [notebook](./examples/quantize_and_pack_int4.ipynb).
@@ -1,15 +1,16 @@
1
1
  compressed_tensors/__init__.py,sha256=SV1csvHUVCd8kHXz6UDZim1HZ_fAVG3vfk-j_4Bb6hY,789
2
2
  compressed_tensors/base.py,sha256=OA2TOLP1gP3LSH7gp508eqr2ZtDQ-pqRHElCp-aB0vs,755
3
3
  compressed_tensors/version.py,sha256=V8krJZctm43D4AGQhJY6dB0MvP1-T9TJ8BcGa8kESrI,1512
4
- compressed_tensors/compressors/__init__.py,sha256=xUiZjKof5nxsmd_whbuTlmJNx54aJQeyTQhYjKsU6oo,935
5
- compressed_tensors/compressors/base.py,sha256=ictzKWJgE80MSzu5ZG4FY6EdEHgP9LaTYn73NvLDRRM,2061
4
+ compressed_tensors/compressors/__init__.py,sha256=3yyoNICHll3F4HS6Yu-cgNZpDhfuobFNWCs6DrPcUyQ,992
5
+ compressed_tensors/compressors/base.py,sha256=LWEgbpgTxzmoqQ7Xhq2OQszUgWoDtFuGCiV1Y8nlBGw,2134
6
6
  compressed_tensors/compressors/dense.py,sha256=G_XHbvuENyupIKlXSITOQgvPkNkcMEOLcLWQr70V9EE,1257
7
7
  compressed_tensors/compressors/helpers.py,sha256=k9avlkmeYj6vkOAvl-MgcixtP7ib24SCfhzZ-RusXfw,5403
8
- compressed_tensors/compressors/int_quantized.py,sha256=2NhcmkryvFA7pW-4gid06d7HYIWET5CmTVVK9bBxXQM,4023
8
+ compressed_tensors/compressors/int_quantized.py,sha256=I0FqnjtwCiJvQxi9YyfA8aBeaR5csqtq1bOrVvRqJ1I,4744
9
9
  compressed_tensors/compressors/model_compressor.py,sha256=teohd0xTbcIDIuEfZrH-bZyAzHn2UZH2KJXT-7Gk3sw,10426
10
+ compressed_tensors/compressors/pack_quantized.py,sha256=K03l8kFqejpapgcMU5hMm1-JIX1cUVvU-VybGSN6RWA,7885
10
11
  compressed_tensors/compressors/sparse_bitmask.py,sha256=TH77NDFJwvQeySY75YV6w1zskZC-JcUGpua4zCFOgTY,8632
11
12
  compressed_tensors/config/__init__.py,sha256=ZBqWn3r6ku1qfmlHHYp0mQueY0i7Pwhr9rbQk9dDlMc,704
12
- compressed_tensors/config/base.py,sha256=0DTHzK-MOtGSC7vGLRCjeFnzjYnm3ebCCLS4CrgDSlo,1416
13
+ compressed_tensors/config/base.py,sha256=grf5tDaLep8i2-W_p7H-fW9DOGXDi4Zz7su7zjs1Qqc,1454
13
14
  compressed_tensors/config/dense.py,sha256=NgSxnFCnckU9-iunxEaqiFwqgdO7YYxlWKR74jNbjks,1317
14
15
  compressed_tensors/config/sparse_bitmask.py,sha256=pZUboRNZTu6NajGOQEFExoPknak5ynVAUeiiYpS1Gt8,1308
15
16
  compressed_tensors/quantization/__init__.py,sha256=83J5bPB7PavN2TfCoW7_vEDhfYpm4TDrqYO9vdSQ5bk,760
@@ -17,10 +18,10 @@ compressed_tensors/quantization/quant_args.py,sha256=A6b2V8lhsM8Ho8RjlPBQdxRUDNW
17
18
  compressed_tensors/quantization/quant_config.py,sha256=U6oEzheNK1d-0kHARzwepasnmS7HHqU_zGwoDBJ-lxU,8042
18
19
  compressed_tensors/quantization/quant_scheme.py,sha256=X3oqmZPiIKtX5tEKKUj-0N6hB68NeiU2b1GcQEQPadQ,1480
19
20
  compressed_tensors/quantization/lifecycle/__init__.py,sha256=ggRGWRqhCxCaTTDWRcgTVX3axnS2xV6rc5YvdzK7fSg,798
20
- compressed_tensors/quantization/lifecycle/apply.py,sha256=hidWZiq1FDddAWhivIVTplamyZiVb-rSnbPluNK2YKA,7573
21
+ compressed_tensors/quantization/lifecycle/apply.py,sha256=whKfNGC_EZm0BC23AP7qWfjRe5OJVWmcZOpX7lryZZc,7625
21
22
  compressed_tensors/quantization/lifecycle/calibration.py,sha256=mLns4jlaWmBwOW8Jtlm5bMX-JET1AiZYUBO7qa-XuxI,1776
22
23
  compressed_tensors/quantization/lifecycle/compressed.py,sha256=VreB10xPwgSLQQlTu20UCrFpRS--cA7-lx5s7nrPPrg,2247
23
- compressed_tensors/quantization/lifecycle/forward.py,sha256=THoaTfl1GeiixqzGqI8ISt03qxBd-wNc4Fsf1V3nZ54,11267
24
+ compressed_tensors/quantization/lifecycle/forward.py,sha256=sXo7ReS2ehHFwbtwUbhPnsnnj-CZ3iyAZKmUzHxjTKc,11373
24
25
  compressed_tensors/quantization/lifecycle/frozen.py,sha256=h1XYt89MouBTf3jTYLG_6OdFxIu5q2N8tPjsy6J4E6Y,1726
25
26
  compressed_tensors/quantization/lifecycle/initialize.py,sha256=U6g9qifSF6pagQZQZEwd-rwWC6uQ_dZXn1wg6nr1Abg,3697
26
27
  compressed_tensors/quantization/observers/__init__.py,sha256=DNH31NQYrIBBcmHsMyFA6whh4pbRsLwuNa6L8AeXaGc,745
@@ -29,14 +30,14 @@ compressed_tensors/quantization/observers/helpers.py,sha256=JwALNfBYY9Eyl8Q180t0
29
30
  compressed_tensors/quantization/observers/memoryless.py,sha256=ZHTPh4aURE8LvHBFaP--HIC2JanMX5-VRdIkE2JHthw,1859
30
31
  compressed_tensors/quantization/observers/min_max.py,sha256=s2I40pzTXrVAjIsavNt6TLAl7-qDUmdc43Xd5rb4XAY,3071
31
32
  compressed_tensors/quantization/utils/__init__.py,sha256=VdtEmP0bvuND_IGQnyqUPc5lnFp-1_yD7StKSX4x80w,656
32
- compressed_tensors/quantization/utils/helpers.py,sha256=dDfGR9PRNeKoqD50qoFPRXwsNckFRc38Ao2UrE_69Tk,4214
33
+ compressed_tensors/quantization/utils/helpers.py,sha256=NzAH18Cn_-mTAR87y6IlcQU5gC393XSjgNKC9CRkr78,6017
33
34
  compressed_tensors/registry/__init__.py,sha256=FwLSNYqfIrb5JD_6OK_MT4_svvKTN_nEhpgQlQvGbjI,658
34
35
  compressed_tensors/registry/registry.py,sha256=fxjOjh2wklCvJhQxwofdy-zV8q7MkQ85SLG77nml2iA,11890
35
36
  compressed_tensors/utils/__init__.py,sha256=5DrYjoZbaEvSkJcC-GRSbM_RBHVF4tG9gMd3zsJnjLw,665
36
37
  compressed_tensors/utils/helpers.py,sha256=h0jfl9drs5FAx40tCHRcVtJqXixB5hT5yq_IG2aY_-w,1735
37
38
  compressed_tensors/utils/safetensors_load.py,sha256=wo9UirGrGlenBqZeqotvpCT7D5MEdjCo2J3HeRaIFoU,8502
38
- compressed_tensors_nightly-0.3.3.20240514.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
39
- compressed_tensors_nightly-0.3.3.20240514.dist-info/METADATA,sha256=kQhsyTU2oXTX41J5rPQLSIMTVcXskMxS9Cj1EI4QuUI,4056
40
- compressed_tensors_nightly-0.3.3.20240514.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
41
- compressed_tensors_nightly-0.3.3.20240514.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
42
- compressed_tensors_nightly-0.3.3.20240514.dist-info/RECORD,,
39
+ compressed_tensors_nightly-0.3.3.20240516.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
40
+ compressed_tensors_nightly-0.3.3.20240516.dist-info/METADATA,sha256=C_J0XMNZOkW4H6xwMbRX8ryXtoxksTo9CoMk25yoO-0,5633
41
+ compressed_tensors_nightly-0.3.3.20240516.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
42
+ compressed_tensors_nightly-0.3.3.20240516.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
43
+ compressed_tensors_nightly-0.3.3.20240516.dist-info/RECORD,,