compressed-tensors-nightly 0.4.0.20240623__py3-none-any.whl → 0.4.0.20240626__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- compressed_tensors/compressors/base.py +1 -1
- compressed_tensors/compressors/dense.py +1 -1
- compressed_tensors/compressors/marlin_24.py +5 -5
- compressed_tensors/compressors/model_compressor.py +5 -3
- compressed_tensors/compressors/naive_quantized.py +4 -4
- compressed_tensors/compressors/pack_quantized.py +84 -20
- compressed_tensors/compressors/sparse_bitmask.py +1 -1
- compressed_tensors/quantization/lifecycle/apply.py +5 -1
- compressed_tensors/quantization/quant_scheme.py +0 -1
- compressed_tensors/utils/helpers.py +0 -1
- {compressed_tensors_nightly-0.4.0.20240623.dist-info → compressed_tensors_nightly-0.4.0.20240626.dist-info}/METADATA +1 -1
- {compressed_tensors_nightly-0.4.0.20240623.dist-info → compressed_tensors_nightly-0.4.0.20240626.dist-info}/RECORD +15 -15
- {compressed_tensors_nightly-0.4.0.20240623.dist-info → compressed_tensors_nightly-0.4.0.20240626.dist-info}/LICENSE +0 -0
- {compressed_tensors_nightly-0.4.0.20240623.dist-info → compressed_tensors_nightly-0.4.0.20240626.dist-info}/WHEEL +0 -0
- {compressed_tensors_nightly-0.4.0.20240623.dist-info → compressed_tensors_nightly-0.4.0.20240626.dist-info}/top_level.txt +0 -0
@@ -45,7 +45,7 @@ class Compressor(RegistryMixin):
|
|
45
45
|
raise NotImplementedError()
|
46
46
|
|
47
47
|
def decompress(
|
48
|
-
self, path_to_model_or_tensors: str, device: str = "cpu"
|
48
|
+
self, path_to_model_or_tensors: str, device: str = "cpu", **kwargs
|
49
49
|
) -> Generator[Tuple[str, Tensor], None, None]:
|
50
50
|
"""
|
51
51
|
Reads a compressed state dict located at path_to_model_or_tensors
|
@@ -29,6 +29,6 @@ class DenseCompressor(Compressor):
|
|
29
29
|
return model_state
|
30
30
|
|
31
31
|
def decompress(
|
32
|
-
self, path_to_model_or_tensors: str, device: str = "cpu"
|
32
|
+
self, path_to_model_or_tensors: str, device: str = "cpu", **kwargs
|
33
33
|
) -> Generator[Tuple[str, Tensor], None, None]:
|
34
34
|
return iter([])
|
@@ -107,7 +107,7 @@ class Marlin24Compressor(Compressor):
|
|
107
107
|
def compress(
|
108
108
|
self,
|
109
109
|
model_state: Dict[str, Tensor],
|
110
|
-
|
110
|
+
names_to_scheme: Dict[str, QuantizationArgs],
|
111
111
|
**kwargs,
|
112
112
|
) -> Dict[str, Tensor]:
|
113
113
|
"""
|
@@ -115,11 +115,11 @@ class Marlin24Compressor(Compressor):
|
|
115
115
|
with the Marlin24 kernel
|
116
116
|
|
117
117
|
:param model_state: state dict of uncompressed model
|
118
|
-
:param
|
118
|
+
:param names_to_scheme: quantization args for each quantized weight, needed for
|
119
119
|
quantize function to calculate bit depth
|
120
120
|
:return: compressed state dict
|
121
121
|
"""
|
122
|
-
self.validate_quant_compatability(
|
122
|
+
self.validate_quant_compatability(names_to_scheme)
|
123
123
|
|
124
124
|
compressed_dict = {}
|
125
125
|
weight_suffix = ".weight"
|
@@ -139,7 +139,7 @@ class Marlin24Compressor(Compressor):
|
|
139
139
|
value = value.to(torch.float16)
|
140
140
|
|
141
141
|
# quantize weight, keeping it as a float16 for now
|
142
|
-
quant_args =
|
142
|
+
quant_args = names_to_scheme[prefix]
|
143
143
|
value = quantize(
|
144
144
|
x=value, scale=scale, zero_point=zp, args=quant_args
|
145
145
|
)
|
@@ -175,7 +175,7 @@ class Marlin24Compressor(Compressor):
|
|
175
175
|
return compressed_dict
|
176
176
|
|
177
177
|
def decompress(
|
178
|
-
self, path_to_model_or_tensors: str, device: str = "cpu"
|
178
|
+
self, path_to_model_or_tensors: str, device: str = "cpu", **kwargs
|
179
179
|
) -> Generator[Tuple[str, Tensor], None, None]:
|
180
180
|
raise NotImplementedError(
|
181
181
|
"Decompression is not implemented for the Marlin24 Compressor."
|
@@ -231,7 +231,7 @@ class ModelCompressor:
|
|
231
231
|
quantized_modules_to_args = map_modules_to_quant_args(model)
|
232
232
|
if self.quantization_compressor is not None:
|
233
233
|
compressed_state_dict = self.quantization_compressor.compress(
|
234
|
-
state_dict,
|
234
|
+
state_dict, names_to_scheme=quantized_modules_to_args
|
235
235
|
)
|
236
236
|
|
237
237
|
if self.sparsity_compressor is not None:
|
@@ -260,9 +260,11 @@ class ModelCompressor:
|
|
260
260
|
setattr(model, SPARSITY_CONFIG_NAME, self.sparsity_compressor.config)
|
261
261
|
|
262
262
|
if self.quantization_compressor is not None:
|
263
|
-
apply_quantization_config(model, self.quantization_config)
|
263
|
+
names_to_scheme = apply_quantization_config(model, self.quantization_config)
|
264
264
|
load_pretrained_quantization(model, model_path)
|
265
|
-
dense_gen = self.quantization_compressor.decompress(
|
265
|
+
dense_gen = self.quantization_compressor.decompress(
|
266
|
+
model_path, names_to_scheme=names_to_scheme
|
267
|
+
)
|
266
268
|
self._replace_weights(dense_gen, model)
|
267
269
|
|
268
270
|
def update_status(module):
|
@@ -49,14 +49,14 @@ class QuantizationCompressor(Compressor):
|
|
49
49
|
def compress(
|
50
50
|
self,
|
51
51
|
model_state: Dict[str, Tensor],
|
52
|
-
|
52
|
+
names_to_scheme: Dict[str, QuantizationArgs],
|
53
53
|
**kwargs,
|
54
54
|
) -> Dict[str, Tensor]:
|
55
55
|
"""
|
56
56
|
Compresses a dense state dict
|
57
57
|
|
58
58
|
:param model_state: state dict of uncompressed model
|
59
|
-
:param
|
59
|
+
:param names_to_scheme: quantization args for each quantized weight, needed for
|
60
60
|
quantize function to calculate bit depth
|
61
61
|
:return: compressed state dict
|
62
62
|
"""
|
@@ -73,7 +73,7 @@ class QuantizationCompressor(Compressor):
|
|
73
73
|
zp = model_state.get(merge_names(prefix, "weight_zero_point"), None)
|
74
74
|
if scale is not None and zp is not None:
|
75
75
|
# weight is quantized, compress it
|
76
|
-
quant_args =
|
76
|
+
quant_args = names_to_scheme[prefix]
|
77
77
|
if can_quantize(value, quant_args):
|
78
78
|
# only quantize if not already quantized
|
79
79
|
value = quantize(
|
@@ -93,7 +93,7 @@ class QuantizationCompressor(Compressor):
|
|
93
93
|
return compressed_dict
|
94
94
|
|
95
95
|
def decompress(
|
96
|
-
self, path_to_model_or_tensors: str, device: str = "cpu"
|
96
|
+
self, path_to_model_or_tensors: str, device: str = "cpu", **kwargs
|
97
97
|
) -> Generator[Tuple[str, Tensor], None, None]:
|
98
98
|
"""
|
99
99
|
Reads a compressed state dict located at path_to_model_or_tensors
|
@@ -29,7 +29,13 @@ from torch import Tensor
|
|
29
29
|
from tqdm import tqdm
|
30
30
|
|
31
31
|
|
32
|
-
__all__ = [
|
32
|
+
__all__ = [
|
33
|
+
"PackedQuantizationCompressor",
|
34
|
+
"pack_4bit_ints",
|
35
|
+
"pack_8bit_ints",
|
36
|
+
"unpack_4bit_ints",
|
37
|
+
"unpack_8bit_ints",
|
38
|
+
]
|
33
39
|
|
34
40
|
_LOGGER: logging.Logger = logging.getLogger(__name__)
|
35
41
|
|
@@ -50,14 +56,14 @@ class PackedQuantizationCompressor(Compressor):
|
|
50
56
|
def compress(
|
51
57
|
self,
|
52
58
|
model_state: Dict[str, Tensor],
|
53
|
-
|
59
|
+
names_to_scheme: Dict[str, QuantizationArgs],
|
54
60
|
**kwargs,
|
55
61
|
) -> Dict[str, Tensor]:
|
56
62
|
"""
|
57
63
|
Compresses a dense state dict
|
58
64
|
|
59
65
|
:param model_state: state dict of uncompressed model
|
60
|
-
:param
|
66
|
+
:param names_to_scheme: quantization args for each quantized weight, needed for
|
61
67
|
quantize function to calculate bit depth
|
62
68
|
:return: compressed state dict
|
63
69
|
"""
|
@@ -75,7 +81,7 @@ class PackedQuantizationCompressor(Compressor):
|
|
75
81
|
shape = torch.tensor(value.shape)
|
76
82
|
if scale is not None and zp is not None:
|
77
83
|
# weight is quantized, compress it
|
78
|
-
quant_args =
|
84
|
+
quant_args = names_to_scheme[prefix]
|
79
85
|
if can_quantize(value, quant_args):
|
80
86
|
# convert weight to an int if not already compressed
|
81
87
|
value = quantize(
|
@@ -85,7 +91,11 @@ class PackedQuantizationCompressor(Compressor):
|
|
85
91
|
args=quant_args,
|
86
92
|
dtype=torch.int8,
|
87
93
|
)
|
88
|
-
|
94
|
+
|
95
|
+
if quant_args.num_bits == 8:
|
96
|
+
value = pack_8bit_ints(value.cpu())
|
97
|
+
else:
|
98
|
+
value = pack_4bit_ints(value.cpu())
|
89
99
|
compressed_dict[merge_names(prefix, "weight_shape")] = shape
|
90
100
|
compressed_dict[merge_names(prefix, "weight_packed")] = value
|
91
101
|
continue
|
@@ -101,7 +111,10 @@ class PackedQuantizationCompressor(Compressor):
|
|
101
111
|
return compressed_dict
|
102
112
|
|
103
113
|
def decompress(
|
104
|
-
self,
|
114
|
+
self,
|
115
|
+
path_to_model_or_tensors: str,
|
116
|
+
names_to_scheme: Dict[str, QuantizationArgs],
|
117
|
+
device: str = "cpu",
|
105
118
|
) -> Generator[Tuple[str, Tensor], None, None]:
|
106
119
|
"""
|
107
120
|
Reads a compressed state dict located at path_to_model_or_tensors
|
@@ -119,6 +132,7 @@ class PackedQuantizationCompressor(Compressor):
|
|
119
132
|
for weight_name in weight_mappings.keys():
|
120
133
|
weight_data = {}
|
121
134
|
for param_name, safe_path in weight_mappings[weight_name].items():
|
135
|
+
weight_data["num_bits"] = names_to_scheme.get(weight_name).num_bits
|
122
136
|
full_name = merge_names(weight_name, param_name)
|
123
137
|
with safe_open(safe_path, framework="pt", device=device) as f:
|
124
138
|
weight_data[param_name] = f.get_tensor(full_name)
|
@@ -127,8 +141,12 @@ class PackedQuantizationCompressor(Compressor):
|
|
127
141
|
zero_point = weight_data.get("weight_zero_point", None)
|
128
142
|
scale = weight_data["weight_scale"]
|
129
143
|
weight = weight_data["weight_packed"]
|
144
|
+
num_bits = weight_data["num_bits"]
|
130
145
|
original_shape = torch.Size(weight_data["weight_shape"])
|
131
|
-
|
146
|
+
if num_bits == 4:
|
147
|
+
unpacked = unpack_4bit_ints(weight, original_shape)
|
148
|
+
else:
|
149
|
+
unpacked = unpack_8bit_ints(weight, original_shape)
|
132
150
|
decompressed = dequantize(
|
133
151
|
x_q=unpacked,
|
134
152
|
scale=scale,
|
@@ -137,6 +155,19 @@ class PackedQuantizationCompressor(Compressor):
|
|
137
155
|
yield merge_names(weight_name, "weight"), decompressed
|
138
156
|
|
139
157
|
|
158
|
+
def pack_8bit_ints(value: torch.Tensor) -> torch.Tensor:
|
159
|
+
"""
|
160
|
+
Packs a tensor of int8 into int32s with padding
|
161
|
+
|
162
|
+
:param value: tensor to pack
|
163
|
+
:returns: packed int32 tensor
|
164
|
+
"""
|
165
|
+
# need to convert to unsigned 8bit to use numpy's pack/unpack
|
166
|
+
value_uint = (value - 128).to(torch.uint8)
|
167
|
+
bits = np.unpackbits(value_uint, axis=-1, bitorder="little")
|
168
|
+
return _pack_bits(bits_to_pack=bits)
|
169
|
+
|
170
|
+
|
140
171
|
def pack_4bit_ints(value: torch.Tensor) -> torch.Tensor:
|
141
172
|
"""
|
142
173
|
Packs a tensor of int4 weights stored in int8 into int32s with padding
|
@@ -152,22 +183,31 @@ def pack_4bit_ints(value: torch.Tensor) -> torch.Tensor:
|
|
152
183
|
bits = np.unpackbits(temp.numpy(), axis=-1, bitorder="little")
|
153
184
|
ranges = np.array([range(x, x + 4) for x in range(0, bits.shape[1], 8)]).flatten()
|
154
185
|
only_4_bits = bits[:, ranges] # top 4 bits are 0 because we're really uint4
|
186
|
+
return _pack_bits(bits_to_pack=only_4_bits)
|
155
187
|
|
156
|
-
# pad each row to fill a full 32bit int
|
157
|
-
pack_depth = 32
|
158
|
-
padding = (
|
159
|
-
math.ceil(only_4_bits.shape[1] / pack_depth) * pack_depth - only_4_bits.shape[1]
|
160
|
-
)
|
161
|
-
padded_bits = np.pad(
|
162
|
-
only_4_bits, pad_width=[(0, 0), (0, padding)], constant_values=0
|
163
|
-
)
|
164
188
|
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
compressed = np.ascontiguousarray(compressed).view(np.int32)
|
189
|
+
def unpack_8bit_ints(value: torch.Tensor, shape: torch.Size) -> torch.Tensor:
|
190
|
+
"""
|
191
|
+
Unpacks a tensor packed int8 weights in int32
|
169
192
|
|
170
|
-
|
193
|
+
:param value: tensor to upack
|
194
|
+
:param shape: shape to unpack into, used to remove padding
|
195
|
+
:returns: unpacked int8 tensor
|
196
|
+
"""
|
197
|
+
if value.dtype is not torch.int32:
|
198
|
+
raise ValueError(
|
199
|
+
f"Expected {torch.int32} but got {value.dtype}, Aborting unpack."
|
200
|
+
)
|
201
|
+
|
202
|
+
# unpack bits and undo padding to nearest int32 bits
|
203
|
+
individual_depth = 8
|
204
|
+
as_uint8 = value.numpy().view(np.uint8)
|
205
|
+
bits = np.unpackbits(as_uint8, axis=-1, bitorder="little")
|
206
|
+
original_row_size = int(shape[1] * individual_depth)
|
207
|
+
bits = bits[:, :original_row_size]
|
208
|
+
bits = np.packbits(bits, axis=-1, bitorder="little")
|
209
|
+
final = (bits - 128).astype(np.int8)
|
210
|
+
return torch.from_numpy(final)
|
171
211
|
|
172
212
|
|
173
213
|
def unpack_4bit_ints(value: torch.Tensor, shape: torch.Size) -> torch.Tensor:
|
@@ -206,3 +246,27 @@ def unpack_4bit_ints(value: torch.Tensor, shape: torch.Size) -> torch.Tensor:
|
|
206
246
|
final = repacked.astype(np.int8) - 8
|
207
247
|
|
208
248
|
return torch.from_numpy(final)
|
249
|
+
|
250
|
+
|
251
|
+
def _pack_bits(bits_to_pack: torch.Tensor) -> torch.Tensor:
|
252
|
+
"""
|
253
|
+
Pack a tensor of bits to int32.
|
254
|
+
|
255
|
+
:param bits_to_pack: tensor of bits to pack
|
256
|
+
"""
|
257
|
+
# pad each row to fill a full 32bit int
|
258
|
+
pack_depth = 32
|
259
|
+
padding = (
|
260
|
+
math.ceil(bits_to_pack.shape[1] / pack_depth) * pack_depth
|
261
|
+
- bits_to_pack.shape[1]
|
262
|
+
)
|
263
|
+
padded_bits = np.pad(
|
264
|
+
bits_to_pack, pad_width=[(0, 0), (0, padding)], constant_values=0
|
265
|
+
)
|
266
|
+
|
267
|
+
# after packbits each uint8 is two packed uint4s
|
268
|
+
# then we keep the bit pattern the same but convert to int32
|
269
|
+
compressed = np.packbits(padded_bits, axis=-1, bitorder="little")
|
270
|
+
compressed = np.ascontiguousarray(compressed).view(np.int32)
|
271
|
+
|
272
|
+
return torch.from_numpy(compressed)
|
@@ -72,7 +72,7 @@ class BitmaskCompressor(Compressor):
|
|
72
72
|
return compressed_dict
|
73
73
|
|
74
74
|
def decompress(
|
75
|
-
self, path_to_model_or_tensors: str, device: str = "cpu"
|
75
|
+
self, path_to_model_or_tensors: str, device: str = "cpu", **kwargs
|
76
76
|
) -> Generator[Tuple[str, Tensor], None, None]:
|
77
77
|
"""
|
78
78
|
Reads a bitmask compressed state dict located
|
@@ -96,7 +96,7 @@ def load_pretrained_quantization(model: Module, model_name_or_path: str):
|
|
96
96
|
)
|
97
97
|
|
98
98
|
|
99
|
-
def apply_quantization_config(model: Module, config: QuantizationConfig):
|
99
|
+
def apply_quantization_config(model: Module, config: QuantizationConfig) -> Dict:
|
100
100
|
"""
|
101
101
|
Initializes the model for quantization in-place based on the given config
|
102
102
|
|
@@ -106,6 +106,7 @@ def apply_quantization_config(model: Module, config: QuantizationConfig):
|
|
106
106
|
# build mapping of targets to schemes for easier matching
|
107
107
|
# use ordered dict to preserve target ordering in config
|
108
108
|
target_to_scheme = OrderedDict()
|
109
|
+
names_to_scheme = OrderedDict()
|
109
110
|
for scheme in config.config_groups.values():
|
110
111
|
for target in scheme.targets:
|
111
112
|
target_to_scheme[target] = scheme
|
@@ -123,6 +124,7 @@ def apply_quantization_config(model: Module, config: QuantizationConfig):
|
|
123
124
|
if target is not None:
|
124
125
|
# target matched - add layer and scheme to target list
|
125
126
|
submodule.quantization_scheme = target_to_scheme[target]
|
127
|
+
names_to_scheme[name] = submodule.quantization_scheme.weights
|
126
128
|
|
127
129
|
if config.ignore is not None and ignored_submodules is not None:
|
128
130
|
if set(config.ignore) - set(ignored_submodules):
|
@@ -132,7 +134,9 @@ def apply_quantization_config(model: Module, config: QuantizationConfig):
|
|
132
134
|
f"{set(config.ignore) - set(ignored_submodules)}"
|
133
135
|
)
|
134
136
|
# apply current quantization status across all targeted layers
|
137
|
+
|
135
138
|
apply_quantization_status(model, config.quantization_status)
|
139
|
+
return names_to_scheme
|
136
140
|
|
137
141
|
|
138
142
|
def apply_quantization_status(model: Module, status: QuantizationStatus):
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: compressed-tensors-nightly
|
3
|
-
Version: 0.4.0.
|
3
|
+
Version: 0.4.0.20240626
|
4
4
|
Summary: Library for utilization of compressed safetensors of neural network models
|
5
5
|
Home-page: https://github.com/neuralmagic/compressed-tensors
|
6
6
|
Author: Neuralmagic, Inc.
|
@@ -2,14 +2,14 @@ compressed_tensors/__init__.py,sha256=SV1csvHUVCd8kHXz6UDZim1HZ_fAVG3vfk-j_4Bb6h
|
|
2
2
|
compressed_tensors/base.py,sha256=OA2TOLP1gP3LSH7gp508eqr2ZtDQ-pqRHElCp-aB0vs,755
|
3
3
|
compressed_tensors/version.py,sha256=cJJf0y0NnXErTtQtVQjOvrq9hMIkhXIfBwuu4Tuxl24,1586
|
4
4
|
compressed_tensors/compressors/__init__.py,sha256=wmX4VnkUTS63xBwK5-6w8FP78bNZpcdcqvf2KOEC5E4,1133
|
5
|
-
compressed_tensors/compressors/base.py,sha256
|
6
|
-
compressed_tensors/compressors/dense.py,sha256=
|
5
|
+
compressed_tensors/compressors/base.py,sha256=-rqT2h9G2iwDkwrVj0d0jxxn9h0dccJA1mqOzVEkwGM,2144
|
6
|
+
compressed_tensors/compressors/dense.py,sha256=xcWECjcRY4INN6jC7vHx5wvUX3NmnKlxA9SVE1A6m2Q,1267
|
7
7
|
compressed_tensors/compressors/helpers.py,sha256=k9avlkmeYj6vkOAvl-MgcixtP7ib24SCfhzZ-RusXfw,5403
|
8
|
-
compressed_tensors/compressors/marlin_24.py,sha256=
|
9
|
-
compressed_tensors/compressors/model_compressor.py,sha256=
|
10
|
-
compressed_tensors/compressors/naive_quantized.py,sha256=
|
11
|
-
compressed_tensors/compressors/pack_quantized.py,sha256=
|
12
|
-
compressed_tensors/compressors/sparse_bitmask.py,sha256=
|
8
|
+
compressed_tensors/compressors/marlin_24.py,sha256=PULMP1fp1sNWz-xOxvM0JXhOrUbq6sPwOTscYSifgDw,9450
|
9
|
+
compressed_tensors/compressors/model_compressor.py,sha256=t4dH7Yh637JV53VPyys-gkoMPJHGf_tlWWufLRyIdUM,13418
|
10
|
+
compressed_tensors/compressors/naive_quantized.py,sha256=6_1wuTF96-lw-UzzrsiEX_ipciKiQQJoZ8uotVwtbyQ,5569
|
11
|
+
compressed_tensors/compressors/pack_quantized.py,sha256=ZRqqBVPB6B-nZQOSdu7WhKrKWIm2-ZVrUQHATxO2Boc,10297
|
12
|
+
compressed_tensors/compressors/sparse_bitmask.py,sha256=kiDwBlFV0sJGLcIdDYxIiuF64ccgwDfqq1hWRQThYDc,8647
|
13
13
|
compressed_tensors/compressors/utils/__init__.py,sha256=-mbGDZh1hd9T6u62Ht_iBIK255UmMg0f5bLkSs1f9Cc,731
|
14
14
|
compressed_tensors/compressors/utils/helpers.py,sha256=4fq7KclSIK__jemCG9pwYlgWLrQjsaAMxhIrhjdw0BQ,1506
|
15
15
|
compressed_tensors/compressors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVyah6BUUir_StT28,2530
|
@@ -21,9 +21,9 @@ compressed_tensors/config/sparse_bitmask.py,sha256=pZUboRNZTu6NajGOQEFExoPknak5y
|
|
21
21
|
compressed_tensors/quantization/__init__.py,sha256=83J5bPB7PavN2TfCoW7_vEDhfYpm4TDrqYO9vdSQ5bk,760
|
22
22
|
compressed_tensors/quantization/quant_args.py,sha256=Vc_tWSTcbZZsMJlACpLq4JEPvGx87izc8VEx-mcXjoM,5621
|
23
23
|
compressed_tensors/quantization/quant_config.py,sha256=hL42sXp1wAZxyrkHarw7tAMRcwSVEr0MT3wmrmL3NhE,8285
|
24
|
-
compressed_tensors/quantization/quant_scheme.py,sha256=
|
24
|
+
compressed_tensors/quantization/quant_scheme.py,sha256=TU9W3bOWCY2l5Vrha0ufRtW1ac4gew1uwW8N3JGbZvg,3785
|
25
25
|
compressed_tensors/quantization/lifecycle/__init__.py,sha256=ggRGWRqhCxCaTTDWRcgTVX3axnS2xV6rc5YvdzK7fSg,798
|
26
|
-
compressed_tensors/quantization/lifecycle/apply.py,sha256=
|
26
|
+
compressed_tensors/quantization/lifecycle/apply.py,sha256=Cly9ptSnKoGm_Up4RFtMpR2VH_dATzqc3upiEamiW14,8902
|
27
27
|
compressed_tensors/quantization/lifecycle/calibration.py,sha256=mLns4jlaWmBwOW8Jtlm5bMX-JET1AiZYUBO7qa-XuxI,1776
|
28
28
|
compressed_tensors/quantization/lifecycle/compressed.py,sha256=VreB10xPwgSLQQlTu20UCrFpRS--cA7-lx5s7nrPPrg,2247
|
29
29
|
compressed_tensors/quantization/lifecycle/forward.py,sha256=tcjL_qyE3ODourNprt2bndF7_ALlUEGY2_Yag4exLoE,11908
|
@@ -39,10 +39,10 @@ compressed_tensors/quantization/utils/helpers.py,sha256=NzAH18Cn_-mTAR87y6IlcQU5
|
|
39
39
|
compressed_tensors/registry/__init__.py,sha256=FwLSNYqfIrb5JD_6OK_MT4_svvKTN_nEhpgQlQvGbjI,658
|
40
40
|
compressed_tensors/registry/registry.py,sha256=fxjOjh2wklCvJhQxwofdy-zV8q7MkQ85SLG77nml2iA,11890
|
41
41
|
compressed_tensors/utils/__init__.py,sha256=5DrYjoZbaEvSkJcC-GRSbM_RBHVF4tG9gMd3zsJnjLw,665
|
42
|
-
compressed_tensors/utils/helpers.py,sha256=
|
42
|
+
compressed_tensors/utils/helpers.py,sha256=dt4uxSIeqvqDmeJBJ6UUVHEOnMI7EtMSzEDv6PRUu14,2266
|
43
43
|
compressed_tensors/utils/safetensors_load.py,sha256=0MheXwx1jeY12PeISppiSIZHs6rmN2YddwPpFb9V67I,8527
|
44
|
-
compressed_tensors_nightly-0.4.0.
|
45
|
-
compressed_tensors_nightly-0.4.0.
|
46
|
-
compressed_tensors_nightly-0.4.0.
|
47
|
-
compressed_tensors_nightly-0.4.0.
|
48
|
-
compressed_tensors_nightly-0.4.0.
|
44
|
+
compressed_tensors_nightly-0.4.0.20240626.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
45
|
+
compressed_tensors_nightly-0.4.0.20240626.dist-info/METADATA,sha256=UczZs7zG7XJKQ_8BGjHjE-qHClrlr7IgztIx_SlTcw0,5668
|
46
|
+
compressed_tensors_nightly-0.4.0.20240626.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
47
|
+
compressed_tensors_nightly-0.4.0.20240626.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
|
48
|
+
compressed_tensors_nightly-0.4.0.20240626.dist-info/RECORD,,
|
File without changes
|
File without changes
|