compressed-tensors-nightly 0.4.0.20240721__py3-none-any.whl → 0.4.0.20240731__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- compressed_tensors/compressors/model_compressor.py +6 -8
- compressed_tensors/quantization/lifecycle/apply.py +12 -13
- compressed_tensors/quantization/lifecycle/calibration.py +17 -0
- compressed_tensors/quantization/lifecycle/forward.py +8 -4
- compressed_tensors/quantization/lifecycle/initialize.py +28 -0
- compressed_tensors/quantization/quant_config.py +11 -0
- compressed_tensors/quantization/quant_scheme.py +20 -1
- compressed_tensors/utils/__init__.py +1 -0
- compressed_tensors/utils/offload.py +104 -0
- {compressed_tensors_nightly-0.4.0.20240721.dist-info → compressed_tensors_nightly-0.4.0.20240731.dist-info}/METADATA +2 -1
- {compressed_tensors_nightly-0.4.0.20240721.dist-info → compressed_tensors_nightly-0.4.0.20240731.dist-info}/RECORD +14 -13
- {compressed_tensors_nightly-0.4.0.20240721.dist-info → compressed_tensors_nightly-0.4.0.20240731.dist-info}/LICENSE +0 -0
- {compressed_tensors_nightly-0.4.0.20240721.dist-info → compressed_tensors_nightly-0.4.0.20240731.dist-info}/WHEEL +0 -0
- {compressed_tensors_nightly-0.4.0.20240721.dist-info → compressed_tensors_nightly-0.4.0.20240731.dist-info}/top_level.txt +0 -0
@@ -39,10 +39,10 @@ from compressed_tensors.quantization.utils import (
|
|
39
39
|
is_module_quantized,
|
40
40
|
iter_named_leaf_modules,
|
41
41
|
)
|
42
|
-
from compressed_tensors.utils import get_safetensors_folder
|
42
|
+
from compressed_tensors.utils import get_safetensors_folder, update_parameter_data
|
43
43
|
from compressed_tensors.utils.helpers import fix_fsdp_module_name
|
44
44
|
from torch import Tensor
|
45
|
-
from torch.nn import Module
|
45
|
+
from torch.nn import Module
|
46
46
|
from tqdm import tqdm
|
47
47
|
from transformers import AutoConfig
|
48
48
|
from transformers.file_utils import CONFIG_NAME
|
@@ -307,12 +307,10 @@ class ModelCompressor:
|
|
307
307
|
|
308
308
|
def _replace_weights(self, dense_weight_generator, model):
|
309
309
|
for name, data in tqdm(dense_weight_generator, desc="Decompressing model"):
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
data_new = Parameter(data.to(model_device).to(data_dtype))
|
315
|
-
data_old.data = data_new.data
|
310
|
+
split_name = name.split(".")
|
311
|
+
prefix, param_name = ".".join(split_name[:-1]), split_name[-1]
|
312
|
+
module = operator.attrgetter(prefix)(model)
|
313
|
+
update_parameter_data(module, data, param_name)
|
316
314
|
|
317
315
|
|
318
316
|
def map_modules_to_quant_args(model: Module) -> Dict:
|
@@ -43,6 +43,7 @@ from compressed_tensors.quantization.utils import (
|
|
43
43
|
iter_named_leaf_modules,
|
44
44
|
)
|
45
45
|
from compressed_tensors.utils.helpers import fix_fsdp_module_name
|
46
|
+
from compressed_tensors.utils.offload import update_parameter_data
|
46
47
|
from compressed_tensors.utils.safetensors_load import get_safetensors_folder
|
47
48
|
from torch.nn import Module
|
48
49
|
|
@@ -265,19 +266,17 @@ def _load_quant_args_from_state_dict(
|
|
265
266
|
"""
|
266
267
|
scale_name = f"{base_name}_scale"
|
267
268
|
zp_name = f"{base_name}_zero_point"
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
if
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
else: # fill with zeros matching scale shape
|
280
|
-
zp.data = torch.zeros_like(scale, dtype=zp.dtype).to(device)
|
269
|
+
|
270
|
+
state_dict_scale = state_dict.get(f"{module_name}.{scale_name}", None)
|
271
|
+
state_dict_zp = state_dict.get(f"{module_name}.{zp_name}", None)
|
272
|
+
|
273
|
+
if state_dict_scale is not None:
|
274
|
+
# module is quantized
|
275
|
+
update_parameter_data(module, state_dict_scale, scale_name)
|
276
|
+
if state_dict_zp is None:
|
277
|
+
# fill in zero point for symmetric quantization
|
278
|
+
state_dict_zp = torch.zeros_like(state_dict_scale, device="cpu")
|
279
|
+
update_parameter_data(module, state_dict_zp, zp_name)
|
281
280
|
|
282
281
|
|
283
282
|
def _scheme_from_targets(
|
@@ -16,6 +16,7 @@
|
|
16
16
|
import logging
|
17
17
|
|
18
18
|
from compressed_tensors.quantization.quant_config import QuantizationStatus
|
19
|
+
from compressed_tensors.utils import is_module_offloaded, update_parameter_data
|
19
20
|
from torch.nn import Module
|
20
21
|
|
21
22
|
|
@@ -48,4 +49,20 @@ def set_module_for_calibration(module: Module):
|
|
48
49
|
"to re-calibrate a frozen module"
|
49
50
|
)
|
50
51
|
|
52
|
+
if module.quantization_scheme.weights is not None:
|
53
|
+
# set weight scale and zero_point up front, calibration data doesn't affect it
|
54
|
+
observer = module.weight_observer
|
55
|
+
|
56
|
+
offloaded = False
|
57
|
+
if is_module_offloaded(module):
|
58
|
+
module._hf_hook.pre_forward(module)
|
59
|
+
offloaded = True
|
60
|
+
|
61
|
+
scale, zero_point = observer(module.weight)
|
62
|
+
update_parameter_data(module, scale, "weight_scale")
|
63
|
+
update_parameter_data(module, zero_point, "weight_zero_point")
|
64
|
+
|
65
|
+
if offloaded:
|
66
|
+
module._hf_hook.post_forward(module, None)
|
67
|
+
|
51
68
|
module.quantization_status = QuantizationStatus.CALIBRATION
|
@@ -25,6 +25,7 @@ from compressed_tensors.quantization.quant_args import (
|
|
25
25
|
)
|
26
26
|
from compressed_tensors.quantization.quant_config import QuantizationStatus
|
27
27
|
from compressed_tensors.quantization.quant_scheme import QuantizationScheme
|
28
|
+
from compressed_tensors.utils import update_parameter_data
|
28
29
|
from torch.nn import Module
|
29
30
|
|
30
31
|
|
@@ -312,16 +313,19 @@ def maybe_calibrate_or_quantize(
|
|
312
313
|
scale = getattr(module, f"{base_name}_scale")
|
313
314
|
zero_point = getattr(module, f"{base_name}_zero_point")
|
314
315
|
|
315
|
-
if
|
316
|
+
if (
|
317
|
+
module.quantization_status == QuantizationStatus.CALIBRATION
|
318
|
+
and base_name != "weight"
|
319
|
+
):
|
316
320
|
# calibration mode - get new quant params from observer
|
317
321
|
observer = getattr(module, f"{base_name}_observer")
|
318
322
|
|
319
323
|
updated_scale, updated_zero_point = observer(value)
|
320
324
|
|
321
325
|
# update scale and zero point
|
322
|
-
|
323
|
-
|
324
|
-
|
326
|
+
update_parameter_data(module, updated_scale, f"{base_name}_scale")
|
327
|
+
update_parameter_data(module, updated_zero_point, f"{base_name}_zero_point")
|
328
|
+
|
325
329
|
return fake_quantize(value, scale, zero_point, args)
|
326
330
|
|
327
331
|
|
@@ -17,6 +17,8 @@ import logging
|
|
17
17
|
from typing import Optional
|
18
18
|
|
19
19
|
import torch
|
20
|
+
from accelerate.hooks import add_hook_to_module, remove_hook_from_module
|
21
|
+
from accelerate.utils import PrefixedDataset
|
20
22
|
from compressed_tensors.quantization.lifecycle.forward import (
|
21
23
|
wrap_module_forward_quantized,
|
22
24
|
)
|
@@ -26,6 +28,7 @@ from compressed_tensors.quantization.quant_args import (
|
|
26
28
|
)
|
27
29
|
from compressed_tensors.quantization.quant_config import QuantizationStatus
|
28
30
|
from compressed_tensors.quantization.quant_scheme import QuantizationScheme
|
31
|
+
from compressed_tensors.utils import get_execution_device, is_module_offloaded
|
29
32
|
from torch.nn import Module, Parameter
|
30
33
|
|
31
34
|
|
@@ -81,9 +84,32 @@ def initialize_module_for_quantization(
|
|
81
84
|
module.quantization_scheme = scheme
|
82
85
|
module.quantization_status = QuantizationStatus.INITIALIZED
|
83
86
|
|
87
|
+
offloaded = False
|
88
|
+
if is_module_offloaded(module):
|
89
|
+
offloaded = True
|
90
|
+
hook = module._hf_hook
|
91
|
+
prefix_dict = module._hf_hook.weights_map
|
92
|
+
new_prefix = {}
|
93
|
+
|
94
|
+
# recreate the prefix dict (since it is immutable)
|
95
|
+
# and add quantization parameters
|
96
|
+
for key, data in module.named_parameters():
|
97
|
+
if key not in prefix_dict:
|
98
|
+
new_prefix[f"{prefix_dict.prefix}{key}"] = data
|
99
|
+
else:
|
100
|
+
new_prefix[f"{prefix_dict.prefix}{key}"] = prefix_dict[key]
|
101
|
+
new_prefix_dict = PrefixedDataset(new_prefix, prefix_dict.prefix)
|
102
|
+
remove_hook_from_module(module)
|
103
|
+
|
84
104
|
# wrap forward call of module to perform quantized actions based on calltime status
|
85
105
|
wrap_module_forward_quantized(module, scheme)
|
86
106
|
|
107
|
+
if offloaded:
|
108
|
+
# we need to re-add the hook for offloading now that we've wrapped forward
|
109
|
+
add_hook_to_module(module, hook)
|
110
|
+
if prefix_dict is not None:
|
111
|
+
module._hf_hook.weights_map = new_prefix_dict
|
112
|
+
|
87
113
|
|
88
114
|
def _initialize_scale_zero_point_observer(
|
89
115
|
module: Module,
|
@@ -99,6 +125,8 @@ def _initialize_scale_zero_point_observer(
|
|
99
125
|
return # no need to register a scale and zero point for a dynamic observer
|
100
126
|
|
101
127
|
device = next(module.parameters()).device
|
128
|
+
if is_module_offloaded(module):
|
129
|
+
device = get_execution_device(module)
|
102
130
|
|
103
131
|
# infer expected scale/zero point shape
|
104
132
|
expected_shape = 1 # per tensor
|
@@ -239,3 +239,14 @@ class QuantizationConfig(BaseModel):
|
|
239
239
|
format=format,
|
240
240
|
ignore=consolidated_ignore,
|
241
241
|
)
|
242
|
+
|
243
|
+
def requires_calibration_data(self):
|
244
|
+
for _, scheme in self.config_groups.items():
|
245
|
+
if scheme.input_activations is not None:
|
246
|
+
if not scheme.input_activations.dynamic:
|
247
|
+
return True
|
248
|
+
if scheme.output_activations is not None:
|
249
|
+
if not scheme.output_activations.dynamic:
|
250
|
+
return True
|
251
|
+
|
252
|
+
return False
|
@@ -165,7 +165,7 @@ W4A8 = dict(
|
|
165
165
|
input_activations=QuantizationArgs(
|
166
166
|
num_bits=8,
|
167
167
|
type=QuantizationType.INT,
|
168
|
-
strategy=QuantizationStrategy.
|
168
|
+
strategy=QuantizationStrategy.TOKEN,
|
169
169
|
symmetric=True,
|
170
170
|
dynamic=True,
|
171
171
|
),
|
@@ -189,6 +189,24 @@ FP8 = dict(
|
|
189
189
|
),
|
190
190
|
)
|
191
191
|
|
192
|
+
# FP8 weights and FP8 dynamic activations quantization
|
193
|
+
FP8_DYNAMIC = dict(
|
194
|
+
weights=QuantizationArgs(
|
195
|
+
num_bits=8,
|
196
|
+
type=QuantizationType.FLOAT,
|
197
|
+
strategy=QuantizationStrategy.CHANNEL,
|
198
|
+
symmetric=True,
|
199
|
+
dynamic=False,
|
200
|
+
),
|
201
|
+
input_activations=QuantizationArgs(
|
202
|
+
num_bits=8,
|
203
|
+
type=QuantizationType.FLOAT,
|
204
|
+
strategy=QuantizationStrategy.TOKEN,
|
205
|
+
symmetric=True,
|
206
|
+
dynamic=True,
|
207
|
+
),
|
208
|
+
)
|
209
|
+
|
192
210
|
PRESET_SCHEMES = {
|
193
211
|
# Integer weight only schemes
|
194
212
|
"W8A16": W8A16,
|
@@ -198,4 +216,5 @@ PRESET_SCHEMES = {
|
|
198
216
|
"W4A8": W4A8,
|
199
217
|
# Float weight and activation schemes
|
200
218
|
"FP8": FP8,
|
219
|
+
"FP8_DYNAMIC": FP8_DYNAMIC,
|
201
220
|
}
|
@@ -0,0 +1,104 @@
|
|
1
|
+
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing,
|
10
|
+
# software distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
import torch
|
16
|
+
from torch.nn import Module
|
17
|
+
|
18
|
+
|
19
|
+
__all__ = [
|
20
|
+
"is_module_offloaded",
|
21
|
+
"get_execution_device",
|
22
|
+
"get_offloaded_device",
|
23
|
+
"update_prefix_dict",
|
24
|
+
"update_parameter_data",
|
25
|
+
]
|
26
|
+
|
27
|
+
|
28
|
+
def is_module_offloaded(module: Module) -> bool:
|
29
|
+
"""
|
30
|
+
:param module: layer to check
|
31
|
+
:return: True if layer is offloaded from GPU, False otherwise
|
32
|
+
"""
|
33
|
+
return hasattr(module, "_hf_hook") and module._hf_hook.offload
|
34
|
+
|
35
|
+
|
36
|
+
def get_execution_device(module: Module) -> torch.device:
|
37
|
+
"""
|
38
|
+
:param module: layer to check
|
39
|
+
:return: device layer is loaded onto during forward pass
|
40
|
+
"""
|
41
|
+
if is_module_offloaded(module):
|
42
|
+
return module._hf_hook.execution_device
|
43
|
+
return next(module.parameters()).device
|
44
|
+
|
45
|
+
|
46
|
+
def get_offloaded_device(module: Module) -> torch.device:
|
47
|
+
"""
|
48
|
+
:param module: layer to check
|
49
|
+
:return: device layer is offloaded to onto after forward pass
|
50
|
+
"""
|
51
|
+
if is_module_offloaded(module):
|
52
|
+
first_key = list(module._hf_hook.weights_map.keys())[0]
|
53
|
+
prefix_dataset = module._hf_hook.weights_map.dataset
|
54
|
+
return prefix_dataset[first_key].device
|
55
|
+
return next(module.parameters()).device
|
56
|
+
|
57
|
+
|
58
|
+
def update_prefix_dict(module: Module, key: str, data: torch.Tensor):
|
59
|
+
"""
|
60
|
+
Updates the offloaded state dict for a given module. Parameter named key is replaced
|
61
|
+
by data. This is neccesary because parameter updates for offloaded modules do not
|
62
|
+
persist automatically between loads. This function only affects the offloaded
|
63
|
+
state dict and not the current state of the loaded module.
|
64
|
+
|
65
|
+
:param module: layer containing the parameter to update
|
66
|
+
:param key: name of parameter to update
|
67
|
+
:param data: tensor to update parameter with in the offloaded state dict
|
68
|
+
"""
|
69
|
+
if not is_module_offloaded(module):
|
70
|
+
raise ValueError("Prefix dict is only applicable to offloaded modules")
|
71
|
+
prefix_dict = module._hf_hook.weights_map
|
72
|
+
prefix_dict.dataset[f"{prefix_dict.prefix}{key}"] = data
|
73
|
+
|
74
|
+
|
75
|
+
def update_parameter_data(
|
76
|
+
module: Module, new_param_data: torch.Tensor, param_name: str
|
77
|
+
):
|
78
|
+
"""
|
79
|
+
Updates the paramter value named param_name for a given module. This function
|
80
|
+
updates both the current loaded module state and the offloaded state dict if
|
81
|
+
the module is offloaded. This is neccesary because parameter updates for offloaded
|
82
|
+
modules do not persist automatically between loads.
|
83
|
+
|
84
|
+
:param module: layer containing the parameter to update
|
85
|
+
:param new_param_data: tensor to update parameter with
|
86
|
+
:param param_name:
|
87
|
+
"""
|
88
|
+
device = next(module.parameters()).device
|
89
|
+
|
90
|
+
offloaded = False
|
91
|
+
if is_module_offloaded(module):
|
92
|
+
offload_device = get_offloaded_device(module)
|
93
|
+
offloaded = True
|
94
|
+
|
95
|
+
parameter = getattr(module, param_name, None)
|
96
|
+
dtype = parameter.dtype
|
97
|
+
parameter.data = new_param_data.to(device).to(dtype)
|
98
|
+
|
99
|
+
if offloaded:
|
100
|
+
prefix_dict = module._hf_hook.weights_map.dataset
|
101
|
+
prefix = module._hf_hook.weights_map.prefix
|
102
|
+
prefix_dict[f"{prefix}{param_name}"] = new_param_data.to(offload_device).to(
|
103
|
+
dtype
|
104
|
+
)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: compressed-tensors-nightly
|
3
|
-
Version: 0.4.0.
|
3
|
+
Version: 0.4.0.20240731
|
4
4
|
Summary: Library for utilization of compressed safetensors of neural network models
|
5
5
|
Home-page: https://github.com/neuralmagic/compressed-tensors
|
6
6
|
Author: Neuralmagic, Inc.
|
@@ -10,6 +10,7 @@ Description-Content-Type: text/markdown
|
|
10
10
|
License-File: LICENSE
|
11
11
|
Requires-Dist: torch >=1.7.0
|
12
12
|
Requires-Dist: transformers
|
13
|
+
Requires-Dist: accelerate
|
13
14
|
Requires-Dist: pydantic >=2.0
|
14
15
|
Provides-Extra: dev
|
15
16
|
Requires-Dist: black ==22.12.0 ; extra == 'dev'
|
@@ -6,7 +6,7 @@ compressed_tensors/compressors/base.py,sha256=-rqT2h9G2iwDkwrVj0d0jxxn9h0dccJA1m
|
|
6
6
|
compressed_tensors/compressors/dense.py,sha256=xcWECjcRY4INN6jC7vHx5wvUX3NmnKlxA9SVE1A6m2Q,1267
|
7
7
|
compressed_tensors/compressors/helpers.py,sha256=k9avlkmeYj6vkOAvl-MgcixtP7ib24SCfhzZ-RusXfw,5403
|
8
8
|
compressed_tensors/compressors/marlin_24.py,sha256=e7fGUyZbjUpA5VUMCPxqcYPGNiwoDKupHJaXWCoVKRw,9410
|
9
|
-
compressed_tensors/compressors/model_compressor.py,sha256=
|
9
|
+
compressed_tensors/compressors/model_compressor.py,sha256=b7jPE4czwP9uulIZML5qUQAvQaQzElwzUGwat7jlpgI,13352
|
10
10
|
compressed_tensors/compressors/naive_quantized.py,sha256=6_1wuTF96-lw-UzzrsiEX_ipciKiQQJoZ8uotVwtbyQ,5569
|
11
11
|
compressed_tensors/compressors/pack_quantized.py,sha256=tnhqvkko6fIaTywI2JNvh5lE2xXWKJ_hYShv_s6C9Vk,8506
|
12
12
|
compressed_tensors/compressors/sparse_bitmask.py,sha256=kiDwBlFV0sJGLcIdDYxIiuF64ccgwDfqq1hWRQThYDc,8647
|
@@ -16,16 +16,16 @@ compressed_tensors/config/dense.py,sha256=NgSxnFCnckU9-iunxEaqiFwqgdO7YYxlWKR74j
|
|
16
16
|
compressed_tensors/config/sparse_bitmask.py,sha256=pZUboRNZTu6NajGOQEFExoPknak5ynVAUeiiYpS1Gt8,1308
|
17
17
|
compressed_tensors/quantization/__init__.py,sha256=83J5bPB7PavN2TfCoW7_vEDhfYpm4TDrqYO9vdSQ5bk,760
|
18
18
|
compressed_tensors/quantization/quant_args.py,sha256=Vc_tWSTcbZZsMJlACpLq4JEPvGx87izc8VEx-mcXjoM,5621
|
19
|
-
compressed_tensors/quantization/quant_config.py,sha256=
|
20
|
-
compressed_tensors/quantization/quant_scheme.py,sha256=
|
19
|
+
compressed_tensors/quantization/quant_config.py,sha256=NpVu8YJ4Xw2pIQW_PGaNaml8kx1bUnxkvb0jBYWbKdE,9971
|
20
|
+
compressed_tensors/quantization/quant_scheme.py,sha256=_RKOFJI0T5xJVBLX63UeYkSY4EFAecsBnqzUIVBjeU0,6014
|
21
21
|
compressed_tensors/quantization/lifecycle/__init__.py,sha256=MXE2E7GfIfRRfhrdGy2Og3AZOz5N59B0ZGFcsD89y6c,821
|
22
|
-
compressed_tensors/quantization/lifecycle/apply.py,sha256=
|
23
|
-
compressed_tensors/quantization/lifecycle/calibration.py,sha256=
|
22
|
+
compressed_tensors/quantization/lifecycle/apply.py,sha256=aamouy1IWCSGl5_lfC7rZrUNbMEfhyHQrZFGEm2VH4w,13242
|
23
|
+
compressed_tensors/quantization/lifecycle/calibration.py,sha256=n-m4xwa9Ds2xrltp6r0rhuVLJhE8bQ1LnifrcrbA-ig,2448
|
24
24
|
compressed_tensors/quantization/lifecycle/compressed.py,sha256=VreB10xPwgSLQQlTu20UCrFpRS--cA7-lx5s7nrPPrg,2247
|
25
|
-
compressed_tensors/quantization/lifecycle/forward.py,sha256=
|
25
|
+
compressed_tensors/quantization/lifecycle/forward.py,sha256=6PSXYcf-R1dOY8zsuIWnBaoyARNymYc3-qvV6-L7SlI,12397
|
26
26
|
compressed_tensors/quantization/lifecycle/frozen.py,sha256=h1XYt89MouBTf3jTYLG_6OdFxIu5q2N8tPjsy6J4E6Y,1726
|
27
27
|
compressed_tensors/quantization/lifecycle/helpers.py,sha256=xDkM3yVpGVnwAdg2aUOmrlDPaOksi-bavSQ5mMeOQlk,1651
|
28
|
-
compressed_tensors/quantization/lifecycle/initialize.py,sha256=
|
28
|
+
compressed_tensors/quantization/lifecycle/initialize.py,sha256=oCD8pgmHT3lW5J7zdsSN3YzEQIhTfE7M01R5Wb0wpck,5801
|
29
29
|
compressed_tensors/quantization/observers/__init__.py,sha256=DNH31NQYrIBBcmHsMyFA6whh4pbRsLwuNa6L8AeXaGc,745
|
30
30
|
compressed_tensors/quantization/observers/base.py,sha256=2WO7N2eyXf1r1gxVidos1bUS5o7pcrpug4gQgHIazrQ,6794
|
31
31
|
compressed_tensors/quantization/observers/helpers.py,sha256=s_A23Qa_BLfOdHJCN5bm-qPWkhjjj_RIVrhSp1Y9Dtk,4211
|
@@ -35,13 +35,14 @@ compressed_tensors/quantization/utils/__init__.py,sha256=VdtEmP0bvuND_IGQnyqUPc5
|
|
35
35
|
compressed_tensors/quantization/utils/helpers.py,sha256=YjXABJQUnelof-z7qcwck6fnrFLh4uMSrOmPiqNp_RY,8591
|
36
36
|
compressed_tensors/registry/__init__.py,sha256=FwLSNYqfIrb5JD_6OK_MT4_svvKTN_nEhpgQlQvGbjI,658
|
37
37
|
compressed_tensors/registry/registry.py,sha256=fxjOjh2wklCvJhQxwofdy-zV8q7MkQ85SLG77nml2iA,11890
|
38
|
-
compressed_tensors/utils/__init__.py,sha256=
|
38
|
+
compressed_tensors/utils/__init__.py,sha256=rvbIJlvdKYn4iX7r3KP6peCbU5uyMzgxwhsQstLoMxQ,785
|
39
39
|
compressed_tensors/utils/helpers.py,sha256=d3yP9ViQ8R3GzMHfohxNlaokzyrRuj2PyjxWAJZmSws,3156
|
40
|
+
compressed_tensors/utils/offload.py,sha256=BL7_cNAHTKbSta179R5R4ASk6oXuZhTJDY4D_8Lv2OE,3717
|
40
41
|
compressed_tensors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVyah6BUUir_StT28,2530
|
41
42
|
compressed_tensors/utils/safetensors_load.py,sha256=0MheXwx1jeY12PeISppiSIZHs6rmN2YddwPpFb9V67I,8527
|
42
43
|
compressed_tensors/utils/semi_structured_conversions.py,sha256=g1EZHzdv-ko7ufPX430dp7wE33o6FWJXuSP4zZydCu0,13488
|
43
|
-
compressed_tensors_nightly-0.4.0.
|
44
|
-
compressed_tensors_nightly-0.4.0.
|
45
|
-
compressed_tensors_nightly-0.4.0.
|
46
|
-
compressed_tensors_nightly-0.4.0.
|
47
|
-
compressed_tensors_nightly-0.4.0.
|
44
|
+
compressed_tensors_nightly-0.4.0.20240731.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
45
|
+
compressed_tensors_nightly-0.4.0.20240731.dist-info/METADATA,sha256=2uwF7iMEXcX3ZvX-mCcklcr-qdqhlap0Ld5WDxL41pQ,5694
|
46
|
+
compressed_tensors_nightly-0.4.0.20240731.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
47
|
+
compressed_tensors_nightly-0.4.0.20240731.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
|
48
|
+
compressed_tensors_nightly-0.4.0.20240731.dist-info/RECORD,,
|
File without changes
|
File without changes
|