compressed-tensors-nightly 0.3.3.20240601__py3-none-any.whl → 0.3.3.20240603__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- compressed_tensors/compressors/model_compressor.py +21 -6
- compressed_tensors/quantization/lifecycle/apply.py +16 -1
- compressed_tensors/quantization/lifecycle/forward.py +8 -0
- compressed_tensors/quantization/quant_config.py +7 -19
- compressed_tensors/utils/helpers.py +24 -6
- {compressed_tensors_nightly-0.3.3.20240601.dist-info → compressed_tensors_nightly-0.3.3.20240603.dist-info}/METADATA +1 -1
- {compressed_tensors_nightly-0.3.3.20240601.dist-info → compressed_tensors_nightly-0.3.3.20240603.dist-info}/RECORD +10 -10
- {compressed_tensors_nightly-0.3.3.20240601.dist-info → compressed_tensors_nightly-0.3.3.20240603.dist-info}/LICENSE +0 -0
- {compressed_tensors_nightly-0.3.3.20240601.dist-info → compressed_tensors_nightly-0.3.3.20240603.dist-info}/WHEEL +0 -0
- {compressed_tensors_nightly-0.3.3.20240601.dist-info → compressed_tensors_nightly-0.3.3.20240603.dist-info}/top_level.txt +0 -0
@@ -16,6 +16,7 @@ import json
|
|
16
16
|
import logging
|
17
17
|
import operator
|
18
18
|
import os
|
19
|
+
from copy import deepcopy
|
19
20
|
from typing import Dict, Optional, Union
|
20
21
|
|
21
22
|
from compressed_tensors.base import (
|
@@ -36,6 +37,7 @@ from compressed_tensors.quantization.utils import (
|
|
36
37
|
iter_named_leaf_modules,
|
37
38
|
)
|
38
39
|
from compressed_tensors.utils import get_safetensors_folder
|
40
|
+
from compressed_tensors.utils.helpers import fix_fsdp_module_name
|
39
41
|
from torch import Tensor
|
40
42
|
from torch.nn import Module, Parameter
|
41
43
|
from tqdm import tqdm
|
@@ -89,9 +91,8 @@ class ModelCompressor:
|
|
89
91
|
if compression_config is None:
|
90
92
|
return None
|
91
93
|
|
92
|
-
sparsity_config =
|
93
|
-
quantization_config =
|
94
|
-
|
94
|
+
sparsity_config = cls.parse_sparsity_config(compression_config)
|
95
|
+
quantization_config = cls.parse_quantization_config(compression_config)
|
95
96
|
if sparsity_config is None and quantization_config is None:
|
96
97
|
return None
|
97
98
|
|
@@ -141,6 +142,21 @@ class ModelCompressor:
|
|
141
142
|
sparsity_config=sparsity_config, quantization_config=quantization_config
|
142
143
|
)
|
143
144
|
|
145
|
+
@staticmethod
|
146
|
+
def parse_sparsity_config(compression_config: Dict) -> Union[Dict, None]:
|
147
|
+
if compression_config is None:
|
148
|
+
return None
|
149
|
+
return compression_config.get(SPARSITY_CONFIG_NAME, None)
|
150
|
+
|
151
|
+
@staticmethod
|
152
|
+
def parse_quantization_config(compression_config: Dict) -> Union[Dict, None]:
|
153
|
+
quantization_config = deepcopy(compression_config)
|
154
|
+
quantization_config.pop(SPARSITY_CONFIG_NAME, None)
|
155
|
+
if len(quantization_config) == 0:
|
156
|
+
quantization_config = None
|
157
|
+
|
158
|
+
return quantization_config
|
159
|
+
|
144
160
|
def __init__(
|
145
161
|
self,
|
146
162
|
sparsity_config: Optional[SparsityCompressionConfig] = None,
|
@@ -233,9 +249,7 @@ class ModelCompressor:
|
|
233
249
|
config_data[COMPRESSION_CONFIG_NAME] = {}
|
234
250
|
if self.quantization_config is not None:
|
235
251
|
quant_config_data = self.quantization_config.model_dump()
|
236
|
-
config_data[COMPRESSION_CONFIG_NAME]
|
237
|
-
QUANTIZATION_CONFIG_NAME
|
238
|
-
] = quant_config_data
|
252
|
+
config_data[COMPRESSION_CONFIG_NAME] = quant_config_data
|
239
253
|
if self.sparsity_config is not None:
|
240
254
|
sparsity_config_data = self.sparsity_config.model_dump()
|
241
255
|
config_data[COMPRESSION_CONFIG_NAME][
|
@@ -260,6 +274,7 @@ def _get_weight_arg_mappings(model: Module) -> Dict:
|
|
260
274
|
for name, submodule in iter_named_leaf_modules(model):
|
261
275
|
if is_module_quantized(submodule):
|
262
276
|
if submodule.quantization_scheme.weights is not None:
|
277
|
+
name = fix_fsdp_module_name(name)
|
263
278
|
quantized_modules_to_args[name] = submodule.quantization_scheme.weights
|
264
279
|
|
265
280
|
return quantized_modules_to_args
|
@@ -12,6 +12,7 @@
|
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
|
+
import logging
|
15
16
|
import re
|
16
17
|
from collections import OrderedDict
|
17
18
|
from typing import Dict, Iterable, Optional
|
@@ -35,6 +36,7 @@ from compressed_tensors.quantization.utils import (
|
|
35
36
|
infer_quantization_status,
|
36
37
|
iter_named_leaf_modules,
|
37
38
|
)
|
39
|
+
from compressed_tensors.utils.helpers import fix_fsdp_module_name
|
38
40
|
from compressed_tensors.utils.safetensors_load import get_safetensors_folder
|
39
41
|
from torch.nn import Module
|
40
42
|
|
@@ -50,6 +52,9 @@ from compressed_tensors.quantization.utils.helpers import is_module_quantized
|
|
50
52
|
from compressed_tensors.utils.safetensors_load import get_quantization_state_dict
|
51
53
|
|
52
54
|
|
55
|
+
_LOGGER = logging.getLogger(__name__)
|
56
|
+
|
57
|
+
|
53
58
|
def load_pretrained_quantization(model: Module, model_name_or_path: str):
|
54
59
|
"""
|
55
60
|
Loads the quantization parameters (scale and zero point) from model_name_or_path to
|
@@ -105,15 +110,24 @@ def apply_quantization_config(model: Module, config: QuantizationConfig):
|
|
105
110
|
for target in scheme.targets:
|
106
111
|
target_to_scheme[target] = scheme
|
107
112
|
|
113
|
+
# list of submodules to ignore
|
114
|
+
ignored_submodules = []
|
108
115
|
# mark appropriate layers for quantization by setting their quantization schemes
|
109
116
|
for name, submodule in iter_named_leaf_modules(model):
|
117
|
+
# potentially fix module name to remove FSDP wrapper prefix
|
118
|
+
name = fix_fsdp_module_name(name)
|
110
119
|
if find_first_name_or_class_match(name, submodule, config.ignore):
|
120
|
+
ignored_submodules.append(name)
|
111
121
|
continue # layer matches ignore list, continue
|
112
122
|
target = find_first_name_or_class_match(name, submodule, target_to_scheme)
|
113
123
|
if target is not None:
|
114
124
|
# target matched - add layer and scheme to target list
|
115
125
|
submodule.quantization_scheme = target_to_scheme[target]
|
116
|
-
|
126
|
+
if set(config.ignore) - set(ignored_submodules):
|
127
|
+
_LOGGER.warning(
|
128
|
+
"Some layers that were to be ignored were "
|
129
|
+
f"not found in the model: {set(config.ignore) - set(ignored_submodules)}"
|
130
|
+
)
|
117
131
|
# apply current quantization status across all targeted layers
|
118
132
|
apply_quantization_status(model, config.quantization_status)
|
119
133
|
|
@@ -157,6 +171,7 @@ def _find_first_match(
|
|
157
171
|
# returns first element of target that matches value either
|
158
172
|
# exactly or as a regex after 're:'. if check_contains is set to True,
|
159
173
|
# additionally checks if the target string is contained with value.
|
174
|
+
|
160
175
|
for target in targets:
|
161
176
|
if target.startswith("re:"):
|
162
177
|
pattern = target[3:]
|
@@ -57,6 +57,14 @@ def quantize(
|
|
57
57
|
:param dtype: optional dtype to cast the quantized output to
|
58
58
|
:return: fake quantized tensor
|
59
59
|
"""
|
60
|
+
# ensure all tensors are on the same device
|
61
|
+
# assumes that the target device is the input
|
62
|
+
# tensor's device
|
63
|
+
if x.device != scale.device:
|
64
|
+
scale = scale.to(x.device)
|
65
|
+
if x.device != zero_point.device:
|
66
|
+
zero_point = zero_point.to(x.device)
|
67
|
+
|
60
68
|
return _process_quantization(
|
61
69
|
x=x,
|
62
70
|
scale=scale,
|
@@ -15,7 +15,6 @@
|
|
15
15
|
from enum import Enum
|
16
16
|
from typing import Dict, List, Optional, Union
|
17
17
|
|
18
|
-
from compressed_tensors.base import QUANTIZATION_CONFIG_NAME
|
19
18
|
from compressed_tensors.config import CompressionFormat
|
20
19
|
from compressed_tensors.quantization.quant_scheme import (
|
21
20
|
QuantizationScheme,
|
@@ -29,13 +28,14 @@ from compressed_tensors.quantization.utils import (
|
|
29
28
|
)
|
30
29
|
from pydantic import BaseModel, Field
|
31
30
|
from torch.nn import Module
|
32
|
-
from transformers import AutoConfig
|
33
31
|
|
34
32
|
|
35
33
|
__all__ = [
|
36
34
|
"QuantizationStatus",
|
37
35
|
"QuantizationConfig",
|
38
36
|
"LIFECYCLE_ORDER",
|
37
|
+
"DEFAULT_QUANTIZATION_METHOD",
|
38
|
+
"DEFAULT_QUANTIZATION_FORMAT",
|
39
39
|
]
|
40
40
|
|
41
41
|
|
@@ -101,6 +101,9 @@ LIFECYCLE_ORDER = [
|
|
101
101
|
QuantizationStatus.COMPRESSED,
|
102
102
|
]
|
103
103
|
|
104
|
+
DEFAULT_QUANTIZATION_METHOD = "compressed-tensors"
|
105
|
+
DEFAULT_QUANTIZATION_FORMAT = "fakequant"
|
106
|
+
|
104
107
|
|
105
108
|
class QuantizationConfig(BaseModel):
|
106
109
|
"""
|
@@ -122,8 +125,8 @@ class QuantizationConfig(BaseModel):
|
|
122
125
|
"""
|
123
126
|
|
124
127
|
config_groups: Dict[str, Union[QuantizationScheme, List[str]]]
|
125
|
-
quant_method: str =
|
126
|
-
format: str =
|
128
|
+
quant_method: str = DEFAULT_QUANTIZATION_METHOD
|
129
|
+
format: str = DEFAULT_QUANTIZATION_FORMAT
|
127
130
|
quantization_status: QuantizationStatus = QuantizationStatus.INITIALIZED
|
128
131
|
global_compression_ratio: Optional[float] = None
|
129
132
|
ignore: Optional[List[str]] = Field(default_factory=list)
|
@@ -141,21 +144,6 @@ class QuantizationConfig(BaseModel):
|
|
141
144
|
targets=targets_or_scheme,
|
142
145
|
)
|
143
146
|
|
144
|
-
@staticmethod
|
145
|
-
def from_model_config(model_name_or_path) -> "QuantizationConfig":
|
146
|
-
"""
|
147
|
-
Given a path to a model config, extract a quantization config if it exists
|
148
|
-
|
149
|
-
:param pretrained_model_name_or_path: path to model config on disk or HF hub
|
150
|
-
:return: instantiated QuantizationConfig if config contains a quant config
|
151
|
-
"""
|
152
|
-
config = AutoConfig.from_pretrained(model_name_or_path)
|
153
|
-
quantization_config = getattr(config, QUANTIZATION_CONFIG_NAME, None)
|
154
|
-
if quantization_config is None:
|
155
|
-
return None
|
156
|
-
|
157
|
-
return QuantizationConfig.parse_obj(quantization_config)
|
158
|
-
|
159
147
|
@staticmethod
|
160
148
|
def from_pretrained(
|
161
149
|
model: Module, format: Optional[str] = None
|
@@ -15,18 +15,17 @@
|
|
15
15
|
|
16
16
|
from typing import Optional
|
17
17
|
|
18
|
-
from compressed_tensors.base import SPARSITY_CONFIG_NAME
|
19
|
-
from compressed_tensors.compressors import ModelCompressor
|
20
|
-
from compressed_tensors.config import CompressionConfig
|
21
18
|
from transformers import AutoConfig
|
22
19
|
|
23
20
|
|
24
|
-
__all__ = ["infer_compressor_from_model_config"]
|
21
|
+
__all__ = ["infer_compressor_from_model_config", "fix_fsdp_module_name"]
|
22
|
+
|
23
|
+
FSDP_WRAPPER_NAME = "_fsdp_wrapped_module"
|
25
24
|
|
26
25
|
|
27
26
|
def infer_compressor_from_model_config(
|
28
27
|
pretrained_model_name_or_path: str,
|
29
|
-
) -> Optional[ModelCompressor]:
|
28
|
+
) -> Optional["ModelCompressor"]: # noqa: F821
|
30
29
|
"""
|
31
30
|
Given a path to a model config, extract a sparsity config if it exists and return
|
32
31
|
the associated ModelCompressor
|
@@ -34,8 +33,11 @@ def infer_compressor_from_model_config(
|
|
34
33
|
:param pretrained_model_name_or_path: path to model config on disk or HF hub
|
35
34
|
:return: matching compressor if config contains a sparsity config
|
36
35
|
"""
|
36
|
+
from compressed_tensors.compressors import ModelCompressor
|
37
|
+
from compressed_tensors.config import CompressionConfig
|
38
|
+
|
37
39
|
config = AutoConfig.from_pretrained(pretrained_model_name_or_path)
|
38
|
-
sparsity_config =
|
40
|
+
sparsity_config = ModelCompressor.parse_sparsity_config(config)
|
39
41
|
if sparsity_config is None:
|
40
42
|
return None
|
41
43
|
|
@@ -43,3 +45,19 @@ def infer_compressor_from_model_config(
|
|
43
45
|
sparsity_config = CompressionConfig.load_from_registry(format, **sparsity_config)
|
44
46
|
compressor = ModelCompressor.load_from_registry(format, config=sparsity_config)
|
45
47
|
return compressor
|
48
|
+
|
49
|
+
|
50
|
+
# TODO: There is already the same function in
|
51
|
+
# SparseML, should be moved to a shared location
|
52
|
+
# in the future
|
53
|
+
def fix_fsdp_module_name(name: str) -> str:
|
54
|
+
"""
|
55
|
+
Remove FSDP wrapper prefixes from a module name
|
56
|
+
Accounts for scenario where FSDP_WRAPPER_NAME is
|
57
|
+
at the end of the name, as well as in the middle.
|
58
|
+
:param name: name to strip
|
59
|
+
:return: stripped name
|
60
|
+
"""
|
61
|
+
return name.replace(FSDP_WRAPPER_NAME + ".", "").replace(
|
62
|
+
"." + FSDP_WRAPPER_NAME, ""
|
63
|
+
)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: compressed-tensors-nightly
|
3
|
-
Version: 0.3.3.
|
3
|
+
Version: 0.3.3.20240603
|
4
4
|
Summary: Library for utilization of compressed safetensors of neural network models
|
5
5
|
Home-page: https://github.com/neuralmagic/compressed-tensors
|
6
6
|
Author: Neuralmagic, Inc.
|
@@ -6,7 +6,7 @@ compressed_tensors/compressors/base.py,sha256=LWEgbpgTxzmoqQ7Xhq2OQszUgWoDtFuGCi
|
|
6
6
|
compressed_tensors/compressors/dense.py,sha256=G_XHbvuENyupIKlXSITOQgvPkNkcMEOLcLWQr70V9EE,1257
|
7
7
|
compressed_tensors/compressors/helpers.py,sha256=k9avlkmeYj6vkOAvl-MgcixtP7ib24SCfhzZ-RusXfw,5403
|
8
8
|
compressed_tensors/compressors/int_quantized.py,sha256=Ct2vCK0yoPm6vkIFlzDMGQ7m14xT1GyURsSwH9DP770,5242
|
9
|
-
compressed_tensors/compressors/model_compressor.py,sha256=
|
9
|
+
compressed_tensors/compressors/model_compressor.py,sha256=ymn4xzAstcutXxkY3Z3V_1MuJv383-lkZHzp37mA9z0,11119
|
10
10
|
compressed_tensors/compressors/pack_quantized.py,sha256=VPiLlgJlDgARrn7YmiQoLqUfxErKBfj54epMYWRsF8k,8451
|
11
11
|
compressed_tensors/compressors/sparse_bitmask.py,sha256=H9oZSTYI1oRCzAMbd4zThUnZd1h2rfs8DmA3tPcvuNE,8637
|
12
12
|
compressed_tensors/config/__init__.py,sha256=ZBqWn3r6ku1qfmlHHYp0mQueY0i7Pwhr9rbQk9dDlMc,704
|
@@ -15,13 +15,13 @@ compressed_tensors/config/dense.py,sha256=NgSxnFCnckU9-iunxEaqiFwqgdO7YYxlWKR74j
|
|
15
15
|
compressed_tensors/config/sparse_bitmask.py,sha256=pZUboRNZTu6NajGOQEFExoPknak5ynVAUeiiYpS1Gt8,1308
|
16
16
|
compressed_tensors/quantization/__init__.py,sha256=83J5bPB7PavN2TfCoW7_vEDhfYpm4TDrqYO9vdSQ5bk,760
|
17
17
|
compressed_tensors/quantization/quant_args.py,sha256=A6b2V8lhsM8Ho8RjlPBQdxRUDNWhqq-ie5E3RR2_GNg,4360
|
18
|
-
compressed_tensors/quantization/quant_config.py,sha256=
|
18
|
+
compressed_tensors/quantization/quant_config.py,sha256=Nv9rvWNrlbeJgNZhQf-cPAEWJ9NU75ATWHCacWaiQ_s,8189
|
19
19
|
compressed_tensors/quantization/quant_scheme.py,sha256=-hAK1-C67_wJl10eaVLUvbslPBTV04WyzL_J-u9f1ck,3571
|
20
20
|
compressed_tensors/quantization/lifecycle/__init__.py,sha256=ggRGWRqhCxCaTTDWRcgTVX3axnS2xV6rc5YvdzK7fSg,798
|
21
|
-
compressed_tensors/quantization/lifecycle/apply.py,sha256=
|
21
|
+
compressed_tensors/quantization/lifecycle/apply.py,sha256=disclMUDaz2MLPvcTwGQ1oo1clhTTBkAeNz5J9NRxVw,8552
|
22
22
|
compressed_tensors/quantization/lifecycle/calibration.py,sha256=mLns4jlaWmBwOW8Jtlm5bMX-JET1AiZYUBO7qa-XuxI,1776
|
23
23
|
compressed_tensors/quantization/lifecycle/compressed.py,sha256=VreB10xPwgSLQQlTu20UCrFpRS--cA7-lx5s7nrPPrg,2247
|
24
|
-
compressed_tensors/quantization/lifecycle/forward.py,sha256=
|
24
|
+
compressed_tensors/quantization/lifecycle/forward.py,sha256=_1TwffkyaaXL5QpFgXH1gvueUivOLpuRkoXY7vRXktY,11094
|
25
25
|
compressed_tensors/quantization/lifecycle/frozen.py,sha256=h1XYt89MouBTf3jTYLG_6OdFxIu5q2N8tPjsy6J4E6Y,1726
|
26
26
|
compressed_tensors/quantization/lifecycle/initialize.py,sha256=pFfcu-pxdQKzlnn-18-RlkEktt2yDi6woNXJsiv1A2c,3732
|
27
27
|
compressed_tensors/quantization/observers/__init__.py,sha256=DNH31NQYrIBBcmHsMyFA6whh4pbRsLwuNa6L8AeXaGc,745
|
@@ -34,10 +34,10 @@ compressed_tensors/quantization/utils/helpers.py,sha256=NzAH18Cn_-mTAR87y6IlcQU5
|
|
34
34
|
compressed_tensors/registry/__init__.py,sha256=FwLSNYqfIrb5JD_6OK_MT4_svvKTN_nEhpgQlQvGbjI,658
|
35
35
|
compressed_tensors/registry/registry.py,sha256=fxjOjh2wklCvJhQxwofdy-zV8q7MkQ85SLG77nml2iA,11890
|
36
36
|
compressed_tensors/utils/__init__.py,sha256=5DrYjoZbaEvSkJcC-GRSbM_RBHVF4tG9gMd3zsJnjLw,665
|
37
|
-
compressed_tensors/utils/helpers.py,sha256=
|
37
|
+
compressed_tensors/utils/helpers.py,sha256=5ull5yFT31M2zVxKeFvpvvlvX5f1Sk1LGuj_wrfZWCY,2267
|
38
38
|
compressed_tensors/utils/safetensors_load.py,sha256=wo9UirGrGlenBqZeqotvpCT7D5MEdjCo2J3HeRaIFoU,8502
|
39
|
-
compressed_tensors_nightly-0.3.3.
|
40
|
-
compressed_tensors_nightly-0.3.3.
|
41
|
-
compressed_tensors_nightly-0.3.3.
|
42
|
-
compressed_tensors_nightly-0.3.3.
|
43
|
-
compressed_tensors_nightly-0.3.3.
|
39
|
+
compressed_tensors_nightly-0.3.3.20240603.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
40
|
+
compressed_tensors_nightly-0.3.3.20240603.dist-info/METADATA,sha256=VSYJpZfZihQ_Y7H8jkyTdsTeNn2a9g4C9l04Tal0LmY,5673
|
41
|
+
compressed_tensors_nightly-0.3.3.20240603.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
42
|
+
compressed_tensors_nightly-0.3.3.20240603.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
|
43
|
+
compressed_tensors_nightly-0.3.3.20240603.dist-info/RECORD,,
|
File without changes
|
File without changes
|