compressed-tensors-nightly 0.8.0.20241122__py3-none-any.whl → 0.8.0.20241123__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- compressed_tensors/compressors/model_compressors/model_compressor.py +17 -5
- compressed_tensors/linear/compressed_linear.py +3 -1
- compressed_tensors/quantization/lifecycle/apply.py +2 -1
- compressed_tensors/quantization/quant_config.py +2 -2
- compressed_tensors/quantization/quant_scheme.py +1 -23
- {compressed_tensors_nightly-0.8.0.20241122.dist-info → compressed_tensors_nightly-0.8.0.20241123.dist-info}/METADATA +1 -1
- {compressed_tensors_nightly-0.8.0.20241122.dist-info → compressed_tensors_nightly-0.8.0.20241123.dist-info}/RECORD +10 -10
- {compressed_tensors_nightly-0.8.0.20241122.dist-info → compressed_tensors_nightly-0.8.0.20241123.dist-info}/WHEEL +1 -1
- {compressed_tensors_nightly-0.8.0.20241122.dist-info → compressed_tensors_nightly-0.8.0.20241123.dist-info}/LICENSE +0 -0
- {compressed_tensors_nightly-0.8.0.20241122.dist-info → compressed_tensors_nightly-0.8.0.20241123.dist-info}/top_level.txt +0 -0
@@ -24,7 +24,6 @@ import compressed_tensors
|
|
24
24
|
import torch
|
25
25
|
import transformers
|
26
26
|
from compressed_tensors.base import (
|
27
|
-
COMPRESSION_CONFIG_NAME,
|
28
27
|
COMPRESSION_VERSION_NAME,
|
29
28
|
QUANTIZATION_CONFIG_NAME,
|
30
29
|
QUANTIZATION_METHOD_NAME,
|
@@ -39,6 +38,7 @@ from compressed_tensors.quantization import (
|
|
39
38
|
apply_quantization_config,
|
40
39
|
load_pretrained_quantization,
|
41
40
|
)
|
41
|
+
from compressed_tensors.quantization.quant_args import QuantizationArgs
|
42
42
|
from compressed_tensors.quantization.utils import (
|
43
43
|
is_module_quantized,
|
44
44
|
iter_named_leaf_modules,
|
@@ -103,12 +103,14 @@ class ModelCompressor:
|
|
103
103
|
:return: compressor for the configs, or None if model is not compressed
|
104
104
|
"""
|
105
105
|
config = AutoConfig.from_pretrained(pretrained_model_name_or_path, **kwargs)
|
106
|
-
compression_config = getattr(config,
|
106
|
+
compression_config = getattr(config, QUANTIZATION_CONFIG_NAME, None)
|
107
|
+
|
107
108
|
return cls.from_compression_config(compression_config)
|
108
109
|
|
109
110
|
@classmethod
|
110
111
|
def from_compression_config(
|
111
|
-
cls,
|
112
|
+
cls,
|
113
|
+
compression_config: Union[Dict[str, Any], "CompressedTensorsConfig"],
|
112
114
|
):
|
113
115
|
"""
|
114
116
|
:param compression_config:
|
@@ -265,7 +267,11 @@ class ModelCompressor:
|
|
265
267
|
state_dict = model.state_dict()
|
266
268
|
|
267
269
|
compressed_state_dict = state_dict
|
268
|
-
|
270
|
+
|
271
|
+
quantized_modules_to_args: Dict[
|
272
|
+
str, QuantizationArgs
|
273
|
+
] = map_modules_to_quant_args(model)
|
274
|
+
|
269
275
|
if self.quantization_compressor is not None:
|
270
276
|
compressed_state_dict = self.quantization_compressor.compress(
|
271
277
|
state_dict, names_to_scheme=quantized_modules_to_args
|
@@ -369,7 +375,13 @@ class ModelCompressor:
|
|
369
375
|
update_parameter_data(module, data, param_name)
|
370
376
|
|
371
377
|
|
372
|
-
def map_modules_to_quant_args(model: Module) -> Dict:
|
378
|
+
def map_modules_to_quant_args(model: Module) -> Dict[str, QuantizationArgs]:
|
379
|
+
"""
|
380
|
+
Given a pytorch model, map out the submodule name (usually linear layers)
|
381
|
+
to the QuantizationArgs
|
382
|
+
|
383
|
+
:param model: pytorch model
|
384
|
+
"""
|
373
385
|
quantized_modules_to_args = {}
|
374
386
|
for name, submodule in iter_named_leaf_modules(model):
|
375
387
|
if is_module_quantized(submodule):
|
@@ -12,6 +12,8 @@
|
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
|
+
from typing import Dict, Tuple
|
16
|
+
|
15
17
|
import torch
|
16
18
|
from compressed_tensors.compressors.base import BaseCompressor
|
17
19
|
from compressed_tensors.quantization import (
|
@@ -53,7 +55,7 @@ class CompressedLinear(Linear):
|
|
53
55
|
)
|
54
56
|
|
55
57
|
# get the shape and dtype of compressed parameters
|
56
|
-
compression_params = module.compressor.compression_param_info(
|
58
|
+
compression_params: Dict[str, Tuple] = module.compressor.compression_param_info(
|
57
59
|
module.weight.shape, quantization_scheme.weights
|
58
60
|
)
|
59
61
|
|
@@ -106,7 +106,8 @@ def apply_quantization_config(
|
|
106
106
|
model: Module, config: Union[QuantizationConfig, None], run_compressed: bool = False
|
107
107
|
) -> OrderedDict:
|
108
108
|
"""
|
109
|
-
Initializes the model for quantization in-place based on the given config
|
109
|
+
Initializes the model for quantization in-place based on the given config.
|
110
|
+
Optionally coverts quantizable modules to compressed_linear modules
|
110
111
|
|
111
112
|
:param model: model to apply quantization config to
|
112
113
|
:param config: quantization config
|
@@ -132,9 +132,9 @@ class QuantizationConfig(BaseModel):
|
|
132
132
|
`k_proj` and `v_proj` in their names. If this is not the case
|
133
133
|
and kv_cache_scheme != None, the quantization of kv cache will fail
|
134
134
|
:global_compression_ratio: optional informational config to report the model
|
135
|
-
|
135
|
+
compression ratio acheived by the quantization config
|
136
136
|
:ignore: optional list of layers to ignore from config_groups. Layers in this list
|
137
|
-
|
137
|
+
are not quantized even if they match up with a target in config_groups
|
138
138
|
"""
|
139
139
|
|
140
140
|
config_groups: Dict[str, Union[QuantizationScheme, List[str]]]
|
@@ -36,7 +36,7 @@ class QuantizationScheme(BaseModel):
|
|
36
36
|
of modules should be quantized
|
37
37
|
|
38
38
|
:param targets: list of modules to apply the QuantizationArgs to, can be layer
|
39
|
-
names, layer types or a regular expression
|
39
|
+
names, layer types or a regular expression, typically ["Linear"]
|
40
40
|
:param weights: quantization config for layer weights
|
41
41
|
:param input_activations: quantization config for layer inputs
|
42
42
|
:param output_activations: quantization config for layer outputs
|
@@ -47,28 +47,6 @@ class QuantizationScheme(BaseModel):
|
|
47
47
|
input_activations: Optional[QuantizationArgs] = None
|
48
48
|
output_activations: Optional[QuantizationArgs] = None
|
49
49
|
|
50
|
-
@classmethod
|
51
|
-
def default_scheme(
|
52
|
-
cls,
|
53
|
-
targets: Optional[List[str]] = None,
|
54
|
-
):
|
55
|
-
|
56
|
-
if targets is None:
|
57
|
-
# default to quantizing all Linear layers
|
58
|
-
targets = ["Linear"]
|
59
|
-
|
60
|
-
# by default, activations and weights are left unquantized
|
61
|
-
weights = None
|
62
|
-
input_activations = None
|
63
|
-
output_activations = None
|
64
|
-
|
65
|
-
return cls(
|
66
|
-
targets=targets,
|
67
|
-
weights=weights,
|
68
|
-
input_activations=input_activations,
|
69
|
-
output_activations=output_activations,
|
70
|
-
)
|
71
|
-
|
72
50
|
|
73
51
|
"""
|
74
52
|
Pre-Set Quantization Scheme Args
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: compressed-tensors-nightly
|
3
|
-
Version: 0.8.0.
|
3
|
+
Version: 0.8.0.20241123
|
4
4
|
Summary: Library for utilization of compressed safetensors of neural network models
|
5
5
|
Home-page: https://github.com/neuralmagic/compressed-tensors
|
6
6
|
Author: Neuralmagic, Inc.
|
@@ -5,7 +5,7 @@ compressed_tensors/compressors/__init__.py,sha256=smSygTSfcfuujRrAXDc6uZm4L_ccV1
|
|
5
5
|
compressed_tensors/compressors/base.py,sha256=D9TNwQcjanDiAHODPbg8JUqc66e3j50rctY7A708NEs,6743
|
6
6
|
compressed_tensors/compressors/helpers.py,sha256=OK6qxX9j3bHwF9JfIYSGMgBJe2PWjlTA3byXKCJaTIQ,5431
|
7
7
|
compressed_tensors/compressors/model_compressors/__init__.py,sha256=5RGGPFu4YqEt_aOdFSQYFYFDjcZFJN0CsMqRtDZz3Js,666
|
8
|
-
compressed_tensors/compressors/model_compressors/model_compressor.py,sha256=
|
8
|
+
compressed_tensors/compressors/model_compressors/model_compressor.py,sha256=sxh1TvW1Bp9YJE41hW0XZfd0kYYB85nhJvBLVRTDcV0,15886
|
9
9
|
compressed_tensors/compressors/quantized_compressors/__init__.py,sha256=09UJq68Pht6Bf-4iP9xYl3tetKsncNPHD8IAGbePsr4,714
|
10
10
|
compressed_tensors/compressors/quantized_compressors/base.py,sha256=K1KOnS6Y8nUA1-HN7VhyfsDc01nilW0WfXMUhuD-l8w,5954
|
11
11
|
compressed_tensors/compressors/quantized_compressors/naive_quantized.py,sha256=Mmfr-hap-4zw7CzE1mXi0UirknqGidNxw38GGWVgTqM,4916
|
@@ -21,13 +21,13 @@ compressed_tensors/config/base.py,sha256=3bFAdwDZjOt-U3fneOeL8dRci-PS8DqstnXuQVt
|
|
21
21
|
compressed_tensors/config/dense.py,sha256=NgSxnFCnckU9-iunxEaqiFwqgdO7YYxlWKR74jNbjks,1317
|
22
22
|
compressed_tensors/config/sparse_bitmask.py,sha256=pZUboRNZTu6NajGOQEFExoPknak5ynVAUeiiYpS1Gt8,1308
|
23
23
|
compressed_tensors/linear/__init__.py,sha256=fH6rjBYAxuwrTzBTlTjTgCYNyh6TCvCqajCz4Im4YrA,617
|
24
|
-
compressed_tensors/linear/compressed_linear.py,sha256=
|
24
|
+
compressed_tensors/linear/compressed_linear.py,sha256=MJa-UfoKhIkdUWRD1shrXXri2cOwR5GK0a4t4bNYosM,3268
|
25
25
|
compressed_tensors/quantization/__init__.py,sha256=83J5bPB7PavN2TfCoW7_vEDhfYpm4TDrqYO9vdSQ5bk,760
|
26
26
|
compressed_tensors/quantization/quant_args.py,sha256=osjNwCSB6tcyH9Qeg5sHEiB-bHyi3XJ8TzkGVJuGTc4,8711
|
27
|
-
compressed_tensors/quantization/quant_config.py,sha256=
|
28
|
-
compressed_tensors/quantization/quant_scheme.py,sha256=
|
27
|
+
compressed_tensors/quantization/quant_config.py,sha256=K6kOZ6LDXpFlqsVzR4NEATV6y6Ea83rJWnNyVlvw-pI,10379
|
28
|
+
compressed_tensors/quantization/quant_scheme.py,sha256=o3SaeNKWFVqbOtk0cLlwHh_YenU4hnteXFtH5ey9zLk,5452
|
29
29
|
compressed_tensors/quantization/lifecycle/__init__.py,sha256=_uItzFWusyV74Zco_pHLOTdE9a83cL-R-ZdyQrBkIyw,772
|
30
|
-
compressed_tensors/quantization/lifecycle/apply.py,sha256=
|
30
|
+
compressed_tensors/quantization/lifecycle/apply.py,sha256=jCUSgeOBtagE5IhgIbyYMZ4kv8Rm20VGJ4IxXZ5HAnw,15066
|
31
31
|
compressed_tensors/quantization/lifecycle/compressed.py,sha256=Fj9n66IN0EWsOAkBHg3O0GlOQpxstqjCcs0ttzMXrJ0,2296
|
32
32
|
compressed_tensors/quantization/lifecycle/forward.py,sha256=QPL6-vKOFuKdKIEsVqMhsw4x552Jpm2sqO0oeChbnrM,12941
|
33
33
|
compressed_tensors/quantization/lifecycle/helpers.py,sha256=C0mhy2vJ0fCjVeN4kFNhw8Eq1wkteBGHiZ36RVLThRY,944
|
@@ -43,8 +43,8 @@ compressed_tensors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVy
|
|
43
43
|
compressed_tensors/utils/permute.py,sha256=V6tJLKo3Syccj-viv4F7ZKZgJeCB-hl-dK8RKI_kBwI,2355
|
44
44
|
compressed_tensors/utils/safetensors_load.py,sha256=m08ANVuTBxQdoa6LufDgcNJ7wCLDJolyZljB8VEybAU,8578
|
45
45
|
compressed_tensors/utils/semi_structured_conversions.py,sha256=XKNffPum54kPASgqKzgKvyeqWPAkair2XEQXjkp7ho8,13489
|
46
|
-
compressed_tensors_nightly-0.8.0.
|
47
|
-
compressed_tensors_nightly-0.8.0.
|
48
|
-
compressed_tensors_nightly-0.8.0.
|
49
|
-
compressed_tensors_nightly-0.8.0.
|
50
|
-
compressed_tensors_nightly-0.8.0.
|
46
|
+
compressed_tensors_nightly-0.8.0.20241123.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
47
|
+
compressed_tensors_nightly-0.8.0.20241123.dist-info/METADATA,sha256=ZbKZisWw2SQTdEZR49B4POeeBOIu58NbrDiCAjgbQGY,6799
|
48
|
+
compressed_tensors_nightly-0.8.0.20241123.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
|
49
|
+
compressed_tensors_nightly-0.8.0.20241123.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
|
50
|
+
compressed_tensors_nightly-0.8.0.20241123.dist-info/RECORD,,
|
File without changes
|