compressed-tensors 0.8.0__tar.gz → 0.8.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/PKG-INFO +1 -1
- {compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/compressors/model_compressors/model_compressor.py +17 -5
- {compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/compressors/quantized_compressors/naive_quantized.py +4 -2
- {compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/compressors/quantized_compressors/pack_quantized.py +2 -0
- {compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/linear/compressed_linear.py +3 -1
- {compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/quantization/lifecycle/apply.py +2 -1
- {compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/quantization/quant_args.py +16 -3
- {compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/quantization/quant_config.py +2 -2
- {compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/quantization/quant_scheme.py +17 -24
- {compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/utils/helpers.py +33 -1
- {compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/version.py +1 -1
- {compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors.egg-info/PKG-INFO +1 -1
- {compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/LICENSE +0 -0
- {compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/README.md +0 -0
- {compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/pyproject.toml +0 -0
- {compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/setup.cfg +0 -0
- {compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/setup.py +0 -0
- {compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/__init__.py +0 -0
- {compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/base.py +0 -0
- {compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/compressors/__init__.py +0 -0
- {compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/compressors/base.py +0 -0
- {compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/compressors/helpers.py +0 -0
- {compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/compressors/model_compressors/__init__.py +0 -0
- {compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/compressors/quantized_compressors/__init__.py +0 -0
- {compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/compressors/quantized_compressors/base.py +0 -0
- {compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/compressors/sparse_compressors/__init__.py +0 -0
- {compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/compressors/sparse_compressors/base.py +0 -0
- {compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/compressors/sparse_compressors/dense.py +0 -0
- {compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py +0 -0
- {compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/compressors/sparse_quantized_compressors/__init__.py +0 -0
- {compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py +0 -0
- {compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/config/__init__.py +0 -0
- {compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/config/base.py +0 -0
- {compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/config/dense.py +0 -0
- {compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/config/sparse_bitmask.py +0 -0
- {compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/linear/__init__.py +0 -0
- {compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/quantization/__init__.py +0 -0
- {compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/quantization/lifecycle/__init__.py +0 -0
- {compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/quantization/lifecycle/compressed.py +0 -0
- {compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/quantization/lifecycle/forward.py +0 -0
- {compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/quantization/lifecycle/helpers.py +0 -0
- {compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/quantization/lifecycle/initialize.py +0 -0
- {compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/quantization/utils/__init__.py +0 -0
- {compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/quantization/utils/helpers.py +0 -0
- {compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/registry/__init__.py +0 -0
- {compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/registry/registry.py +0 -0
- {compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/utils/__init__.py +0 -0
- {compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/utils/offload.py +0 -0
- {compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/utils/permutations_24.py +0 -0
- {compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/utils/permute.py +0 -0
- {compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/utils/safetensors_load.py +0 -0
- {compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/utils/semi_structured_conversions.py +0 -0
- {compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors.egg-info/SOURCES.txt +0 -0
- {compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors.egg-info/dependency_links.txt +0 -0
- {compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors.egg-info/requires.txt +0 -0
- {compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors.egg-info/top_level.txt +0 -0
@@ -24,7 +24,6 @@ import compressed_tensors
|
|
24
24
|
import torch
|
25
25
|
import transformers
|
26
26
|
from compressed_tensors.base import (
|
27
|
-
COMPRESSION_CONFIG_NAME,
|
28
27
|
COMPRESSION_VERSION_NAME,
|
29
28
|
QUANTIZATION_CONFIG_NAME,
|
30
29
|
QUANTIZATION_METHOD_NAME,
|
@@ -39,6 +38,7 @@ from compressed_tensors.quantization import (
|
|
39
38
|
apply_quantization_config,
|
40
39
|
load_pretrained_quantization,
|
41
40
|
)
|
41
|
+
from compressed_tensors.quantization.quant_args import QuantizationArgs
|
42
42
|
from compressed_tensors.quantization.utils import (
|
43
43
|
is_module_quantized,
|
44
44
|
iter_named_leaf_modules,
|
@@ -103,12 +103,14 @@ class ModelCompressor:
|
|
103
103
|
:return: compressor for the configs, or None if model is not compressed
|
104
104
|
"""
|
105
105
|
config = AutoConfig.from_pretrained(pretrained_model_name_or_path, **kwargs)
|
106
|
-
compression_config = getattr(config,
|
106
|
+
compression_config = getattr(config, QUANTIZATION_CONFIG_NAME, None)
|
107
|
+
|
107
108
|
return cls.from_compression_config(compression_config)
|
108
109
|
|
109
110
|
@classmethod
|
110
111
|
def from_compression_config(
|
111
|
-
cls,
|
112
|
+
cls,
|
113
|
+
compression_config: Union[Dict[str, Any], "CompressedTensorsConfig"],
|
112
114
|
):
|
113
115
|
"""
|
114
116
|
:param compression_config:
|
@@ -265,7 +267,11 @@ class ModelCompressor:
|
|
265
267
|
state_dict = model.state_dict()
|
266
268
|
|
267
269
|
compressed_state_dict = state_dict
|
268
|
-
|
270
|
+
|
271
|
+
quantized_modules_to_args: Dict[
|
272
|
+
str, QuantizationArgs
|
273
|
+
] = map_modules_to_quant_args(model)
|
274
|
+
|
269
275
|
if self.quantization_compressor is not None:
|
270
276
|
compressed_state_dict = self.quantization_compressor.compress(
|
271
277
|
state_dict, names_to_scheme=quantized_modules_to_args
|
@@ -369,7 +375,13 @@ class ModelCompressor:
|
|
369
375
|
update_parameter_data(module, data, param_name)
|
370
376
|
|
371
377
|
|
372
|
-
def map_modules_to_quant_args(model: Module) -> Dict:
|
378
|
+
def map_modules_to_quant_args(model: Module) -> Dict[str, QuantizationArgs]:
|
379
|
+
"""
|
380
|
+
Given a pytorch model, map out the submodule name (usually linear layers)
|
381
|
+
to the QuantizationArgs
|
382
|
+
|
383
|
+
:param model: pytorch model
|
384
|
+
"""
|
373
385
|
quantized_modules_to_args = {}
|
374
386
|
for name, submodule in iter_named_leaf_modules(model):
|
375
387
|
if is_module_quantized(submodule):
|
@@ -93,9 +93,11 @@ class NaiveQuantizationCompressor(BaseQuantizationCompressor):
|
|
93
93
|
args=quantization_args,
|
94
94
|
dtype=quantization_args.pytorch_dtype(),
|
95
95
|
)
|
96
|
+
else:
|
97
|
+
quantized_weight = weight
|
96
98
|
|
97
|
-
|
98
|
-
|
99
|
+
if device is not None:
|
100
|
+
quantized_weight = quantized_weight.to(device)
|
99
101
|
|
100
102
|
return {"weight": quantized_weight}
|
101
103
|
|
@@ -94,6 +94,8 @@ class PackedQuantizationCompressor(BaseQuantizationCompressor):
|
|
94
94
|
args=quantization_args,
|
95
95
|
dtype=torch.int8,
|
96
96
|
)
|
97
|
+
else:
|
98
|
+
quantized_weight = weight
|
97
99
|
|
98
100
|
packed_weight = pack_to_int32(quantized_weight, quantization_args.num_bits)
|
99
101
|
weight_shape = torch.tensor(weight.shape)
|
@@ -12,6 +12,8 @@
|
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
|
+
from typing import Dict, Tuple
|
16
|
+
|
15
17
|
import torch
|
16
18
|
from compressed_tensors.compressors.base import BaseCompressor
|
17
19
|
from compressed_tensors.quantization import (
|
@@ -53,7 +55,7 @@ class CompressedLinear(Linear):
|
|
53
55
|
)
|
54
56
|
|
55
57
|
# get the shape and dtype of compressed parameters
|
56
|
-
compression_params = module.compressor.compression_param_info(
|
58
|
+
compression_params: Dict[str, Tuple] = module.compressor.compression_param_info(
|
57
59
|
module.weight.shape, quantization_scheme.weights
|
58
60
|
)
|
59
61
|
|
@@ -106,7 +106,8 @@ def apply_quantization_config(
|
|
106
106
|
model: Module, config: Union[QuantizationConfig, None], run_compressed: bool = False
|
107
107
|
) -> OrderedDict:
|
108
108
|
"""
|
109
|
-
Initializes the model for quantization in-place based on the given config
|
109
|
+
Initializes the model for quantization in-place based on the given config.
|
110
|
+
Optionally coverts quantizable modules to compressed_linear modules
|
110
111
|
|
111
112
|
:param model: model to apply quantization config to
|
112
113
|
:param config: quantization config
|
@@ -17,6 +17,7 @@ from enum import Enum
|
|
17
17
|
from typing import Any, Dict, Optional, Union
|
18
18
|
|
19
19
|
import torch
|
20
|
+
from compressed_tensors.utils import Aliasable
|
20
21
|
from pydantic import BaseModel, Field, field_validator, model_validator
|
21
22
|
|
22
23
|
|
@@ -53,17 +54,29 @@ class QuantizationStrategy(str, Enum):
|
|
53
54
|
TOKEN = "token"
|
54
55
|
|
55
56
|
|
56
|
-
class ActivationOrdering(str, Enum):
|
57
|
+
class ActivationOrdering(Aliasable, str, Enum):
|
57
58
|
"""
|
58
59
|
Enum storing strategies for activation ordering
|
59
60
|
|
60
61
|
Group: reorder groups and weight\n
|
61
|
-
Weight: only reorder weight, not groups. Slightly lower
|
62
|
-
|
62
|
+
Weight: only reorder weight, not groups. Slightly lower accuracy but also lower
|
63
|
+
latency when compared to group actorder\n
|
64
|
+
Dynamic: alias for Group\n
|
65
|
+
Static: alias for Weight\n
|
63
66
|
"""
|
64
67
|
|
65
68
|
GROUP = "group"
|
66
69
|
WEIGHT = "weight"
|
70
|
+
# aliases
|
71
|
+
DYNAMIC = "dynamic"
|
72
|
+
STATIC = "static"
|
73
|
+
|
74
|
+
@staticmethod
|
75
|
+
def get_aliases() -> Dict[str, str]:
|
76
|
+
return {
|
77
|
+
"dynamic": "group",
|
78
|
+
"static": "weight",
|
79
|
+
}
|
67
80
|
|
68
81
|
|
69
82
|
class QuantizationArgs(BaseModel, use_enum_values=True):
|
@@ -132,9 +132,9 @@ class QuantizationConfig(BaseModel):
|
|
132
132
|
`k_proj` and `v_proj` in their names. If this is not the case
|
133
133
|
and kv_cache_scheme != None, the quantization of kv cache will fail
|
134
134
|
:global_compression_ratio: optional informational config to report the model
|
135
|
-
|
135
|
+
compression ratio acheived by the quantization config
|
136
136
|
:ignore: optional list of layers to ignore from config_groups. Layers in this list
|
137
|
-
|
137
|
+
are not quantized even if they match up with a target in config_groups
|
138
138
|
"""
|
139
139
|
|
140
140
|
config_groups: Dict[str, Union[QuantizationScheme, List[str]]]
|
@@ -13,14 +13,14 @@
|
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
15
|
from copy import deepcopy
|
16
|
-
from typing import List, Optional
|
16
|
+
from typing import Any, Dict, List, Optional
|
17
17
|
|
18
18
|
from compressed_tensors.quantization.quant_args import (
|
19
19
|
QuantizationArgs,
|
20
20
|
QuantizationStrategy,
|
21
21
|
QuantizationType,
|
22
22
|
)
|
23
|
-
from pydantic import BaseModel
|
23
|
+
from pydantic import BaseModel, model_validator
|
24
24
|
|
25
25
|
|
26
26
|
__all__ = [
|
@@ -36,7 +36,7 @@ class QuantizationScheme(BaseModel):
|
|
36
36
|
of modules should be quantized
|
37
37
|
|
38
38
|
:param targets: list of modules to apply the QuantizationArgs to, can be layer
|
39
|
-
names, layer types or a regular expression
|
39
|
+
names, layer types or a regular expression, typically ["Linear"]
|
40
40
|
:param weights: quantization config for layer weights
|
41
41
|
:param input_activations: quantization config for layer inputs
|
42
42
|
:param output_activations: quantization config for layer outputs
|
@@ -47,27 +47,20 @@ class QuantizationScheme(BaseModel):
|
|
47
47
|
input_activations: Optional[QuantizationArgs] = None
|
48
48
|
output_activations: Optional[QuantizationArgs] = None
|
49
49
|
|
50
|
-
@
|
51
|
-
def
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
return cls(
|
66
|
-
targets=targets,
|
67
|
-
weights=weights,
|
68
|
-
input_activations=input_activations,
|
69
|
-
output_activations=output_activations,
|
70
|
-
)
|
50
|
+
@model_validator(mode="after")
|
51
|
+
def validate_model_after(model: "QuantizationArgs") -> Dict[str, Any]:
|
52
|
+
inputs = model.input_activations
|
53
|
+
outputs = model.output_activations
|
54
|
+
|
55
|
+
if inputs is not None:
|
56
|
+
if inputs.actorder is not None:
|
57
|
+
raise ValueError("Cannot apply actorder to input activations")
|
58
|
+
|
59
|
+
if outputs is not None:
|
60
|
+
if outputs.actorder is not None:
|
61
|
+
raise ValueError("Cannot apply actorder to output activations")
|
62
|
+
|
63
|
+
return model
|
71
64
|
|
72
65
|
|
73
66
|
"""
|
{compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/utils/helpers.py
RENAMED
@@ -12,7 +12,7 @@
|
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
|
-
from typing import Any, Optional
|
15
|
+
from typing import Any, Dict, Optional
|
16
16
|
|
17
17
|
import torch
|
18
18
|
from transformers import AutoConfig
|
@@ -24,6 +24,7 @@ __all__ = [
|
|
24
24
|
"tensor_follows_mask_structure",
|
25
25
|
"replace_module",
|
26
26
|
"is_compressed_tensors_config",
|
27
|
+
"Aliasable",
|
27
28
|
]
|
28
29
|
|
29
30
|
FSDP_WRAPPER_NAME = "_fsdp_wrapped_module"
|
@@ -119,3 +120,34 @@ def is_compressed_tensors_config(compression_config: Any) -> bool:
|
|
119
120
|
return isinstance(compression_config, CompressedTensorsConfig)
|
120
121
|
except ImportError:
|
121
122
|
return False
|
123
|
+
|
124
|
+
|
125
|
+
class Aliasable:
|
126
|
+
"""
|
127
|
+
A mixin for enums to allow aliasing of enum members
|
128
|
+
|
129
|
+
Example:
|
130
|
+
>>> class MyClass(Aliasable, int, Enum):
|
131
|
+
>>> ...
|
132
|
+
"""
|
133
|
+
|
134
|
+
@staticmethod
|
135
|
+
def get_aliases() -> Dict[str, str]:
|
136
|
+
raise NotImplementedError()
|
137
|
+
|
138
|
+
def __eq__(self, other):
|
139
|
+
if isinstance(other, self.__class__):
|
140
|
+
aliases = self.get_aliases()
|
141
|
+
return self.value == other.value or (
|
142
|
+
aliases.get(self.value, self.value)
|
143
|
+
== aliases.get(other.value, other.value)
|
144
|
+
)
|
145
|
+
else:
|
146
|
+
aliases = self.get_aliases()
|
147
|
+
self_value = aliases.get(self.value, self.value)
|
148
|
+
other_value = aliases.get(other, other)
|
149
|
+
return self_value == other_value
|
150
|
+
|
151
|
+
def __hash__(self):
|
152
|
+
canonical_value = self.aliases.get(self.value, self.value)
|
153
|
+
return hash(canonical_value)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/compressors/__init__.py
RENAMED
File without changes
|
{compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/compressors/base.py
RENAMED
File without changes
|
{compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/compressors/helpers.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/config/__init__.py
RENAMED
File without changes
|
File without changes
|
{compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/config/dense.py
RENAMED
File without changes
|
File without changes
|
{compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/linear/__init__.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/registry/__init__.py
RENAMED
File without changes
|
{compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/registry/registry.py
RENAMED
File without changes
|
{compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/utils/__init__.py
RENAMED
File without changes
|
{compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/utils/offload.py
RENAMED
File without changes
|
File without changes
|
{compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors/utils/permute.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
{compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors.egg-info/SOURCES.txt
RENAMED
File without changes
|
File without changes
|
{compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors.egg-info/requires.txt
RENAMED
File without changes
|
{compressed-tensors-0.8.0 → compressed-tensors-0.8.1}/src/compressed_tensors.egg-info/top_level.txt
RENAMED
File without changes
|