compressed-tensors 0.3.1__tar.gz → 0.3.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {compressed-tensors-0.3.1/src/compressed_tensors.egg-info → compressed-tensors-0.3.2}/PKG-INFO +4 -1
- {compressed-tensors-0.3.1 → compressed-tensors-0.3.2}/README.md +3 -0
- {compressed-tensors-0.3.1 → compressed-tensors-0.3.2}/setup.py +1 -1
- {compressed-tensors-0.3.1 → compressed-tensors-0.3.2}/src/compressed_tensors/compressors/__init__.py +1 -6
- {compressed-tensors-0.3.1 → compressed-tensors-0.3.2}/src/compressed_tensors/compressors/base.py +25 -1
- {compressed-tensors-0.3.1 → compressed-tensors-0.3.2}/src/compressed_tensors/compressors/dense.py +1 -1
- {compressed-tensors-0.3.1 → compressed-tensors-0.3.2}/src/compressed_tensors/compressors/helpers.py +0 -24
- {compressed-tensors-0.3.1 → compressed-tensors-0.3.2}/src/compressed_tensors/compressors/sparse_bitmask.py +3 -2
- {compressed-tensors-0.3.1 → compressed-tensors-0.3.2}/src/compressed_tensors/quantization/lifecycle/forward.py +18 -12
- {compressed-tensors-0.3.1 → compressed-tensors-0.3.2}/src/compressed_tensors/quantization/lifecycle/frozen.py +9 -9
- {compressed-tensors-0.3.1 → compressed-tensors-0.3.2}/src/compressed_tensors/quantization/lifecycle/initialize.py +7 -4
- {compressed-tensors-0.3.1 → compressed-tensors-0.3.2}/src/compressed_tensors/quantization/observers/memoryless.py +2 -2
- {compressed-tensors-0.3.1 → compressed-tensors-0.3.2}/src/compressed_tensors/quantization/quant_args.py +11 -0
- {compressed-tensors-0.3.1 → compressed-tensors-0.3.2/src/compressed_tensors.egg-info}/PKG-INFO +4 -1
- {compressed-tensors-0.3.1 → compressed-tensors-0.3.2}/LICENSE +0 -0
- {compressed-tensors-0.3.1 → compressed-tensors-0.3.2}/pyproject.toml +0 -0
- {compressed-tensors-0.3.1 → compressed-tensors-0.3.2}/setup.cfg +0 -0
- {compressed-tensors-0.3.1 → compressed-tensors-0.3.2}/src/compressed_tensors/__init__.py +0 -0
- {compressed-tensors-0.3.1 → compressed-tensors-0.3.2}/src/compressed_tensors/base.py +0 -0
- {compressed-tensors-0.3.1 → compressed-tensors-0.3.2}/src/compressed_tensors/config/__init__.py +0 -0
- {compressed-tensors-0.3.1 → compressed-tensors-0.3.2}/src/compressed_tensors/config/base.py +0 -0
- {compressed-tensors-0.3.1 → compressed-tensors-0.3.2}/src/compressed_tensors/config/dense.py +0 -0
- {compressed-tensors-0.3.1 → compressed-tensors-0.3.2}/src/compressed_tensors/config/sparse_bitmask.py +0 -0
- {compressed-tensors-0.3.1 → compressed-tensors-0.3.2}/src/compressed_tensors/quantization/__init__.py +0 -0
- {compressed-tensors-0.3.1 → compressed-tensors-0.3.2}/src/compressed_tensors/quantization/lifecycle/__init__.py +0 -0
- {compressed-tensors-0.3.1 → compressed-tensors-0.3.2}/src/compressed_tensors/quantization/lifecycle/apply.py +0 -0
- {compressed-tensors-0.3.1 → compressed-tensors-0.3.2}/src/compressed_tensors/quantization/lifecycle/calibration.py +0 -0
- {compressed-tensors-0.3.1 → compressed-tensors-0.3.2}/src/compressed_tensors/quantization/observers/__init__.py +0 -0
- {compressed-tensors-0.3.1 → compressed-tensors-0.3.2}/src/compressed_tensors/quantization/observers/base.py +0 -0
- {compressed-tensors-0.3.1 → compressed-tensors-0.3.2}/src/compressed_tensors/quantization/observers/helpers.py +0 -0
- {compressed-tensors-0.3.1 → compressed-tensors-0.3.2}/src/compressed_tensors/quantization/observers/min_max.py +0 -0
- {compressed-tensors-0.3.1 → compressed-tensors-0.3.2}/src/compressed_tensors/quantization/quant_config.py +0 -0
- {compressed-tensors-0.3.1 → compressed-tensors-0.3.2}/src/compressed_tensors/quantization/quant_scheme.py +0 -0
- {compressed-tensors-0.3.1 → compressed-tensors-0.3.2}/src/compressed_tensors/quantization/utils/__init__.py +0 -0
- {compressed-tensors-0.3.1 → compressed-tensors-0.3.2}/src/compressed_tensors/quantization/utils/helpers.py +0 -0
- {compressed-tensors-0.3.1 → compressed-tensors-0.3.2}/src/compressed_tensors/registry/__init__.py +0 -0
- {compressed-tensors-0.3.1 → compressed-tensors-0.3.2}/src/compressed_tensors/registry/registry.py +0 -0
- {compressed-tensors-0.3.1 → compressed-tensors-0.3.2}/src/compressed_tensors/utils/__init__.py +0 -0
- {compressed-tensors-0.3.1 → compressed-tensors-0.3.2}/src/compressed_tensors/utils/safetensors_load.py +0 -0
- {compressed-tensors-0.3.1 → compressed-tensors-0.3.2}/src/compressed_tensors.egg-info/SOURCES.txt +0 -0
- {compressed-tensors-0.3.1 → compressed-tensors-0.3.2}/src/compressed_tensors.egg-info/dependency_links.txt +0 -0
- {compressed-tensors-0.3.1 → compressed-tensors-0.3.2}/src/compressed_tensors.egg-info/requires.txt +0 -0
- {compressed-tensors-0.3.1 → compressed-tensors-0.3.2}/src/compressed_tensors.egg-info/top_level.txt +0 -0
- {compressed-tensors-0.3.1 → compressed-tensors-0.3.2}/tests/test_bitmask.py +0 -0
- {compressed-tensors-0.3.1 → compressed-tensors-0.3.2}/tests/test_registry.py +0 -0
{compressed-tensors-0.3.1/src/compressed_tensors.egg-info → compressed-tensors-0.3.2}/PKG-INFO
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: compressed-tensors
|
3
|
-
Version: 0.3.
|
3
|
+
Version: 0.3.2
|
4
4
|
Summary: Library for utilization of compressed safetensors of neural network models
|
5
5
|
Home-page: https://github.com/neuralmagic/compressed-tensors
|
6
6
|
Author: Neuralmagic, Inc.
|
@@ -94,4 +94,7 @@ save_compressed_model(model, "compressed_model.safetensors", compression_format=
|
|
94
94
|
state_dict = dict(load_compressed("compressed_model.safetensors", compression_config))
|
95
95
|
```
|
96
96
|
|
97
|
+
For more in-depth tutorial on bitmask compression, refer to the [notebook](https://github.com/neuralmagic/compressed-tensors/blob/d707c5b84bc3fef164aebdcd97cb6eaa571982f8/examples/bitmask_compression.ipynb).
|
98
|
+
|
99
|
+
|
97
100
|
|
@@ -80,3 +80,6 @@ save_compressed_model(model, "compressed_model.safetensors", compression_format=
|
|
80
80
|
# load compressed model weights (`dict` turns generator into a dictionary)
|
81
81
|
state_dict = dict(load_compressed("compressed_model.safetensors", compression_config))
|
82
82
|
```
|
83
|
+
|
84
|
+
For more in-depth tutorial on bitmask compression, refer to the [notebook](https://github.com/neuralmagic/compressed-tensors/blob/d707c5b84bc3fef164aebdcd97cb6eaa571982f8/examples/bitmask_compression.ipynb).
|
85
|
+
|
{compressed-tensors-0.3.1 → compressed-tensors-0.3.2}/src/compressed_tensors/compressors/__init__.py
RENAMED
@@ -16,10 +16,5 @@
|
|
16
16
|
|
17
17
|
from .base import ModelCompressor
|
18
18
|
from .dense import DenseCompressor
|
19
|
-
from .helpers import
|
20
|
-
infer_compressor_from_model_config,
|
21
|
-
load_compressed,
|
22
|
-
save_compressed,
|
23
|
-
save_compressed_model,
|
24
|
-
)
|
19
|
+
from .helpers import load_compressed, save_compressed, save_compressed_model
|
25
20
|
from .sparse_bitmask import BitmaskCompressor, BitmaskTensor
|
{compressed-tensors-0.3.1 → compressed-tensors-0.3.2}/src/compressed_tensors/compressors/base.py
RENAMED
@@ -22,6 +22,7 @@ from compressed_tensors.utils import get_safetensors_folder
|
|
22
22
|
from torch import Tensor
|
23
23
|
from torch.nn import Module, Parameter
|
24
24
|
from tqdm import tqdm
|
25
|
+
from transformers import AutoConfig
|
25
26
|
|
26
27
|
|
27
28
|
__all__ = ["ModelCompressor"]
|
@@ -34,6 +35,29 @@ class ModelCompressor(RegistryMixin):
|
|
34
35
|
:param config: config specifying compression parameters
|
35
36
|
"""
|
36
37
|
|
38
|
+
@classmethod
|
39
|
+
def from_pretrained(
|
40
|
+
cls, pretrained_model_name_or_path: str
|
41
|
+
) -> Optional["ModelCompressor"]:
|
42
|
+
"""
|
43
|
+
Given a path to a model config, extract a sparsity config if it exists and
|
44
|
+
return the associated ModelCompressor
|
45
|
+
|
46
|
+
:param pretrained_model_name_or_path: path to model config on disk or HF hub
|
47
|
+
:return: matching compressor if config contains a sparsity config
|
48
|
+
"""
|
49
|
+
config = AutoConfig.from_pretrained(pretrained_model_name_or_path)
|
50
|
+
sparsity_config = getattr(config, SPARSITY_CONFIG_NAME, None)
|
51
|
+
if sparsity_config is None:
|
52
|
+
return None
|
53
|
+
|
54
|
+
format = sparsity_config.get("format")
|
55
|
+
sparsity_config = CompressionConfig.load_from_registry(
|
56
|
+
format, **sparsity_config
|
57
|
+
)
|
58
|
+
compressor = cls.load_from_registry(format, config=sparsity_config)
|
59
|
+
return compressor
|
60
|
+
|
37
61
|
def __init__(self, config: Optional[CompressionConfig] = None):
|
38
62
|
self.config = config
|
39
63
|
|
@@ -47,7 +71,7 @@ class ModelCompressor(RegistryMixin):
|
|
47
71
|
raise NotImplementedError()
|
48
72
|
|
49
73
|
def decompress(
|
50
|
-
self, path_to_model_or_tensors: str
|
74
|
+
self, path_to_model_or_tensors: str, device: str = "cpu"
|
51
75
|
) -> Generator[Tuple[str, Tensor], None, None]:
|
52
76
|
"""
|
53
77
|
Reads a compressed state dict located at path_to_model_or_tensors
|
{compressed-tensors-0.3.1 → compressed-tensors-0.3.2}/src/compressed_tensors/compressors/dense.py
RENAMED
@@ -29,6 +29,6 @@ class DenseCompressor(ModelCompressor):
|
|
29
29
|
return model_state
|
30
30
|
|
31
31
|
def decompress(
|
32
|
-
self, path_to_model_or_tensors: str, device: str
|
32
|
+
self, path_to_model_or_tensors: str, device: str = "cpu"
|
33
33
|
) -> Generator[Tuple[str, Tensor], None, None]:
|
34
34
|
return iter([])
|
{compressed-tensors-0.3.1 → compressed-tensors-0.3.2}/src/compressed_tensors/compressors/helpers.py
RENAMED
@@ -16,45 +16,21 @@ from pathlib import Path
|
|
16
16
|
from typing import Dict, Generator, Optional, Tuple, Union
|
17
17
|
|
18
18
|
import torch
|
19
|
-
from compressed_tensors.base import SPARSITY_CONFIG_NAME
|
20
19
|
from compressed_tensors.compressors import ModelCompressor
|
21
20
|
from compressed_tensors.config import CompressionConfig, CompressionFormat
|
22
21
|
from compressed_tensors.utils.safetensors_load import get_weight_mappings
|
23
22
|
from safetensors import safe_open
|
24
23
|
from safetensors.torch import save_file
|
25
24
|
from torch import Tensor
|
26
|
-
from transformers import AutoConfig
|
27
25
|
|
28
26
|
|
29
27
|
__all__ = [
|
30
|
-
"infer_compressor_from_model_config",
|
31
28
|
"load_compressed",
|
32
29
|
"save_compressed",
|
33
30
|
"save_compressed_model",
|
34
31
|
]
|
35
32
|
|
36
33
|
|
37
|
-
def infer_compressor_from_model_config(
|
38
|
-
pretrained_model_name_or_path: str,
|
39
|
-
) -> Optional[ModelCompressor]:
|
40
|
-
"""
|
41
|
-
Given a path to a model config, extract a sparsity config if it exists and return
|
42
|
-
the associated ModelCompressor
|
43
|
-
|
44
|
-
:param pretrained_model_name_or_path: path to model config on disk or HF hub
|
45
|
-
:return: matching compressor if config contains a sparsity config
|
46
|
-
"""
|
47
|
-
config = AutoConfig.from_pretrained(pretrained_model_name_or_path)
|
48
|
-
sparsity_config = getattr(config, SPARSITY_CONFIG_NAME, None)
|
49
|
-
if sparsity_config is None:
|
50
|
-
return None
|
51
|
-
|
52
|
-
format = sparsity_config.get("format")
|
53
|
-
sparsity_config = CompressionConfig.load_from_registry(format, **sparsity_config)
|
54
|
-
compressor = ModelCompressor.load_from_registry(format, config=sparsity_config)
|
55
|
-
return compressor
|
56
|
-
|
57
|
-
|
58
34
|
def save_compressed(
|
59
35
|
tensors: Dict[str, Tensor],
|
60
36
|
save_path: Union[str, Path],
|
@@ -75,8 +75,9 @@ class BitmaskCompressor(ModelCompressor):
|
|
75
75
|
self, path_to_model_or_tensors: str, device: str = "cpu"
|
76
76
|
) -> Generator[Tuple[str, Tensor], None, None]:
|
77
77
|
"""
|
78
|
-
Reads a bitmask compressed state dict located
|
79
|
-
and returns a generator
|
78
|
+
Reads a bitmask compressed state dict located
|
79
|
+
at path_to_model_or_tensors and returns a generator
|
80
|
+
for sequentially decompressing back to a dense state dict
|
80
81
|
|
81
82
|
:param model_path: path to compressed safetensors model (directory with
|
82
83
|
one or more safetensors files) or compressed tensors file
|
@@ -111,7 +111,7 @@ def wrap_module_forward_quantized(module: Module, scheme: QuantizationScheme):
|
|
111
111
|
|
112
112
|
|
113
113
|
def _maybe_calibrate_or_quantize(
|
114
|
-
module: Module, value:
|
114
|
+
module: Module, value: torch.Tensor, base_name: str, args: "QuantizationArgs"
|
115
115
|
) -> torch.Tensor:
|
116
116
|
# only run quantized for the included stages
|
117
117
|
if module.quantization_status not in {
|
@@ -120,17 +120,23 @@ def _maybe_calibrate_or_quantize(
|
|
120
120
|
}:
|
121
121
|
return value
|
122
122
|
|
123
|
-
|
124
|
-
|
125
|
-
zero_point = getattr(module, f"{base_name}_zero_point")
|
126
|
-
|
127
|
-
if module.quantization_status == QuantizationStatus.CALIBRATION:
|
128
|
-
# get observer and get new quant params from observation
|
123
|
+
if args.dynamic:
|
124
|
+
# dynamic quantization - get scale and zero point directly from observer
|
129
125
|
observer = getattr(module, f"{base_name}_observer")
|
130
|
-
|
131
|
-
|
132
|
-
#
|
133
|
-
scale
|
134
|
-
zero_point
|
126
|
+
scale, zero_point = observer(value)
|
127
|
+
else:
|
128
|
+
# static quantization - get previous scale and zero point from layer
|
129
|
+
scale = getattr(module, f"{base_name}_scale")
|
130
|
+
zero_point = getattr(module, f"{base_name}_zero_point")
|
131
|
+
|
132
|
+
if module.quantization_status == QuantizationStatus.CALIBRATION:
|
133
|
+
# calibration mode - get new quant params from observer
|
134
|
+
observer = getattr(module, f"{base_name}_observer")
|
135
|
+
updated_scale, updated_zero_point = observer(value)
|
136
|
+
|
137
|
+
# update scale and zero point
|
138
|
+
device = next(module.parameters()).device
|
139
|
+
scale.data = updated_scale.to(device)
|
140
|
+
zero_point.data = updated_zero_point.to(device)
|
135
141
|
|
136
142
|
return fake_quantize(value, scale, zero_point, args)
|
@@ -30,17 +30,17 @@ def freeze_module_quantization(module: Module):
|
|
30
30
|
|
31
31
|
:param module: module to freeze quantization for
|
32
32
|
"""
|
33
|
-
|
33
|
+
scheme = getattr(module, "quantization_scheme", None)
|
34
|
+
if not scheme:
|
34
35
|
# no quantization scheme nothing to do
|
35
36
|
return
|
36
37
|
|
37
|
-
# delete observers from module
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
delattr(module, observer_name)
|
38
|
+
# delete observers from module if not dynamic
|
39
|
+
if scheme.input_activations and not scheme.input_activations.dynamic:
|
40
|
+
delattr(module, "input_observer")
|
41
|
+
if scheme.weights and not scheme.weights.dynamic:
|
42
|
+
delattr(module, "weight_observer")
|
43
|
+
if scheme.output_activations and not scheme.output_activations.dynamic:
|
44
|
+
delattr(module, "output_observer")
|
45
45
|
|
46
46
|
module.quantization_status = QuantizationStatus.FROZEN
|
@@ -80,6 +80,13 @@ def initialize_module_for_quantization(
|
|
80
80
|
def _initialize_scale_zero_point_observer(
|
81
81
|
module: Module, base_name: str, quantization_args: QuantizationArgs
|
82
82
|
):
|
83
|
+
# initialize observer module and attach as submodule
|
84
|
+
observer = quantization_args.get_observer()
|
85
|
+
module.register_module(f"{base_name}_observer", observer)
|
86
|
+
|
87
|
+
if quantization_args.dynamic:
|
88
|
+
return # no need to register a scale and zero point for a dynamic observer
|
89
|
+
|
83
90
|
device = next(module.parameters()).device
|
84
91
|
|
85
92
|
# initializes empty scale and zero point parameters for the module
|
@@ -90,7 +97,3 @@ def _initialize_scale_zero_point_observer(
|
|
90
97
|
torch.empty(0, device=device, dtype=int), requires_grad=False
|
91
98
|
)
|
92
99
|
module.register_parameter(f"{base_name}_zero_point", init_zero_point)
|
93
|
-
|
94
|
-
# initialize observer module and attach as submodule
|
95
|
-
observer = quantization_args.get_observer()
|
96
|
-
module.register_module(f"{base_name}_observer", observer)
|
@@ -23,10 +23,10 @@ from torch import FloatTensor, IntTensor, Tensor
|
|
23
23
|
__all__ = ["MemorylessObserver"]
|
24
24
|
|
25
25
|
|
26
|
-
@Observer.register("memoryless")
|
26
|
+
@Observer.register("memoryless", alias=["dynamic"])
|
27
27
|
class MemorylessObserver(Observer):
|
28
28
|
"""
|
29
|
-
Implements a
|
29
|
+
Implements a quantization observer that sets the scale and
|
30
30
|
zero point based on the latest observed value without tracking state
|
31
31
|
"""
|
32
32
|
|
@@ -53,6 +53,11 @@ class QuantizationArgs(BaseModel):
|
|
53
53
|
:param group_size: group length to use for the group strategy
|
54
54
|
:param block_structure: 2d block structure to use for the block strategy, must be
|
55
55
|
of the format "2x4", "8x16", etc.
|
56
|
+
:param dynamic: set True to perform dynamic quantization - values will not be
|
57
|
+
calibrated during calibration phase, instead during inference new quantization
|
58
|
+
ranges will be observed with every sample. Defaults to False for static
|
59
|
+
quantization. Note that enabling dynamic quantization will change the default
|
60
|
+
observer to a memoryless one
|
56
61
|
"""
|
57
62
|
|
58
63
|
num_bits: int = 8
|
@@ -61,6 +66,7 @@ class QuantizationArgs(BaseModel):
|
|
61
66
|
strategy: QuantizationStrategy = QuantizationStrategy.TENSOR
|
62
67
|
group_size: Optional[int] = None
|
63
68
|
block_structure: Optional[str] = None
|
69
|
+
dynamic: bool = False
|
64
70
|
observer: str = Field(
|
65
71
|
default="minmax",
|
66
72
|
description=(
|
@@ -82,4 +88,9 @@ class QuantizationArgs(BaseModel):
|
|
82
88
|
"""
|
83
89
|
from compressed_tensors.quantization.observers.base import Observer
|
84
90
|
|
91
|
+
if self.observer == "minmax" and self.dynamic:
|
92
|
+
# override defualt observer for dynamic, you never want minmax which
|
93
|
+
# keeps state across samples for dynamic
|
94
|
+
self.observer = "memoryless"
|
95
|
+
|
85
96
|
return Observer.load_from_registry(self.observer, quantization_args=self)
|
{compressed-tensors-0.3.1 → compressed-tensors-0.3.2/src/compressed_tensors.egg-info}/PKG-INFO
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: compressed-tensors
|
3
|
-
Version: 0.3.
|
3
|
+
Version: 0.3.2
|
4
4
|
Summary: Library for utilization of compressed safetensors of neural network models
|
5
5
|
Home-page: https://github.com/neuralmagic/compressed-tensors
|
6
6
|
Author: Neuralmagic, Inc.
|
@@ -94,4 +94,7 @@ save_compressed_model(model, "compressed_model.safetensors", compression_format=
|
|
94
94
|
state_dict = dict(load_compressed("compressed_model.safetensors", compression_config))
|
95
95
|
```
|
96
96
|
|
97
|
+
For more in-depth tutorial on bitmask compression, refer to the [notebook](https://github.com/neuralmagic/compressed-tensors/blob/d707c5b84bc3fef164aebdcd97cb6eaa571982f8/examples/bitmask_compression.ipynb).
|
98
|
+
|
99
|
+
|
97
100
|
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{compressed-tensors-0.3.1 → compressed-tensors-0.3.2}/src/compressed_tensors/config/__init__.py
RENAMED
File without changes
|
File without changes
|
{compressed-tensors-0.3.1 → compressed-tensors-0.3.2}/src/compressed_tensors/config/dense.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{compressed-tensors-0.3.1 → compressed-tensors-0.3.2}/src/compressed_tensors/registry/__init__.py
RENAMED
File without changes
|
{compressed-tensors-0.3.1 → compressed-tensors-0.3.2}/src/compressed_tensors/registry/registry.py
RENAMED
File without changes
|
{compressed-tensors-0.3.1 → compressed-tensors-0.3.2}/src/compressed_tensors/utils/__init__.py
RENAMED
File without changes
|
File without changes
|
{compressed-tensors-0.3.1 → compressed-tensors-0.3.2}/src/compressed_tensors.egg-info/SOURCES.txt
RENAMED
File without changes
|
File without changes
|
{compressed-tensors-0.3.1 → compressed-tensors-0.3.2}/src/compressed_tensors.egg-info/requires.txt
RENAMED
File without changes
|
{compressed-tensors-0.3.1 → compressed-tensors-0.3.2}/src/compressed_tensors.egg-info/top_level.txt
RENAMED
File without changes
|
File without changes
|
File without changes
|