compressed-tensors 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- compressed_tensors/__init__.py +21 -0
- compressed_tensors/base.py +16 -0
- compressed_tensors/compressors/__init__.py +25 -0
- compressed_tensors/compressors/base.py +79 -0
- compressed_tensors/compressors/dense.py +34 -0
- compressed_tensors/compressors/helpers.py +161 -0
- compressed_tensors/compressors/sparse_bitmask.py +238 -0
- compressed_tensors/config/__init__.py +18 -0
- compressed_tensors/config/base.py +42 -0
- compressed_tensors/config/dense.py +36 -0
- compressed_tensors/config/sparse_bitmask.py +36 -0
- compressed_tensors/quantization/__init__.py +21 -0
- compressed_tensors/quantization/lifecycle/__init__.py +22 -0
- compressed_tensors/quantization/lifecycle/apply.py +173 -0
- compressed_tensors/quantization/lifecycle/calibration.py +51 -0
- compressed_tensors/quantization/lifecycle/forward.py +136 -0
- compressed_tensors/quantization/lifecycle/frozen.py +46 -0
- compressed_tensors/quantization/lifecycle/initialize.py +96 -0
- compressed_tensors/quantization/observers/__init__.py +21 -0
- compressed_tensors/quantization/observers/base.py +69 -0
- compressed_tensors/quantization/observers/helpers.py +53 -0
- compressed_tensors/quantization/observers/memoryless.py +48 -0
- compressed_tensors/quantization/observers/min_max.py +65 -0
- compressed_tensors/quantization/quant_args.py +85 -0
- compressed_tensors/quantization/quant_config.py +171 -0
- compressed_tensors/quantization/quant_scheme.py +39 -0
- compressed_tensors/quantization/utils/__init__.py +16 -0
- compressed_tensors/quantization/utils/helpers.py +115 -0
- compressed_tensors/registry/__init__.py +17 -0
- compressed_tensors/registry/registry.py +360 -0
- compressed_tensors/utils/__init__.py +16 -0
- compressed_tensors/utils/helpers.py +151 -0
- compressed_tensors/utils/safetensors_load.py +237 -0
- compressed_tensors-0.3.0.dist-info/METADATA +22 -0
- compressed_tensors-0.3.0.dist-info/RECORD +37 -0
- compressed_tensors-0.3.0.dist-info/WHEEL +5 -0
- compressed_tensors-0.3.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,21 @@
|
|
1
|
+
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing,
|
10
|
+
# software distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
from .base import *
|
16
|
+
|
17
|
+
# flake8: noqa
|
18
|
+
from .compressors import *
|
19
|
+
from .config import *
|
20
|
+
from .quantization import QuantizationConfig, QuantizationStatus
|
21
|
+
from .utils import *
|
@@ -0,0 +1,16 @@
|
|
1
|
+
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing,
|
10
|
+
# software distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
SPARSITY_CONFIG_NAME = "sparsity_config"
|
16
|
+
QUANTIZATION_CONFIG_NAME = "sparseml_quantization_config"
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing,
|
10
|
+
# software distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
# flake8: noqa
|
16
|
+
|
17
|
+
from .base import ModelCompressor
|
18
|
+
from .dense import DenseCompressor
|
19
|
+
from .helpers import (
|
20
|
+
infer_compressor_from_model_config,
|
21
|
+
load_compressed,
|
22
|
+
save_compressed,
|
23
|
+
save_compressed_model,
|
24
|
+
)
|
25
|
+
from .sparse_bitmask import BitmaskCompressor, BitmaskTensor
|
@@ -0,0 +1,79 @@
|
|
1
|
+
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing,
|
10
|
+
# software distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
import operator
|
16
|
+
from typing import Dict, Generator, Optional, Tuple
|
17
|
+
|
18
|
+
from compressed_tensors.base import SPARSITY_CONFIG_NAME
|
19
|
+
from compressed_tensors.config import CompressionConfig
|
20
|
+
from compressed_tensors.registry import RegistryMixin
|
21
|
+
from compressed_tensors.utils import get_safetensors_folder
|
22
|
+
from torch import Tensor
|
23
|
+
from torch.nn import Module, Parameter
|
24
|
+
from tqdm import tqdm
|
25
|
+
|
26
|
+
|
27
|
+
__all__ = ["ModelCompressor"]
|
28
|
+
|
29
|
+
|
30
|
+
class ModelCompressor(RegistryMixin):
|
31
|
+
"""
|
32
|
+
Base class representing a model compression algorithm.
|
33
|
+
|
34
|
+
:param config: config specifying compression parameters
|
35
|
+
"""
|
36
|
+
|
37
|
+
def __init__(self, config: Optional[CompressionConfig] = None):
|
38
|
+
self.config = config
|
39
|
+
|
40
|
+
def compress(self, model_state: Dict[str, Tensor]) -> Dict[str, Tensor]:
|
41
|
+
"""
|
42
|
+
Compresses a dense state dict
|
43
|
+
|
44
|
+
:param model_state: state dict of uncompressed model
|
45
|
+
:return: compressed state dict
|
46
|
+
"""
|
47
|
+
raise NotImplementedError()
|
48
|
+
|
49
|
+
def decompress(
|
50
|
+
self, path_to_model_or_tensors: str
|
51
|
+
) -> Generator[Tuple[str, Tensor], None, None]:
|
52
|
+
"""
|
53
|
+
Reads a compressed state dict located at path_to_model_or_tensors
|
54
|
+
and returns a generator for sequentially decompressing back to a
|
55
|
+
dense state dict
|
56
|
+
|
57
|
+
:param model_path: path to compressed safetensors model (directory with
|
58
|
+
one or more safetensors files) or compressed tensors file
|
59
|
+
:return: compressed state dict
|
60
|
+
"""
|
61
|
+
raise NotImplementedError()
|
62
|
+
|
63
|
+
def overwrite_weights(self, model_path: str, model: Module):
|
64
|
+
"""
|
65
|
+
Overwrites the weights in model with weights decompressed from model_path
|
66
|
+
|
67
|
+
:param model_path: path to compressed weights
|
68
|
+
:param model: pytorch model to load decompressed weights into
|
69
|
+
"""
|
70
|
+
model_path = get_safetensors_folder(model_path)
|
71
|
+
dense_gen = self.decompress(model_path)
|
72
|
+
for name, data in tqdm(dense_gen, desc="Decompressing model"):
|
73
|
+
# loading the decompressed weights into the model
|
74
|
+
model_device = operator.attrgetter(name)(model).device
|
75
|
+
data_new = Parameter(data.to(model_device))
|
76
|
+
data_old = operator.attrgetter(name)(model)
|
77
|
+
data_old.data = data_new.data
|
78
|
+
|
79
|
+
setattr(model, SPARSITY_CONFIG_NAME, self.config)
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing,
|
10
|
+
# software distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
from typing import Dict, Generator, Tuple
|
16
|
+
|
17
|
+
from compressed_tensors.compressors import ModelCompressor
|
18
|
+
from compressed_tensors.config import CompressionFormat
|
19
|
+
from torch import Tensor
|
20
|
+
|
21
|
+
|
22
|
+
@ModelCompressor.register(name=CompressionFormat.dense_sparsity.value)
|
23
|
+
class DenseCompressor(ModelCompressor):
|
24
|
+
"""
|
25
|
+
Identity compressor for dense models, returns the original state_dict
|
26
|
+
"""
|
27
|
+
|
28
|
+
def compress(self, model_state: Dict[str, Tensor]) -> Dict[str, Tensor]:
|
29
|
+
return model_state
|
30
|
+
|
31
|
+
def decompress(
|
32
|
+
self, path_to_model_or_tensors: str, device: str
|
33
|
+
) -> Generator[Tuple[str, Tensor], None, None]:
|
34
|
+
return iter([])
|
@@ -0,0 +1,161 @@
|
|
1
|
+
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing,
|
10
|
+
# software distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
from pathlib import Path
|
16
|
+
from typing import Dict, Generator, Optional, Tuple, Union
|
17
|
+
|
18
|
+
import torch
|
19
|
+
from compressed_tensors.base import SPARSITY_CONFIG_NAME
|
20
|
+
from compressed_tensors.compressors import ModelCompressor
|
21
|
+
from compressed_tensors.config import CompressionConfig, CompressionFormat
|
22
|
+
from compressed_tensors.utils.safetensors_load import get_weight_mappings
|
23
|
+
from safetensors import safe_open
|
24
|
+
from safetensors.torch import save_file
|
25
|
+
from torch import Tensor
|
26
|
+
from transformers import AutoConfig
|
27
|
+
|
28
|
+
|
29
|
+
__all__ = [
|
30
|
+
"infer_compressor_from_model_config",
|
31
|
+
"load_compressed",
|
32
|
+
"save_compressed",
|
33
|
+
"save_compressed_model",
|
34
|
+
]
|
35
|
+
|
36
|
+
|
37
|
+
def infer_compressor_from_model_config(
|
38
|
+
pretrained_model_name_or_path: str,
|
39
|
+
) -> Optional[ModelCompressor]:
|
40
|
+
"""
|
41
|
+
Given a path to a model config, extract a sparsity config if it exists and return
|
42
|
+
the associated ModelCompressor
|
43
|
+
|
44
|
+
:param pretrained_model_name_or_path: path to model config on disk or HF hub
|
45
|
+
:return: matching compressor if config contains a sparsity config
|
46
|
+
"""
|
47
|
+
config = AutoConfig.from_pretrained(pretrained_model_name_or_path)
|
48
|
+
sparsity_config = getattr(config, SPARSITY_CONFIG_NAME, None)
|
49
|
+
if sparsity_config is None:
|
50
|
+
return None
|
51
|
+
|
52
|
+
format = sparsity_config.get("format")
|
53
|
+
sparsity_config = CompressionConfig.load_from_registry(format, **sparsity_config)
|
54
|
+
compressor = ModelCompressor.load_from_registry(format, config=sparsity_config)
|
55
|
+
return compressor
|
56
|
+
|
57
|
+
|
58
|
+
def save_compressed(
|
59
|
+
tensors: Dict[str, Tensor],
|
60
|
+
save_path: Union[str, Path],
|
61
|
+
compression_format: Optional[CompressionFormat] = None,
|
62
|
+
):
|
63
|
+
"""
|
64
|
+
Save compressed tensors to disk. If tensors are not compressed,
|
65
|
+
save them as is.
|
66
|
+
|
67
|
+
:param tensors: dictionary of tensors to compress
|
68
|
+
:param save_path: path to save compressed tensors
|
69
|
+
:param compression_format: compression format used for the tensors
|
70
|
+
:return: compression config, if tensors were compressed - None otherwise
|
71
|
+
"""
|
72
|
+
if tensors is None or len(tensors) == 0:
|
73
|
+
raise ValueError("No tensors or empty tensors provided to compress")
|
74
|
+
|
75
|
+
# if no compression_format specified, default to `dense_sparsity`
|
76
|
+
compression_format = compression_format or CompressionFormat.dense_sparsity.value
|
77
|
+
|
78
|
+
if not (
|
79
|
+
compression_format in ModelCompressor.registered_names()
|
80
|
+
or compression_format in ModelCompressor.registered_aliases()
|
81
|
+
):
|
82
|
+
raise ValueError(
|
83
|
+
f"Unknown compression format: {compression_format}. "
|
84
|
+
f"Must be one of {set(ModelCompressor.registered_names() + ModelCompressor.registered_aliases())}" # noqa E501
|
85
|
+
)
|
86
|
+
|
87
|
+
# compress
|
88
|
+
compressor = ModelCompressor.load_from_registry(compression_format)
|
89
|
+
# save compressed tensors
|
90
|
+
compressed_tensors = compressor.compress(tensors)
|
91
|
+
save_file(compressed_tensors, save_path)
|
92
|
+
|
93
|
+
|
94
|
+
def load_compressed(
|
95
|
+
compressed_tensors: Union[str, Path],
|
96
|
+
compression_config: CompressionConfig = None,
|
97
|
+
device: Optional[str] = "cpu",
|
98
|
+
) -> Generator[Tuple[str, Tensor], None, None]:
|
99
|
+
"""
|
100
|
+
Load compressed tensors from disk.
|
101
|
+
If tensors are not compressed, load them as is.
|
102
|
+
|
103
|
+
:param compressed_tensors: path to compressed tensors.
|
104
|
+
This can be a path to a file or a directory containing
|
105
|
+
one or multiple safetensor files (if multiple - in the format
|
106
|
+
assumed by huggingface)
|
107
|
+
:param compression_config: compression config to use for decompressing tensors.
|
108
|
+
:param device: device to move tensors to. If None, tensors are loaded on CPU.
|
109
|
+
:param return_dict: if True, return a dictionary of decompressed tensors
|
110
|
+
:return a generator that yields the name and tensor of the decompressed tensor
|
111
|
+
"""
|
112
|
+
if compressed_tensors is None or not Path(compressed_tensors).exists():
|
113
|
+
raise ValueError("No compressed tensors provided to load")
|
114
|
+
|
115
|
+
if (
|
116
|
+
compression_config is None
|
117
|
+
or compression_config.format == CompressionFormat.dense_sparsity.value
|
118
|
+
):
|
119
|
+
# if no compression_config specified, or `dense_sparsity` format specified,
|
120
|
+
# assume tensors are not compressed on disk
|
121
|
+
weight_mappings = get_weight_mappings(compressed_tensors)
|
122
|
+
for weight_name, file_with_weight_name in weight_mappings.items():
|
123
|
+
with safe_open(file_with_weight_name, framework="pt", device=device) as f:
|
124
|
+
weight = f.get_tensor(weight_name)
|
125
|
+
yield weight_name, weight
|
126
|
+
else:
|
127
|
+
# decompress tensors
|
128
|
+
compression_format = compression_config.format
|
129
|
+
compressor = ModelCompressor.load_from_registry(
|
130
|
+
compression_format, config=compression_config
|
131
|
+
)
|
132
|
+
yield from compressor.decompress(compressed_tensors, device=device)
|
133
|
+
|
134
|
+
|
135
|
+
def save_compressed_model(
|
136
|
+
model: torch.nn.Module,
|
137
|
+
filename: str,
|
138
|
+
compression_format: Optional[CompressionFormat] = None,
|
139
|
+
force_contiguous: bool = True,
|
140
|
+
):
|
141
|
+
"""
|
142
|
+
Wrapper around safetensors `save_model` helper function, which allows for
|
143
|
+
saving compressed model to disk.
|
144
|
+
|
145
|
+
Note: The model is assumed to have a
|
146
|
+
state_dict with unique entries
|
147
|
+
|
148
|
+
:param model: model to save on disk
|
149
|
+
:param filename: filename location to save the file
|
150
|
+
:param compression_format: compression format used for the model
|
151
|
+
:param force_contiguous: forcing the state_dict to be saved as contiguous tensors
|
152
|
+
"""
|
153
|
+
state_dict = model.state_dict()
|
154
|
+
if force_contiguous:
|
155
|
+
state_dict = {k: v.contiguous() for k, v in state_dict.items()}
|
156
|
+
try:
|
157
|
+
save_compressed(state_dict, filename, compression_format=compression_format)
|
158
|
+
except ValueError as e:
|
159
|
+
msg = str(e)
|
160
|
+
msg += " Or use save_compressed_model(..., force_contiguous=True), read the docs for potential caveats." # noqa E501
|
161
|
+
raise ValueError(msg)
|
@@ -0,0 +1,238 @@
|
|
1
|
+
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing,
|
10
|
+
# software distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
import logging
|
16
|
+
from typing import Dict, Generator, List, Tuple, Union
|
17
|
+
|
18
|
+
import numpy
|
19
|
+
import torch
|
20
|
+
from compressed_tensors.compressors import ModelCompressor
|
21
|
+
from compressed_tensors.config import CompressionFormat
|
22
|
+
from compressed_tensors.utils import get_nested_weight_mappings, merge_names
|
23
|
+
from safetensors import safe_open
|
24
|
+
from torch import Tensor
|
25
|
+
from tqdm import tqdm
|
26
|
+
|
27
|
+
|
28
|
+
__all__ = [
|
29
|
+
"BitmaskCompressor",
|
30
|
+
"BitmaskTensor",
|
31
|
+
"bitmask_compress",
|
32
|
+
"bitmask_decompress",
|
33
|
+
"pack_bitmasks",
|
34
|
+
"unpack_bitmasks",
|
35
|
+
]
|
36
|
+
|
37
|
+
_LOGGER: logging.Logger = logging.getLogger(__name__)
|
38
|
+
|
39
|
+
|
40
|
+
@ModelCompressor.register(name=CompressionFormat.sparse_bitmask.value)
|
41
|
+
class BitmaskCompressor(ModelCompressor):
|
42
|
+
"""
|
43
|
+
Compression for sparse models using bitmasks. Non-zero weights are stored in a 1d
|
44
|
+
values tensor, with their locations stored in a 2d bitmask
|
45
|
+
"""
|
46
|
+
|
47
|
+
COMPRESSION_PARAM_NAMES = ["shape", "compressed", "bitmask", "row_offsets"]
|
48
|
+
|
49
|
+
def compress(self, model_state: Dict[str, Tensor]) -> Dict[str, Tensor]:
|
50
|
+
"""
|
51
|
+
Compresses a dense state dict using bitmask compression
|
52
|
+
|
53
|
+
:param model_state: state dict of uncompressed model
|
54
|
+
:return: compressed state dict
|
55
|
+
"""
|
56
|
+
compressed_dict = {}
|
57
|
+
_LOGGER.debug(
|
58
|
+
f"Compressing model with {len(model_state)} parameterized layers..."
|
59
|
+
)
|
60
|
+
for name, value in tqdm(model_state.items(), desc="Compressing model"):
|
61
|
+
bitmask_tensor = BitmaskTensor.from_dense(value)
|
62
|
+
bitmask_dict = bitmask_tensor.dict(name_prefix=name, device="cpu")
|
63
|
+
for key in bitmask_dict.keys():
|
64
|
+
if key in compressed_dict:
|
65
|
+
_LOGGER.warn(
|
66
|
+
f"Expected all compressed state_dict keys to be unique, but "
|
67
|
+
f"found an existing entry for {key}. The existing entry will "
|
68
|
+
"be replaced."
|
69
|
+
)
|
70
|
+
compressed_dict |= bitmask_dict
|
71
|
+
|
72
|
+
return compressed_dict
|
73
|
+
|
74
|
+
def decompress(
|
75
|
+
self, path_to_model_or_tensors: str, device: str = "cpu"
|
76
|
+
) -> Generator[Tuple[str, Tensor], None, None]:
|
77
|
+
"""
|
78
|
+
Reads a bitmask compressed state dict located at path_to_model_or_tensors
|
79
|
+
and returns a generator for sequentially decompressing back to a dense state dict
|
80
|
+
|
81
|
+
:param model_path: path to compressed safetensors model (directory with
|
82
|
+
one or more safetensors files) or compressed tensors file
|
83
|
+
:param device: device to load decompressed weights onto
|
84
|
+
:return: iterator for generating decompressed weights
|
85
|
+
"""
|
86
|
+
weight_mappings = get_nested_weight_mappings(
|
87
|
+
path_to_model_or_tensors, self.COMPRESSION_PARAM_NAMES
|
88
|
+
)
|
89
|
+
for weight_name in weight_mappings.keys():
|
90
|
+
weight_data = {}
|
91
|
+
for param_name, safe_path in weight_mappings[weight_name].items():
|
92
|
+
full_name = merge_names(weight_name, param_name)
|
93
|
+
with safe_open(safe_path, framework="pt", device=device) as f:
|
94
|
+
weight_data[param_name] = f.get_tensor(full_name)
|
95
|
+
data = BitmaskTensor(**weight_data)
|
96
|
+
decompressed = data.decompress()
|
97
|
+
yield weight_name, decompressed
|
98
|
+
|
99
|
+
|
100
|
+
class BitmaskTensor:
|
101
|
+
"""
|
102
|
+
Owns compressions and decompression for a single bitmask compressed tensor.
|
103
|
+
Adapted from: https://github.com/mgoin/torch_bitmask/tree/main
|
104
|
+
|
105
|
+
:param shape: shape of dense tensor
|
106
|
+
:compressed: flat tensor of non-zero values
|
107
|
+
:bitmask: 2d bitmask of non-zero values
|
108
|
+
:row_offsets: flat tensor indicating what index in values each dense row starts at
|
109
|
+
"""
|
110
|
+
|
111
|
+
def __init__(
|
112
|
+
self,
|
113
|
+
shape: Union[torch.Size, List],
|
114
|
+
compressed: Tensor,
|
115
|
+
bitmask: Tensor,
|
116
|
+
row_offsets: Tensor,
|
117
|
+
):
|
118
|
+
self.shape = list(shape)
|
119
|
+
self.compressed = compressed
|
120
|
+
self.bitmask = bitmask
|
121
|
+
self.row_offsets = row_offsets
|
122
|
+
|
123
|
+
@staticmethod
|
124
|
+
def from_dense(tensor: Tensor) -> "BitmaskTensor":
|
125
|
+
"""
|
126
|
+
:param tensor: dense tensor to compress
|
127
|
+
:return: instantiated compressed tensor
|
128
|
+
"""
|
129
|
+
shape = tensor.shape
|
130
|
+
compressed, bitmask, row_offsets = bitmask_compress(tensor.cpu())
|
131
|
+
return BitmaskTensor(
|
132
|
+
shape=shape, compressed=compressed, bitmask=bitmask, row_offsets=row_offsets
|
133
|
+
)
|
134
|
+
|
135
|
+
def decompress(self) -> Tensor:
|
136
|
+
"""
|
137
|
+
:return: reconstructed dense tensor
|
138
|
+
"""
|
139
|
+
return bitmask_decompress(self.compressed, self.bitmask, self.shape)
|
140
|
+
|
141
|
+
def curr_memory_size_bytes(self):
|
142
|
+
"""
|
143
|
+
:return: size in bytes required to store compressed tensor on disk
|
144
|
+
"""
|
145
|
+
|
146
|
+
def sizeof_tensor(a):
|
147
|
+
return a.element_size() * a.nelement()
|
148
|
+
|
149
|
+
return (
|
150
|
+
sizeof_tensor(self.compressed)
|
151
|
+
+ sizeof_tensor(self.bitmask)
|
152
|
+
+ sizeof_tensor(self.row_offsets)
|
153
|
+
)
|
154
|
+
|
155
|
+
def dict(self, name_prefix: str, device: str = "cpu") -> Dict[str, Tensor]:
|
156
|
+
"""
|
157
|
+
:name_prefix: name of original tensor to store compressed weight as
|
158
|
+
:return: dict of compressed data for the stored weight
|
159
|
+
"""
|
160
|
+
return {
|
161
|
+
merge_names(name_prefix, "shape"): torch.tensor(self.shape, device=device),
|
162
|
+
merge_names(name_prefix, "compressed"): self.compressed.to(device),
|
163
|
+
merge_names(name_prefix, "bitmask"): self.bitmask.to(device),
|
164
|
+
merge_names(name_prefix, "row_offsets"): self.row_offsets.to(device),
|
165
|
+
}
|
166
|
+
|
167
|
+
def __repr__(self):
|
168
|
+
return f"BitmaskTensor(shape={self.shape}, compressed=True)"
|
169
|
+
|
170
|
+
|
171
|
+
def bitmask_compress(tensor: Tensor) -> Tuple[Tensor, Tensor, Tensor]:
|
172
|
+
"""
|
173
|
+
Compresses a dense tensor using bitmask compression
|
174
|
+
|
175
|
+
:param tensor: dense tensor to compress
|
176
|
+
:return: tuple of compressed data representing tensor
|
177
|
+
"""
|
178
|
+
bytemasks = tensor != 0
|
179
|
+
row_counts = bytemasks.sum(dim=-1)
|
180
|
+
row_offsets = torch.cumsum(row_counts, 0) - row_counts
|
181
|
+
values = tensor[bytemasks]
|
182
|
+
bitmasks_packed = pack_bitmasks(bytemasks)
|
183
|
+
|
184
|
+
return values, bitmasks_packed, row_offsets
|
185
|
+
|
186
|
+
|
187
|
+
def bitmask_decompress(
|
188
|
+
values: Tensor, bitmasks: Tensor, original_shape: torch.Size
|
189
|
+
) -> Tensor:
|
190
|
+
"""
|
191
|
+
Reconstructs a dense tensor from a compressed one
|
192
|
+
|
193
|
+
:param values: 1d tensor of non-zero values
|
194
|
+
:param bitmasks: 2d int8 tensor flagging locations of non-zero values in the
|
195
|
+
tensors original shape
|
196
|
+
:param original_shape: shape of the dense tensor
|
197
|
+
:return: decompressed dense tensor
|
198
|
+
"""
|
199
|
+
bytemasks_unpacked = unpack_bitmasks(bitmasks, original_shape)
|
200
|
+
|
201
|
+
decompressed_tensor = torch.zeros(original_shape, dtype=values.dtype)
|
202
|
+
decompressed_tensor[bytemasks_unpacked] = values
|
203
|
+
|
204
|
+
return decompressed_tensor
|
205
|
+
|
206
|
+
|
207
|
+
def pack_bitmasks(bytemasks: Tensor) -> Tensor:
|
208
|
+
"""
|
209
|
+
Converts a bytemask tensor to a bitmask tensor to reduce memory. Shape RxC will be
|
210
|
+
compressed to R x ceil(C/8)
|
211
|
+
:param bytemasks: mask tensor where each byte corresponds to a weight
|
212
|
+
:return: mask tensor where each bit corresounds to a weight
|
213
|
+
"""
|
214
|
+
packed_bits_numpy = numpy.packbits(bytemasks.numpy(), axis=-1, bitorder="little")
|
215
|
+
packed_bits_torch = torch.from_numpy(packed_bits_numpy)
|
216
|
+
|
217
|
+
return packed_bits_torch
|
218
|
+
|
219
|
+
|
220
|
+
def unpack_bitmasks(packed_bitmasks: Tensor, original_shape: torch.Size) -> Tensor:
|
221
|
+
"""
|
222
|
+
Converts a bitmask tensor back to a bytemask tensor for use during decompression
|
223
|
+
|
224
|
+
:param packed_bitmasks: mask tensor where each bit corresponds to a weight
|
225
|
+
:param original_shape: dense shape to decompress to
|
226
|
+
:return: boolean mask of weights in the original dense shape
|
227
|
+
"""
|
228
|
+
# Unpack the bits
|
229
|
+
unpacked_bits = numpy.unpackbits(
|
230
|
+
packed_bitmasks.numpy(), axis=-1, count=original_shape[-1], bitorder="little"
|
231
|
+
)
|
232
|
+
|
233
|
+
# Reshape to match the original shape
|
234
|
+
unpacked_bitmasks_torch = torch.from_numpy(
|
235
|
+
unpacked_bits.reshape(original_shape).astype(bool)
|
236
|
+
)
|
237
|
+
|
238
|
+
return unpacked_bitmasks_torch
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing,
|
10
|
+
# software distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
# flake8: noqa
|
16
|
+
from .base import *
|
17
|
+
from .dense import *
|
18
|
+
from .sparse_bitmask import *
|
@@ -0,0 +1,42 @@
|
|
1
|
+
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing,
|
10
|
+
# software distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
from enum import Enum
|
16
|
+
from typing import Optional
|
17
|
+
|
18
|
+
from compressed_tensors.registry import RegistryMixin
|
19
|
+
from pydantic import BaseModel
|
20
|
+
|
21
|
+
|
22
|
+
__all__ = ["CompressionConfig", "CompressionFormat"]
|
23
|
+
|
24
|
+
|
25
|
+
class CompressionFormat(Enum):
|
26
|
+
dense_sparsity = "dense-sparsity"
|
27
|
+
sparse_bitmask = "sparse-bitmask"
|
28
|
+
|
29
|
+
|
30
|
+
class CompressionConfig(RegistryMixin, BaseModel):
|
31
|
+
"""
|
32
|
+
Base data class for storing compression parameters
|
33
|
+
|
34
|
+
:param format: name of compression format
|
35
|
+
:param global_sparsity: average sparsity of the entire model
|
36
|
+
:param sparsity_structure: structure of the sparsity, such as
|
37
|
+
"unstructured", "2:4", "8:16" etc
|
38
|
+
"""
|
39
|
+
|
40
|
+
format: str
|
41
|
+
global_sparsity: Optional[float] = 0.0
|
42
|
+
sparsity_structure: Optional[str] = "unstructured"
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing,
|
10
|
+
# software distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
from typing import Optional
|
16
|
+
|
17
|
+
from compressed_tensors.config import CompressionConfig, CompressionFormat
|
18
|
+
|
19
|
+
|
20
|
+
__all__ = ["DenseSparsityConfig"]
|
21
|
+
|
22
|
+
|
23
|
+
@CompressionConfig.register(name=CompressionFormat.dense_sparsity.value)
|
24
|
+
class DenseSparsityConfig(CompressionConfig):
|
25
|
+
"""
|
26
|
+
Identity configuration for storing a sparse model in
|
27
|
+
an uncompressed dense format
|
28
|
+
|
29
|
+
:param global_sparsity: average sparsity of the entire model
|
30
|
+
:param sparsity_structure: structure of the sparsity, such as
|
31
|
+
"unstructured", "2:4", "8:16" etc
|
32
|
+
"""
|
33
|
+
|
34
|
+
format: str = CompressionFormat.dense_sparsity.value
|
35
|
+
global_sparsity: Optional[float] = 0.0
|
36
|
+
sparsity_structure: Optional[str] = "unstructured"
|