compressed-tensors 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. compressed_tensors/__init__.py +21 -0
  2. compressed_tensors/base.py +16 -0
  3. compressed_tensors/compressors/__init__.py +25 -0
  4. compressed_tensors/compressors/base.py +79 -0
  5. compressed_tensors/compressors/dense.py +34 -0
  6. compressed_tensors/compressors/helpers.py +161 -0
  7. compressed_tensors/compressors/sparse_bitmask.py +238 -0
  8. compressed_tensors/config/__init__.py +18 -0
  9. compressed_tensors/config/base.py +42 -0
  10. compressed_tensors/config/dense.py +36 -0
  11. compressed_tensors/config/sparse_bitmask.py +36 -0
  12. compressed_tensors/quantization/__init__.py +21 -0
  13. compressed_tensors/quantization/lifecycle/__init__.py +22 -0
  14. compressed_tensors/quantization/lifecycle/apply.py +173 -0
  15. compressed_tensors/quantization/lifecycle/calibration.py +51 -0
  16. compressed_tensors/quantization/lifecycle/forward.py +136 -0
  17. compressed_tensors/quantization/lifecycle/frozen.py +46 -0
  18. compressed_tensors/quantization/lifecycle/initialize.py +96 -0
  19. compressed_tensors/quantization/observers/__init__.py +21 -0
  20. compressed_tensors/quantization/observers/base.py +69 -0
  21. compressed_tensors/quantization/observers/helpers.py +53 -0
  22. compressed_tensors/quantization/observers/memoryless.py +48 -0
  23. compressed_tensors/quantization/observers/min_max.py +65 -0
  24. compressed_tensors/quantization/quant_args.py +85 -0
  25. compressed_tensors/quantization/quant_config.py +171 -0
  26. compressed_tensors/quantization/quant_scheme.py +39 -0
  27. compressed_tensors/quantization/utils/__init__.py +16 -0
  28. compressed_tensors/quantization/utils/helpers.py +115 -0
  29. compressed_tensors/registry/__init__.py +17 -0
  30. compressed_tensors/registry/registry.py +360 -0
  31. compressed_tensors/utils/__init__.py +16 -0
  32. compressed_tensors/utils/helpers.py +151 -0
  33. compressed_tensors/utils/safetensors_load.py +237 -0
  34. compressed_tensors-0.3.0.dist-info/METADATA +22 -0
  35. compressed_tensors-0.3.0.dist-info/RECORD +37 -0
  36. compressed_tensors-0.3.0.dist-info/WHEEL +5 -0
  37. compressed_tensors-0.3.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,21 @@
1
+ # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing,
10
+ # software distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from .base import *
16
+
17
+ # flake8: noqa
18
+ from .compressors import *
19
+ from .config import *
20
+ from .quantization import QuantizationConfig, QuantizationStatus
21
+ from .utils import *
@@ -0,0 +1,16 @@
1
+ # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing,
10
+ # software distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ SPARSITY_CONFIG_NAME = "sparsity_config"
16
+ QUANTIZATION_CONFIG_NAME = "sparseml_quantization_config"
@@ -0,0 +1,25 @@
1
+ # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing,
10
+ # software distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ # flake8: noqa
16
+
17
+ from .base import ModelCompressor
18
+ from .dense import DenseCompressor
19
+ from .helpers import (
20
+ infer_compressor_from_model_config,
21
+ load_compressed,
22
+ save_compressed,
23
+ save_compressed_model,
24
+ )
25
+ from .sparse_bitmask import BitmaskCompressor, BitmaskTensor
@@ -0,0 +1,79 @@
1
+ # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing,
10
+ # software distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import operator
16
+ from typing import Dict, Generator, Optional, Tuple
17
+
18
+ from compressed_tensors.base import SPARSITY_CONFIG_NAME
19
+ from compressed_tensors.config import CompressionConfig
20
+ from compressed_tensors.registry import RegistryMixin
21
+ from compressed_tensors.utils import get_safetensors_folder
22
+ from torch import Tensor
23
+ from torch.nn import Module, Parameter
24
+ from tqdm import tqdm
25
+
26
+
27
+ __all__ = ["ModelCompressor"]
28
+
29
+
30
+ class ModelCompressor(RegistryMixin):
31
+ """
32
+ Base class representing a model compression algorithm.
33
+
34
+ :param config: config specifying compression parameters
35
+ """
36
+
37
+ def __init__(self, config: Optional[CompressionConfig] = None):
38
+ self.config = config
39
+
40
+ def compress(self, model_state: Dict[str, Tensor]) -> Dict[str, Tensor]:
41
+ """
42
+ Compresses a dense state dict
43
+
44
+ :param model_state: state dict of uncompressed model
45
+ :return: compressed state dict
46
+ """
47
+ raise NotImplementedError()
48
+
49
+ def decompress(
50
+ self, path_to_model_or_tensors: str
51
+ ) -> Generator[Tuple[str, Tensor], None, None]:
52
+ """
53
+ Reads a compressed state dict located at path_to_model_or_tensors
54
+ and returns a generator for sequentially decompressing back to a
55
+ dense state dict
56
+
57
+ :param model_path: path to compressed safetensors model (directory with
58
+ one or more safetensors files) or compressed tensors file
59
+ :return: compressed state dict
60
+ """
61
+ raise NotImplementedError()
62
+
63
+ def overwrite_weights(self, model_path: str, model: Module):
64
+ """
65
+ Overwrites the weights in model with weights decompressed from model_path
66
+
67
+ :param model_path: path to compressed weights
68
+ :param model: pytorch model to load decompressed weights into
69
+ """
70
+ model_path = get_safetensors_folder(model_path)
71
+ dense_gen = self.decompress(model_path)
72
+ for name, data in tqdm(dense_gen, desc="Decompressing model"):
73
+ # loading the decompressed weights into the model
74
+ model_device = operator.attrgetter(name)(model).device
75
+ data_new = Parameter(data.to(model_device))
76
+ data_old = operator.attrgetter(name)(model)
77
+ data_old.data = data_new.data
78
+
79
+ setattr(model, SPARSITY_CONFIG_NAME, self.config)
@@ -0,0 +1,34 @@
1
+ # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing,
10
+ # software distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from typing import Dict, Generator, Tuple
16
+
17
+ from compressed_tensors.compressors import ModelCompressor
18
+ from compressed_tensors.config import CompressionFormat
19
+ from torch import Tensor
20
+
21
+
22
+ @ModelCompressor.register(name=CompressionFormat.dense_sparsity.value)
23
+ class DenseCompressor(ModelCompressor):
24
+ """
25
+ Identity compressor for dense models, returns the original state_dict
26
+ """
27
+
28
+ def compress(self, model_state: Dict[str, Tensor]) -> Dict[str, Tensor]:
29
+ return model_state
30
+
31
+ def decompress(
32
+ self, path_to_model_or_tensors: str, device: str
33
+ ) -> Generator[Tuple[str, Tensor], None, None]:
34
+ return iter([])
@@ -0,0 +1,161 @@
1
+ # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing,
10
+ # software distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from pathlib import Path
16
+ from typing import Dict, Generator, Optional, Tuple, Union
17
+
18
+ import torch
19
+ from compressed_tensors.base import SPARSITY_CONFIG_NAME
20
+ from compressed_tensors.compressors import ModelCompressor
21
+ from compressed_tensors.config import CompressionConfig, CompressionFormat
22
+ from compressed_tensors.utils.safetensors_load import get_weight_mappings
23
+ from safetensors import safe_open
24
+ from safetensors.torch import save_file
25
+ from torch import Tensor
26
+ from transformers import AutoConfig
27
+
28
+
29
+ __all__ = [
30
+ "infer_compressor_from_model_config",
31
+ "load_compressed",
32
+ "save_compressed",
33
+ "save_compressed_model",
34
+ ]
35
+
36
+
37
+ def infer_compressor_from_model_config(
38
+ pretrained_model_name_or_path: str,
39
+ ) -> Optional[ModelCompressor]:
40
+ """
41
+ Given a path to a model config, extract a sparsity config if it exists and return
42
+ the associated ModelCompressor
43
+
44
+ :param pretrained_model_name_or_path: path to model config on disk or HF hub
45
+ :return: matching compressor if config contains a sparsity config
46
+ """
47
+ config = AutoConfig.from_pretrained(pretrained_model_name_or_path)
48
+ sparsity_config = getattr(config, SPARSITY_CONFIG_NAME, None)
49
+ if sparsity_config is None:
50
+ return None
51
+
52
+ format = sparsity_config.get("format")
53
+ sparsity_config = CompressionConfig.load_from_registry(format, **sparsity_config)
54
+ compressor = ModelCompressor.load_from_registry(format, config=sparsity_config)
55
+ return compressor
56
+
57
+
58
+ def save_compressed(
59
+ tensors: Dict[str, Tensor],
60
+ save_path: Union[str, Path],
61
+ compression_format: Optional[CompressionFormat] = None,
62
+ ):
63
+ """
64
+ Save compressed tensors to disk. If tensors are not compressed,
65
+ save them as is.
66
+
67
+ :param tensors: dictionary of tensors to compress
68
+ :param save_path: path to save compressed tensors
69
+ :param compression_format: compression format used for the tensors
70
+ :return: compression config, if tensors were compressed - None otherwise
71
+ """
72
+ if tensors is None or len(tensors) == 0:
73
+ raise ValueError("No tensors or empty tensors provided to compress")
74
+
75
+ # if no compression_format specified, default to `dense_sparsity`
76
+ compression_format = compression_format or CompressionFormat.dense_sparsity.value
77
+
78
+ if not (
79
+ compression_format in ModelCompressor.registered_names()
80
+ or compression_format in ModelCompressor.registered_aliases()
81
+ ):
82
+ raise ValueError(
83
+ f"Unknown compression format: {compression_format}. "
84
+ f"Must be one of {set(ModelCompressor.registered_names() + ModelCompressor.registered_aliases())}" # noqa E501
85
+ )
86
+
87
+ # compress
88
+ compressor = ModelCompressor.load_from_registry(compression_format)
89
+ # save compressed tensors
90
+ compressed_tensors = compressor.compress(tensors)
91
+ save_file(compressed_tensors, save_path)
92
+
93
+
94
+ def load_compressed(
95
+ compressed_tensors: Union[str, Path],
96
+ compression_config: CompressionConfig = None,
97
+ device: Optional[str] = "cpu",
98
+ ) -> Generator[Tuple[str, Tensor], None, None]:
99
+ """
100
+ Load compressed tensors from disk.
101
+ If tensors are not compressed, load them as is.
102
+
103
+ :param compressed_tensors: path to compressed tensors.
104
+ This can be a path to a file or a directory containing
105
+ one or multiple safetensor files (if multiple - in the format
106
+ assumed by huggingface)
107
+ :param compression_config: compression config to use for decompressing tensors.
108
+ :param device: device to move tensors to. If None, tensors are loaded on CPU.
109
+ :param return_dict: if True, return a dictionary of decompressed tensors
110
+ :return a generator that yields the name and tensor of the decompressed tensor
111
+ """
112
+ if compressed_tensors is None or not Path(compressed_tensors).exists():
113
+ raise ValueError("No compressed tensors provided to load")
114
+
115
+ if (
116
+ compression_config is None
117
+ or compression_config.format == CompressionFormat.dense_sparsity.value
118
+ ):
119
+ # if no compression_config specified, or `dense_sparsity` format specified,
120
+ # assume tensors are not compressed on disk
121
+ weight_mappings = get_weight_mappings(compressed_tensors)
122
+ for weight_name, file_with_weight_name in weight_mappings.items():
123
+ with safe_open(file_with_weight_name, framework="pt", device=device) as f:
124
+ weight = f.get_tensor(weight_name)
125
+ yield weight_name, weight
126
+ else:
127
+ # decompress tensors
128
+ compression_format = compression_config.format
129
+ compressor = ModelCompressor.load_from_registry(
130
+ compression_format, config=compression_config
131
+ )
132
+ yield from compressor.decompress(compressed_tensors, device=device)
133
+
134
+
135
+ def save_compressed_model(
136
+ model: torch.nn.Module,
137
+ filename: str,
138
+ compression_format: Optional[CompressionFormat] = None,
139
+ force_contiguous: bool = True,
140
+ ):
141
+ """
142
+ Wrapper around safetensors `save_model` helper function, which allows for
143
+ saving compressed model to disk.
144
+
145
+ Note: The model is assumed to have a
146
+ state_dict with unique entries
147
+
148
+ :param model: model to save on disk
149
+ :param filename: filename location to save the file
150
+ :param compression_format: compression format used for the model
151
+ :param force_contiguous: forcing the state_dict to be saved as contiguous tensors
152
+ """
153
+ state_dict = model.state_dict()
154
+ if force_contiguous:
155
+ state_dict = {k: v.contiguous() for k, v in state_dict.items()}
156
+ try:
157
+ save_compressed(state_dict, filename, compression_format=compression_format)
158
+ except ValueError as e:
159
+ msg = str(e)
160
+ msg += " Or use save_compressed_model(..., force_contiguous=True), read the docs for potential caveats." # noqa E501
161
+ raise ValueError(msg)
@@ -0,0 +1,238 @@
1
+ # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing,
10
+ # software distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import logging
16
+ from typing import Dict, Generator, List, Tuple, Union
17
+
18
+ import numpy
19
+ import torch
20
+ from compressed_tensors.compressors import ModelCompressor
21
+ from compressed_tensors.config import CompressionFormat
22
+ from compressed_tensors.utils import get_nested_weight_mappings, merge_names
23
+ from safetensors import safe_open
24
+ from torch import Tensor
25
+ from tqdm import tqdm
26
+
27
+
28
+ __all__ = [
29
+ "BitmaskCompressor",
30
+ "BitmaskTensor",
31
+ "bitmask_compress",
32
+ "bitmask_decompress",
33
+ "pack_bitmasks",
34
+ "unpack_bitmasks",
35
+ ]
36
+
37
+ _LOGGER: logging.Logger = logging.getLogger(__name__)
38
+
39
+
40
+ @ModelCompressor.register(name=CompressionFormat.sparse_bitmask.value)
41
+ class BitmaskCompressor(ModelCompressor):
42
+ """
43
+ Compression for sparse models using bitmasks. Non-zero weights are stored in a 1d
44
+ values tensor, with their locations stored in a 2d bitmask
45
+ """
46
+
47
+ COMPRESSION_PARAM_NAMES = ["shape", "compressed", "bitmask", "row_offsets"]
48
+
49
+ def compress(self, model_state: Dict[str, Tensor]) -> Dict[str, Tensor]:
50
+ """
51
+ Compresses a dense state dict using bitmask compression
52
+
53
+ :param model_state: state dict of uncompressed model
54
+ :return: compressed state dict
55
+ """
56
+ compressed_dict = {}
57
+ _LOGGER.debug(
58
+ f"Compressing model with {len(model_state)} parameterized layers..."
59
+ )
60
+ for name, value in tqdm(model_state.items(), desc="Compressing model"):
61
+ bitmask_tensor = BitmaskTensor.from_dense(value)
62
+ bitmask_dict = bitmask_tensor.dict(name_prefix=name, device="cpu")
63
+ for key in bitmask_dict.keys():
64
+ if key in compressed_dict:
65
+ _LOGGER.warn(
66
+ f"Expected all compressed state_dict keys to be unique, but "
67
+ f"found an existing entry for {key}. The existing entry will "
68
+ "be replaced."
69
+ )
70
+ compressed_dict |= bitmask_dict
71
+
72
+ return compressed_dict
73
+
74
+ def decompress(
75
+ self, path_to_model_or_tensors: str, device: str = "cpu"
76
+ ) -> Generator[Tuple[str, Tensor], None, None]:
77
+ """
78
+ Reads a bitmask compressed state dict located at path_to_model_or_tensors
79
+ and returns a generator for sequentially decompressing back to a dense state dict
80
+
81
+ :param model_path: path to compressed safetensors model (directory with
82
+ one or more safetensors files) or compressed tensors file
83
+ :param device: device to load decompressed weights onto
84
+ :return: iterator for generating decompressed weights
85
+ """
86
+ weight_mappings = get_nested_weight_mappings(
87
+ path_to_model_or_tensors, self.COMPRESSION_PARAM_NAMES
88
+ )
89
+ for weight_name in weight_mappings.keys():
90
+ weight_data = {}
91
+ for param_name, safe_path in weight_mappings[weight_name].items():
92
+ full_name = merge_names(weight_name, param_name)
93
+ with safe_open(safe_path, framework="pt", device=device) as f:
94
+ weight_data[param_name] = f.get_tensor(full_name)
95
+ data = BitmaskTensor(**weight_data)
96
+ decompressed = data.decompress()
97
+ yield weight_name, decompressed
98
+
99
+
100
+ class BitmaskTensor:
101
+ """
102
+ Owns compressions and decompression for a single bitmask compressed tensor.
103
+ Adapted from: https://github.com/mgoin/torch_bitmask/tree/main
104
+
105
+ :param shape: shape of dense tensor
106
+ :compressed: flat tensor of non-zero values
107
+ :bitmask: 2d bitmask of non-zero values
108
+ :row_offsets: flat tensor indicating what index in values each dense row starts at
109
+ """
110
+
111
+ def __init__(
112
+ self,
113
+ shape: Union[torch.Size, List],
114
+ compressed: Tensor,
115
+ bitmask: Tensor,
116
+ row_offsets: Tensor,
117
+ ):
118
+ self.shape = list(shape)
119
+ self.compressed = compressed
120
+ self.bitmask = bitmask
121
+ self.row_offsets = row_offsets
122
+
123
+ @staticmethod
124
+ def from_dense(tensor: Tensor) -> "BitmaskTensor":
125
+ """
126
+ :param tensor: dense tensor to compress
127
+ :return: instantiated compressed tensor
128
+ """
129
+ shape = tensor.shape
130
+ compressed, bitmask, row_offsets = bitmask_compress(tensor.cpu())
131
+ return BitmaskTensor(
132
+ shape=shape, compressed=compressed, bitmask=bitmask, row_offsets=row_offsets
133
+ )
134
+
135
+ def decompress(self) -> Tensor:
136
+ """
137
+ :return: reconstructed dense tensor
138
+ """
139
+ return bitmask_decompress(self.compressed, self.bitmask, self.shape)
140
+
141
+ def curr_memory_size_bytes(self):
142
+ """
143
+ :return: size in bytes required to store compressed tensor on disk
144
+ """
145
+
146
+ def sizeof_tensor(a):
147
+ return a.element_size() * a.nelement()
148
+
149
+ return (
150
+ sizeof_tensor(self.compressed)
151
+ + sizeof_tensor(self.bitmask)
152
+ + sizeof_tensor(self.row_offsets)
153
+ )
154
+
155
+ def dict(self, name_prefix: str, device: str = "cpu") -> Dict[str, Tensor]:
156
+ """
157
+ :name_prefix: name of original tensor to store compressed weight as
158
+ :return: dict of compressed data for the stored weight
159
+ """
160
+ return {
161
+ merge_names(name_prefix, "shape"): torch.tensor(self.shape, device=device),
162
+ merge_names(name_prefix, "compressed"): self.compressed.to(device),
163
+ merge_names(name_prefix, "bitmask"): self.bitmask.to(device),
164
+ merge_names(name_prefix, "row_offsets"): self.row_offsets.to(device),
165
+ }
166
+
167
+ def __repr__(self):
168
+ return f"BitmaskTensor(shape={self.shape}, compressed=True)"
169
+
170
+
171
+ def bitmask_compress(tensor: Tensor) -> Tuple[Tensor, Tensor, Tensor]:
172
+ """
173
+ Compresses a dense tensor using bitmask compression
174
+
175
+ :param tensor: dense tensor to compress
176
+ :return: tuple of compressed data representing tensor
177
+ """
178
+ bytemasks = tensor != 0
179
+ row_counts = bytemasks.sum(dim=-1)
180
+ row_offsets = torch.cumsum(row_counts, 0) - row_counts
181
+ values = tensor[bytemasks]
182
+ bitmasks_packed = pack_bitmasks(bytemasks)
183
+
184
+ return values, bitmasks_packed, row_offsets
185
+
186
+
187
+ def bitmask_decompress(
188
+ values: Tensor, bitmasks: Tensor, original_shape: torch.Size
189
+ ) -> Tensor:
190
+ """
191
+ Reconstructs a dense tensor from a compressed one
192
+
193
+ :param values: 1d tensor of non-zero values
194
+ :param bitmasks: 2d int8 tensor flagging locations of non-zero values in the
195
+ tensors original shape
196
+ :param original_shape: shape of the dense tensor
197
+ :return: decompressed dense tensor
198
+ """
199
+ bytemasks_unpacked = unpack_bitmasks(bitmasks, original_shape)
200
+
201
+ decompressed_tensor = torch.zeros(original_shape, dtype=values.dtype)
202
+ decompressed_tensor[bytemasks_unpacked] = values
203
+
204
+ return decompressed_tensor
205
+
206
+
207
+ def pack_bitmasks(bytemasks: Tensor) -> Tensor:
208
+ """
209
+ Converts a bytemask tensor to a bitmask tensor to reduce memory. Shape RxC will be
210
+ compressed to R x ceil(C/8)
211
+ :param bytemasks: mask tensor where each byte corresponds to a weight
212
+ :return: mask tensor where each bit corresounds to a weight
213
+ """
214
+ packed_bits_numpy = numpy.packbits(bytemasks.numpy(), axis=-1, bitorder="little")
215
+ packed_bits_torch = torch.from_numpy(packed_bits_numpy)
216
+
217
+ return packed_bits_torch
218
+
219
+
220
+ def unpack_bitmasks(packed_bitmasks: Tensor, original_shape: torch.Size) -> Tensor:
221
+ """
222
+ Converts a bitmask tensor back to a bytemask tensor for use during decompression
223
+
224
+ :param packed_bitmasks: mask tensor where each bit corresponds to a weight
225
+ :param original_shape: dense shape to decompress to
226
+ :return: boolean mask of weights in the original dense shape
227
+ """
228
+ # Unpack the bits
229
+ unpacked_bits = numpy.unpackbits(
230
+ packed_bitmasks.numpy(), axis=-1, count=original_shape[-1], bitorder="little"
231
+ )
232
+
233
+ # Reshape to match the original shape
234
+ unpacked_bitmasks_torch = torch.from_numpy(
235
+ unpacked_bits.reshape(original_shape).astype(bool)
236
+ )
237
+
238
+ return unpacked_bitmasks_torch
@@ -0,0 +1,18 @@
1
+ # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing,
10
+ # software distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ # flake8: noqa
16
+ from .base import *
17
+ from .dense import *
18
+ from .sparse_bitmask import *
@@ -0,0 +1,42 @@
1
+ # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing,
10
+ # software distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from enum import Enum
16
+ from typing import Optional
17
+
18
+ from compressed_tensors.registry import RegistryMixin
19
+ from pydantic import BaseModel
20
+
21
+
22
+ __all__ = ["CompressionConfig", "CompressionFormat"]
23
+
24
+
25
+ class CompressionFormat(Enum):
26
+ dense_sparsity = "dense-sparsity"
27
+ sparse_bitmask = "sparse-bitmask"
28
+
29
+
30
+ class CompressionConfig(RegistryMixin, BaseModel):
31
+ """
32
+ Base data class for storing compression parameters
33
+
34
+ :param format: name of compression format
35
+ :param global_sparsity: average sparsity of the entire model
36
+ :param sparsity_structure: structure of the sparsity, such as
37
+ "unstructured", "2:4", "8:16" etc
38
+ """
39
+
40
+ format: str
41
+ global_sparsity: Optional[float] = 0.0
42
+ sparsity_structure: Optional[str] = "unstructured"
@@ -0,0 +1,36 @@
1
+ # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing,
10
+ # software distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from typing import Optional
16
+
17
+ from compressed_tensors.config import CompressionConfig, CompressionFormat
18
+
19
+
20
+ __all__ = ["DenseSparsityConfig"]
21
+
22
+
23
+ @CompressionConfig.register(name=CompressionFormat.dense_sparsity.value)
24
+ class DenseSparsityConfig(CompressionConfig):
25
+ """
26
+ Identity configuration for storing a sparse model in
27
+ an uncompressed dense format
28
+
29
+ :param global_sparsity: average sparsity of the entire model
30
+ :param sparsity_structure: structure of the sparsity, such as
31
+ "unstructured", "2:4", "8:16" etc
32
+ """
33
+
34
+ format: str = CompressionFormat.dense_sparsity.value
35
+ global_sparsity: Optional[float] = 0.0
36
+ sparsity_structure: Optional[str] = "unstructured"