compressed-tensors 0.5.0__tar.gz → 0.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/PKG-INFO +27 -25
  2. {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/README.md +24 -13
  3. {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/setup.py +5 -2
  4. {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/__init__.py +1 -0
  5. {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/base.py +2 -0
  6. compressed-tensors-0.7.0/src/compressed_tensors/compressors/__init__.py +22 -0
  7. compressed-tensors-0.7.0/src/compressed_tensors/compressors/base.py +188 -0
  8. {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/compressors/helpers.py +6 -6
  9. compressed-tensors-0.7.0/src/compressed_tensors/compressors/model_compressors/__init__.py +17 -0
  10. {compressed_tensors-0.5.0/src/compressed_tensors/compressors → compressed-tensors-0.7.0/src/compressed_tensors/compressors/model_compressors}/model_compressor.py +99 -43
  11. compressed-tensors-0.7.0/src/compressed_tensors/compressors/quantized_compressors/__init__.py +18 -0
  12. compressed_tensors-0.5.0/src/compressed_tensors/compressors/naive_quantized.py → compressed-tensors-0.7.0/src/compressed_tensors/compressors/quantized_compressors/base.py +64 -62
  13. compressed-tensors-0.7.0/src/compressed_tensors/compressors/quantized_compressors/naive_quantized.py +140 -0
  14. compressed-tensors-0.7.0/src/compressed_tensors/compressors/quantized_compressors/pack_quantized.py +211 -0
  15. compressed-tensors-0.7.0/src/compressed_tensors/compressors/sparse_compressors/__init__.py +18 -0
  16. compressed-tensors-0.7.0/src/compressed_tensors/compressors/sparse_compressors/base.py +110 -0
  17. {compressed_tensors-0.5.0/src/compressed_tensors/compressors → compressed-tensors-0.7.0/src/compressed_tensors/compressors/sparse_compressors}/dense.py +3 -3
  18. {compressed_tensors-0.5.0/src/compressed_tensors/compressors → compressed-tensors-0.7.0/src/compressed_tensors/compressors/sparse_compressors}/sparse_bitmask.py +14 -59
  19. compressed-tensors-0.7.0/src/compressed_tensors/compressors/sparse_quantized_compressors/__init__.py +16 -0
  20. {compressed_tensors-0.5.0/src/compressed_tensors/compressors → compressed-tensors-0.7.0/src/compressed_tensors/compressors/sparse_quantized_compressors}/marlin_24.py +3 -3
  21. {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/config/base.py +6 -1
  22. compressed-tensors-0.7.0/src/compressed_tensors/linear/__init__.py +13 -0
  23. compressed-tensors-0.7.0/src/compressed_tensors/linear/compressed_linear.py +87 -0
  24. {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/quantization/__init__.py +1 -0
  25. compressed-tensors-0.7.0/src/compressed_tensors/quantization/cache.py +201 -0
  26. {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/quantization/lifecycle/apply.py +63 -9
  27. {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/quantization/lifecycle/calibration.py +7 -7
  28. {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/quantization/lifecycle/compressed.py +3 -1
  29. {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/quantization/lifecycle/forward.py +126 -44
  30. {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/quantization/lifecycle/frozen.py +6 -1
  31. compressed-tensors-0.7.0/src/compressed_tensors/quantization/lifecycle/helpers.py +33 -0
  32. compressed-tensors-0.7.0/src/compressed_tensors/quantization/lifecycle/initialize.py +239 -0
  33. {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/quantization/observers/__init__.py +1 -0
  34. {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/quantization/observers/base.py +54 -14
  35. {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/quantization/observers/min_max.py +8 -0
  36. compressed-tensors-0.7.0/src/compressed_tensors/quantization/observers/mse.py +162 -0
  37. {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/quantization/quant_args.py +102 -24
  38. {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/quantization/quant_config.py +14 -2
  39. {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/quantization/quant_scheme.py +12 -13
  40. {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/quantization/utils/helpers.py +44 -19
  41. {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/utils/__init__.py +1 -0
  42. {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/utils/helpers.py +30 -1
  43. {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/utils/offload.py +14 -2
  44. compressed-tensors-0.7.0/src/compressed_tensors/utils/permute.py +70 -0
  45. {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/utils/safetensors_load.py +2 -0
  46. {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/utils/semi_structured_conversions.py +1 -0
  47. {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/version.py +1 -1
  48. {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors.egg-info/PKG-INFO +27 -25
  49. {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors.egg-info/SOURCES.txt +18 -8
  50. {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors.egg-info/requires.txt +3 -1
  51. compressed_tensors-0.5.0/src/compressed_tensors/compressors/__init__.py +0 -28
  52. compressed_tensors-0.5.0/src/compressed_tensors/compressors/base.py +0 -60
  53. compressed_tensors-0.5.0/src/compressed_tensors/compressors/pack_quantized.py +0 -219
  54. compressed_tensors-0.5.0/src/compressed_tensors/quantization/lifecycle/helpers.py +0 -53
  55. compressed_tensors-0.5.0/src/compressed_tensors/quantization/lifecycle/initialize.py +0 -156
  56. compressed_tensors-0.5.0/tests/test_registry.py +0 -53
  57. {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/LICENSE +0 -0
  58. {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/pyproject.toml +0 -0
  59. {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/setup.cfg +0 -0
  60. {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/config/__init__.py +0 -0
  61. {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/config/dense.py +0 -0
  62. {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/config/sparse_bitmask.py +0 -0
  63. {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/quantization/lifecycle/__init__.py +0 -0
  64. {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/quantization/observers/helpers.py +0 -0
  65. {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/quantization/observers/memoryless.py +0 -0
  66. {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/quantization/utils/__init__.py +0 -0
  67. {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/registry/__init__.py +0 -0
  68. {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/registry/registry.py +0 -0
  69. {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/utils/permutations_24.py +0 -0
  70. {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors.egg-info/dependency_links.txt +0 -0
  71. {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors.egg-info/top_level.txt +0 -0
@@ -1,51 +1,53 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: compressed-tensors
3
- Version: 0.5.0
3
+ Version: 0.7.0
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
7
7
  Author-email: support@neuralmagic.com
8
8
  License: Apache 2.0
9
9
  Description-Content-Type: text/markdown
10
- License-File: LICENSE
11
- Requires-Dist: torch>=1.7.0
12
- Requires-Dist: transformers
13
- Requires-Dist: accelerate
14
- Requires-Dist: pydantic>=2.0
15
10
  Provides-Extra: dev
16
- Requires-Dist: black==22.12.0; extra == "dev"
17
- Requires-Dist: isort==5.8.0; extra == "dev"
18
- Requires-Dist: wheel>=0.36.2; extra == "dev"
19
- Requires-Dist: flake8>=3.8.3; extra == "dev"
20
- Requires-Dist: pytest>=6.0.0; extra == "dev"
21
- Requires-Dist: nbconvert>=7.16.3; extra == "dev"
22
-
23
- # compressed_tensors
24
-
25
- This repository extends a [safetensors](https://github.com/huggingface/safetensors) format to efficiently store sparse and/or quantized tensors on disk. `compressed-tensors` format supports multiple compression types to minimize the disk space and facilitate the tensor manipulation.
11
+ Provides-Extra: accelerate
12
+ License-File: LICENSE
26
13
 
27
- ## Motivation
14
+ # compressed-tensors
28
15
 
29
- ### Reduce disk space by saving sparse tensors in a compressed format
16
+ The `compressed-tensors` library extends the [safetensors](https://github.com/huggingface/safetensors) format, providing a versatile and efficient way to store and manage compressed tensor data. This library supports various quantization and sparsity schemes, making it a unified format for handling different model optimizations like GPTQ, AWQ, SmoothQuant, INT8, FP8, SparseGPT, and more.
30
17
 
31
- The compressed format stores the data much more efficiently by taking advantage of two properties of tensors:
18
+ ## Why `compressed-tensors`?
32
19
 
33
- - Sparse tensors -> due to a large number of entries that are equal to zero.
34
- - Quantized -> due to their low precision representation.
20
+ As model compression becomes increasingly important for efficient deployment of LLMs, the landscape of quantization and compression techniques has become increasingly fragmented.
21
+ Each method often comes with its own storage format and loading procedures, making it challenging to work with multiple techniques or switch between them.
22
+ `compressed-tensors` addresses this by providing a single, extensible format that can represent a wide variety of compression schemes.
35
23
 
36
- ### Introduce an elegant interface to save/load compressed tensors
24
+ * **Unified Checkpoint Format**: Supports various compression schemes in a single, consistent format.
25
+ * **Wide Compatibility**: Works with popular quantization methods like GPTQ, SmoothQuant, and FP8. See [llm-compressor](https://github.com/vllm-project/llm-compressor)
26
+ * **Flexible Quantization Support**:
27
+ * Weight-only quantization (e.g., W4A16, W8A16, WnA16)
28
+ * Activation quantization (e.g., W8A8)
29
+ * KV cache quantization
30
+ * Non-uniform schemes (different layers can be quantized in different ways!)
31
+ * **Sparsity Support**: Handles both unstructured and semi-structured (e.g., 2:4) sparsity patterns.
32
+ * **Open-Source Integration**: Designed to work seamlessly with Hugging Face models and PyTorch.
37
33
 
38
- The library provides the user with the ability to compress/decompress tensors. The properties of tensors are defined by human-readable configs, allowing the users to understand the compression format at a quick glance.
34
+ This allows developers and researchers to easily experiment with composing different quantization methods, simplify model deployment pipelines, and reduce the overhead of supporting multiple compression formats in inference engines.
39
35
 
40
36
  ## Installation
41
37
 
42
- ### Pip
38
+ ### From [PyPI](https://pypi.org/project/compressed-tensors)
43
39
 
40
+ Stable release:
44
41
  ```bash
45
42
  pip install compressed-tensors
46
43
  ```
47
44
 
48
- ### From source
45
+ Nightly release:
46
+ ```bash
47
+ pip install compressed-tensors-nightly
48
+ ```
49
+
50
+ ### From Source
49
51
 
50
52
  ```bash
51
53
  git clone https://github.com/neuralmagic/compressed-tensors
@@ -1,29 +1,40 @@
1
- # compressed_tensors
1
+ # compressed-tensors
2
2
 
3
- This repository extends a [safetensors](https://github.com/huggingface/safetensors) format to efficiently store sparse and/or quantized tensors on disk. `compressed-tensors` format supports multiple compression types to minimize the disk space and facilitate the tensor manipulation.
3
+ The `compressed-tensors` library extends the [safetensors](https://github.com/huggingface/safetensors) format, providing a versatile and efficient way to store and manage compressed tensor data. This library supports various quantization and sparsity schemes, making it a unified format for handling different model optimizations like GPTQ, AWQ, SmoothQuant, INT8, FP8, SparseGPT, and more.
4
4
 
5
- ## Motivation
5
+ ## Why `compressed-tensors`?
6
6
 
7
- ### Reduce disk space by saving sparse tensors in a compressed format
7
+ As model compression becomes increasingly important for efficient deployment of LLMs, the landscape of quantization and compression techniques has become increasingly fragmented.
8
+ Each method often comes with its own storage format and loading procedures, making it challenging to work with multiple techniques or switch between them.
9
+ `compressed-tensors` addresses this by providing a single, extensible format that can represent a wide variety of compression schemes.
8
10
 
9
- The compressed format stores the data much more efficiently by taking advantage of two properties of tensors:
11
+ * **Unified Checkpoint Format**: Supports various compression schemes in a single, consistent format.
12
+ * **Wide Compatibility**: Works with popular quantization methods like GPTQ, SmoothQuant, and FP8. See [llm-compressor](https://github.com/vllm-project/llm-compressor)
13
+ * **Flexible Quantization Support**:
14
+ * Weight-only quantization (e.g., W4A16, W8A16, WnA16)
15
+ * Activation quantization (e.g., W8A8)
16
+ * KV cache quantization
17
+ * Non-uniform schemes (different layers can be quantized in different ways!)
18
+ * **Sparsity Support**: Handles both unstructured and semi-structured (e.g., 2:4) sparsity patterns.
19
+ * **Open-Source Integration**: Designed to work seamlessly with Hugging Face models and PyTorch.
10
20
 
11
- - Sparse tensors -> due to a large number of entries that are equal to zero.
12
- - Quantized -> due to their low precision representation.
13
-
14
- ### Introduce an elegant interface to save/load compressed tensors
15
-
16
- The library provides the user with the ability to compress/decompress tensors. The properties of tensors are defined by human-readable configs, allowing the users to understand the compression format at a quick glance.
21
+ This allows developers and researchers to easily experiment with composing different quantization methods, simplify model deployment pipelines, and reduce the overhead of supporting multiple compression formats in inference engines.
17
22
 
18
23
  ## Installation
19
24
 
20
- ### Pip
25
+ ### From [PyPI](https://pypi.org/project/compressed-tensors)
21
26
 
27
+ Stable release:
22
28
  ```bash
23
29
  pip install compressed-tensors
24
30
  ```
25
31
 
26
- ### From source
32
+ Nightly release:
33
+ ```bash
34
+ pip install compressed-tensors-nightly
35
+ ```
36
+
37
+ ### From Source
27
38
 
28
39
  ```bash
29
40
  git clone https://github.com/neuralmagic/compressed-tensors
@@ -46,10 +46,13 @@ def _setup_packages() -> List:
46
46
  )
47
47
 
48
48
  def _setup_install_requires() -> List:
49
- return ["torch>=1.7.0", "transformers", "accelerate", "pydantic>=2.0"]
49
+ return ["torch>=1.7.0", "transformers", "pydantic>=2.0"]
50
50
 
51
51
  def _setup_extras() -> Dict:
52
- return {"dev": ["black==22.12.0", "isort==5.8.0", "wheel>=0.36.2", "flake8>=3.8.3", "pytest>=6.0.0", "nbconvert>=7.16.3"]}
52
+ return {
53
+ "dev": ["black==22.12.0", "isort==5.8.0", "wheel>=0.36.2", "flake8>=3.8.3", "pytest>=6.0.0", "nbconvert>=7.16.3"],
54
+ "accelerate": ["accelerate"]
55
+ }
53
56
 
54
57
  setup(
55
58
  name=_PACKAGE_NAME,
@@ -19,3 +19,4 @@ from .compressors import *
19
19
  from .config import *
20
20
  from .quantization import QuantizationConfig, QuantizationStatus
21
21
  from .utils import *
22
+ from .version import *
@@ -16,3 +16,5 @@ SPARSITY_CONFIG_NAME = "sparsity_config"
16
16
  QUANTIZATION_CONFIG_NAME = "quantization_config"
17
17
  COMPRESSION_CONFIG_NAME = "compression_config"
18
18
  KV_CACHE_SCHEME_NAME = "kv_cache_scheme"
19
+ COMPRESSION_VERSION_NAME = "version"
20
+ QUANTIZATION_METHOD_NAME = "quant_method"
@@ -0,0 +1,22 @@
1
+ # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing,
10
+ # software distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ # flake8: noqa
16
+
17
+ from .base import *
18
+ from .helpers import *
19
+ from .model_compressors import *
20
+ from .quantized_compressors import *
21
+ from .sparse_compressors import *
22
+ from .sparse_quantized_compressors import *
@@ -0,0 +1,188 @@
1
+ # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing,
10
+ # software distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from abc import ABC, abstractmethod
16
+ from typing import Dict, Generator, Optional, Tuple, Union
17
+
18
+ import torch
19
+ from compressed_tensors.config import SparsityCompressionConfig
20
+ from compressed_tensors.quantization import QuantizationArgs, QuantizationConfig
21
+ from compressed_tensors.registry import RegistryMixin
22
+ from torch import Tensor
23
+ from torch.nn import Module
24
+
25
+
26
+ __all__ = ["BaseCompressor"]
27
+
28
+
29
+ class BaseCompressor(RegistryMixin, ABC):
30
+ """
31
+ Base class representing a model compression algorithm. Each child class should
32
+ implement compression_param_info, compress_weight and decompress_weight.
33
+
34
+ Compressors support compressing/decompressing a full module state dict or a single
35
+ quantized PyTorch leaf module.
36
+
37
+ Model Load Lifecycle (run_compressed=False):
38
+ - ModelCompressor.decompress()
39
+ - apply_quantization_config()
40
+ - BaseCompressor.decompress()
41
+
42
+ Model Save Lifecycle:
43
+ - ModelCompressor.compress()
44
+ - BaseCompressor.compress()
45
+
46
+
47
+ Module Lifecycle (run_compressed=True):
48
+ - apply_quantization_config()
49
+ - compressed_module = CompressedLinear(module)
50
+ - initialize_module_for_quantization()
51
+ - BaseCompressor.compression_param_info()
52
+ - register_parameters()
53
+ - compressed_module.forward()
54
+ -compressed_module.decompress()
55
+
56
+
57
+ :param config: config specifying compression parameters
58
+ """
59
+
60
+ def __init__(
61
+ self, config: Union[SparsityCompressionConfig, QuantizationConfig, None] = None
62
+ ):
63
+ self.config = config
64
+
65
+ def compression_param_info(
66
+ self,
67
+ weight_shape: torch.Size,
68
+ quantization_args: Optional[QuantizationArgs] = None,
69
+ ) -> Dict[str, Tuple[torch.Size, torch.dtype]]:
70
+ """
71
+ Creates a dictionary of expected shapes and dtypes for each compression
72
+ parameter used by the compressor
73
+
74
+ :param weight_shape: uncompressed weight shape
75
+ :param quantization_args: quantization parameters for the weight
76
+ :return: dictionary mapping compressed parameter names to shape and dtype
77
+ """
78
+ raise NotImplementedError()
79
+
80
+ @abstractmethod
81
+ def compress(
82
+ self,
83
+ model_state: Dict[str, Tensor],
84
+ **kwargs,
85
+ ) -> Dict[str, Tensor]:
86
+ """
87
+ Compresses a dense state dict
88
+
89
+ :param model_state: state dict of uncompressed model
90
+ :param kwargs: additional arguments for compression
91
+ :return: compressed state dict
92
+ """
93
+ raise NotImplementedError()
94
+
95
+ @abstractmethod
96
+ def decompress(
97
+ self,
98
+ path_to_model_or_tensors: str,
99
+ device: str = "cpu",
100
+ **kwargs,
101
+ ) -> Generator[Tuple[str, Tensor], None, None]:
102
+ """
103
+ Reads a compressed state dict located at path_to_model_or_tensors
104
+ and returns a generator for sequentially decompressing back to a
105
+ dense state dict
106
+
107
+ :param path_to_model_or_tensors: path to compressed safetensors model (directory
108
+ with one or more safetensors files) or compressed tensors file
109
+ :param names_to_scheme: quantization args for each quantized weight
110
+ :param device: optional device to load intermediate weights into
111
+ :return: compressed state dict
112
+ """
113
+ raise NotImplementedError()
114
+
115
+ def compress_module(self, module: Module) -> Optional[Dict[str, torch.Tensor]]:
116
+ """
117
+ Compresses a single quantized leaf PyTorch module. If the module is not
118
+ quantized, this function has no effect.
119
+
120
+ :param module: PyTorch module to compress
121
+ :return: dictionary of compressed weight data, or None if module is not
122
+ quantized
123
+ """
124
+ if not hasattr(module, "quantization_scheme"):
125
+ return None # module is not quantized
126
+ quantization_scheme = module.quantization_scheme
127
+ if not hasattr(quantization_scheme, "weights"):
128
+ return None # weights are not quantized
129
+
130
+ quantization_args = quantization_scheme.weights
131
+ weight = getattr(module, "weight", None)
132
+ weight_scale = getattr(module, "weight_scale", None)
133
+ weight_zero_point = getattr(module, "weight_zero_point", None)
134
+
135
+ return self.compress_weight(
136
+ weight=weight,
137
+ scale=weight_scale,
138
+ zero_point=weight_zero_point,
139
+ quantization_args=quantization_args,
140
+ )
141
+
142
+ def compress_weight(
143
+ self,
144
+ weight: Tensor,
145
+ **kwargs,
146
+ ) -> Dict[str, torch.Tensor]:
147
+ """
148
+ Compresses a single uncompressed weight
149
+
150
+ :param weight: uncompressed weight tensor
151
+ :param kwargs: additional arguments for compression
152
+ """
153
+ raise NotImplementedError()
154
+
155
+ def decompress_module(self, module: Module):
156
+ """
157
+ Decompresses a single compressed leaf PyTorch module. If the module is not
158
+ quantized, this function has no effect.
159
+
160
+ :param module: PyTorch module to decompress
161
+ :return: tensor of the decompressed weight, or None if module is not quantized
162
+ """
163
+ if not hasattr(module, "quantization_scheme"):
164
+ return None # module is not quantized
165
+ quantization_scheme = module.quantization_scheme
166
+ if not hasattr(quantization_scheme, "weights"):
167
+ return None # weights are not quantized
168
+
169
+ quantization_args = quantization_scheme.weights
170
+ compressed_data = {}
171
+ for name, parameter in module.named_parameters():
172
+ compressed_data[name] = parameter
173
+
174
+ return self.decompress_weight(
175
+ compressed_data=compressed_data, quantization_args=quantization_args
176
+ )
177
+
178
+ def decompress_weight(
179
+ self, compressed_data: Dict[str, Tensor], **kwargs
180
+ ) -> torch.Tensor:
181
+ """
182
+ Decompresses a single compressed weight
183
+
184
+ :param compressed_data: dictionary of data needed for decompression
185
+ :param kwargs: additional arguments for decompression
186
+ :return: tensor of the decompressed weight
187
+ """
188
+ raise NotImplementedError()
@@ -16,7 +16,7 @@ from pathlib import Path
16
16
  from typing import Dict, Generator, Optional, Tuple, Union
17
17
 
18
18
  import torch
19
- from compressed_tensors.compressors import Compressor
19
+ from compressed_tensors.compressors import BaseCompressor
20
20
  from compressed_tensors.config import CompressionFormat, SparsityCompressionConfig
21
21
  from compressed_tensors.utils.safetensors_load import get_weight_mappings
22
22
  from safetensors import safe_open
@@ -52,16 +52,16 @@ def save_compressed(
52
52
  compression_format = compression_format or CompressionFormat.dense.value
53
53
 
54
54
  if not (
55
- compression_format in Compressor.registered_names()
56
- or compression_format in Compressor.registered_aliases()
55
+ compression_format in BaseCompressor.registered_names()
56
+ or compression_format in BaseCompressor.registered_aliases()
57
57
  ):
58
58
  raise ValueError(
59
59
  f"Unknown compression format: {compression_format}. "
60
- f"Must be one of {set(Compressor.registered_names() + Compressor.registered_aliases())}" # noqa E501
60
+ f"Must be one of {set(BaseCompressor.registered_names() + BaseCompressor.registered_aliases())}" # noqa E501
61
61
  )
62
62
 
63
63
  # compress
64
- compressor = Compressor.load_from_registry(compression_format)
64
+ compressor = BaseCompressor.load_from_registry(compression_format)
65
65
  # save compressed tensors
66
66
  compressed_tensors = compressor.compress(tensors)
67
67
  save_file(compressed_tensors, save_path)
@@ -102,7 +102,7 @@ def load_compressed(
102
102
  else:
103
103
  # decompress tensors
104
104
  compression_format = compression_config.format
105
- compressor = Compressor.load_from_registry(
105
+ compressor = BaseCompressor.load_from_registry(
106
106
  compression_format, config=compression_config
107
107
  )
108
108
  yield from compressor.decompress(compressed_tensors, device=device)
@@ -0,0 +1,17 @@
1
+ # Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing,
10
+ # software distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # flake8: noqa
15
+
16
+
17
+ from .model_compressor import *