compressed-tensors 0.5.0__tar.gz → 0.7.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/PKG-INFO +27 -25
- {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/README.md +24 -13
- {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/setup.py +5 -2
- {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/__init__.py +1 -0
- {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/base.py +2 -0
- compressed-tensors-0.7.0/src/compressed_tensors/compressors/__init__.py +22 -0
- compressed-tensors-0.7.0/src/compressed_tensors/compressors/base.py +188 -0
- {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/compressors/helpers.py +6 -6
- compressed-tensors-0.7.0/src/compressed_tensors/compressors/model_compressors/__init__.py +17 -0
- {compressed_tensors-0.5.0/src/compressed_tensors/compressors → compressed-tensors-0.7.0/src/compressed_tensors/compressors/model_compressors}/model_compressor.py +99 -43
- compressed-tensors-0.7.0/src/compressed_tensors/compressors/quantized_compressors/__init__.py +18 -0
- compressed_tensors-0.5.0/src/compressed_tensors/compressors/naive_quantized.py → compressed-tensors-0.7.0/src/compressed_tensors/compressors/quantized_compressors/base.py +64 -62
- compressed-tensors-0.7.0/src/compressed_tensors/compressors/quantized_compressors/naive_quantized.py +140 -0
- compressed-tensors-0.7.0/src/compressed_tensors/compressors/quantized_compressors/pack_quantized.py +211 -0
- compressed-tensors-0.7.0/src/compressed_tensors/compressors/sparse_compressors/__init__.py +18 -0
- compressed-tensors-0.7.0/src/compressed_tensors/compressors/sparse_compressors/base.py +110 -0
- {compressed_tensors-0.5.0/src/compressed_tensors/compressors → compressed-tensors-0.7.0/src/compressed_tensors/compressors/sparse_compressors}/dense.py +3 -3
- {compressed_tensors-0.5.0/src/compressed_tensors/compressors → compressed-tensors-0.7.0/src/compressed_tensors/compressors/sparse_compressors}/sparse_bitmask.py +14 -59
- compressed-tensors-0.7.0/src/compressed_tensors/compressors/sparse_quantized_compressors/__init__.py +16 -0
- {compressed_tensors-0.5.0/src/compressed_tensors/compressors → compressed-tensors-0.7.0/src/compressed_tensors/compressors/sparse_quantized_compressors}/marlin_24.py +3 -3
- {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/config/base.py +6 -1
- compressed-tensors-0.7.0/src/compressed_tensors/linear/__init__.py +13 -0
- compressed-tensors-0.7.0/src/compressed_tensors/linear/compressed_linear.py +87 -0
- {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/quantization/__init__.py +1 -0
- compressed-tensors-0.7.0/src/compressed_tensors/quantization/cache.py +201 -0
- {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/quantization/lifecycle/apply.py +63 -9
- {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/quantization/lifecycle/calibration.py +7 -7
- {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/quantization/lifecycle/compressed.py +3 -1
- {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/quantization/lifecycle/forward.py +126 -44
- {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/quantization/lifecycle/frozen.py +6 -1
- compressed-tensors-0.7.0/src/compressed_tensors/quantization/lifecycle/helpers.py +33 -0
- compressed-tensors-0.7.0/src/compressed_tensors/quantization/lifecycle/initialize.py +239 -0
- {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/quantization/observers/__init__.py +1 -0
- {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/quantization/observers/base.py +54 -14
- {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/quantization/observers/min_max.py +8 -0
- compressed-tensors-0.7.0/src/compressed_tensors/quantization/observers/mse.py +162 -0
- {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/quantization/quant_args.py +102 -24
- {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/quantization/quant_config.py +14 -2
- {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/quantization/quant_scheme.py +12 -13
- {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/quantization/utils/helpers.py +44 -19
- {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/utils/__init__.py +1 -0
- {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/utils/helpers.py +30 -1
- {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/utils/offload.py +14 -2
- compressed-tensors-0.7.0/src/compressed_tensors/utils/permute.py +70 -0
- {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/utils/safetensors_load.py +2 -0
- {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/utils/semi_structured_conversions.py +1 -0
- {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/version.py +1 -1
- {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors.egg-info/PKG-INFO +27 -25
- {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors.egg-info/SOURCES.txt +18 -8
- {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors.egg-info/requires.txt +3 -1
- compressed_tensors-0.5.0/src/compressed_tensors/compressors/__init__.py +0 -28
- compressed_tensors-0.5.0/src/compressed_tensors/compressors/base.py +0 -60
- compressed_tensors-0.5.0/src/compressed_tensors/compressors/pack_quantized.py +0 -219
- compressed_tensors-0.5.0/src/compressed_tensors/quantization/lifecycle/helpers.py +0 -53
- compressed_tensors-0.5.0/src/compressed_tensors/quantization/lifecycle/initialize.py +0 -156
- compressed_tensors-0.5.0/tests/test_registry.py +0 -53
- {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/LICENSE +0 -0
- {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/pyproject.toml +0 -0
- {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/setup.cfg +0 -0
- {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/config/__init__.py +0 -0
- {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/config/dense.py +0 -0
- {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/config/sparse_bitmask.py +0 -0
- {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/quantization/lifecycle/__init__.py +0 -0
- {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/quantization/observers/helpers.py +0 -0
- {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/quantization/observers/memoryless.py +0 -0
- {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/quantization/utils/__init__.py +0 -0
- {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/registry/__init__.py +0 -0
- {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/registry/registry.py +0 -0
- {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/utils/permutations_24.py +0 -0
- {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors.egg-info/dependency_links.txt +0 -0
- {compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors.egg-info/top_level.txt +0 -0
@@ -1,51 +1,53 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: compressed-tensors
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.7.0
|
4
4
|
Summary: Library for utilization of compressed safetensors of neural network models
|
5
5
|
Home-page: https://github.com/neuralmagic/compressed-tensors
|
6
6
|
Author: Neuralmagic, Inc.
|
7
7
|
Author-email: support@neuralmagic.com
|
8
8
|
License: Apache 2.0
|
9
9
|
Description-Content-Type: text/markdown
|
10
|
-
License-File: LICENSE
|
11
|
-
Requires-Dist: torch>=1.7.0
|
12
|
-
Requires-Dist: transformers
|
13
|
-
Requires-Dist: accelerate
|
14
|
-
Requires-Dist: pydantic>=2.0
|
15
10
|
Provides-Extra: dev
|
16
|
-
|
17
|
-
|
18
|
-
Requires-Dist: wheel>=0.36.2; extra == "dev"
|
19
|
-
Requires-Dist: flake8>=3.8.3; extra == "dev"
|
20
|
-
Requires-Dist: pytest>=6.0.0; extra == "dev"
|
21
|
-
Requires-Dist: nbconvert>=7.16.3; extra == "dev"
|
22
|
-
|
23
|
-
# compressed_tensors
|
24
|
-
|
25
|
-
This repository extends a [safetensors](https://github.com/huggingface/safetensors) format to efficiently store sparse and/or quantized tensors on disk. `compressed-tensors` format supports multiple compression types to minimize the disk space and facilitate the tensor manipulation.
|
11
|
+
Provides-Extra: accelerate
|
12
|
+
License-File: LICENSE
|
26
13
|
|
27
|
-
|
14
|
+
# compressed-tensors
|
28
15
|
|
29
|
-
|
16
|
+
The `compressed-tensors` library extends the [safetensors](https://github.com/huggingface/safetensors) format, providing a versatile and efficient way to store and manage compressed tensor data. This library supports various quantization and sparsity schemes, making it a unified format for handling different model optimizations like GPTQ, AWQ, SmoothQuant, INT8, FP8, SparseGPT, and more.
|
30
17
|
|
31
|
-
|
18
|
+
## Why `compressed-tensors`?
|
32
19
|
|
33
|
-
|
34
|
-
|
20
|
+
As model compression becomes increasingly important for efficient deployment of LLMs, the landscape of quantization and compression techniques has become increasingly fragmented.
|
21
|
+
Each method often comes with its own storage format and loading procedures, making it challenging to work with multiple techniques or switch between them.
|
22
|
+
`compressed-tensors` addresses this by providing a single, extensible format that can represent a wide variety of compression schemes.
|
35
23
|
|
36
|
-
|
24
|
+
* **Unified Checkpoint Format**: Supports various compression schemes in a single, consistent format.
|
25
|
+
* **Wide Compatibility**: Works with popular quantization methods like GPTQ, SmoothQuant, and FP8. See [llm-compressor](https://github.com/vllm-project/llm-compressor)
|
26
|
+
* **Flexible Quantization Support**:
|
27
|
+
* Weight-only quantization (e.g., W4A16, W8A16, WnA16)
|
28
|
+
* Activation quantization (e.g., W8A8)
|
29
|
+
* KV cache quantization
|
30
|
+
* Non-uniform schemes (different layers can be quantized in different ways!)
|
31
|
+
* **Sparsity Support**: Handles both unstructured and semi-structured (e.g., 2:4) sparsity patterns.
|
32
|
+
* **Open-Source Integration**: Designed to work seamlessly with Hugging Face models and PyTorch.
|
37
33
|
|
38
|
-
|
34
|
+
This allows developers and researchers to easily experiment with composing different quantization methods, simplify model deployment pipelines, and reduce the overhead of supporting multiple compression formats in inference engines.
|
39
35
|
|
40
36
|
## Installation
|
41
37
|
|
42
|
-
###
|
38
|
+
### From [PyPI](https://pypi.org/project/compressed-tensors)
|
43
39
|
|
40
|
+
Stable release:
|
44
41
|
```bash
|
45
42
|
pip install compressed-tensors
|
46
43
|
```
|
47
44
|
|
48
|
-
|
45
|
+
Nightly release:
|
46
|
+
```bash
|
47
|
+
pip install compressed-tensors-nightly
|
48
|
+
```
|
49
|
+
|
50
|
+
### From Source
|
49
51
|
|
50
52
|
```bash
|
51
53
|
git clone https://github.com/neuralmagic/compressed-tensors
|
@@ -1,29 +1,40 @@
|
|
1
|
-
#
|
1
|
+
# compressed-tensors
|
2
2
|
|
3
|
-
|
3
|
+
The `compressed-tensors` library extends the [safetensors](https://github.com/huggingface/safetensors) format, providing a versatile and efficient way to store and manage compressed tensor data. This library supports various quantization and sparsity schemes, making it a unified format for handling different model optimizations like GPTQ, AWQ, SmoothQuant, INT8, FP8, SparseGPT, and more.
|
4
4
|
|
5
|
-
##
|
5
|
+
## Why `compressed-tensors`?
|
6
6
|
|
7
|
-
|
7
|
+
As model compression becomes increasingly important for efficient deployment of LLMs, the landscape of quantization and compression techniques has become increasingly fragmented.
|
8
|
+
Each method often comes with its own storage format and loading procedures, making it challenging to work with multiple techniques or switch between them.
|
9
|
+
`compressed-tensors` addresses this by providing a single, extensible format that can represent a wide variety of compression schemes.
|
8
10
|
|
9
|
-
|
11
|
+
* **Unified Checkpoint Format**: Supports various compression schemes in a single, consistent format.
|
12
|
+
* **Wide Compatibility**: Works with popular quantization methods like GPTQ, SmoothQuant, and FP8. See [llm-compressor](https://github.com/vllm-project/llm-compressor)
|
13
|
+
* **Flexible Quantization Support**:
|
14
|
+
* Weight-only quantization (e.g., W4A16, W8A16, WnA16)
|
15
|
+
* Activation quantization (e.g., W8A8)
|
16
|
+
* KV cache quantization
|
17
|
+
* Non-uniform schemes (different layers can be quantized in different ways!)
|
18
|
+
* **Sparsity Support**: Handles both unstructured and semi-structured (e.g., 2:4) sparsity patterns.
|
19
|
+
* **Open-Source Integration**: Designed to work seamlessly with Hugging Face models and PyTorch.
|
10
20
|
|
11
|
-
|
12
|
-
- Quantized -> due to their low precision representation.
|
13
|
-
|
14
|
-
### Introduce an elegant interface to save/load compressed tensors
|
15
|
-
|
16
|
-
The library provides the user with the ability to compress/decompress tensors. The properties of tensors are defined by human-readable configs, allowing the users to understand the compression format at a quick glance.
|
21
|
+
This allows developers and researchers to easily experiment with composing different quantization methods, simplify model deployment pipelines, and reduce the overhead of supporting multiple compression formats in inference engines.
|
17
22
|
|
18
23
|
## Installation
|
19
24
|
|
20
|
-
###
|
25
|
+
### From [PyPI](https://pypi.org/project/compressed-tensors)
|
21
26
|
|
27
|
+
Stable release:
|
22
28
|
```bash
|
23
29
|
pip install compressed-tensors
|
24
30
|
```
|
25
31
|
|
26
|
-
|
32
|
+
Nightly release:
|
33
|
+
```bash
|
34
|
+
pip install compressed-tensors-nightly
|
35
|
+
```
|
36
|
+
|
37
|
+
### From Source
|
27
38
|
|
28
39
|
```bash
|
29
40
|
git clone https://github.com/neuralmagic/compressed-tensors
|
@@ -46,10 +46,13 @@ def _setup_packages() -> List:
|
|
46
46
|
)
|
47
47
|
|
48
48
|
def _setup_install_requires() -> List:
|
49
|
-
return ["torch>=1.7.0", "transformers", "
|
49
|
+
return ["torch>=1.7.0", "transformers", "pydantic>=2.0"]
|
50
50
|
|
51
51
|
def _setup_extras() -> Dict:
|
52
|
-
return {
|
52
|
+
return {
|
53
|
+
"dev": ["black==22.12.0", "isort==5.8.0", "wheel>=0.36.2", "flake8>=3.8.3", "pytest>=6.0.0", "nbconvert>=7.16.3"],
|
54
|
+
"accelerate": ["accelerate"]
|
55
|
+
}
|
53
56
|
|
54
57
|
setup(
|
55
58
|
name=_PACKAGE_NAME,
|
@@ -16,3 +16,5 @@ SPARSITY_CONFIG_NAME = "sparsity_config"
|
|
16
16
|
QUANTIZATION_CONFIG_NAME = "quantization_config"
|
17
17
|
COMPRESSION_CONFIG_NAME = "compression_config"
|
18
18
|
KV_CACHE_SCHEME_NAME = "kv_cache_scheme"
|
19
|
+
COMPRESSION_VERSION_NAME = "version"
|
20
|
+
QUANTIZATION_METHOD_NAME = "quant_method"
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing,
|
10
|
+
# software distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
# flake8: noqa
|
16
|
+
|
17
|
+
from .base import *
|
18
|
+
from .helpers import *
|
19
|
+
from .model_compressors import *
|
20
|
+
from .quantized_compressors import *
|
21
|
+
from .sparse_compressors import *
|
22
|
+
from .sparse_quantized_compressors import *
|
@@ -0,0 +1,188 @@
|
|
1
|
+
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing,
|
10
|
+
# software distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
|
15
|
+
from abc import ABC, abstractmethod
|
16
|
+
from typing import Dict, Generator, Optional, Tuple, Union
|
17
|
+
|
18
|
+
import torch
|
19
|
+
from compressed_tensors.config import SparsityCompressionConfig
|
20
|
+
from compressed_tensors.quantization import QuantizationArgs, QuantizationConfig
|
21
|
+
from compressed_tensors.registry import RegistryMixin
|
22
|
+
from torch import Tensor
|
23
|
+
from torch.nn import Module
|
24
|
+
|
25
|
+
|
26
|
+
__all__ = ["BaseCompressor"]
|
27
|
+
|
28
|
+
|
29
|
+
class BaseCompressor(RegistryMixin, ABC):
|
30
|
+
"""
|
31
|
+
Base class representing a model compression algorithm. Each child class should
|
32
|
+
implement compression_param_info, compress_weight and decompress_weight.
|
33
|
+
|
34
|
+
Compressors support compressing/decompressing a full module state dict or a single
|
35
|
+
quantized PyTorch leaf module.
|
36
|
+
|
37
|
+
Model Load Lifecycle (run_compressed=False):
|
38
|
+
- ModelCompressor.decompress()
|
39
|
+
- apply_quantization_config()
|
40
|
+
- BaseCompressor.decompress()
|
41
|
+
|
42
|
+
Model Save Lifecycle:
|
43
|
+
- ModelCompressor.compress()
|
44
|
+
- BaseCompressor.compress()
|
45
|
+
|
46
|
+
|
47
|
+
Module Lifecycle (run_compressed=True):
|
48
|
+
- apply_quantization_config()
|
49
|
+
- compressed_module = CompressedLinear(module)
|
50
|
+
- initialize_module_for_quantization()
|
51
|
+
- BaseCompressor.compression_param_info()
|
52
|
+
- register_parameters()
|
53
|
+
- compressed_module.forward()
|
54
|
+
-compressed_module.decompress()
|
55
|
+
|
56
|
+
|
57
|
+
:param config: config specifying compression parameters
|
58
|
+
"""
|
59
|
+
|
60
|
+
def __init__(
|
61
|
+
self, config: Union[SparsityCompressionConfig, QuantizationConfig, None] = None
|
62
|
+
):
|
63
|
+
self.config = config
|
64
|
+
|
65
|
+
def compression_param_info(
|
66
|
+
self,
|
67
|
+
weight_shape: torch.Size,
|
68
|
+
quantization_args: Optional[QuantizationArgs] = None,
|
69
|
+
) -> Dict[str, Tuple[torch.Size, torch.dtype]]:
|
70
|
+
"""
|
71
|
+
Creates a dictionary of expected shapes and dtypes for each compression
|
72
|
+
parameter used by the compressor
|
73
|
+
|
74
|
+
:param weight_shape: uncompressed weight shape
|
75
|
+
:param quantization_args: quantization parameters for the weight
|
76
|
+
:return: dictionary mapping compressed parameter names to shape and dtype
|
77
|
+
"""
|
78
|
+
raise NotImplementedError()
|
79
|
+
|
80
|
+
@abstractmethod
|
81
|
+
def compress(
|
82
|
+
self,
|
83
|
+
model_state: Dict[str, Tensor],
|
84
|
+
**kwargs,
|
85
|
+
) -> Dict[str, Tensor]:
|
86
|
+
"""
|
87
|
+
Compresses a dense state dict
|
88
|
+
|
89
|
+
:param model_state: state dict of uncompressed model
|
90
|
+
:param kwargs: additional arguments for compression
|
91
|
+
:return: compressed state dict
|
92
|
+
"""
|
93
|
+
raise NotImplementedError()
|
94
|
+
|
95
|
+
@abstractmethod
|
96
|
+
def decompress(
|
97
|
+
self,
|
98
|
+
path_to_model_or_tensors: str,
|
99
|
+
device: str = "cpu",
|
100
|
+
**kwargs,
|
101
|
+
) -> Generator[Tuple[str, Tensor], None, None]:
|
102
|
+
"""
|
103
|
+
Reads a compressed state dict located at path_to_model_or_tensors
|
104
|
+
and returns a generator for sequentially decompressing back to a
|
105
|
+
dense state dict
|
106
|
+
|
107
|
+
:param path_to_model_or_tensors: path to compressed safetensors model (directory
|
108
|
+
with one or more safetensors files) or compressed tensors file
|
109
|
+
:param names_to_scheme: quantization args for each quantized weight
|
110
|
+
:param device: optional device to load intermediate weights into
|
111
|
+
:return: compressed state dict
|
112
|
+
"""
|
113
|
+
raise NotImplementedError()
|
114
|
+
|
115
|
+
def compress_module(self, module: Module) -> Optional[Dict[str, torch.Tensor]]:
|
116
|
+
"""
|
117
|
+
Compresses a single quantized leaf PyTorch module. If the module is not
|
118
|
+
quantized, this function has no effect.
|
119
|
+
|
120
|
+
:param module: PyTorch module to compress
|
121
|
+
:return: dictionary of compressed weight data, or None if module is not
|
122
|
+
quantized
|
123
|
+
"""
|
124
|
+
if not hasattr(module, "quantization_scheme"):
|
125
|
+
return None # module is not quantized
|
126
|
+
quantization_scheme = module.quantization_scheme
|
127
|
+
if not hasattr(quantization_scheme, "weights"):
|
128
|
+
return None # weights are not quantized
|
129
|
+
|
130
|
+
quantization_args = quantization_scheme.weights
|
131
|
+
weight = getattr(module, "weight", None)
|
132
|
+
weight_scale = getattr(module, "weight_scale", None)
|
133
|
+
weight_zero_point = getattr(module, "weight_zero_point", None)
|
134
|
+
|
135
|
+
return self.compress_weight(
|
136
|
+
weight=weight,
|
137
|
+
scale=weight_scale,
|
138
|
+
zero_point=weight_zero_point,
|
139
|
+
quantization_args=quantization_args,
|
140
|
+
)
|
141
|
+
|
142
|
+
def compress_weight(
|
143
|
+
self,
|
144
|
+
weight: Tensor,
|
145
|
+
**kwargs,
|
146
|
+
) -> Dict[str, torch.Tensor]:
|
147
|
+
"""
|
148
|
+
Compresses a single uncompressed weight
|
149
|
+
|
150
|
+
:param weight: uncompressed weight tensor
|
151
|
+
:param kwargs: additional arguments for compression
|
152
|
+
"""
|
153
|
+
raise NotImplementedError()
|
154
|
+
|
155
|
+
def decompress_module(self, module: Module):
|
156
|
+
"""
|
157
|
+
Decompresses a single compressed leaf PyTorch module. If the module is not
|
158
|
+
quantized, this function has no effect.
|
159
|
+
|
160
|
+
:param module: PyTorch module to decompress
|
161
|
+
:return: tensor of the decompressed weight, or None if module is not quantized
|
162
|
+
"""
|
163
|
+
if not hasattr(module, "quantization_scheme"):
|
164
|
+
return None # module is not quantized
|
165
|
+
quantization_scheme = module.quantization_scheme
|
166
|
+
if not hasattr(quantization_scheme, "weights"):
|
167
|
+
return None # weights are not quantized
|
168
|
+
|
169
|
+
quantization_args = quantization_scheme.weights
|
170
|
+
compressed_data = {}
|
171
|
+
for name, parameter in module.named_parameters():
|
172
|
+
compressed_data[name] = parameter
|
173
|
+
|
174
|
+
return self.decompress_weight(
|
175
|
+
compressed_data=compressed_data, quantization_args=quantization_args
|
176
|
+
)
|
177
|
+
|
178
|
+
def decompress_weight(
|
179
|
+
self, compressed_data: Dict[str, Tensor], **kwargs
|
180
|
+
) -> torch.Tensor:
|
181
|
+
"""
|
182
|
+
Decompresses a single compressed weight
|
183
|
+
|
184
|
+
:param compressed_data: dictionary of data needed for decompression
|
185
|
+
:param kwargs: additional arguments for decompression
|
186
|
+
:return: tensor of the decompressed weight
|
187
|
+
"""
|
188
|
+
raise NotImplementedError()
|
{compressed_tensors-0.5.0 → compressed-tensors-0.7.0}/src/compressed_tensors/compressors/helpers.py
RENAMED
@@ -16,7 +16,7 @@ from pathlib import Path
|
|
16
16
|
from typing import Dict, Generator, Optional, Tuple, Union
|
17
17
|
|
18
18
|
import torch
|
19
|
-
from compressed_tensors.compressors import
|
19
|
+
from compressed_tensors.compressors import BaseCompressor
|
20
20
|
from compressed_tensors.config import CompressionFormat, SparsityCompressionConfig
|
21
21
|
from compressed_tensors.utils.safetensors_load import get_weight_mappings
|
22
22
|
from safetensors import safe_open
|
@@ -52,16 +52,16 @@ def save_compressed(
|
|
52
52
|
compression_format = compression_format or CompressionFormat.dense.value
|
53
53
|
|
54
54
|
if not (
|
55
|
-
compression_format in
|
56
|
-
or compression_format in
|
55
|
+
compression_format in BaseCompressor.registered_names()
|
56
|
+
or compression_format in BaseCompressor.registered_aliases()
|
57
57
|
):
|
58
58
|
raise ValueError(
|
59
59
|
f"Unknown compression format: {compression_format}. "
|
60
|
-
f"Must be one of {set(
|
60
|
+
f"Must be one of {set(BaseCompressor.registered_names() + BaseCompressor.registered_aliases())}" # noqa E501
|
61
61
|
)
|
62
62
|
|
63
63
|
# compress
|
64
|
-
compressor =
|
64
|
+
compressor = BaseCompressor.load_from_registry(compression_format)
|
65
65
|
# save compressed tensors
|
66
66
|
compressed_tensors = compressor.compress(tensors)
|
67
67
|
save_file(compressed_tensors, save_path)
|
@@ -102,7 +102,7 @@ def load_compressed(
|
|
102
102
|
else:
|
103
103
|
# decompress tensors
|
104
104
|
compression_format = compression_config.format
|
105
|
-
compressor =
|
105
|
+
compressor = BaseCompressor.load_from_registry(
|
106
106
|
compression_format, config=compression_config
|
107
107
|
)
|
108
108
|
yield from compressor.decompress(compressed_tensors, device=device)
|
@@ -0,0 +1,17 @@
|
|
1
|
+
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
|
2
|
+
#
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
# you may not use this file except in compliance with the License.
|
5
|
+
# You may obtain a copy of the License at
|
6
|
+
#
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
#
|
9
|
+
# Unless required by applicable law or agreed to in writing,
|
10
|
+
# software distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
# See the License for the specific language governing permissions and
|
13
|
+
# limitations under the License.
|
14
|
+
# flake8: noqa
|
15
|
+
|
16
|
+
|
17
|
+
from .model_compressor import *
|