compressed-tensors-nightly 0.7.0.20241011__tar.gz → 0.7.0.20241013__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {compressed-tensors-nightly-0.7.0.20241011/src/compressed_tensors_nightly.egg-info → compressed-tensors-nightly-0.7.0.20241013}/PKG-INFO +1 -1
- {compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/src/compressed_tensors/quantization/lifecycle/forward.py +6 -4
- {compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/src/compressed_tensors/quantization/lifecycle/initialize.py +7 -6
- {compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/src/compressed_tensors/quantization/observers/__init__.py +0 -1
- {compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/src/compressed_tensors/quantization/observers/helpers.py +40 -2
- {compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/src/compressed_tensors/quantization/quant_args.py +28 -4
- {compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/src/compressed_tensors/quantization/quant_scheme.py +3 -0
- {compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013/src/compressed_tensors_nightly.egg-info}/PKG-INFO +1 -1
- {compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/src/compressed_tensors_nightly.egg-info/SOURCES.txt +0 -1
- compressed-tensors-nightly-0.7.0.20241011/src/compressed_tensors/quantization/observers/memoryless.py +0 -56
- {compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/LICENSE +0 -0
- {compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/README.md +0 -0
- {compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/pyproject.toml +0 -0
- {compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/setup.cfg +0 -0
- {compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/setup.py +0 -0
- {compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/src/compressed_tensors/__init__.py +0 -0
- {compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/src/compressed_tensors/base.py +0 -0
- {compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/src/compressed_tensors/compressors/__init__.py +0 -0
- {compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/src/compressed_tensors/compressors/base.py +0 -0
- {compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/src/compressed_tensors/compressors/helpers.py +0 -0
- {compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/src/compressed_tensors/compressors/model_compressors/__init__.py +0 -0
- {compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/src/compressed_tensors/compressors/model_compressors/model_compressor.py +0 -0
- {compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/src/compressed_tensors/compressors/quantized_compressors/__init__.py +0 -0
- {compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/src/compressed_tensors/compressors/quantized_compressors/base.py +0 -0
- {compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/src/compressed_tensors/compressors/quantized_compressors/naive_quantized.py +0 -0
- {compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/src/compressed_tensors/compressors/quantized_compressors/pack_quantized.py +0 -0
- {compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/src/compressed_tensors/compressors/sparse_compressors/__init__.py +0 -0
- {compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/src/compressed_tensors/compressors/sparse_compressors/base.py +0 -0
- {compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/src/compressed_tensors/compressors/sparse_compressors/dense.py +0 -0
- {compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/src/compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py +0 -0
- {compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/src/compressed_tensors/compressors/sparse_quantized_compressors/__init__.py +0 -0
- {compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/src/compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py +0 -0
- {compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/src/compressed_tensors/config/__init__.py +0 -0
- {compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/src/compressed_tensors/config/base.py +0 -0
- {compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/src/compressed_tensors/config/dense.py +0 -0
- {compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/src/compressed_tensors/config/sparse_bitmask.py +0 -0
- {compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/src/compressed_tensors/linear/__init__.py +0 -0
- {compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/src/compressed_tensors/linear/compressed_linear.py +0 -0
- {compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/src/compressed_tensors/quantization/__init__.py +0 -0
- {compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/src/compressed_tensors/quantization/cache.py +0 -0
- {compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/src/compressed_tensors/quantization/lifecycle/__init__.py +0 -0
- {compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/src/compressed_tensors/quantization/lifecycle/apply.py +0 -0
- {compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/src/compressed_tensors/quantization/lifecycle/calibration.py +0 -0
- {compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/src/compressed_tensors/quantization/lifecycle/compressed.py +0 -0
- {compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/src/compressed_tensors/quantization/lifecycle/frozen.py +0 -0
- {compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/src/compressed_tensors/quantization/lifecycle/helpers.py +0 -0
- {compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/src/compressed_tensors/quantization/observers/base.py +0 -0
- {compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/src/compressed_tensors/quantization/observers/min_max.py +0 -0
- {compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/src/compressed_tensors/quantization/observers/mse.py +0 -0
- {compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/src/compressed_tensors/quantization/quant_config.py +0 -0
- {compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/src/compressed_tensors/quantization/utils/__init__.py +0 -0
- {compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/src/compressed_tensors/quantization/utils/helpers.py +0 -0
- {compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/src/compressed_tensors/registry/__init__.py +0 -0
- {compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/src/compressed_tensors/registry/registry.py +0 -0
- {compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/src/compressed_tensors/utils/__init__.py +0 -0
- {compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/src/compressed_tensors/utils/helpers.py +0 -0
- {compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/src/compressed_tensors/utils/offload.py +0 -0
- {compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/src/compressed_tensors/utils/permutations_24.py +0 -0
- {compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/src/compressed_tensors/utils/permute.py +0 -0
- {compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/src/compressed_tensors/utils/safetensors_load.py +0 -0
- {compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/src/compressed_tensors/utils/semi_structured_conversions.py +0 -0
- {compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/src/compressed_tensors/version.py +0 -0
- {compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/src/compressed_tensors_nightly.egg-info/dependency_links.txt +0 -0
- {compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/src/compressed_tensors_nightly.egg-info/requires.txt +0 -0
- {compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/src/compressed_tensors_nightly.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: compressed-tensors-nightly
|
3
|
-
Version: 0.7.0.
|
3
|
+
Version: 0.7.0.20241013
|
4
4
|
Summary: Library for utilization of compressed safetensors of neural network models
|
5
5
|
Home-page: https://github.com/neuralmagic/compressed-tensors
|
6
6
|
Author: Neuralmagic, Inc.
|
@@ -18,7 +18,10 @@ from typing import Callable, Optional
|
|
18
18
|
|
19
19
|
import torch
|
20
20
|
from compressed_tensors.quantization.cache import QuantizedKVParameterCache
|
21
|
-
from compressed_tensors.quantization.observers.helpers import
|
21
|
+
from compressed_tensors.quantization.observers.helpers import (
|
22
|
+
calculate_range,
|
23
|
+
compute_dynamic_scales_and_zp,
|
24
|
+
)
|
22
25
|
from compressed_tensors.quantization.quant_args import (
|
23
26
|
QuantizationArgs,
|
24
27
|
QuantizationStrategy,
|
@@ -376,9 +379,8 @@ def maybe_calibrate_or_quantize(
|
|
376
379
|
g_idx = getattr(module, "weight_g_idx", None)
|
377
380
|
|
378
381
|
if args.dynamic:
|
379
|
-
# dynamic quantization -
|
380
|
-
|
381
|
-
scale, zero_point = observer(value, g_idx=g_idx)
|
382
|
+
# dynamic quantization - no need to invoke observer
|
383
|
+
scale, zero_point = compute_dynamic_scales_and_zp(value=value, args=args)
|
382
384
|
else:
|
383
385
|
# static quantization - get previous scale and zero point from layer
|
384
386
|
scale = getattr(module, f"{base_name}_scale")
|
@@ -153,12 +153,16 @@ def _initialize_scale_zero_point_observer(
|
|
153
153
|
weight_shape: Optional[torch.Size] = None,
|
154
154
|
force_zero_point: bool = True,
|
155
155
|
):
|
156
|
+
|
156
157
|
# initialize observer module and attach as submodule
|
157
158
|
observer = quantization_args.get_observer()
|
158
|
-
|
159
|
+
# no need to register an observer for dynamic quantization
|
160
|
+
if observer:
|
161
|
+
module.register_module(f"{base_name}_observer", observer)
|
159
162
|
|
163
|
+
# no need to register a scale and zero point for a dynamic quantization
|
160
164
|
if quantization_args.dynamic:
|
161
|
-
return
|
165
|
+
return
|
162
166
|
|
163
167
|
device = next(module.parameters()).device
|
164
168
|
if is_module_offloaded(module):
|
@@ -173,10 +177,7 @@ def _initialize_scale_zero_point_observer(
|
|
173
177
|
expected_shape = (weight_shape[0], 1)
|
174
178
|
elif quantization_args.strategy == QuantizationStrategy.GROUP:
|
175
179
|
num_groups = weight_shape[1] // quantization_args.group_size
|
176
|
-
expected_shape = (
|
177
|
-
weight_shape[0],
|
178
|
-
max(num_groups, 1)
|
179
|
-
)
|
180
|
+
expected_shape = (weight_shape[0], max(num_groups, 1))
|
180
181
|
|
181
182
|
scale_dtype = module.weight.dtype
|
182
183
|
if scale_dtype not in [torch.float16, torch.bfloat16, torch.float32]:
|
@@ -13,18 +13,56 @@
|
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
15
|
from collections import Counter
|
16
|
-
from typing import Tuple
|
16
|
+
from typing import Optional, Tuple
|
17
17
|
|
18
18
|
import torch
|
19
19
|
from compressed_tensors.quantization.quant_args import (
|
20
20
|
FP8_DTYPE,
|
21
21
|
QuantizationArgs,
|
22
|
+
QuantizationStrategy,
|
22
23
|
QuantizationType,
|
23
24
|
)
|
24
25
|
from torch import FloatTensor, IntTensor, Tensor
|
25
26
|
|
26
27
|
|
27
|
-
__all__ = [
|
28
|
+
__all__ = [
|
29
|
+
"calculate_qparams",
|
30
|
+
"get_observer_token_count",
|
31
|
+
"calculate_range",
|
32
|
+
"compute_dynamic_scales_and_zp",
|
33
|
+
]
|
34
|
+
|
35
|
+
|
36
|
+
def compute_dynamic_scales_and_zp(value: Tensor, args: QuantizationArgs):
|
37
|
+
"""
|
38
|
+
Returns the computed scales and zero points for dynamic activation
|
39
|
+
qunatization.
|
40
|
+
|
41
|
+
:param value: tensor to calculate quantization parameters for
|
42
|
+
:param args: quantization args
|
43
|
+
:param reduce_dims: optional tuple of dimensions to reduce along,
|
44
|
+
returned scale and zero point will be shaped (1,) along the
|
45
|
+
reduced dimensions
|
46
|
+
:return: tuple of scale and zero point derived from the observed tensor
|
47
|
+
"""
|
48
|
+
if args.strategy == QuantizationStrategy.TOKEN:
|
49
|
+
dim = {1, 2}
|
50
|
+
reduce_dims = tuple(idx for idx in range(value.ndim) if idx not in dim)
|
51
|
+
elif args.strategy == QuantizationStrategy.TENSOR:
|
52
|
+
reduce_dims = None
|
53
|
+
else:
|
54
|
+
raise ValueError(
|
55
|
+
f"One of {QuantizationStrategy.TOKEN} or {QuantizationStrategy.TENSOR} ",
|
56
|
+
"must be used for dynamic quantization",
|
57
|
+
)
|
58
|
+
|
59
|
+
if not reduce_dims:
|
60
|
+
min_val, max_val = torch.aminmax(value)
|
61
|
+
else:
|
62
|
+
min_val = torch.amin(value, dim=reduce_dims, keepdims=True)
|
63
|
+
max_val = torch.amax(value, dim=reduce_dims, keepdims=True)
|
64
|
+
|
65
|
+
return calculate_qparams(min_val, max_val, args)
|
28
66
|
|
29
67
|
|
30
68
|
def get_observer_token_count(module: torch.nn.Module) -> Counter:
|
@@ -12,6 +12,7 @@
|
|
12
12
|
# See the License for the specific language governing permissions and
|
13
13
|
# limitations under the License.
|
14
14
|
|
15
|
+
import warnings
|
15
16
|
from enum import Enum
|
16
17
|
from typing import Any, Dict, Optional, Union
|
17
18
|
|
@@ -94,7 +95,7 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
|
|
94
95
|
block_structure: Optional[str] = None
|
95
96
|
dynamic: bool = False
|
96
97
|
actorder: Union[ActivationOrdering, bool, None] = None
|
97
|
-
observer: str = Field(
|
98
|
+
observer: Optional[str] = Field(
|
98
99
|
default="minmax",
|
99
100
|
description=(
|
100
101
|
"The class to use to compute the quantization param - "
|
@@ -115,10 +116,10 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
|
|
115
116
|
"""
|
116
117
|
from compressed_tensors.quantization.observers.base import Observer
|
117
118
|
|
119
|
+
# No observer required for the dynamic case
|
118
120
|
if self.dynamic:
|
119
|
-
|
120
|
-
|
121
|
-
self.observer = "memoryless"
|
121
|
+
self.observer = None
|
122
|
+
return self.observer
|
122
123
|
|
123
124
|
return Observer.load_from_registry(self.observer, quantization_args=self)
|
124
125
|
|
@@ -171,6 +172,8 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
|
|
171
172
|
strategy = model.strategy
|
172
173
|
group_size = model.group_size
|
173
174
|
actorder = model.actorder
|
175
|
+
dynamic = model.dynamic
|
176
|
+
observer = model.observer
|
174
177
|
|
175
178
|
# infer strategy
|
176
179
|
if strategy is None:
|
@@ -207,6 +210,27 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
|
|
207
210
|
"activation ordering"
|
208
211
|
)
|
209
212
|
|
213
|
+
if dynamic:
|
214
|
+
if strategy not in (
|
215
|
+
QuantizationStrategy.TOKEN,
|
216
|
+
QuantizationStrategy.TENSOR,
|
217
|
+
):
|
218
|
+
raise ValueError(
|
219
|
+
f"One of {QuantizationStrategy.TOKEN} or "
|
220
|
+
f"{QuantizationStrategy.TENSOR} must be used for dynamic ",
|
221
|
+
"quantization",
|
222
|
+
)
|
223
|
+
if observer is not None:
|
224
|
+
warnings.warn(
|
225
|
+
"No observer is used for dynamic quantization, setting to None"
|
226
|
+
)
|
227
|
+
model.observer = None
|
228
|
+
|
229
|
+
# if we have not set an observer and we
|
230
|
+
# are running static quantization, use minmax
|
231
|
+
if not observer and not dynamic:
|
232
|
+
model.observer = "minmax"
|
233
|
+
|
210
234
|
# write back modified values
|
211
235
|
model.strategy = strategy
|
212
236
|
return model
|
@@ -122,6 +122,7 @@ INT8_W8A8 = dict(
|
|
122
122
|
strategy=QuantizationStrategy.TOKEN,
|
123
123
|
symmetric=True,
|
124
124
|
dynamic=True,
|
125
|
+
observer=None,
|
125
126
|
),
|
126
127
|
)
|
127
128
|
|
@@ -164,6 +165,7 @@ INT8_W4A8 = dict(
|
|
164
165
|
strategy=QuantizationStrategy.TOKEN,
|
165
166
|
symmetric=True,
|
166
167
|
dynamic=True,
|
168
|
+
observer=None,
|
167
169
|
),
|
168
170
|
)
|
169
171
|
|
@@ -200,6 +202,7 @@ FP8_DYNAMIC = dict(
|
|
200
202
|
strategy=QuantizationStrategy.TOKEN,
|
201
203
|
symmetric=True,
|
202
204
|
dynamic=True,
|
205
|
+
observer=None,
|
203
206
|
),
|
204
207
|
)
|
205
208
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: compressed-tensors-nightly
|
3
|
-
Version: 0.7.0.
|
3
|
+
Version: 0.7.0.20241013
|
4
4
|
Summary: Library for utilization of compressed safetensors of neural network models
|
5
5
|
Home-page: https://github.com/neuralmagic/compressed-tensors
|
6
6
|
Author: Neuralmagic, Inc.
|
@@ -43,7 +43,6 @@ src/compressed_tensors/quantization/lifecycle/initialize.py
|
|
43
43
|
src/compressed_tensors/quantization/observers/__init__.py
|
44
44
|
src/compressed_tensors/quantization/observers/base.py
|
45
45
|
src/compressed_tensors/quantization/observers/helpers.py
|
46
|
-
src/compressed_tensors/quantization/observers/memoryless.py
|
47
46
|
src/compressed_tensors/quantization/observers/min_max.py
|
48
47
|
src/compressed_tensors/quantization/observers/mse.py
|
49
48
|
src/compressed_tensors/quantization/utils/__init__.py
|
@@ -1,56 +0,0 @@
|
|
1
|
-
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
|
2
|
-
#
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
-
# you may not use this file except in compliance with the License.
|
5
|
-
# You may obtain a copy of the License at
|
6
|
-
#
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
8
|
-
#
|
9
|
-
# Unless required by applicable law or agreed to in writing,
|
10
|
-
# software distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
-
# See the License for the specific language governing permissions and
|
13
|
-
# limitations under the License.
|
14
|
-
|
15
|
-
from typing import Any, Optional, Tuple
|
16
|
-
|
17
|
-
import torch
|
18
|
-
from compressed_tensors.quantization.observers.base import Observer
|
19
|
-
from compressed_tensors.quantization.observers.helpers import calculate_qparams
|
20
|
-
from torch import FloatTensor, IntTensor, Tensor
|
21
|
-
|
22
|
-
|
23
|
-
__all__ = ["MemorylessObserver"]
|
24
|
-
|
25
|
-
|
26
|
-
@Observer.register("memoryless", alias=["dynamic"])
|
27
|
-
class MemorylessObserver(Observer):
|
28
|
-
"""
|
29
|
-
Implements a quantization observer that sets the scale and
|
30
|
-
zero point based on the latest observed value without tracking state
|
31
|
-
"""
|
32
|
-
|
33
|
-
def calculate_qparams(
|
34
|
-
self,
|
35
|
-
observed: Tensor,
|
36
|
-
tensor_id: Optional[Any] = None,
|
37
|
-
reduce_dims: Optional[Tuple[int]] = None,
|
38
|
-
) -> Tuple[FloatTensor, IntTensor]:
|
39
|
-
"""
|
40
|
-
Returns the min and max values of observed tensor
|
41
|
-
|
42
|
-
:param observed: observed tensor to calculate quantization parameters for
|
43
|
-
:param tensor_id: optional id for tensor; not used for memoryless
|
44
|
-
:param reduce_dims: optional tuple of dimensions to reduce along,
|
45
|
-
returned scale and zero point will be shaped (1,) along the
|
46
|
-
reduced dimensions
|
47
|
-
:return: tuple of scale and zero point derived from the observed tensor
|
48
|
-
"""
|
49
|
-
|
50
|
-
if not reduce_dims:
|
51
|
-
min_val, max_val = torch.aminmax(observed)
|
52
|
-
else:
|
53
|
-
min_val = torch.amin(observed, dim=reduce_dims, keepdims=True)
|
54
|
-
max_val = torch.amax(observed, dim=reduce_dims, keepdims=True)
|
55
|
-
|
56
|
-
return calculate_qparams(min_val, max_val, self.quantization_args)
|
{compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/LICENSE
RENAMED
File without changes
|
{compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/README.md
RENAMED
File without changes
|
File without changes
|
{compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/setup.cfg
RENAMED
File without changes
|
{compressed-tensors-nightly-0.7.0.20241011 → compressed-tensors-nightly-0.7.0.20241013}/setup.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|