compressed-tensors-nightly 0.3.3.20240528__py3-none-any.whl → 0.3.3.20240530__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- compressed_tensors/quantization/lifecycle/forward.py +13 -6
- {compressed_tensors_nightly-0.3.3.20240528.dist-info → compressed_tensors_nightly-0.3.3.20240530.dist-info}/METADATA +1 -1
- {compressed_tensors_nightly-0.3.3.20240528.dist-info → compressed_tensors_nightly-0.3.3.20240530.dist-info}/RECORD +6 -6
- {compressed_tensors_nightly-0.3.3.20240528.dist-info → compressed_tensors_nightly-0.3.3.20240530.dist-info}/LICENSE +0 -0
- {compressed_tensors_nightly-0.3.3.20240528.dist-info → compressed_tensors_nightly-0.3.3.20240530.dist-info}/WHEEL +0 -0
- {compressed_tensors_nightly-0.3.3.20240528.dist-info → compressed_tensors_nightly-0.3.3.20240530.dist-info}/top_level.txt +0 -0
@@ -89,11 +89,17 @@ def dequantize(
|
|
89
89
|
if scale.ndim == 0:
|
90
90
|
args = QuantizationArgs(strategy=QuantizationStrategy.TENSOR)
|
91
91
|
elif scale.ndim == 2:
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
92
|
+
if scale.shape[1] == 1:
|
93
|
+
args = QuantizationArgs(strategy=QuantizationStrategy.CHANNEL)
|
94
|
+
else:
|
95
|
+
group_size = int(x_q.shape[1] / scale.shape[1])
|
96
|
+
args = QuantizationArgs(
|
97
|
+
strategy=QuantizationStrategy.GROUP, group_size=group_size
|
98
|
+
)
|
99
|
+
else:
|
100
|
+
raise ValueError(
|
101
|
+
f"Could not infer a quantization strategy from scale with {scale.ndim} "
|
102
|
+
"dimmensions. Expected 0-2 dimmensions."
|
97
103
|
)
|
98
104
|
return _process_quantization(
|
99
105
|
x=x_q,
|
@@ -152,7 +158,8 @@ def _process_quantization(
|
|
152
158
|
|
153
159
|
if args.strategy == QuantizationStrategy.GROUP:
|
154
160
|
|
155
|
-
if do_dequantize
|
161
|
+
if do_dequantize and not do_quantize:
|
162
|
+
# if dequantizing a quantized type infer the output type from the scale
|
156
163
|
output = torch.zeros_like(x, dtype=scale.dtype)
|
157
164
|
else:
|
158
165
|
output_dtype = dtype if dtype is not None else x.dtype
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: compressed-tensors-nightly
|
3
|
-
Version: 0.3.3.
|
3
|
+
Version: 0.3.3.20240530
|
4
4
|
Summary: Library for utilization of compressed safetensors of neural network models
|
5
5
|
Home-page: https://github.com/neuralmagic/compressed-tensors
|
6
6
|
Author: Neuralmagic, Inc.
|
@@ -21,7 +21,7 @@ compressed_tensors/quantization/lifecycle/__init__.py,sha256=ggRGWRqhCxCaTTDWRcg
|
|
21
21
|
compressed_tensors/quantization/lifecycle/apply.py,sha256=yLTDT1zkJp1Nti-aKZGOMW8-TELanF8dXiqDvAkVUQo,7984
|
22
22
|
compressed_tensors/quantization/lifecycle/calibration.py,sha256=mLns4jlaWmBwOW8Jtlm5bMX-JET1AiZYUBO7qa-XuxI,1776
|
23
23
|
compressed_tensors/quantization/lifecycle/compressed.py,sha256=VreB10xPwgSLQQlTu20UCrFpRS--cA7-lx5s7nrPPrg,2247
|
24
|
-
compressed_tensors/quantization/lifecycle/forward.py,sha256=
|
24
|
+
compressed_tensors/quantization/lifecycle/forward.py,sha256=xeHaUbFxcUyqHffhCBZiRk-ObxjAF99rTnPR1Cweym0,10822
|
25
25
|
compressed_tensors/quantization/lifecycle/frozen.py,sha256=h1XYt89MouBTf3jTYLG_6OdFxIu5q2N8tPjsy6J4E6Y,1726
|
26
26
|
compressed_tensors/quantization/lifecycle/initialize.py,sha256=pFfcu-pxdQKzlnn-18-RlkEktt2yDi6woNXJsiv1A2c,3732
|
27
27
|
compressed_tensors/quantization/observers/__init__.py,sha256=DNH31NQYrIBBcmHsMyFA6whh4pbRsLwuNa6L8AeXaGc,745
|
@@ -36,8 +36,8 @@ compressed_tensors/registry/registry.py,sha256=fxjOjh2wklCvJhQxwofdy-zV8q7MkQ85S
|
|
36
36
|
compressed_tensors/utils/__init__.py,sha256=5DrYjoZbaEvSkJcC-GRSbM_RBHVF4tG9gMd3zsJnjLw,665
|
37
37
|
compressed_tensors/utils/helpers.py,sha256=h0jfl9drs5FAx40tCHRcVtJqXixB5hT5yq_IG2aY_-w,1735
|
38
38
|
compressed_tensors/utils/safetensors_load.py,sha256=wo9UirGrGlenBqZeqotvpCT7D5MEdjCo2J3HeRaIFoU,8502
|
39
|
-
compressed_tensors_nightly-0.3.3.
|
40
|
-
compressed_tensors_nightly-0.3.3.
|
41
|
-
compressed_tensors_nightly-0.3.3.
|
42
|
-
compressed_tensors_nightly-0.3.3.
|
43
|
-
compressed_tensors_nightly-0.3.3.
|
39
|
+
compressed_tensors_nightly-0.3.3.20240530.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
40
|
+
compressed_tensors_nightly-0.3.3.20240530.dist-info/METADATA,sha256=qwUahaxoHtwQWO9CbLp2YOw7M4xzLJ5uUvDnJg3m3GA,5633
|
41
|
+
compressed_tensors_nightly-0.3.3.20240530.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
42
|
+
compressed_tensors_nightly-0.3.3.20240530.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
|
43
|
+
compressed_tensors_nightly-0.3.3.20240530.dist-info/RECORD,,
|
File without changes
|
File without changes
|