compressed-tensors-nightly 0.9.1.20250128__tar.gz → 0.9.1.20250201__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {compressed-tensors-nightly-0.9.1.20250128/src/compressed_tensors_nightly.egg-info → compressed-tensors-nightly-0.9.1.20250201}/PKG-INFO +1 -1
- {compressed-tensors-nightly-0.9.1.20250128 → compressed-tensors-nightly-0.9.1.20250201}/src/compressed_tensors/compressors/quantized_compressors/pack_quantized.py +23 -6
- {compressed-tensors-nightly-0.9.1.20250128 → compressed-tensors-nightly-0.9.1.20250201}/src/compressed_tensors/quantization/quant_args.py +5 -6
- {compressed-tensors-nightly-0.9.1.20250128 → compressed-tensors-nightly-0.9.1.20250201}/src/compressed_tensors/utils/helpers.py +2 -2
- {compressed-tensors-nightly-0.9.1.20250128 → compressed-tensors-nightly-0.9.1.20250201}/src/compressed_tensors/utils/offload.py +7 -1
- {compressed-tensors-nightly-0.9.1.20250128 → compressed-tensors-nightly-0.9.1.20250201/src/compressed_tensors_nightly.egg-info}/PKG-INFO +1 -1
- {compressed-tensors-nightly-0.9.1.20250128 → compressed-tensors-nightly-0.9.1.20250201}/LICENSE +0 -0
- {compressed-tensors-nightly-0.9.1.20250128 → compressed-tensors-nightly-0.9.1.20250201}/README.md +0 -0
- {compressed-tensors-nightly-0.9.1.20250128 → compressed-tensors-nightly-0.9.1.20250201}/pyproject.toml +0 -0
- {compressed-tensors-nightly-0.9.1.20250128 → compressed-tensors-nightly-0.9.1.20250201}/setup.cfg +0 -0
- {compressed-tensors-nightly-0.9.1.20250128 → compressed-tensors-nightly-0.9.1.20250201}/setup.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250128 → compressed-tensors-nightly-0.9.1.20250201}/src/compressed_tensors/__init__.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250128 → compressed-tensors-nightly-0.9.1.20250201}/src/compressed_tensors/base.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250128 → compressed-tensors-nightly-0.9.1.20250201}/src/compressed_tensors/compressors/__init__.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250128 → compressed-tensors-nightly-0.9.1.20250201}/src/compressed_tensors/compressors/base.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250128 → compressed-tensors-nightly-0.9.1.20250201}/src/compressed_tensors/compressors/helpers.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250128 → compressed-tensors-nightly-0.9.1.20250201}/src/compressed_tensors/compressors/model_compressors/__init__.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250128 → compressed-tensors-nightly-0.9.1.20250201}/src/compressed_tensors/compressors/model_compressors/model_compressor.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250128 → compressed-tensors-nightly-0.9.1.20250201}/src/compressed_tensors/compressors/quantized_compressors/__init__.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250128 → compressed-tensors-nightly-0.9.1.20250201}/src/compressed_tensors/compressors/quantized_compressors/base.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250128 → compressed-tensors-nightly-0.9.1.20250201}/src/compressed_tensors/compressors/quantized_compressors/naive_quantized.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250128 → compressed-tensors-nightly-0.9.1.20250201}/src/compressed_tensors/compressors/sparse_compressors/__init__.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250128 → compressed-tensors-nightly-0.9.1.20250201}/src/compressed_tensors/compressors/sparse_compressors/base.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250128 → compressed-tensors-nightly-0.9.1.20250201}/src/compressed_tensors/compressors/sparse_compressors/dense.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250128 → compressed-tensors-nightly-0.9.1.20250201}/src/compressed_tensors/compressors/sparse_compressors/sparse_24_bitmask.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250128 → compressed-tensors-nightly-0.9.1.20250201}/src/compressed_tensors/compressors/sparse_compressors/sparse_bitmask.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250128 → compressed-tensors-nightly-0.9.1.20250201}/src/compressed_tensors/compressors/sparse_quantized_compressors/__init__.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250128 → compressed-tensors-nightly-0.9.1.20250201}/src/compressed_tensors/compressors/sparse_quantized_compressors/marlin_24.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250128 → compressed-tensors-nightly-0.9.1.20250201}/src/compressed_tensors/config/__init__.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250128 → compressed-tensors-nightly-0.9.1.20250201}/src/compressed_tensors/config/base.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250128 → compressed-tensors-nightly-0.9.1.20250201}/src/compressed_tensors/config/dense.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250128 → compressed-tensors-nightly-0.9.1.20250201}/src/compressed_tensors/config/sparse_24_bitmask.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250128 → compressed-tensors-nightly-0.9.1.20250201}/src/compressed_tensors/config/sparse_bitmask.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250128 → compressed-tensors-nightly-0.9.1.20250201}/src/compressed_tensors/linear/__init__.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250128 → compressed-tensors-nightly-0.9.1.20250201}/src/compressed_tensors/linear/compressed_linear.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250128 → compressed-tensors-nightly-0.9.1.20250201}/src/compressed_tensors/quantization/__init__.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250128 → compressed-tensors-nightly-0.9.1.20250201}/src/compressed_tensors/quantization/lifecycle/__init__.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250128 → compressed-tensors-nightly-0.9.1.20250201}/src/compressed_tensors/quantization/lifecycle/apply.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250128 → compressed-tensors-nightly-0.9.1.20250201}/src/compressed_tensors/quantization/lifecycle/compressed.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250128 → compressed-tensors-nightly-0.9.1.20250201}/src/compressed_tensors/quantization/lifecycle/forward.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250128 → compressed-tensors-nightly-0.9.1.20250201}/src/compressed_tensors/quantization/lifecycle/helpers.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250128 → compressed-tensors-nightly-0.9.1.20250201}/src/compressed_tensors/quantization/lifecycle/initialize.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250128 → compressed-tensors-nightly-0.9.1.20250201}/src/compressed_tensors/quantization/quant_config.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250128 → compressed-tensors-nightly-0.9.1.20250201}/src/compressed_tensors/quantization/quant_scheme.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250128 → compressed-tensors-nightly-0.9.1.20250201}/src/compressed_tensors/quantization/utils/__init__.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250128 → compressed-tensors-nightly-0.9.1.20250201}/src/compressed_tensors/quantization/utils/helpers.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250128 → compressed-tensors-nightly-0.9.1.20250201}/src/compressed_tensors/registry/__init__.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250128 → compressed-tensors-nightly-0.9.1.20250201}/src/compressed_tensors/registry/registry.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250128 → compressed-tensors-nightly-0.9.1.20250201}/src/compressed_tensors/utils/__init__.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250128 → compressed-tensors-nightly-0.9.1.20250201}/src/compressed_tensors/utils/permutations_24.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250128 → compressed-tensors-nightly-0.9.1.20250201}/src/compressed_tensors/utils/permute.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250128 → compressed-tensors-nightly-0.9.1.20250201}/src/compressed_tensors/utils/safetensors_load.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250128 → compressed-tensors-nightly-0.9.1.20250201}/src/compressed_tensors/utils/semi_structured_conversions.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250128 → compressed-tensors-nightly-0.9.1.20250201}/src/compressed_tensors/version.py +0 -0
- {compressed-tensors-nightly-0.9.1.20250128 → compressed-tensors-nightly-0.9.1.20250201}/src/compressed_tensors_nightly.egg-info/SOURCES.txt +0 -0
- {compressed-tensors-nightly-0.9.1.20250128 → compressed-tensors-nightly-0.9.1.20250201}/src/compressed_tensors_nightly.egg-info/dependency_links.txt +0 -0
- {compressed-tensors-nightly-0.9.1.20250128 → compressed-tensors-nightly-0.9.1.20250201}/src/compressed_tensors_nightly.egg-info/requires.txt +0 -0
- {compressed-tensors-nightly-0.9.1.20250128 → compressed-tensors-nightly-0.9.1.20250201}/src/compressed_tensors_nightly.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: compressed-tensors-nightly
|
3
|
-
Version: 0.9.1.
|
3
|
+
Version: 0.9.1.20250201
|
4
4
|
Summary: Library for utilization of compressed safetensors of neural network models
|
5
5
|
Home-page: https://github.com/neuralmagic/compressed-tensors
|
6
6
|
Author: Neuralmagic, Inc.
|
@@ -138,8 +138,20 @@ def pack_to_int32(value: torch.Tensor, num_bits: int) -> torch.Tensor:
|
|
138
138
|
"""
|
139
139
|
Packs a tensor of quantized weights stored in int8 into int32s with padding
|
140
140
|
|
141
|
+
Pseudocode:
|
142
|
+
1. Shift wrt num_bits to convert to unsigned. num_bits=8
|
143
|
+
[1,2] -> [129, 130]
|
144
|
+
2. Pad to fill in 32 bits
|
145
|
+
[129, 130] -> [129, 130, 0, 0]
|
146
|
+
3. convert to binary align in order
|
147
|
+
[129, 130, 0, 0] -> 00000000 00000000 10000010 10000001
|
148
|
+
4. convert aligned binary to number
|
149
|
+
00000000000000001000001010000001 -> 33409
|
150
|
+
5. covert back to uint32
|
151
|
+
33409 -> 33409
|
152
|
+
|
141
153
|
:param value: tensor to pack
|
142
|
-
:param num_bits: number of bits used to store underlying data
|
154
|
+
:param num_bits: number of bits used to store underlying data, must be at least 1
|
143
155
|
:returns: packed int32 tensor
|
144
156
|
"""
|
145
157
|
if value.dtype is not torch.int8:
|
@@ -148,19 +160,22 @@ def pack_to_int32(value: torch.Tensor, num_bits: int) -> torch.Tensor:
|
|
148
160
|
if num_bits > 8:
|
149
161
|
raise ValueError("Packing is only supported for less than 8 bits")
|
150
162
|
|
163
|
+
if num_bits < 1:
|
164
|
+
raise ValueError(f"num_bits must be at least 1, got {num_bits}")
|
165
|
+
|
151
166
|
# convert to unsigned for packing
|
152
|
-
offset =
|
167
|
+
offset = 1 << (num_bits - 1)
|
153
168
|
value = (value + offset).to(torch.uint8)
|
154
169
|
value = value.cpu().numpy().astype(np.uint32)
|
155
170
|
pack_factor = 32 // num_bits
|
156
171
|
|
157
172
|
# pad input tensor and initialize packed output
|
158
173
|
packed_size = math.ceil(value.shape[1] / pack_factor)
|
159
|
-
|
160
|
-
padding = packed.shape[1] * pack_factor - value.shape[1]
|
174
|
+
padding = packed_size * pack_factor - value.shape[1]
|
161
175
|
value = np.pad(value, pad_width=[(0, 0), (0, padding)], constant_values=0)
|
162
176
|
|
163
177
|
# pack values
|
178
|
+
packed = np.zeros((value.shape[0], packed_size), dtype=np.uint32)
|
164
179
|
for i in range(pack_factor):
|
165
180
|
packed |= value[:, i::pack_factor] << num_bits * i
|
166
181
|
|
@@ -174,7 +189,9 @@ def unpack_from_int32(
|
|
174
189
|
) -> torch.Tensor:
|
175
190
|
"""
|
176
191
|
Unpacks a tensor of packed int32 weights into individual int8s, maintaining the
|
177
|
-
original
|
192
|
+
original bit range.
|
193
|
+
|
194
|
+
Return tensors in int8
|
178
195
|
|
179
196
|
:param value: tensor to upack
|
180
197
|
:param num_bits: number of bits to unpack each data point into
|
@@ -192,7 +209,7 @@ def unpack_from_int32(
|
|
192
209
|
pack_factor = 32 // num_bits
|
193
210
|
|
194
211
|
# unpack
|
195
|
-
mask =
|
212
|
+
mask = (1 << num_bits) - 1
|
196
213
|
unpacked = torch.zeros(
|
197
214
|
(value.shape[0], value.shape[1] * pack_factor),
|
198
215
|
device=value.device,
|
@@ -18,6 +18,7 @@ from typing import Any, Dict, Optional, Union
|
|
18
18
|
|
19
19
|
import torch
|
20
20
|
from compressed_tensors.utils import Aliasable
|
21
|
+
from compressed_tensors.utils.helpers import deprecated
|
21
22
|
from pydantic import BaseModel, Field, field_validator, model_validator
|
22
23
|
|
23
24
|
|
@@ -123,12 +124,6 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
|
|
123
124
|
),
|
124
125
|
)
|
125
126
|
|
126
|
-
def get_observer(self):
|
127
|
-
"""
|
128
|
-
:return: torch quantization FakeQuantize built based on these QuantizationArgs
|
129
|
-
"""
|
130
|
-
return self.observer
|
131
|
-
|
132
127
|
@field_validator("type", mode="before")
|
133
128
|
def validate_type(cls, value) -> QuantizationType:
|
134
129
|
if isinstance(value, str):
|
@@ -250,6 +245,10 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
|
|
250
245
|
else:
|
251
246
|
raise ValueError(f"Invalid quantization type {self.type}")
|
252
247
|
|
248
|
+
@deprecated("QuantizationArgs.observer")
|
249
|
+
def get_observer(self) -> str:
|
250
|
+
return self.observer
|
251
|
+
|
253
252
|
|
254
253
|
def round_to_quantized_type(
|
255
254
|
tensor: torch.Tensor, args: QuantizationArgs
|
@@ -170,8 +170,8 @@ def deprecated(future_name: Optional[str] = None, message: Optional[str] = None)
|
|
170
170
|
"""
|
171
171
|
Decorator to mark functions as deprecated
|
172
172
|
|
173
|
-
:param new_function: Function called in place of
|
174
|
-
:param message:
|
173
|
+
:param new_function: Function called in place of deprecated function
|
174
|
+
:param message: Deprecation message, replaces default deprecation message
|
175
175
|
"""
|
176
176
|
|
177
177
|
def decorator(func: Callable[[Any], Any]):
|
@@ -26,6 +26,7 @@ Utilities associated with offloading functionality provided by `accelerate`.
|
|
26
26
|
"""
|
27
27
|
|
28
28
|
import contextlib
|
29
|
+
import warnings
|
29
30
|
from functools import wraps
|
30
31
|
from typing import Any, Callable, Dict, Literal, Optional, Union
|
31
32
|
|
@@ -200,9 +201,14 @@ def update_offload_parameter(
|
|
200
201
|
"""
|
201
202
|
param = getattr(module, name)
|
202
203
|
data = data.to(param.dtype)
|
204
|
+
if param.data.shape != data.shape:
|
205
|
+
warnings.warn(
|
206
|
+
f"Shape of parameter being updated {param.data.shape} does not match shape "
|
207
|
+
f"of update data {data.shape}"
|
208
|
+
)
|
203
209
|
|
204
210
|
# copy data into onloaded parameter if applicable
|
205
|
-
if param.device != "meta":
|
211
|
+
if param.device != torch.device("meta"):
|
206
212
|
param.data.copy_(data)
|
207
213
|
|
208
214
|
# update offload dict
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: compressed-tensors-nightly
|
3
|
-
Version: 0.9.1.
|
3
|
+
Version: 0.9.1.20250201
|
4
4
|
Summary: Library for utilization of compressed safetensors of neural network models
|
5
5
|
Home-page: https://github.com/neuralmagic/compressed-tensors
|
6
6
|
Author: Neuralmagic, Inc.
|
{compressed-tensors-nightly-0.9.1.20250128 → compressed-tensors-nightly-0.9.1.20250201}/LICENSE
RENAMED
File without changes
|
{compressed-tensors-nightly-0.9.1.20250128 → compressed-tensors-nightly-0.9.1.20250201}/README.md
RENAMED
File without changes
|
File without changes
|
{compressed-tensors-nightly-0.9.1.20250128 → compressed-tensors-nightly-0.9.1.20250201}/setup.cfg
RENAMED
File without changes
|
{compressed-tensors-nightly-0.9.1.20250128 → compressed-tensors-nightly-0.9.1.20250201}/setup.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|