compressed-tensors 0.11.1a20250821__py3-none-any.whl → 0.11.1a20250828__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- compressed_tensors/compressors/model_compressors/model_compressor.py +6 -3
- compressed_tensors/compressors/quantized_compressors/nvfp4_quantized.py +5 -4
- compressed_tensors/quantization/lifecycle/apply.py +14 -19
- compressed_tensors/version.py +1 -1
- {compressed_tensors-0.11.1a20250821.dist-info → compressed_tensors-0.11.1a20250828.dist-info}/METADATA +1 -1
- {compressed_tensors-0.11.1a20250821.dist-info → compressed_tensors-0.11.1a20250828.dist-info}/RECORD +9 -9
- {compressed_tensors-0.11.1a20250821.dist-info → compressed_tensors-0.11.1a20250828.dist-info}/WHEEL +0 -0
- {compressed_tensors-0.11.1a20250821.dist-info → compressed_tensors-0.11.1a20250828.dist-info}/licenses/LICENSE +0 -0
- {compressed_tensors-0.11.1a20250821.dist-info → compressed_tensors-0.11.1a20250828.dist-info}/top_level.txt +0 -0
@@ -703,9 +703,12 @@ class ModelCompressor:
|
|
703
703
|
with override_quantization_status(
|
704
704
|
self.quantization_config, QuantizationStatus.FROZEN
|
705
705
|
):
|
706
|
-
|
707
|
-
|
708
|
-
|
706
|
+
apply_quantization_config(model, self.quantization_config)
|
707
|
+
names_to_scheme: Set[QuantizationScheme] = {
|
708
|
+
name: getattr(module, "quantization_scheme")
|
709
|
+
for name, module in model.named_modules()
|
710
|
+
if getattr(module, "quantization_scheme", None) is not None
|
711
|
+
}
|
709
712
|
# Load activation scales/zp or any other quantization parameters
|
710
713
|
# Conditionally load the weight quantization parameters if we have a
|
711
714
|
# dense compressor or if a sparsity compressor has already been applied
|
@@ -123,6 +123,7 @@ class NVFP4PackedCompressor(BaseQuantizationCompressor):
|
|
123
123
|
return decompressed_weight
|
124
124
|
|
125
125
|
|
126
|
+
@torch.compile(fullgraph=True, dynamic=True)
|
126
127
|
def pack_fp4_to_uint8(x: torch.Tensor) -> torch.Tensor:
|
127
128
|
"""
|
128
129
|
Packs a tensor with values in the fp4 range into uint8.
|
@@ -145,12 +146,11 @@ def pack_fp4_to_uint8(x: torch.Tensor) -> torch.Tensor:
|
|
145
146
|
|
146
147
|
# Find closest valid FP4 value index for each element
|
147
148
|
abs_x = torch.abs(x)
|
148
|
-
|
149
|
-
|
150
|
-
abs_indices = torch.where(torch.isclose(abs_x, val), i, abs_indices)
|
149
|
+
abs_diff_x = torch.abs(abs_x.unsqueeze(-1) - kE2M1) # [m, n, 8]
|
150
|
+
abs_indices = torch.argmin(abs_diff_x, dim=-1) # [m, n]
|
151
151
|
|
152
152
|
# Apply sign bit (bit 3) to get final 4-bit representation
|
153
|
-
indices = abs_indices + (torch.signbit(x)
|
153
|
+
indices = abs_indices + (torch.signbit(x).to(torch.long) << 3)
|
154
154
|
|
155
155
|
# Reshape to prepare for packing pairs of values
|
156
156
|
indices = indices.reshape(-1)
|
@@ -174,6 +174,7 @@ kE2M1ToFloat = torch.tensor(
|
|
174
174
|
|
175
175
|
|
176
176
|
# reference: : https://github.com/vllm-project/vllm/pull/16362
|
177
|
+
@torch.compile(fullgraph=True, dynamic=True)
|
177
178
|
def unpack_fp4_from_uint8(
|
178
179
|
a: torch.Tensor, m: int, n: int, dtype: Optional[torch.dtype] = torch.bfloat16
|
179
180
|
) -> torch.Tensor:
|
@@ -115,7 +115,7 @@ def load_pretrained_quantization_parameters(
|
|
115
115
|
|
116
116
|
def apply_quantization_config(
|
117
117
|
model: Module, config: Union[QuantizationConfig, None], run_compressed: bool = False
|
118
|
-
)
|
118
|
+
):
|
119
119
|
"""
|
120
120
|
Initializes the model for quantization in-place based on the given config.
|
121
121
|
Optionally coverts quantizable modules to compressed_linear modules
|
@@ -125,26 +125,22 @@ def apply_quantization_config(
|
|
125
125
|
:param run_compressed: Whether the model will be run in compressed mode or
|
126
126
|
decompressed fully on load
|
127
127
|
"""
|
128
|
-
|
129
|
-
if config is None:
|
130
|
-
return dict()
|
128
|
+
from compressed_tensors.linear.compressed_linear import CompressedLinear
|
131
129
|
|
132
|
-
# remove reference to the original `config`
|
133
|
-
# argument. This function can mutate it, and we'd
|
134
|
-
# like to keep the original `config` as it is.
|
135
130
|
config = deepcopy(config)
|
131
|
+
if config is None: # see PR #180
|
132
|
+
return dict()
|
133
|
+
|
134
|
+
# preprocess to support kv cache scheme
|
135
|
+
config = process_quantization_config(config)
|
136
|
+
|
136
137
|
# build mapping of targets to schemes for easier matching
|
137
138
|
# use ordered dict to preserve target ordering in config
|
138
139
|
target_to_scheme = OrderedDict()
|
139
|
-
config = process_quantization_config(config)
|
140
|
-
names_to_scheme = dict()
|
141
140
|
for scheme in config.config_groups.values():
|
142
141
|
for target in scheme.targets:
|
143
142
|
target_to_scheme[target] = scheme
|
144
143
|
|
145
|
-
if run_compressed:
|
146
|
-
from compressed_tensors.linear.compressed_linear import CompressedLinear
|
147
|
-
|
148
144
|
# mark appropriate layers for quantization by setting their quantization schemes
|
149
145
|
for name, submodule in match_named_modules(
|
150
146
|
model, target_to_scheme, config.ignore, warn_on_fail=True
|
@@ -153,7 +149,12 @@ def apply_quantization_config(
|
|
153
149
|
# quant scheme to the matching layers
|
154
150
|
matched_targets = match_targets(name, submodule, target_to_scheme)
|
155
151
|
scheme = _scheme_from_targets(target_to_scheme, matched_targets, name)
|
156
|
-
|
152
|
+
# target matched - add layer and scheme to target list
|
153
|
+
submodule.quantization_scheme = scheme
|
154
|
+
|
155
|
+
# replace with run compressed if applicable
|
156
|
+
# FUTURE: move this to model compressor
|
157
|
+
if isinstance(submodule, torch.nn.Linear) and run_compressed:
|
157
158
|
format = config.format
|
158
159
|
if format != CompressionFormat.dense.value:
|
159
160
|
if isinstance(submodule, torch.nn.Linear):
|
@@ -165,14 +166,8 @@ def apply_quantization_config(
|
|
165
166
|
)
|
166
167
|
replace_module(model, name, compressed_linear)
|
167
168
|
|
168
|
-
# target matched - add layer and scheme to target list
|
169
|
-
submodule.quantization_scheme = scheme
|
170
|
-
|
171
|
-
names_to_scheme[name] = submodule.quantization_scheme
|
172
|
-
|
173
169
|
# apply current quantization status across all targeted layers
|
174
170
|
apply_quantization_status(model, config.quantization_status)
|
175
|
-
return names_to_scheme
|
176
171
|
|
177
172
|
|
178
173
|
def process_quantization_config(config: QuantizationConfig) -> QuantizationConfig:
|
compressed_tensors/version.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: compressed-tensors
|
3
|
-
Version: 0.11.
|
3
|
+
Version: 0.11.1a20250828
|
4
4
|
Summary: Library for utilization of compressed safetensors of neural network models
|
5
5
|
Home-page: https://github.com/neuralmagic/compressed-tensors
|
6
6
|
Author: Neuralmagic, Inc.
|
{compressed_tensors-0.11.1a20250821.dist-info → compressed_tensors-0.11.1a20250828.dist-info}/RECORD
RENAMED
@@ -1,15 +1,15 @@
|
|
1
1
|
compressed_tensors/__init__.py,sha256=UtKmifNeBCSE2TZSAfduVNNzHY-3V7bLjZ7n7RuXLOE,812
|
2
2
|
compressed_tensors/base.py,sha256=-gxWvDF4LCkyeDP8YlGzvBBKxo4Dk9h4NINPD61drFU,921
|
3
|
-
compressed_tensors/version.py,sha256=
|
3
|
+
compressed_tensors/version.py,sha256=NChUyeUoxQUAMGsjmgMd6I-sPb4p5iHss-5eGrWhivg,523
|
4
4
|
compressed_tensors/compressors/__init__.py,sha256=smSygTSfcfuujRrAXDc6uZm4L_ccV1tWZewqVnOb4lM,825
|
5
5
|
compressed_tensors/compressors/base.py,sha256=nvWsv4xEw1Tkxkxth6TmHplDYXfBeP22xWxOsZERyDY,7204
|
6
6
|
compressed_tensors/compressors/helpers.py,sha256=OK6qxX9j3bHwF9JfIYSGMgBJe2PWjlTA3byXKCJaTIQ,5431
|
7
7
|
compressed_tensors/compressors/model_compressors/__init__.py,sha256=5RGGPFu4YqEt_aOdFSQYFYFDjcZFJN0CsMqRtDZz3Js,666
|
8
|
-
compressed_tensors/compressors/model_compressors/model_compressor.py,sha256=
|
8
|
+
compressed_tensors/compressors/model_compressors/model_compressor.py,sha256=mZqpBS5znPHedlVVkKsUsVCs52zK5bAmEiI8cqMBKnY,37618
|
9
9
|
compressed_tensors/compressors/quantized_compressors/__init__.py,sha256=KvaFBL_Q84LxRGJOV035M8OBoCkAx8kOkfphswgkKWk,745
|
10
10
|
compressed_tensors/compressors/quantized_compressors/base.py,sha256=_mqTG_HjAIbHqDGucA3ZR_01OXU3CMFxtrDjfM-kY0g,10301
|
11
11
|
compressed_tensors/compressors/quantized_compressors/naive_quantized.py,sha256=0ANDcuD8aXPqTYNPY6GnX9iS6eXJw6P0TzNV_rYS2l8,5369
|
12
|
-
compressed_tensors/compressors/quantized_compressors/nvfp4_quantized.py,sha256=
|
12
|
+
compressed_tensors/compressors/quantized_compressors/nvfp4_quantized.py,sha256=Qq790d5VQQccq6Dj8YhBwhr7S3DqMJNoYPI5S6M1FNo,7183
|
13
13
|
compressed_tensors/compressors/quantized_compressors/pack_quantized.py,sha256=D8h9ltxSIYi1XEKYgbYu1ebbXzCibhPi-eZsBUi0NOg,11245
|
14
14
|
compressed_tensors/compressors/sparse_compressors/__init__.py,sha256=Atuz-OdEgn8OCUhx7Ovd6gXdyImAI186uCR-uR0t_Nk,737
|
15
15
|
compressed_tensors/compressors/sparse_compressors/base.py,sha256=YNZWcHjDleAlqbgRZQ6oJf44MQb_UDNvJGOqhl26uFA,8098
|
@@ -30,7 +30,7 @@ compressed_tensors/quantization/quant_args.py,sha256=5AxYKqCSlg7CDgz2N8G4ZRVIiSU
|
|
30
30
|
compressed_tensors/quantization/quant_config.py,sha256=2NgDwKuQn0f-ojiHC8c6tXtYX_zQlk26Rj-bU71QKvA,10598
|
31
31
|
compressed_tensors/quantization/quant_scheme.py,sha256=X5Z7oXMLPXnX8g-UvWXlRjn4YnD_qTk5mXfGzu20k9o,8903
|
32
32
|
compressed_tensors/quantization/lifecycle/__init__.py,sha256=_uItzFWusyV74Zco_pHLOTdE9a83cL-R-ZdyQrBkIyw,772
|
33
|
-
compressed_tensors/quantization/lifecycle/apply.py,sha256=
|
33
|
+
compressed_tensors/quantization/lifecycle/apply.py,sha256=TuSjKomSk4N0My-UY9PWk2Nyuze6TilEGPsZELgotzk,14716
|
34
34
|
compressed_tensors/quantization/lifecycle/compressed.py,sha256=Fj9n66IN0EWsOAkBHg3O0GlOQpxstqjCcs0ttzMXrJ0,2296
|
35
35
|
compressed_tensors/quantization/lifecycle/forward.py,sha256=xcLTgaff1wYUWzvQqYKmhWYkshWVI-PhLPtBOyyZro0,17576
|
36
36
|
compressed_tensors/quantization/lifecycle/helpers.py,sha256=C0mhy2vJ0fCjVeN4kFNhw8Eq1wkteBGHiZ36RVLThRY,944
|
@@ -63,8 +63,8 @@ compressed_tensors/utils/permute.py,sha256=V6tJLKo3Syccj-viv4F7ZKZgJeCB-hl-dK8RK
|
|
63
63
|
compressed_tensors/utils/safetensors_load.py,sha256=Vql34aCTDHwmTZXJHzCyBISJo7iA7EQ78LdTlMjdpZo,12023
|
64
64
|
compressed_tensors/utils/semi_structured_conversions.py,sha256=XKNffPum54kPASgqKzgKvyeqWPAkair2XEQXjkp7ho8,13489
|
65
65
|
compressed_tensors/utils/type.py,sha256=bNwoo_FWlvLuDpYAGGzZJITRg0JA_Ngk9LGPo-kvjeU,2554
|
66
|
-
compressed_tensors-0.11.
|
67
|
-
compressed_tensors-0.11.
|
68
|
-
compressed_tensors-0.11.
|
69
|
-
compressed_tensors-0.11.
|
70
|
-
compressed_tensors-0.11.
|
66
|
+
compressed_tensors-0.11.1a20250828.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
67
|
+
compressed_tensors-0.11.1a20250828.dist-info/METADATA,sha256=lPVoawn0HxkV3dRXP0U6C7UWpulLrYHeTpyzWGSfGvM,7031
|
68
|
+
compressed_tensors-0.11.1a20250828.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
69
|
+
compressed_tensors-0.11.1a20250828.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
|
70
|
+
compressed_tensors-0.11.1a20250828.dist-info/RECORD,,
|
{compressed_tensors-0.11.1a20250821.dist-info → compressed_tensors-0.11.1a20250828.dist-info}/WHEEL
RENAMED
File without changes
|
File without changes
|
File without changes
|