compressed-tensors 0.12.3a20251009__py3-none-any.whl → 0.12.3a20251013__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -20,6 +20,3 @@ TRANSFORM_CONFIG_NAME = "transform_config"
20
20
  # required fields
21
21
  COMPRESSION_VERSION_NAME = "version"
22
22
  QUANTIZATION_METHOD_NAME = "quant_method"
23
-
24
- # auxillary configs
25
- KV_CACHE_SCHEME_NAME = "kv_cache_scheme"
@@ -15,7 +15,7 @@
15
15
  import logging
16
16
  from collections import OrderedDict
17
17
  from copy import deepcopy
18
- from typing import Dict, Iterable, List, Optional
18
+ from typing import Dict, List, Optional
19
19
  from typing import OrderedDict as OrderedDictType
20
20
  from typing import Union
21
21
 
@@ -34,7 +34,7 @@ from compressed_tensors.quantization.utils import (
34
34
  KV_CACHE_TARGETS,
35
35
  is_kv_cache_quant_scheme,
36
36
  )
37
- from compressed_tensors.utils.helpers import deprecated, replace_module
37
+ from compressed_tensors.utils.helpers import replace_module
38
38
  from compressed_tensors.utils.match import match_named_modules, match_targets
39
39
  from compressed_tensors.utils.offload import update_parameter_data
40
40
  from compressed_tensors.utils.safetensors_load import get_safetensors_folder
@@ -45,7 +45,6 @@ from torch.nn import Module
45
45
  __all__ = [
46
46
  "load_pretrained_quantization_parameters",
47
47
  "apply_quantization_config",
48
- "find_name_or_class_matches",
49
48
  ]
50
49
 
51
50
  from compressed_tensors.quantization.utils.helpers import is_module_quantized
@@ -208,31 +207,6 @@ def process_kv_cache_config(
208
207
  return config
209
208
 
210
209
 
211
- @deprecated(
212
- message="This function is deprecated and will be removed in a future release."
213
- "Please use `match_targets` from `compressed_tensors.utils.match` instead."
214
- )
215
- def find_name_or_class_matches(
216
- name: str, module: Module, targets: Iterable[str], check_contains: bool = False
217
- ) -> List[str]:
218
- """
219
- Returns all targets that match the given name or the class name.
220
- Returns empty list otherwise.
221
- The order of the output `matches` list matters.
222
- The entries are sorted in the following order:
223
- 1. matches on exact strings
224
- 2. matches on regex patterns
225
- 3. matches on module names
226
- """
227
- if check_contains:
228
- raise NotImplementedError(
229
- "This function is deprecated, and the check_contains=True option has been"
230
- " removed."
231
- )
232
-
233
- return match_targets(name, module, targets)
234
-
235
-
236
210
  def _load_quant_args_from_mapping(
237
211
  base_name: str, module_name: str, module: Module, mapping: Dict
238
212
  ):
@@ -199,7 +199,7 @@ def initialize_qparams(
199
199
  expected_shape = (1,)
200
200
 
201
201
  elif strategy == QuantizationStrategy.TOKEN:
202
- expected_shape = (1, 1)
202
+ raise ValueError("Cannot perform static token quantization")
203
203
 
204
204
  elif strategy == QuantizationStrategy.CHANNEL:
205
205
  if len(observed_shape) < 2:
@@ -264,6 +264,7 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
264
264
  actorder = model.actorder
265
265
  dynamic = model.dynamic
266
266
  observer = model.observer
267
+ dynamic = model.dynamic
267
268
 
268
269
  # infer strategy
269
270
  if strategy is None:
@@ -279,6 +280,12 @@ class QuantizationArgs(BaseModel, use_enum_values=True):
279
280
  "strategy='group' and group_size = -1 for 'channel'"
280
281
  )
281
282
 
283
+ # validate token strategy
284
+ if strategy == QuantizationStrategy.TOKEN and not dynamic:
285
+ raise ValueError(
286
+ "Cannot perform static token quantization, please use `dynamic=True`"
287
+ )
288
+
282
289
  # validate group strategy
283
290
  if strategy == QuantizationStrategy.GROUP:
284
291
  if group_size is None or group_size <= 0:
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.12.3.a20251009'
20
+ __version__ = version = '0.12.3.a20251013'
21
21
  __version_tuple__ = version_tuple = (0, 12, 3)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: compressed-tensors
3
- Version: 0.12.3a20251009
3
+ Version: 0.12.3a20251013
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -1,7 +1,7 @@
1
1
  compressed_tensors/__init__.py,sha256=SRqNYFVvxAaLa4SImhoiIBKfoOSj7EUdx0CxXjGC2PA,884
2
- compressed_tensors/base.py,sha256=-gxWvDF4LCkyeDP8YlGzvBBKxo4Dk9h4NINPD61drFU,921
2
+ compressed_tensors/base.py,sha256=dKAVgQAp9GPH6YspvF_cbGXCrbiqAeLEIPydYAO40WE,859
3
3
  compressed_tensors/logger.py,sha256=sTm1Od1cV0aDxBm3YN-PPvsOATxY_2tBV62TQE4HiPw,4032
4
- compressed_tensors/version.py,sha256=p1gc603nDCOmpKHv3ByvOa_-mIzcmf3lWif35Bc9Lo8,523
4
+ compressed_tensors/version.py,sha256=qw63EaYIXPP8CqvEa8GUQLbMheFNxgxNiH2QRlZwONo,523
5
5
  compressed_tensors/compressors/__init__.py,sha256=smSygTSfcfuujRrAXDc6uZm4L_ccV1tWZewqVnOb4lM,825
6
6
  compressed_tensors/compressors/base.py,sha256=nvWsv4xEw1Tkxkxth6TmHplDYXfBeP22xWxOsZERyDY,7204
7
7
  compressed_tensors/compressors/helpers.py,sha256=OK6qxX9j3bHwF9JfIYSGMgBJe2PWjlTA3byXKCJaTIQ,5431
@@ -28,16 +28,16 @@ compressed_tensors/config/sparse_bitmask.py,sha256=pZUboRNZTu6NajGOQEFExoPknak5y
28
28
  compressed_tensors/linear/__init__.py,sha256=fH6rjBYAxuwrTzBTlTjTgCYNyh6TCvCqajCz4Im4YrA,617
29
29
  compressed_tensors/linear/compressed_linear.py,sha256=1yo9RyjA0aQ--iuIknFfcSorJn43Mn4CoV-q4JlTJ_o,4052
30
30
  compressed_tensors/quantization/__init__.py,sha256=ifNRE2rJNILOWKA3jkPBGwXEXXvaKkn4lRMcxaVlkW0,790
31
- compressed_tensors/quantization/quant_args.py,sha256=MUDEEokFH2AWRhJHu-32_JiamMr5K8ifSuEWKbg2jfE,13431
31
+ compressed_tensors/quantization/quant_args.py,sha256=Cin8MfRrVYG4Ay9RToG4u1n-RfdPr72kYFwND6W5sO8,13695
32
32
  compressed_tensors/quantization/quant_config.py,sha256=Y_OgLId65ajdfupXuOrKSAArrvKicMeA8DHdzRt3J6o,10687
33
33
  compressed_tensors/quantization/quant_metadata.py,sha256=yudYWXRYYSqgRhoUA-RIu2LI14NFchOyPUUuz7bPqJE,1950
34
34
  compressed_tensors/quantization/quant_scheme.py,sha256=ge_YQxeFRPdcZyfbdbLv2emtxCgkY1cd4nLmxsUDJ8c,9721
35
35
  compressed_tensors/quantization/lifecycle/__init__.py,sha256=_uItzFWusyV74Zco_pHLOTdE9a83cL-R-ZdyQrBkIyw,772
36
- compressed_tensors/quantization/lifecycle/apply.py,sha256=1zRc7tQbE5OAVJ5VRgU9FZPnMiusef84HluTORSYC2I,13108
36
+ compressed_tensors/quantization/lifecycle/apply.py,sha256=fyMyfMo6HTE86NUBhViujod62w_M_c9wuCUIlMJwztk,12182
37
37
  compressed_tensors/quantization/lifecycle/compressed.py,sha256=_gTH0CnLe8MxkTY1hrCCeSYAMzuvIwoCTT4hxW1TPk4,2354
38
38
  compressed_tensors/quantization/lifecycle/forward.py,sha256=vVh9JiF2hd9l6B7Wa1zFfYreM0dP3gKX4XghYbV-vEo,17562
39
39
  compressed_tensors/quantization/lifecycle/helpers.py,sha256=C0mhy2vJ0fCjVeN4kFNhw8Eq1wkteBGHiZ36RVLThRY,944
40
- compressed_tensors/quantization/lifecycle/initialize.py,sha256=JMpcsAmLrOMPb3PC4asyo7lce3BkLd8H6iVdnI72K2Q,10573
40
+ compressed_tensors/quantization/lifecycle/initialize.py,sha256=0Ju-TiFHcPnr9jKdOIUtYAqLm8C6d_YzABcVF-BxueA,10610
41
41
  compressed_tensors/quantization/utils/__init__.py,sha256=VdtEmP0bvuND_IGQnyqUPc5lnFp-1_yD7StKSX4x80w,656
42
42
  compressed_tensors/quantization/utils/helpers.py,sha256=BA-twfAKk-HMBr_OZHZnSQN7F1a0l5zB1kJhml6j-cI,17146
43
43
  compressed_tensors/registry/__init__.py,sha256=FwLSNYqfIrb5JD_6OK_MT4_svvKTN_nEhpgQlQvGbjI,658
@@ -65,8 +65,8 @@ compressed_tensors/utils/permutations_24.py,sha256=kx6fsfDHebx94zsSzhXGyCyuC9sVy
65
65
  compressed_tensors/utils/safetensors_load.py,sha256=Vql34aCTDHwmTZXJHzCyBISJo7iA7EQ78LdTlMjdpZo,12023
66
66
  compressed_tensors/utils/semi_structured_conversions.py,sha256=XKNffPum54kPASgqKzgKvyeqWPAkair2XEQXjkp7ho8,13489
67
67
  compressed_tensors/utils/type.py,sha256=bNwoo_FWlvLuDpYAGGzZJITRg0JA_Ngk9LGPo-kvjeU,2554
68
- compressed_tensors-0.12.3a20251009.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
69
- compressed_tensors-0.12.3a20251009.dist-info/METADATA,sha256=H2QGZBR6fGYaw7TSQY8VbPgJBffvl_5qkFl6UTLL5Nk,7027
70
- compressed_tensors-0.12.3a20251009.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
71
- compressed_tensors-0.12.3a20251009.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
72
- compressed_tensors-0.12.3a20251009.dist-info/RECORD,,
68
+ compressed_tensors-0.12.3a20251013.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
69
+ compressed_tensors-0.12.3a20251013.dist-info/METADATA,sha256=SuQIzLdVpQke9zEAfdTocRPAmQ-o3nATcCe_WhBhmxU,7027
70
+ compressed_tensors-0.12.3a20251013.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
71
+ compressed_tensors-0.12.3a20251013.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
72
+ compressed_tensors-0.12.3a20251013.dist-info/RECORD,,