compressed-tensors-nightly 0.3.3.20240601__py3-none-any.whl → 0.3.3.20240603__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -16,6 +16,7 @@ import json
16
16
  import logging
17
17
  import operator
18
18
  import os
19
+ from copy import deepcopy
19
20
  from typing import Dict, Optional, Union
20
21
 
21
22
  from compressed_tensors.base import (
@@ -36,6 +37,7 @@ from compressed_tensors.quantization.utils import (
36
37
  iter_named_leaf_modules,
37
38
  )
38
39
  from compressed_tensors.utils import get_safetensors_folder
40
+ from compressed_tensors.utils.helpers import fix_fsdp_module_name
39
41
  from torch import Tensor
40
42
  from torch.nn import Module, Parameter
41
43
  from tqdm import tqdm
@@ -89,9 +91,8 @@ class ModelCompressor:
89
91
  if compression_config is None:
90
92
  return None
91
93
 
92
- sparsity_config = compression_config.get(SPARSITY_CONFIG_NAME, None)
93
- quantization_config = compression_config.get(QUANTIZATION_CONFIG_NAME, None)
94
-
94
+ sparsity_config = cls.parse_sparsity_config(compression_config)
95
+ quantization_config = cls.parse_quantization_config(compression_config)
95
96
  if sparsity_config is None and quantization_config is None:
96
97
  return None
97
98
 
@@ -141,6 +142,21 @@ class ModelCompressor:
141
142
  sparsity_config=sparsity_config, quantization_config=quantization_config
142
143
  )
143
144
 
145
+ @staticmethod
146
+ def parse_sparsity_config(compression_config: Dict) -> Union[Dict, None]:
147
+ if compression_config is None:
148
+ return None
149
+ return compression_config.get(SPARSITY_CONFIG_NAME, None)
150
+
151
+ @staticmethod
152
+ def parse_quantization_config(compression_config: Dict) -> Union[Dict, None]:
153
+ quantization_config = deepcopy(compression_config)
154
+ quantization_config.pop(SPARSITY_CONFIG_NAME, None)
155
+ if len(quantization_config) == 0:
156
+ quantization_config = None
157
+
158
+ return quantization_config
159
+
144
160
  def __init__(
145
161
  self,
146
162
  sparsity_config: Optional[SparsityCompressionConfig] = None,
@@ -233,9 +249,7 @@ class ModelCompressor:
233
249
  config_data[COMPRESSION_CONFIG_NAME] = {}
234
250
  if self.quantization_config is not None:
235
251
  quant_config_data = self.quantization_config.model_dump()
236
- config_data[COMPRESSION_CONFIG_NAME][
237
- QUANTIZATION_CONFIG_NAME
238
- ] = quant_config_data
252
+ config_data[COMPRESSION_CONFIG_NAME] = quant_config_data
239
253
  if self.sparsity_config is not None:
240
254
  sparsity_config_data = self.sparsity_config.model_dump()
241
255
  config_data[COMPRESSION_CONFIG_NAME][
@@ -260,6 +274,7 @@ def _get_weight_arg_mappings(model: Module) -> Dict:
260
274
  for name, submodule in iter_named_leaf_modules(model):
261
275
  if is_module_quantized(submodule):
262
276
  if submodule.quantization_scheme.weights is not None:
277
+ name = fix_fsdp_module_name(name)
263
278
  quantized_modules_to_args[name] = submodule.quantization_scheme.weights
264
279
 
265
280
  return quantized_modules_to_args
@@ -12,6 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ import logging
15
16
  import re
16
17
  from collections import OrderedDict
17
18
  from typing import Dict, Iterable, Optional
@@ -35,6 +36,7 @@ from compressed_tensors.quantization.utils import (
35
36
  infer_quantization_status,
36
37
  iter_named_leaf_modules,
37
38
  )
39
+ from compressed_tensors.utils.helpers import fix_fsdp_module_name
38
40
  from compressed_tensors.utils.safetensors_load import get_safetensors_folder
39
41
  from torch.nn import Module
40
42
 
@@ -50,6 +52,9 @@ from compressed_tensors.quantization.utils.helpers import is_module_quantized
50
52
  from compressed_tensors.utils.safetensors_load import get_quantization_state_dict
51
53
 
52
54
 
55
+ _LOGGER = logging.getLogger(__name__)
56
+
57
+
53
58
  def load_pretrained_quantization(model: Module, model_name_or_path: str):
54
59
  """
55
60
  Loads the quantization parameters (scale and zero point) from model_name_or_path to
@@ -105,15 +110,24 @@ def apply_quantization_config(model: Module, config: QuantizationConfig):
105
110
  for target in scheme.targets:
106
111
  target_to_scheme[target] = scheme
107
112
 
113
+ # list of submodules to ignore
114
+ ignored_submodules = []
108
115
  # mark appropriate layers for quantization by setting their quantization schemes
109
116
  for name, submodule in iter_named_leaf_modules(model):
117
+ # potentially fix module name to remove FSDP wrapper prefix
118
+ name = fix_fsdp_module_name(name)
110
119
  if find_first_name_or_class_match(name, submodule, config.ignore):
120
+ ignored_submodules.append(name)
111
121
  continue # layer matches ignore list, continue
112
122
  target = find_first_name_or_class_match(name, submodule, target_to_scheme)
113
123
  if target is not None:
114
124
  # target matched - add layer and scheme to target list
115
125
  submodule.quantization_scheme = target_to_scheme[target]
116
-
126
+ if set(config.ignore) - set(ignored_submodules):
127
+ _LOGGER.warning(
128
+ "Some layers that were to be ignored were "
129
+ f"not found in the model: {set(config.ignore) - set(ignored_submodules)}"
130
+ )
117
131
  # apply current quantization status across all targeted layers
118
132
  apply_quantization_status(model, config.quantization_status)
119
133
 
@@ -157,6 +171,7 @@ def _find_first_match(
157
171
  # returns first element of target that matches value either
158
172
  # exactly or as a regex after 're:'. if check_contains is set to True,
159
173
  # additionally checks if the target string is contained with value.
174
+
160
175
  for target in targets:
161
176
  if target.startswith("re:"):
162
177
  pattern = target[3:]
@@ -57,6 +57,14 @@ def quantize(
57
57
  :param dtype: optional dtype to cast the quantized output to
58
58
  :return: fake quantized tensor
59
59
  """
60
+ # ensure all tensors are on the same device
61
+ # assumes that the target device is the input
62
+ # tensor's device
63
+ if x.device != scale.device:
64
+ scale = scale.to(x.device)
65
+ if x.device != zero_point.device:
66
+ zero_point = zero_point.to(x.device)
67
+
60
68
  return _process_quantization(
61
69
  x=x,
62
70
  scale=scale,
@@ -15,7 +15,6 @@
15
15
  from enum import Enum
16
16
  from typing import Dict, List, Optional, Union
17
17
 
18
- from compressed_tensors.base import QUANTIZATION_CONFIG_NAME
19
18
  from compressed_tensors.config import CompressionFormat
20
19
  from compressed_tensors.quantization.quant_scheme import (
21
20
  QuantizationScheme,
@@ -29,13 +28,14 @@ from compressed_tensors.quantization.utils import (
29
28
  )
30
29
  from pydantic import BaseModel, Field
31
30
  from torch.nn import Module
32
- from transformers import AutoConfig
33
31
 
34
32
 
35
33
  __all__ = [
36
34
  "QuantizationStatus",
37
35
  "QuantizationConfig",
38
36
  "LIFECYCLE_ORDER",
37
+ "DEFAULT_QUANTIZATION_METHOD",
38
+ "DEFAULT_QUANTIZATION_FORMAT",
39
39
  ]
40
40
 
41
41
 
@@ -101,6 +101,9 @@ LIFECYCLE_ORDER = [
101
101
  QuantizationStatus.COMPRESSED,
102
102
  ]
103
103
 
104
+ DEFAULT_QUANTIZATION_METHOD = "compressed-tensors"
105
+ DEFAULT_QUANTIZATION_FORMAT = "fakequant"
106
+
104
107
 
105
108
  class QuantizationConfig(BaseModel):
106
109
  """
@@ -122,8 +125,8 @@ class QuantizationConfig(BaseModel):
122
125
  """
123
126
 
124
127
  config_groups: Dict[str, Union[QuantizationScheme, List[str]]]
125
- quant_method: str = "sparseml"
126
- format: str = "fakequant"
128
+ quant_method: str = DEFAULT_QUANTIZATION_METHOD
129
+ format: str = DEFAULT_QUANTIZATION_FORMAT
127
130
  quantization_status: QuantizationStatus = QuantizationStatus.INITIALIZED
128
131
  global_compression_ratio: Optional[float] = None
129
132
  ignore: Optional[List[str]] = Field(default_factory=list)
@@ -141,21 +144,6 @@ class QuantizationConfig(BaseModel):
141
144
  targets=targets_or_scheme,
142
145
  )
143
146
 
144
- @staticmethod
145
- def from_model_config(model_name_or_path) -> "QuantizationConfig":
146
- """
147
- Given a path to a model config, extract a quantization config if it exists
148
-
149
- :param pretrained_model_name_or_path: path to model config on disk or HF hub
150
- :return: instantiated QuantizationConfig if config contains a quant config
151
- """
152
- config = AutoConfig.from_pretrained(model_name_or_path)
153
- quantization_config = getattr(config, QUANTIZATION_CONFIG_NAME, None)
154
- if quantization_config is None:
155
- return None
156
-
157
- return QuantizationConfig.parse_obj(quantization_config)
158
-
159
147
  @staticmethod
160
148
  def from_pretrained(
161
149
  model: Module, format: Optional[str] = None
@@ -15,18 +15,17 @@
15
15
 
16
16
  from typing import Optional
17
17
 
18
- from compressed_tensors.base import SPARSITY_CONFIG_NAME
19
- from compressed_tensors.compressors import ModelCompressor
20
- from compressed_tensors.config import CompressionConfig
21
18
  from transformers import AutoConfig
22
19
 
23
20
 
24
- __all__ = ["infer_compressor_from_model_config"]
21
+ __all__ = ["infer_compressor_from_model_config", "fix_fsdp_module_name"]
22
+
23
+ FSDP_WRAPPER_NAME = "_fsdp_wrapped_module"
25
24
 
26
25
 
27
26
  def infer_compressor_from_model_config(
28
27
  pretrained_model_name_or_path: str,
29
- ) -> Optional[ModelCompressor]:
28
+ ) -> Optional["ModelCompressor"]: # noqa: F821
30
29
  """
31
30
  Given a path to a model config, extract a sparsity config if it exists and return
32
31
  the associated ModelCompressor
@@ -34,8 +33,11 @@ def infer_compressor_from_model_config(
34
33
  :param pretrained_model_name_or_path: path to model config on disk or HF hub
35
34
  :return: matching compressor if config contains a sparsity config
36
35
  """
36
+ from compressed_tensors.compressors import ModelCompressor
37
+ from compressed_tensors.config import CompressionConfig
38
+
37
39
  config = AutoConfig.from_pretrained(pretrained_model_name_or_path)
38
- sparsity_config = getattr(config, SPARSITY_CONFIG_NAME, None)
40
+ sparsity_config = ModelCompressor.parse_sparsity_config(config)
39
41
  if sparsity_config is None:
40
42
  return None
41
43
 
@@ -43,3 +45,19 @@ def infer_compressor_from_model_config(
43
45
  sparsity_config = CompressionConfig.load_from_registry(format, **sparsity_config)
44
46
  compressor = ModelCompressor.load_from_registry(format, config=sparsity_config)
45
47
  return compressor
48
+
49
+
50
+ # TODO: There is already the same function in
51
+ # SparseML, should be moved to a shared location
52
+ # in the future
53
+ def fix_fsdp_module_name(name: str) -> str:
54
+ """
55
+ Remove FSDP wrapper prefixes from a module name
56
+ Accounts for scenario where FSDP_WRAPPER_NAME is
57
+ at the end of the name, as well as in the middle.
58
+ :param name: name to strip
59
+ :return: stripped name
60
+ """
61
+ return name.replace(FSDP_WRAPPER_NAME + ".", "").replace(
62
+ "." + FSDP_WRAPPER_NAME, ""
63
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: compressed-tensors-nightly
3
- Version: 0.3.3.20240601
3
+ Version: 0.3.3.20240603
4
4
  Summary: Library for utilization of compressed safetensors of neural network models
5
5
  Home-page: https://github.com/neuralmagic/compressed-tensors
6
6
  Author: Neuralmagic, Inc.
@@ -6,7 +6,7 @@ compressed_tensors/compressors/base.py,sha256=LWEgbpgTxzmoqQ7Xhq2OQszUgWoDtFuGCi
6
6
  compressed_tensors/compressors/dense.py,sha256=G_XHbvuENyupIKlXSITOQgvPkNkcMEOLcLWQr70V9EE,1257
7
7
  compressed_tensors/compressors/helpers.py,sha256=k9avlkmeYj6vkOAvl-MgcixtP7ib24SCfhzZ-RusXfw,5403
8
8
  compressed_tensors/compressors/int_quantized.py,sha256=Ct2vCK0yoPm6vkIFlzDMGQ7m14xT1GyURsSwH9DP770,5242
9
- compressed_tensors/compressors/model_compressor.py,sha256=gHD2VMbXkXaZiJu3ibOaWiYb4oJDz2hxX03wDuu1yhI,10481
9
+ compressed_tensors/compressors/model_compressor.py,sha256=ymn4xzAstcutXxkY3Z3V_1MuJv383-lkZHzp37mA9z0,11119
10
10
  compressed_tensors/compressors/pack_quantized.py,sha256=VPiLlgJlDgARrn7YmiQoLqUfxErKBfj54epMYWRsF8k,8451
11
11
  compressed_tensors/compressors/sparse_bitmask.py,sha256=H9oZSTYI1oRCzAMbd4zThUnZd1h2rfs8DmA3tPcvuNE,8637
12
12
  compressed_tensors/config/__init__.py,sha256=ZBqWn3r6ku1qfmlHHYp0mQueY0i7Pwhr9rbQk9dDlMc,704
@@ -15,13 +15,13 @@ compressed_tensors/config/dense.py,sha256=NgSxnFCnckU9-iunxEaqiFwqgdO7YYxlWKR74j
15
15
  compressed_tensors/config/sparse_bitmask.py,sha256=pZUboRNZTu6NajGOQEFExoPknak5ynVAUeiiYpS1Gt8,1308
16
16
  compressed_tensors/quantization/__init__.py,sha256=83J5bPB7PavN2TfCoW7_vEDhfYpm4TDrqYO9vdSQ5bk,760
17
17
  compressed_tensors/quantization/quant_args.py,sha256=A6b2V8lhsM8Ho8RjlPBQdxRUDNWhqq-ie5E3RR2_GNg,4360
18
- compressed_tensors/quantization/quant_config.py,sha256=3BcbQ8-Ah7LbTDSSkRu29Yiid33xo0C1ki6NVhxLiaY,8727
18
+ compressed_tensors/quantization/quant_config.py,sha256=Nv9rvWNrlbeJgNZhQf-cPAEWJ9NU75ATWHCacWaiQ_s,8189
19
19
  compressed_tensors/quantization/quant_scheme.py,sha256=-hAK1-C67_wJl10eaVLUvbslPBTV04WyzL_J-u9f1ck,3571
20
20
  compressed_tensors/quantization/lifecycle/__init__.py,sha256=ggRGWRqhCxCaTTDWRcgTVX3axnS2xV6rc5YvdzK7fSg,798
21
- compressed_tensors/quantization/lifecycle/apply.py,sha256=yLTDT1zkJp1Nti-aKZGOMW8-TELanF8dXiqDvAkVUQo,7984
21
+ compressed_tensors/quantization/lifecycle/apply.py,sha256=disclMUDaz2MLPvcTwGQ1oo1clhTTBkAeNz5J9NRxVw,8552
22
22
  compressed_tensors/quantization/lifecycle/calibration.py,sha256=mLns4jlaWmBwOW8Jtlm5bMX-JET1AiZYUBO7qa-XuxI,1776
23
23
  compressed_tensors/quantization/lifecycle/compressed.py,sha256=VreB10xPwgSLQQlTu20UCrFpRS--cA7-lx5s7nrPPrg,2247
24
- compressed_tensors/quantization/lifecycle/forward.py,sha256=xeHaUbFxcUyqHffhCBZiRk-ObxjAF99rTnPR1Cweym0,10822
24
+ compressed_tensors/quantization/lifecycle/forward.py,sha256=_1TwffkyaaXL5QpFgXH1gvueUivOLpuRkoXY7vRXktY,11094
25
25
  compressed_tensors/quantization/lifecycle/frozen.py,sha256=h1XYt89MouBTf3jTYLG_6OdFxIu5q2N8tPjsy6J4E6Y,1726
26
26
  compressed_tensors/quantization/lifecycle/initialize.py,sha256=pFfcu-pxdQKzlnn-18-RlkEktt2yDi6woNXJsiv1A2c,3732
27
27
  compressed_tensors/quantization/observers/__init__.py,sha256=DNH31NQYrIBBcmHsMyFA6whh4pbRsLwuNa6L8AeXaGc,745
@@ -34,10 +34,10 @@ compressed_tensors/quantization/utils/helpers.py,sha256=NzAH18Cn_-mTAR87y6IlcQU5
34
34
  compressed_tensors/registry/__init__.py,sha256=FwLSNYqfIrb5JD_6OK_MT4_svvKTN_nEhpgQlQvGbjI,658
35
35
  compressed_tensors/registry/registry.py,sha256=fxjOjh2wklCvJhQxwofdy-zV8q7MkQ85SLG77nml2iA,11890
36
36
  compressed_tensors/utils/__init__.py,sha256=5DrYjoZbaEvSkJcC-GRSbM_RBHVF4tG9gMd3zsJnjLw,665
37
- compressed_tensors/utils/helpers.py,sha256=h0jfl9drs5FAx40tCHRcVtJqXixB5hT5yq_IG2aY_-w,1735
37
+ compressed_tensors/utils/helpers.py,sha256=5ull5yFT31M2zVxKeFvpvvlvX5f1Sk1LGuj_wrfZWCY,2267
38
38
  compressed_tensors/utils/safetensors_load.py,sha256=wo9UirGrGlenBqZeqotvpCT7D5MEdjCo2J3HeRaIFoU,8502
39
- compressed_tensors_nightly-0.3.3.20240601.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
40
- compressed_tensors_nightly-0.3.3.20240601.dist-info/METADATA,sha256=myHAvn_PdIn9sInGNjfo8CwIObcM_GpTj74SqvCMZSU,5673
41
- compressed_tensors_nightly-0.3.3.20240601.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
42
- compressed_tensors_nightly-0.3.3.20240601.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
43
- compressed_tensors_nightly-0.3.3.20240601.dist-info/RECORD,,
39
+ compressed_tensors_nightly-0.3.3.20240603.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
40
+ compressed_tensors_nightly-0.3.3.20240603.dist-info/METADATA,sha256=VSYJpZfZihQ_Y7H8jkyTdsTeNn2a9g4C9l04Tal0LmY,5673
41
+ compressed_tensors_nightly-0.3.3.20240603.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
42
+ compressed_tensors_nightly-0.3.3.20240603.dist-info/top_level.txt,sha256=w2i-GyPs2s1UwVxvutSvN_lM22SXC2hQFBmoMcPnV7Y,19
43
+ compressed_tensors_nightly-0.3.3.20240603.dist-info/RECORD,,