compressed-tensors-nightly 0.5.0.20240814__py3-none-any.whl → 0.5.0.20240830__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. compressed_tensors/compressors/base.py +198 -8
  2. compressed_tensors/compressors/model_compressor.py +65 -1
  3. compressed_tensors/compressors/naive_quantized.py +71 -75
  4. compressed_tensors/compressors/pack_quantized.py +83 -94
  5. compressed_tensors/linear/__init__.py +13 -0
  6. compressed_tensors/linear/compressed_linear.py +87 -0
  7. compressed_tensors/quantization/lifecycle/apply.py +36 -4
  8. compressed_tensors/quantization/lifecycle/calibration.py +3 -2
  9. compressed_tensors/quantization/lifecycle/compressed.py +1 -1
  10. compressed_tensors/quantization/lifecycle/forward.py +67 -43
  11. compressed_tensors/quantization/lifecycle/helpers.py +29 -2
  12. compressed_tensors/quantization/lifecycle/initialize.py +50 -16
  13. compressed_tensors/quantization/observers/__init__.py +1 -0
  14. compressed_tensors/quantization/observers/base.py +54 -14
  15. compressed_tensors/quantization/observers/min_max.py +8 -0
  16. compressed_tensors/quantization/observers/mse.py +162 -0
  17. compressed_tensors/quantization/quant_args.py +48 -20
  18. compressed_tensors/utils/__init__.py +1 -0
  19. compressed_tensors/utils/helpers.py +13 -0
  20. compressed_tensors/utils/offload.py +7 -1
  21. compressed_tensors/utils/permute.py +70 -0
  22. compressed_tensors/utils/safetensors_load.py +2 -0
  23. compressed_tensors/utils/semi_structured_conversions.py +1 -0
  24. {compressed_tensors_nightly-0.5.0.20240814.dist-info → compressed_tensors_nightly-0.5.0.20240830.dist-info}/METADATA +3 -2
  25. compressed_tensors_nightly-0.5.0.20240830.dist-info/RECORD +52 -0
  26. compressed_tensors_nightly-0.5.0.20240814.dist-info/RECORD +0 -48
  27. {compressed_tensors_nightly-0.5.0.20240814.dist-info → compressed_tensors_nightly-0.5.0.20240830.dist-info}/LICENSE +0 -0
  28. {compressed_tensors_nightly-0.5.0.20240814.dist-info → compressed_tensors_nightly-0.5.0.20240830.dist-info}/WHEEL +0 -0
  29. {compressed_tensors_nightly-0.5.0.20240814.dist-info → compressed_tensors_nightly-0.5.0.20240830.dist-info}/top_level.txt +0 -0
@@ -12,20 +12,53 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import Dict, Generator, Tuple, Union
15
+ import logging
16
+ from typing import Dict, Generator, Optional, Tuple, Union
16
17
 
18
+ import torch
17
19
  from compressed_tensors.config import SparsityCompressionConfig
18
- from compressed_tensors.quantization import QuantizationConfig
20
+ from compressed_tensors.quantization import QuantizationArgs, QuantizationConfig
19
21
  from compressed_tensors.registry import RegistryMixin
22
+ from compressed_tensors.utils import get_nested_weight_mappings, merge_names
23
+ from safetensors import safe_open
20
24
  from torch import Tensor
25
+ from torch.nn.modules import Module
26
+ from tqdm import tqdm
21
27
 
22
28
 
29
+ _LOGGER: logging.Logger = logging.getLogger(__name__)
30
+
23
31
  __all__ = ["Compressor"]
24
32
 
25
33
 
26
34
  class Compressor(RegistryMixin):
27
35
  """
28
- Base class representing a model compression algorithm
36
+ Base class representing a model compression algorithm. Each child class should
37
+ implement compression_param_info, compress_weight and decompress_weight.
38
+
39
+ Compressors support compressing/decompressing a full module state dict or a single
40
+ quantized PyTorch leaf module.
41
+
42
+ Model Load Lifecycle (run_compressed=False):
43
+ - ModelCompressor.decompress()
44
+ - apply_quantization_config()
45
+ - Compressor.decompress()
46
+ - Compressor.decompress_weight()
47
+
48
+ Model Save Lifecycle:
49
+ - ModelCompressor.compress()
50
+ - Compressor.compress()
51
+ - Compressor.compress_weight()
52
+
53
+ Module Lifecycle (run_compressed=True):
54
+ - apply_quantization_config()
55
+ - compressed_module = CompressedLinear(module)
56
+ - initialize_module_for_quantization()
57
+ - Compressor.compression_param_info()
58
+ - register_parameters()
59
+ - compressed_module.forward()
60
+ -compressed_module.decompress()
61
+
29
62
 
30
63
  :param config: config specifying compression parameters
31
64
  """
@@ -35,26 +68,183 @@ class Compressor(RegistryMixin):
35
68
  ):
36
69
  self.config = config
37
70
 
38
- def compress(self, model_state: Dict[str, Tensor], **kwargs) -> Dict[str, Tensor]:
71
+ def compression_param_info(
72
+ self,
73
+ weight_shape: torch.Size,
74
+ quantization_args: Optional[QuantizationArgs] = None,
75
+ ) -> Dict[str, Tuple[torch.Size, torch.dtype]]:
76
+ """
77
+ Creates a dictionary of expected shapes and dtypes for each compression
78
+ parameter used by the compressor
79
+
80
+ :param weight_shape: uncompressed weight shape
81
+ :param quantization_args: quantization parameters for the weight
82
+ :return: dictionary mapping compressed parameter names to shape and dtype
83
+ """
84
+ raise NotImplementedError()
85
+
86
+ def compress(
87
+ self,
88
+ model_state: Dict[str, Tensor],
89
+ names_to_scheme: Dict[str, QuantizationArgs],
90
+ **kwargs,
91
+ ) -> Dict[str, Tensor]:
39
92
  """
40
93
  Compresses a dense state dict
41
94
 
42
95
  :param model_state: state dict of uncompressed model
96
+ :param names_to_scheme: quantization args for each quantized weight, needed for
97
+ quantize function to calculate bit depth
43
98
  :return: compressed state dict
44
99
  """
45
- raise NotImplementedError()
100
+ compressed_dict = {}
101
+ weight_suffix = ".weight"
102
+ _LOGGER.debug(
103
+ f"Compressing model with {len(model_state)} parameterized layers..."
104
+ )
105
+
106
+ for name, value in tqdm(model_state.items(), desc="Compressing model"):
107
+ if name.endswith(weight_suffix):
108
+ prefix = name[: -(len(weight_suffix))]
109
+ scale = model_state.get(merge_names(prefix, "weight_scale"), None)
110
+ zp = model_state.get(merge_names(prefix, "weight_zero_point"), None)
111
+ if scale is not None:
112
+ # weight is quantized, compress it
113
+ quant_args = names_to_scheme[prefix]
114
+ compressed_data = self.compress_weight(
115
+ weight=value,
116
+ scale=scale,
117
+ zero_point=zp,
118
+ quantization_args=quant_args,
119
+ device="cpu",
120
+ )
121
+ for key, value in compressed_data.items():
122
+ compressed_dict[merge_names(prefix, key)] = value
123
+ else:
124
+ compressed_dict[name] = value.to("cpu")
125
+ elif name.endswith("zero_point") and torch.all(value == 0):
126
+ # all zero_points are 0, no need to include in
127
+ # compressed state_dict
128
+ continue
129
+ else:
130
+ compressed_dict[name] = value.to("cpu")
131
+
132
+ return compressed_dict
46
133
 
47
134
  def decompress(
48
- self, path_to_model_or_tensors: str, device: str = "cpu", **kwargs
135
+ self,
136
+ path_to_model_or_tensors: str,
137
+ names_to_scheme: Dict[str, QuantizationArgs],
138
+ device: str = "cpu",
49
139
  ) -> Generator[Tuple[str, Tensor], None, None]:
50
140
  """
51
141
  Reads a compressed state dict located at path_to_model_or_tensors
52
142
  and returns a generator for sequentially decompressing back to a
53
143
  dense state dict
54
144
 
55
- :param model_path: path to compressed safetensors model (directory with
56
- one or more safetensors files) or compressed tensors file
145
+ :param path_to_model_or_tensors: path to compressed safetensors model (directory
146
+ with one or more safetensors files) or compressed tensors file
147
+ :param names_to_scheme: quantization args for each quantized weight
57
148
  :param device: optional device to load intermediate weights into
58
149
  :return: compressed state dict
59
150
  """
151
+ weight_mappings = get_nested_weight_mappings(
152
+ path_to_model_or_tensors, self.COMPRESSION_PARAM_NAMES
153
+ )
154
+ for weight_name in weight_mappings.keys():
155
+ weight_data = {}
156
+ for param_name, safe_path in weight_mappings[weight_name].items():
157
+ full_name = merge_names(weight_name, param_name)
158
+ with safe_open(safe_path, framework="pt", device=device) as f:
159
+ weight_data[param_name] = f.get_tensor(full_name)
160
+
161
+ if "weight_scale" in weight_data:
162
+ quant_args = names_to_scheme[weight_name]
163
+ decompressed = self.decompress_weight(
164
+ compressed_data=weight_data, quantization_args=quant_args
165
+ )
166
+ yield merge_names(weight_name, "weight"), decompressed
167
+
168
+ def compress_weight(
169
+ self,
170
+ weight: Tensor,
171
+ scale: Tensor,
172
+ zero_point: Optional[Tensor] = None,
173
+ g_idx: Optional[torch.Tensor] = None,
174
+ quantization_args: Optional[QuantizationArgs] = None,
175
+ ) -> Dict[str, torch.Tensor]:
176
+ """
177
+ Compresses a single uncompressed weight
178
+
179
+ :param weight: uncompressed weight tensor
180
+ :param scale: quantization scale for weight
181
+ :param zero_point: quantization zero point for weight
182
+ :param g_idx: optional mapping from column index to group index
183
+ :param quantization_args: quantization parameters for weight
184
+ :return: dictionary of compressed weight data
185
+ """
60
186
  raise NotImplementedError()
187
+
188
+ def decompress_weight(
189
+ self,
190
+ compressed_data: Dict[str, Tensor],
191
+ quantization_args: Optional[QuantizationArgs] = None,
192
+ ) -> torch.Tensor:
193
+ """
194
+ Decompresses a single compressed weight
195
+
196
+ :param compressed_data: dictionary of data needed for decompression
197
+ :param quantization_args: quantization parameters for the weight
198
+ :return: tensor of the decompressed weight
199
+ """
200
+ raise NotImplementedError()
201
+
202
+ def compress_module(self, module: Module) -> Optional[Dict[str, torch.Tensor]]:
203
+ """
204
+ Compresses a single quantized leaf PyTorch module. If the module is not
205
+ quantized, this function has no effect.
206
+
207
+ :param module: PyTorch module to compress
208
+ :return: dictionary of compressed weight data, or None if module is not
209
+ quantized
210
+ """
211
+ if not hasattr(module, "quantization_scheme"):
212
+ return None # module is not quantized
213
+ quantization_scheme = module.quantization_scheme
214
+ if not hasattr(quantization_scheme, "weights"):
215
+ return None # weights are not quantized
216
+
217
+ quantization_args = quantization_scheme.weights
218
+ weight = getattr(module, "weight", None)
219
+ weight_scale = getattr(module, "weight_scale", None)
220
+ weight_zero_point = getattr(module, "weight_zero_point", None)
221
+
222
+ return self.compress_weight(
223
+ weight=weight,
224
+ scale=weight_scale,
225
+ zero_point=weight_zero_point,
226
+ quantization_args=quantization_args,
227
+ )
228
+
229
+ def decompress_module(self, module: Module):
230
+ """
231
+ Decompresses a single compressed leaf PyTorch module. If the module is not
232
+ quantized, this function has no effect.
233
+
234
+ :param module: PyTorch module to decompress
235
+ :return: tensor of the decompressed weight, or None if module is not quantized
236
+ """
237
+ if not hasattr(module, "quantization_scheme"):
238
+ return None # module is not quantized
239
+ quantization_scheme = module.quantization_scheme
240
+ if not hasattr(quantization_scheme, "weights"):
241
+ return None # weights are not quantized
242
+
243
+ quantization_args = quantization_scheme.weights
244
+ compressed_data = {}
245
+ for name, parameter in module.named_parameters():
246
+ compressed_data[name] = parameter
247
+
248
+ return self.decompress_weight(
249
+ compressed_data=compressed_data, quantization_args=quantization_args
250
+ )
@@ -28,7 +28,7 @@ from compressed_tensors.base import (
28
28
  SPARSITY_CONFIG_NAME,
29
29
  )
30
30
  from compressed_tensors.compressors import Compressor
31
- from compressed_tensors.config import SparsityCompressionConfig
31
+ from compressed_tensors.config import CompressionFormat, SparsityCompressionConfig
32
32
  from compressed_tensors.quantization import (
33
33
  QuantizationConfig,
34
34
  QuantizationStatus,
@@ -176,6 +176,9 @@ class ModelCompressor:
176
176
  if hasattr(compression_config, SPARSITY_CONFIG_NAME):
177
177
  # for loaded HFQuantizer config
178
178
  return getattr(compression_config, SPARSITY_CONFIG_NAME)
179
+ if SPARSITY_CONFIG_NAME in compression_config:
180
+ # for loaded HFQuantizer config from dict
181
+ return compression_config[SPARSITY_CONFIG_NAME]
179
182
 
180
183
  # SparseAutoModel format
181
184
  return compression_config.get(SPARSITY_CONFIG_NAME, None)
@@ -189,6 +192,10 @@ class ModelCompressor:
189
192
  # for loaded HFQuantizer config
190
193
  return getattr(compression_config, QUANTIZATION_CONFIG_NAME)
191
194
 
195
+ if QUANTIZATION_CONFIG_NAME in compression_config:
196
+ # for loaded HFQuantizer config from dict
197
+ return compression_config[QUANTIZATION_CONFIG_NAME]
198
+
192
199
  # SparseAutoModel format
193
200
  quantization_config = deepcopy(compression_config)
194
201
  quantization_config.pop(SPARSITY_CONFIG_NAME, None)
@@ -234,12 +241,69 @@ class ModelCompressor:
234
241
  compressed_state_dict = self.quantization_compressor.compress(
235
242
  state_dict, names_to_scheme=quantized_modules_to_args
236
243
  )
244
+ if self.quantization_config.format != CompressionFormat.dense.value:
245
+ self.quantization_config.quantization_status = (
246
+ QuantizationStatus.COMPRESSED
247
+ )
237
248
 
238
249
  if self.sparsity_compressor is not None:
239
250
  compressed_state_dict = self.sparsity_compressor.compress(
240
251
  compressed_state_dict
241
252
  )
242
253
 
254
+ # HACK (mgoin): Post-process step for kv cache scales to take the
255
+ # k/v_proj module `output_scale` parameters, and store them in the
256
+ # parent attention module as `k_scale` and `v_scale`
257
+ #
258
+ # Example:
259
+ # Replace `model.layers.0.self_attn.k_proj.output_scale`
260
+ # with `model.layers.0.self_attn.k_scale`
261
+ if (
262
+ self.quantization_config is not None
263
+ and self.quantization_config.kv_cache_scheme is not None
264
+ ):
265
+ # HACK (mgoin): We assume the quantized modules in question
266
+ # will be k_proj and v_proj since those are the default targets.
267
+ # We check that both of these modules have output activation
268
+ # quantization, and additionally check that q_proj doesn't.
269
+ q_proj_has_no_quant_output = 0
270
+ k_proj_has_quant_output = 0
271
+ v_proj_has_quant_output = 0
272
+ for name, module in model.named_modules():
273
+ if not hasattr(module, "quantization_scheme"):
274
+ continue
275
+ out_act = module.quantization_scheme.output_activations
276
+ if name.endswith(".q_proj") and out_act is None:
277
+ q_proj_has_no_quant_output += 1
278
+ elif name.endswith(".k_proj") and out_act is not None:
279
+ k_proj_has_quant_output += 1
280
+ elif name.endswith(".v_proj") and out_act is not None:
281
+ v_proj_has_quant_output += 1
282
+
283
+ assert (
284
+ q_proj_has_no_quant_output > 0
285
+ and k_proj_has_quant_output > 0
286
+ and v_proj_has_quant_output > 0
287
+ )
288
+ assert (
289
+ q_proj_has_no_quant_output
290
+ == k_proj_has_quant_output
291
+ == v_proj_has_quant_output
292
+ )
293
+
294
+ # Move all .k/v_proj.output_scale parameters to .k/v_scale
295
+ working_state_dict = {}
296
+ for key in compressed_state_dict.keys():
297
+ if key.endswith(".k_proj.output_scale"):
298
+ new_key = key.replace(".k_proj.output_scale", ".k_scale")
299
+ working_state_dict[new_key] = compressed_state_dict[key]
300
+ elif key.endswith(".v_proj.output_scale"):
301
+ new_key = key.replace(".v_proj.output_scale", ".v_scale")
302
+ working_state_dict[new_key] = compressed_state_dict[key]
303
+ else:
304
+ working_state_dict[key] = compressed_state_dict[key]
305
+ compressed_state_dict = working_state_dict
306
+
243
307
  # HACK: Override the dtype_byte_size function in transformers to
244
308
  # support float8 types. Fix is posted upstream
245
309
  # https://github.com/huggingface/transformers/pull/30488
@@ -13,7 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import logging
16
- from typing import Dict, Generator, Tuple
16
+ from typing import Dict, Optional, Tuple
17
17
 
18
18
  import torch
19
19
  from compressed_tensors.compressors import Compressor
@@ -21,10 +21,7 @@ from compressed_tensors.config import CompressionFormat
21
21
  from compressed_tensors.quantization import QuantizationArgs
22
22
  from compressed_tensors.quantization.lifecycle.forward import dequantize, quantize
23
23
  from compressed_tensors.quantization.utils import can_quantize
24
- from compressed_tensors.utils import get_nested_weight_mappings, merge_names
25
- from safetensors import safe_open
26
24
  from torch import Tensor
27
- from tqdm import tqdm
28
25
 
29
26
 
30
27
  __all__ = [
@@ -44,86 +41,85 @@ class QuantizationCompressor(Compressor):
44
41
  type to the type specified by the layer's QuantizationArgs.
45
42
  """
46
43
 
47
- COMPRESSION_PARAM_NAMES = ["weight", "weight_scale", "weight_zero_point"]
44
+ COMPRESSION_PARAM_NAMES = [
45
+ "weight",
46
+ "weight_scale",
47
+ "weight_zero_point",
48
+ "weight_g_idx",
49
+ ]
48
50
 
49
- def compress(
51
+ def compression_param_info(
50
52
  self,
51
- model_state: Dict[str, Tensor],
52
- names_to_scheme: Dict[str, QuantizationArgs],
53
- **kwargs,
54
- ) -> Dict[str, Tensor]:
53
+ weight_shape: torch.Size,
54
+ quantization_args: Optional[QuantizationArgs] = None,
55
+ ) -> Dict[str, Tuple[torch.Size, torch.dtype]]:
55
56
  """
56
- Compresses a dense state dict
57
+ Creates a dictionary of expected shapes and dtypes for each compression
58
+ parameter used by the compressor
57
59
 
58
- :param model_state: state dict of uncompressed model
59
- :param names_to_scheme: quantization args for each quantized weight, needed for
60
- quantize function to calculate bit depth
61
- :return: compressed state dict
60
+ :param weight_shape: uncompressed weight shape
61
+ :param quantization_args: quantization parameters for the weight
62
+ :return: dictionary mapping compressed parameter names to shape and dtype
62
63
  """
63
- compressed_dict = {}
64
- weight_suffix = ".weight"
65
- _LOGGER.debug(
66
- f"Compressing model with {len(model_state)} parameterized layers..."
67
- )
64
+ dtype = quantization_args.pytorch_dtype()
65
+ return {"weight": (weight_shape, dtype)}
68
66
 
69
- for name, value in tqdm(model_state.items(), desc="Compressing model"):
70
- if name.endswith(weight_suffix):
71
- prefix = name[: -(len(weight_suffix))]
72
- scale = model_state.get(merge_names(prefix, "weight_scale"), None)
73
- zp = model_state.get(merge_names(prefix, "weight_zero_point"), None)
74
- if scale is not None and zp is not None:
75
- # weight is quantized, compress it
76
- quant_args = names_to_scheme[prefix]
77
- if can_quantize(value, quant_args):
78
- # only quantize if not already quantized
79
- value = quantize(
80
- x=value,
81
- scale=scale,
82
- zero_point=zp,
83
- args=quant_args,
84
- dtype=quant_args.pytorch_dtype(),
85
- )
86
- elif name.endswith("zero_point"):
87
- if torch.all(value == 0):
88
- # all zero_points are 0, no need to include in
89
- # compressed state_dict
90
- continue
91
- compressed_dict[name] = value.to("cpu")
92
-
93
- return compressed_dict
94
-
95
- def decompress(
96
- self, path_to_model_or_tensors: str, device: str = "cpu", **kwargs
97
- ) -> Generator[Tuple[str, Tensor], None, None]:
67
+ def compress_weight(
68
+ self,
69
+ weight: Tensor,
70
+ scale: Tensor,
71
+ zero_point: Optional[Tensor] = None,
72
+ g_idx: Optional[torch.Tensor] = None,
73
+ quantization_args: Optional[QuantizationArgs] = None,
74
+ device: Optional[torch.device] = None,
75
+ ) -> Dict[str, torch.Tensor]:
98
76
  """
99
- Reads a compressed state dict located at path_to_model_or_tensors
100
- and returns a generator for sequentially decompressing back to a
101
- dense state dict
102
-
103
- :param model_path: path to compressed safetensors model (directory with
104
- one or more safetensors files) or compressed tensors file
105
- :param device: optional device to load intermediate weights into
106
- :return: compressed state dict
77
+ Compresses a single uncompressed weight
78
+
79
+ :param weight: uncompressed weight tensor
80
+ :param scale: quantization scale for weight
81
+ :param zero_point: quantization zero point for weight
82
+ :param g_idx: optional mapping from column index to group index
83
+ :param quantization_args: quantization parameters for weight
84
+ :param device: optional device to move compressed output to
85
+ :return: dictionary of compressed weight data
86
+ """
87
+ if can_quantize(weight, quantization_args):
88
+ quantized_weight = quantize(
89
+ x=weight,
90
+ scale=scale,
91
+ zero_point=zero_point,
92
+ g_idx=g_idx,
93
+ args=quantization_args,
94
+ dtype=quantization_args.pytorch_dtype(),
95
+ )
96
+
97
+ if device is not None:
98
+ quantized_weight = quantized_weight.to(device)
99
+
100
+ return {"weight": quantized_weight}
101
+
102
+ def decompress_weight(
103
+ self,
104
+ compressed_data: Dict[str, Tensor],
105
+ quantization_args: Optional[QuantizationArgs] = None,
106
+ ) -> torch.Tensor:
107
107
  """
108
- weight_mappings = get_nested_weight_mappings(
109
- path_to_model_or_tensors, self.COMPRESSION_PARAM_NAMES
108
+ Decompresses a single compressed weight
109
+
110
+ :param compressed_data: dictionary of data needed for decompression
111
+ :param quantization_args: quantization parameters for the weight
112
+ :return: tensor of the decompressed weight
113
+ """
114
+ weight = compressed_data["weight"]
115
+ scale = compressed_data["weight_scale"]
116
+ zero_point = compressed_data.get("weight_zero_point", None)
117
+ g_idx = compressed_data.get("weight_g_idx", None)
118
+ decompressed_weight = dequantize(
119
+ x_q=weight, scale=scale, zero_point=zero_point, g_idx=g_idx
110
120
  )
111
- for weight_name in weight_mappings.keys():
112
- weight_data = {}
113
- for param_name, safe_path in weight_mappings[weight_name].items():
114
- full_name = merge_names(weight_name, param_name)
115
- with safe_open(safe_path, framework="pt", device=device) as f:
116
- weight_data[param_name] = f.get_tensor(full_name)
117
-
118
- if "weight_scale" in weight_data:
119
- zero_point = weight_data.get("weight_zero_point", None)
120
- scale = weight_data["weight_scale"]
121
- decompressed = dequantize(
122
- x_q=weight_data["weight"],
123
- scale=scale,
124
- zero_point=zero_point,
125
- )
126
- yield merge_names(weight_name, "weight"), decompressed
121
+
122
+ return decompressed_weight
127
123
 
128
124
 
129
125
  @Compressor.register(name=CompressionFormat.int_quantized.value)