mct-nightly 2.1.0.20240731.414__py3-none-any.whl → 2.1.0.20240802.429__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. {mct_nightly-2.1.0.20240731.414.dist-info → mct_nightly-2.1.0.20240802.429.dist-info}/METADATA +1 -1
  2. {mct_nightly-2.1.0.20240731.414.dist-info → mct_nightly-2.1.0.20240802.429.dist-info}/RECORD +51 -47
  3. model_compression_toolkit/__init__.py +1 -1
  4. model_compression_toolkit/constants.py +2 -1
  5. model_compression_toolkit/core/common/framework_implementation.py +5 -9
  6. model_compression_toolkit/core/common/graph/base_graph.py +1 -23
  7. model_compression_toolkit/core/common/graph/base_node.py +52 -33
  8. model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_data.py +6 -6
  9. model_compression_toolkit/core/common/mixed_precision/sensitivity_evaluation.py +12 -12
  10. model_compression_toolkit/core/common/quantization/candidate_node_quantization_config.py +2 -2
  11. model_compression_toolkit/core/common/quantization/node_quantization_config.py +17 -38
  12. model_compression_toolkit/core/common/quantization/quantization_params_generation/lut_kmeans_params.py +6 -4
  13. model_compression_toolkit/core/common/quantization/quantization_params_generation/power_of_two_selection.py +19 -12
  14. model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_activations_computation.py +14 -14
  15. model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_search.py +14 -9
  16. model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_weights_computation.py +5 -27
  17. model_compression_toolkit/core/common/quantization/quantization_params_generation/symmetric_selection.py +25 -17
  18. model_compression_toolkit/core/common/quantization/quantization_params_generation/uniform_selection.py +10 -6
  19. model_compression_toolkit/core/common/quantization/quantizers/quantizers_helpers.py +1 -65
  20. model_compression_toolkit/core/common/quantization/set_node_quantization_config.py +12 -5
  21. model_compression_toolkit/core/common/substitutions/shift_negative_activation.py +7 -5
  22. model_compression_toolkit/core/keras/back2framework/factory_model_builder.py +3 -3
  23. model_compression_toolkit/core/keras/keras_implementation.py +21 -17
  24. model_compression_toolkit/core/keras/tf_tensor_numpy.py +2 -2
  25. model_compression_toolkit/core/pytorch/back2framework/factory_model_builder.py +3 -3
  26. model_compression_toolkit/core/pytorch/pytorch_implementation.py +15 -14
  27. model_compression_toolkit/core/pytorch/reader/node_holders.py +1 -1
  28. model_compression_toolkit/core/runner.py +1 -0
  29. model_compression_toolkit/exporter/model_wrapper/keras/validate_layer.py +2 -2
  30. model_compression_toolkit/gptq/common/gptq_training.py +0 -35
  31. model_compression_toolkit/qat/keras/quantizer/base_keras_qat_quantizer.py +1 -1
  32. model_compression_toolkit/qat/pytorch/quantizer/base_pytorch_qat_quantizer.py +1 -1
  33. model_compression_toolkit/target_platform_capabilities/target_platform/op_quantization_config.py +32 -8
  34. model_compression_toolkit/target_platform_capabilities/target_platform/target_platform_model.py +2 -2
  35. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/target_platform_capabilities.py +5 -0
  36. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tp_model.py +2 -0
  37. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_lut/tp_model.py +2 -0
  38. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_pot/tp_model.py +2 -0
  39. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2/tp_model.py +2 -0
  40. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tp_model.py +2 -0
  41. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3/tp_model.py +2 -0
  42. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3_lut/tp_model.py +2 -0
  43. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/__init__.py +16 -0
  44. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tp_model.py +235 -0
  45. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tpc_keras.py +132 -0
  46. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tpc_pytorch.py +112 -0
  47. model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/tp_model.py +2 -0
  48. model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/tp_model.py +2 -0
  49. {mct_nightly-2.1.0.20240731.414.dist-info → mct_nightly-2.1.0.20240802.429.dist-info}/LICENSE.md +0 -0
  50. {mct_nightly-2.1.0.20240731.414.dist-info → mct_nightly-2.1.0.20240802.429.dist-info}/WHEEL +0 -0
  51. {mct_nightly-2.1.0.20240731.414.dist-info → mct_nightly-2.1.0.20240802.429.dist-info}/top_level.txt +0 -0
@@ -58,7 +58,7 @@ class CandidateNodeQuantizationConfig(BaseNodeQuantizationConfig):
58
58
  if activation_quantization_cfg is not None:
59
59
  self.activation_quantization_cfg = activation_quantization_cfg
60
60
  else:
61
- if any(v is None for v in (qc, op_cfg, activation_quantization_fn, activation_quantization_params_fn)):
61
+ if any(v is None for v in (qc, op_cfg, activation_quantization_fn, activation_quantization_params_fn)): # pragma: no cover
62
62
  Logger.critical(
63
63
  "Missing required arguments to initialize a node activation quantization configuration. "
64
64
  "Ensure QuantizationConfig, OpQuantizationConfig, activation quantization function, "
@@ -72,7 +72,7 @@ class CandidateNodeQuantizationConfig(BaseNodeQuantizationConfig):
72
72
  if weights_quantization_cfg is not None:
73
73
  self.weights_quantization_cfg = weights_quantization_cfg
74
74
  else:
75
- if any(v is None for v in (qc, op_cfg, node_attrs_list)):
75
+ if any(v is None for v in (qc, op_cfg, node_attrs_list)): # pragma: no cover
76
76
  Logger.critical("Missing required arguments to initialize a node weights quantization configuration. "
77
77
  "Ensure QuantizationConfig, OpQuantizationConfig, weights quantization function, "
78
78
  "parameters function, and weights attribute quantization config are provided.")
@@ -96,6 +96,7 @@ class NodeActivationQuantizationConfig(BaseNodeQuantizationConfig):
96
96
  self.activation_n_bits = op_cfg.activation_n_bits
97
97
  self.relu_bound_to_power_of_2 = qc.relu_bound_to_power_of_2
98
98
  self.enable_activation_quantization = op_cfg.enable_activation_quantization
99
+ self.is_signed = op_cfg.is_signed
99
100
  self.activation_channel_equalization = qc.activation_channel_equalization
100
101
  self.input_scaling = qc.input_scaling
101
102
  self.min_threshold = qc.min_threshold
@@ -178,20 +179,6 @@ class NodeActivationQuantizationConfig(BaseNodeQuantizationConfig):
178
179
  for param_name, param_value in activation_params.items():
179
180
  self.activation_quantization_params[param_name] = param_value
180
181
 
181
- def has_activation_quantization_params(self) -> bool:
182
- """
183
-
184
- Returns: Whether NodeQuantizationConfig has a activation quantization params or not.
185
-
186
- """
187
- return len(self.activation_quantization_params) > 0
188
-
189
- def no_quantization(self) -> bool:
190
- """
191
- Returns: Whether NodeQuantizationConfig does not have activation params.
192
- """
193
- return (not self.has_activation_quantization_params())
194
-
195
182
  def __eq__(self, other: Any) -> bool:
196
183
  """
197
184
  Compares the object to another object to find if they are equal.
@@ -203,7 +190,7 @@ class NodeActivationQuantizationConfig(BaseNodeQuantizationConfig):
203
190
 
204
191
  """
205
192
  if not isinstance(other, NodeActivationQuantizationConfig):
206
- return False
193
+ return False # pragma: no cover
207
194
 
208
195
  return self.activation_quantization_fn == other.activation_quantization_fn and \
209
196
  self.activation_quantization_params_fn == other.activation_quantization_params_fn and \
@@ -340,14 +327,6 @@ class WeightsAttrQuantizationConfig:
340
327
  else:
341
328
  self.set_weights_quantization_param({})
342
329
 
343
- def has_weights_quantization_params(self) -> bool:
344
- """
345
-
346
- Returns: Whether NodeQuantizationConfig has weights quantization params or not.
347
-
348
- """
349
- return len(self.weights_quantization_params) > 0
350
-
351
330
  def __eq__(self, other: Any) -> bool:
352
331
  """
353
332
  Compares the object to another object to find if they are equal.
@@ -359,7 +338,7 @@ class WeightsAttrQuantizationConfig:
359
338
 
360
339
  """
361
340
  if not isinstance(other, WeightsAttrQuantizationConfig):
362
- return False
341
+ return False # pragma: no cover
363
342
 
364
343
  return self.weights_quantization_fn == other.weights_quantization_fn and \
365
344
  self.weights_quantization_params_fn == other.weights_quantization_params_fn and \
@@ -419,11 +398,11 @@ class NodeWeightsQuantizationConfig(BaseNodeQuantizationConfig):
419
398
  # In Tensorflow, the attribute name is composed of the framework attribute name and the layer name,
420
399
  # therefore, we need to look for the attribute in the op_cfg that is contained in the node attribute's name.
421
400
  attrs_included_in_name = {k: v for k, v in op_cfg.attr_weights_configs_mapping.items() if k in attr}
422
- if len(attrs_included_in_name) > 1:
423
- Logger.error(f"Found multiple attribute in TPC OpConfig that are contained "
424
- f"in the attribute name '{attr}'."
425
- f"Please fix the TPC attribute names mapping such that each operator's attribute would "
426
- f"have a unique matching name.")
401
+ if len(attrs_included_in_name) > 1: # pragma: no cover
402
+ Logger.critical(f"Found multiple attribute in TPC OpConfig that are contained "
403
+ f"in the attribute name '{attr}'."
404
+ f"Please fix the TPC attribute names mapping such that each operator's attribute would "
405
+ f"have a unique matching name.")
427
406
  if len(attrs_included_in_name) == 0:
428
407
  attr_cfg = op_cfg.default_weight_attr_config
429
408
  else:
@@ -446,8 +425,8 @@ class NodeWeightsQuantizationConfig(BaseNodeQuantizationConfig):
446
425
  Returns: An attribute quantization configuration.
447
426
 
448
427
  """
449
- if attr_name is None:
450
- Logger.error("Got 'None' attribute name for retrieving weights attribute quantization configuration.")
428
+ if attr_name is None: # pragma: no cover
429
+ Logger.critical("Got 'None' attribute name for retrieving weights attribute quantization configuration.")
451
430
 
452
431
  if isinstance(attr_name, int):
453
432
  # this is a positional attribute
@@ -463,8 +442,8 @@ class NodeWeightsQuantizationConfig(BaseNodeQuantizationConfig):
463
442
  # If no attribute with the exact name then an error would be thrown
464
443
  attr_cfg = self.attributes_config_mapping.get(attr_name)
465
444
 
466
- if attr_cfg is None:
467
- Logger.error(f"Weight attribute '{attr_name}' config could not be found.")
445
+ if attr_cfg is None: # pragma: no cover
446
+ Logger.critical(f"Weight attribute '{attr_name}' config could not be found.")
468
447
 
469
448
  return attr_cfg
470
449
 
@@ -519,8 +498,8 @@ class NodeWeightsQuantizationConfig(BaseNodeQuantizationConfig):
519
498
  f"{list(attrs_with_name.keys())}.")
520
499
  return attrs_with_name
521
500
 
522
- def set_quant_config_attr(self, config_parameter_name: str, config_parameter_value: Any, attr_name: str = None,
523
- *args: List[Any], **kwargs: Dict[str, Any]):
501
+ def set_quant_config_attr(self, config_parameter_name: str, config_parameter_value: Any,
502
+ attr_name: Union[str, int] = None, *args: List[Any], **kwargs: Dict[str, Any]):
524
503
  """
525
504
  This method overrides the parent class set_quant_config_attr to enable setting a specific weights
526
505
  attribute config parameter.
@@ -546,8 +525,8 @@ class NodeWeightsQuantizationConfig(BaseNodeQuantizationConfig):
546
525
  else:
547
526
  Logger.warning(f"Parameter {config_parameter_name} could not be found in the node quantization config of "
548
527
  f"weights attribute {attr_name} and was not updated!")
549
- else:
550
- Logger.error(f"Weights attribute {attr_name} could not be found to set parameter {config_parameter_name}.")
528
+ else: # pragma: no cover
529
+ Logger.critical(f"Weights attribute {attr_name} could not be found to set parameter {config_parameter_name}.")
551
530
 
552
531
  def __eq__(self, other: Any) -> bool:
553
532
  """
@@ -560,7 +539,7 @@ class NodeWeightsQuantizationConfig(BaseNodeQuantizationConfig):
560
539
 
561
540
  """
562
541
  if not isinstance(other, NodeWeightsQuantizationConfig):
563
- return False
542
+ return False # pragma: no cover
564
543
 
565
544
  return self.min_threshold == other.min_threshold and \
566
545
  self.simd_size == other.simd_size and \
@@ -19,7 +19,7 @@ from sklearn.cluster import KMeans
19
19
 
20
20
  import model_compression_toolkit.core.common.quantization.quantization_config as qc
21
21
  from model_compression_toolkit.constants import LUT_VALUES, MIN_THRESHOLD, SCALE_PER_CHANNEL, \
22
- LUT_VALUES_BITWIDTH, THRESHOLD, NUM_QPARAM_HESSIAN_SAMPLES
22
+ LUT_VALUES_BITWIDTH, THRESHOLD, NUM_QPARAM_HESSIAN_SAMPLES, SIGNED
23
23
  from model_compression_toolkit.core.common.hessian import HessianInfoService
24
24
  from model_compression_toolkit.core.common.quantization.quantizers.quantizers_helpers import \
25
25
  max_power_of_two, int_quantization_with_threshold
@@ -110,7 +110,8 @@ def lut_kmeans_histogram(bins: np.ndarray,
110
110
  constrained: bool = True,
111
111
  n_iter: int = 20,
112
112
  min_threshold: float = MIN_THRESHOLD,
113
- quant_error_method: qc.QuantizationErrorMethod = qc.QuantizationErrorMethod.MSE) -> Dict:
113
+ quant_error_method: qc.QuantizationErrorMethod = qc.QuantizationErrorMethod.MSE,
114
+ is_signed: bool = None) -> Dict:
114
115
  """
115
116
  Finds quantization cluster points for non-uniform activation quantization.
116
117
  The quantizer first finds the closest power-of-two number to the max value of the given histogram,
@@ -129,6 +130,7 @@ def lut_kmeans_histogram(bins: np.ndarray,
129
130
  n_iter: Number of iteration ot search for the threshold (not used for this method).
130
131
  min_threshold: Minimal threshold to use if threshold is too small.
131
132
  quant_error_method: an error function to optimize the parameters' selection accordingly (not used for this method).
133
+ is_signed: Whether the quantization is signed or not. If None then compute SIGNED value.
132
134
 
133
135
  Returns:
134
136
  A dictionary containing the cluster assignments according to the k-means algorithm and
@@ -148,9 +150,9 @@ def lut_kmeans_histogram(bins: np.ndarray,
148
150
  tensor_max = np.max(bins_with_values)
149
151
  threshold = max_power_of_two(tensor_max, min_threshold)
150
152
 
151
- signed = np.any(bins[:-1][counts != 0] < 0) # Whether histogram contains negative values or not.
153
+ signed = np.any(bins[:-1][counts != 0] < 0) if is_signed is None else is_signed # Whether histogram contains negative values or not.
152
154
  tensor_for_kmeans = int_quantization_with_threshold(data=bins, threshold=threshold, n_bits=LUT_VALUES_BITWIDTH, signed=signed)
153
155
  kmeans.fit(tensor_for_kmeans.reshape(-1, 1), sample_weight=np.insert(counts, 0, 0))
154
156
 
155
157
  return {LUT_VALUES: np.float32(np.round(kmeans.cluster_centers_)),
156
- THRESHOLD: threshold}
158
+ THRESHOLD: threshold, SIGNED: signed}
@@ -16,7 +16,7 @@ import numpy as np
16
16
  from typing import Union, Tuple, Dict
17
17
 
18
18
  import model_compression_toolkit.core.common.quantization.quantization_config as qc
19
- from model_compression_toolkit.constants import MIN_THRESHOLD, THRESHOLD, NUM_QPARAM_HESSIAN_SAMPLES
19
+ from model_compression_toolkit.constants import MIN_THRESHOLD, THRESHOLD, NUM_QPARAM_HESSIAN_SAMPLES, SIGNED
20
20
  from model_compression_toolkit.core.common.hessian import HessianInfoService
21
21
  from model_compression_toolkit.core.common.quantization.quantization_params_generation.qparams_search import \
22
22
  qparams_selection_tensor_search, qparams_selection_histogram_search
@@ -105,7 +105,8 @@ def power_of_two_selection_histogram(bins: np.ndarray,
105
105
  constrained: bool = True,
106
106
  n_iter: int = 20,
107
107
  min_threshold: float = MIN_THRESHOLD,
108
- quant_error_method: qc.QuantizationErrorMethod = qc.QuantizationErrorMethod.MSE) -> dict:
108
+ quant_error_method: qc.QuantizationErrorMethod = qc.QuantizationErrorMethod.MSE,
109
+ is_signed: bool = None) -> Dict:
109
110
  """
110
111
  Compute the power of two threshold based on the provided QuantizationErrorMethod to quantize a histogram.
111
112
  Different search is applied, depends on the value of the selected QuantizationErrorMethod.
@@ -121,6 +122,7 @@ def power_of_two_selection_histogram(bins: np.ndarray,
121
122
  n_iter: Number of iteration ot search for the threshold (not used for this method).
122
123
  min_threshold: Minimal threshold to use if threshold is too small (used only for kl threshold selection).
123
124
  quant_error_method: an error function to optimize the parameters' selection accordingly.
125
+ is_signed: Whether the quantization is signed or not. If None then compute SIGNED value.
124
126
 
125
127
  Returns:
126
128
  Power of two threshold to quantize the histogram a power of 2 manner.
@@ -128,17 +130,20 @@ def power_of_two_selection_histogram(bins: np.ndarray,
128
130
  if quant_error_method == qc.QuantizationErrorMethod.NOCLIPPING:
129
131
  tensor_max = np.max(np.abs(bins)[1:][counts > 0])
130
132
  threshold = max_power_of_two(tensor_max, min_threshold)
133
+ # Resolve is_signed in case it is None.
134
+ signed = (bins<0).any() if is_signed is None else is_signed
131
135
  else:
132
136
  error_function = get_threshold_selection_histogram_error_function(QuantizationMethod.POWER_OF_TWO,
133
137
  quant_error_method, p)
134
- threshold = qparams_selection_histogram_search(error_function,
135
- bins,
136
- counts,
137
- n_bits,
138
- constrained=constrained,
139
- n_iter=n_iter,
140
- min_threshold=min_threshold)
141
- return {THRESHOLD: threshold}
138
+ threshold, signed = qparams_selection_histogram_search(error_function,
139
+ bins,
140
+ counts,
141
+ n_bits,
142
+ constrained=constrained,
143
+ n_iter=n_iter,
144
+ min_threshold=min_threshold,
145
+ is_signed=is_signed)
146
+ return {THRESHOLD: threshold, SIGNED: signed}
142
147
 
143
148
 
144
149
  def power_of_two_no_clipping_selection_min_max(bins: np.ndarray,
@@ -151,7 +156,8 @@ def power_of_two_no_clipping_selection_min_max(bins: np.ndarray,
151
156
  n_iter: int = 20,
152
157
  min_threshold: float = MIN_THRESHOLD,
153
158
  quant_error_method: qc.QuantizationErrorMethod =
154
- qc.QuantizationErrorMethod.NOCLIPPING) -> dict:
159
+ qc.QuantizationErrorMethod.NOCLIPPING,
160
+ is_signed: bool = None) -> Dict:
155
161
  """
156
162
  Gets a threshold between min and max numbers.
157
163
  If computed threshold is less than min_threshold, min_threshold is returned.
@@ -168,4 +174,5 @@ def power_of_two_no_clipping_selection_min_max(bins: np.ndarray,
168
174
  constrained,
169
175
  n_iter,
170
176
  min_threshold=min_threshold,
171
- quant_error_method=qc.QuantizationErrorMethod.NOCLIPPING)
177
+ quant_error_method=qc.QuantizationErrorMethod.NOCLIPPING,
178
+ is_signed=is_signed)
@@ -13,7 +13,7 @@
13
13
  # limitations under the License.
14
14
  # ==============================================================================
15
15
  import numpy as np
16
- from typing import Dict
16
+ from typing import Dict, Union
17
17
 
18
18
  from model_compression_toolkit.target_platform_capabilities.target_platform import QuantizationMethod
19
19
  from model_compression_toolkit.core.common.collectors.statistics_collector import BaseStatsCollector
@@ -25,7 +25,7 @@ from model_compression_toolkit.core.common.quantization.node_quantization_config
25
25
 
26
26
  def get_activations_qparams(activation_quant_cfg: NodeActivationQuantizationConfig,
27
27
  nodes_prior_info: NodePriorInfo,
28
- out_stats_container: BaseStatsCollector) -> Dict[str, float]:
28
+ out_stats_container: BaseStatsCollector) -> Dict[str, Union[np.ndarray, float, bool]]:
29
29
  """
30
30
  Compute the activations params for a given node in a graph according to a params function.
31
31
 
@@ -49,7 +49,9 @@ def get_activations_qparams(activation_quant_cfg: NodeActivationQuantizationConf
49
49
  bins_counts)
50
50
  min_value, max_value = out_stats_container.get_min_max_values()
51
51
 
52
- if nodes_prior_info.is_output_bounded():
52
+ if activation_quant_cfg.is_signed is not None:
53
+ signed = activation_quant_cfg.is_signed
54
+ elif nodes_prior_info.is_output_bounded():
53
55
  signed = min_value < 0
54
56
  else:
55
57
  signed = np.any(bins_values[:-1][bins_counts > 0] < 0)
@@ -65,14 +67,12 @@ def get_activations_qparams(activation_quant_cfg: NodeActivationQuantizationConf
65
67
  activation_quant_cfg.activation_quantization_params_fn = \
66
68
  quantization_params_generation.uniform_no_clipping_selection_min_max
67
69
 
68
- activation_params = activation_quant_cfg.activation_quantization_params_fn(bins_values,
69
- bins_counts,
70
- activation_quant_cfg.l_p_value,
71
- activation_quant_cfg.activation_n_bits,
72
- min_value,
73
- max_value,
74
- min_threshold=activation_quant_cfg.min_threshold,
75
- quant_error_method=activation_quant_cfg.activation_error_method)
76
- activation_params.update({SIGNED: signed})
77
-
78
- return activation_params
70
+ return activation_quant_cfg.activation_quantization_params_fn(bins_values,
71
+ bins_counts,
72
+ activation_quant_cfg.l_p_value,
73
+ activation_quant_cfg.activation_n_bits,
74
+ min_value,
75
+ max_value,
76
+ min_threshold=activation_quant_cfg.min_threshold,
77
+ quant_error_method=activation_quant_cfg.activation_error_method,
78
+ is_signed=signed)
@@ -107,7 +107,8 @@ def qparams_selection_histogram_search(error_function: Callable,
107
107
  n_bits: int,
108
108
  constrained: bool = True,
109
109
  n_iter: int = 10,
110
- min_threshold: float = MIN_THRESHOLD):
110
+ min_threshold: float = MIN_THRESHOLD,
111
+ is_signed: bool = None) -> Tuple[np.ndarray, bool]:
111
112
  """
112
113
  Search for an optimal threshold to quantize a histogram of collected float values.
113
114
  The search_methods starts with the constrained no-clipping threshold by the bins' maximal value, and continues with
@@ -123,13 +124,14 @@ def qparams_selection_histogram_search(error_function: Callable,
123
124
  constrained: Whether the threshold should be constrained or not.
124
125
  n_iter: Number of searching iterations.
125
126
  min_threshold: Threshold to return if the computed threshold is smaller that min_threshold.
127
+ is_signed: Whether the quantization is signed or not. If None then compute SIGNED value.
126
128
 
127
129
  Returns:
128
130
  Optimal constrained threshold to quantize the tensor.
129
131
 
130
132
  """
131
133
 
132
- signed = np.any(bins < 0) # Whether histogram contains negative values or not.
134
+ signed = (bins < 0).any() if is_signed is None else is_signed # Whether histogram contains negative values or not.
133
135
  tensor_data = np.abs(bins)
134
136
  tensor_max = np.max(tensor_data)
135
137
  if not constrained:
@@ -150,7 +152,7 @@ def qparams_selection_histogram_search(error_function: Callable,
150
152
  error_list.append(error)
151
153
 
152
154
  # Return the threshold with the minimal error.
153
- return np.maximum(threshold_list[np.argmin(error_list)], min_threshold)
155
+ return np.maximum(threshold_list[np.argmin(error_list)], min_threshold), signed
154
156
 
155
157
 
156
158
  def qparams_symmetric_iterative_minimization(x0: np.ndarray,
@@ -537,7 +539,8 @@ def qparams_symmetric_selection_histogram_search(error_function: Callable,
537
539
  counts: np.ndarray,
538
540
  n_bits: int,
539
541
  n_iter: int = SYMMETRIC_HISTOGRAM_N_ITER,
540
- min_threshold: float = MIN_THRESHOLD):
542
+ min_threshold: float = MIN_THRESHOLD,
543
+ is_signed: bool = None) -> Tuple[np.ndarray, bool]:
541
544
  """
542
545
  search for optimal threshold (per-channel or per-tensor) for symmetric quantization of a histogram,
543
546
  using the iterative optimizer method.
@@ -550,12 +553,13 @@ def qparams_symmetric_selection_histogram_search(error_function: Callable,
550
553
  n_bits: Number of bits to quantize the tensor.
551
554
  n_iter: Number of searching iterations.
552
555
  min_threshold: Threshold to return if the computed threshold is smaller that min_threshold.
556
+ is_signed: Whether the quantization is signed or not. If None then compute SIGNED value.
553
557
 
554
558
  Returns:
555
559
  Optimized threshold for quantifying the histogram.
556
560
 
557
561
  """
558
- signed = np.any(bins[:-1][counts != 0] < 0) # Whether histogram contains negative values or not.
562
+ signed = np.any(bins[:-1][counts != 0] < 0) if is_signed is None else is_signed # Whether histogram contains negative values or not.
559
563
 
560
564
  res = qparams_symmetric_iterative_minimization(x0=get_init_threshold(min_threshold, tensor_max),
561
565
  x=bins,
@@ -570,7 +574,7 @@ def qparams_symmetric_selection_histogram_search(error_function: Callable,
570
574
  n_iter=SYMMETRIC_HISTOGRAM_N_ITER,
571
575
  dec_freq=SYMMETRIC_HISTOGRAM_DEC_FREQ,
572
576
  per_channel=False)
573
- return max(min_threshold, res['param'])
577
+ return max(min_threshold, res['param']), signed
574
578
 
575
579
 
576
580
  def kl_qparams_symmetric_selection_histogram_search(error_function: Callable,
@@ -579,7 +583,8 @@ def kl_qparams_symmetric_selection_histogram_search(error_function: Callable,
579
583
  counts: np.ndarray,
580
584
  n_bits: int,
581
585
  n_iter: int = SYMMETRIC_HISTOGRAM_N_ITER,
582
- min_threshold: float = MIN_THRESHOLD):
586
+ min_threshold: float = MIN_THRESHOLD,
587
+ is_signed: bool = None) -> Tuple[np.ndarray, bool]:
583
588
  """
584
589
  Search for optimal threshold (per-channel or per-tensor) for symmetric quantization of a histogram,
585
590
  with KL-Divergence loss function (needs a separate search function
@@ -599,7 +604,7 @@ def kl_qparams_symmetric_selection_histogram_search(error_function: Callable,
599
604
  Optimized threshold for quantifying the histogram.
600
605
 
601
606
  """
602
- signed = np.any(bins[:-1][counts != 0] < 0) # Whether histogram contains negative values or not.
607
+ signed = np.any(bins[:-1][counts != 0] < 0) if is_signed is None else is_signed # Whether histogram contains negative values or not.
603
608
  res = qparams_symmetric_iterative_minimization(x0=get_init_threshold(min_threshold, tensor_max),
604
609
  x=bins,
605
610
  loss_fn=lambda x, q_x, t:
@@ -617,7 +622,7 @@ def kl_qparams_symmetric_selection_histogram_search(error_function: Callable,
617
622
  n_iter=SYMMETRIC_HISTOGRAM_N_ITER,
618
623
  dec_freq=SYMMETRIC_HISTOGRAM_DEC_FREQ,
619
624
  per_channel=False)
620
- return max(min_threshold, res['param'])
625
+ return max(min_threshold, res['param']), signed
621
626
 
622
627
 
623
628
  def qparams_uniform_selection_histogram_search(error_function: Callable,
@@ -22,10 +22,7 @@ from model_compression_toolkit.defaultdict import DefaultDict
22
22
  from model_compression_toolkit.core.common.framework_info import FrameworkInfo
23
23
  from model_compression_toolkit.core.common.quantization.node_quantization_config import NodeWeightsQuantizationConfig, \
24
24
  WeightsAttrQuantizationConfig
25
-
26
- # If the quantization config does not contain kernel channel mapping or the weights
27
- # quantization is not per-channel, we use a dummy channel mapping.
28
- dummy_channel_mapping = DefaultDict(default_value=(None, None))
25
+ from model_compression_toolkit.logger import Logger
29
26
 
30
27
 
31
28
  def get_weights_qparams(weights_attr_values: np.ndarray,
@@ -64,29 +61,10 @@ def get_weights_qparams(weights_attr_values: np.ndarray,
64
61
  node=node,
65
62
  hessian_info_service=hessian_info_service,
66
63
  num_hessian_samples=num_hessian_samples)
67
- else:
64
+ else: # pragma: no cover
65
+ Logger.error(f"Requested weights quantization parameters computation for node {node.name} without providing a "
66
+ f"weights_quantization_params_fn."
67
+ f"Returning an empty dictionary since no quantization parameters were computed.")
68
68
  weights_params = {}
69
69
 
70
70
  return weights_params, output_channels_axis
71
-
72
-
73
- def _get_kernel_channels_mapping(fw_info:FrameworkInfo,
74
- use_dummy: bool) -> DefaultDict:
75
- """
76
- Get a kernel channel mapping from the framework info, or use dummy mapping (which returns a
77
- tuple of Nones) if use_use_dummy is True.
78
-
79
- Args:
80
- fw_info: Framework info which contains a kernel channels mapping.
81
- use_dummy: Whether to use a dummy mapping or not.
82
-
83
- Returns:
84
- Kernel channels mapping.
85
- """
86
-
87
- # Set a kernel channels mapping
88
- if use_dummy: # If kernel mapping is missing, we use a dummy channels mapping
89
- kernel_channels_mapping = dummy_channel_mapping
90
- else:
91
- kernel_channels_mapping = fw_info.kernel_channels_mapping
92
- return kernel_channels_mapping
@@ -16,7 +16,7 @@ import numpy as np
16
16
  from typing import Union, Tuple, Dict
17
17
 
18
18
  import model_compression_toolkit.core.common.quantization.quantization_config as qc
19
- from model_compression_toolkit.constants import MIN_THRESHOLD, THRESHOLD, NUM_QPARAM_HESSIAN_SAMPLES
19
+ from model_compression_toolkit.constants import MIN_THRESHOLD, THRESHOLD, NUM_QPARAM_HESSIAN_SAMPLES, SIGNED
20
20
  from model_compression_toolkit.core.common.hessian import HessianInfoService
21
21
  from model_compression_toolkit.core.common.quantization.quantization_params_generation.error_functions import \
22
22
  get_threshold_selection_tensor_error_function, get_threshold_selection_histogram_error_function, _kl_error_histogram
@@ -106,7 +106,8 @@ def symmetric_selection_histogram(bins: np.ndarray,
106
106
  constrained: bool = True,
107
107
  n_iter: int = 20,
108
108
  min_threshold: float = MIN_THRESHOLD,
109
- quant_error_method: qc.QuantizationErrorMethod = qc.QuantizationErrorMethod.MSE) -> dict:
109
+ quant_error_method: qc.QuantizationErrorMethod = qc.QuantizationErrorMethod.MSE,
110
+ is_signed: bool = None) -> Dict:
110
111
  """
111
112
  Compute the optimal threshold based on the provided QuantizationErrorMethod to quantize a histogram.
112
113
  Different search is applied, depends on the value of the selected QuantizationErrorMethod.
@@ -122,6 +123,7 @@ def symmetric_selection_histogram(bins: np.ndarray,
122
123
  n_iter: Number of iteration ot search for the threshold (not used for this method).
123
124
  min_threshold: Minimal threshold to use if threshold is too small (used only for kl threshold selection).
124
125
  quant_error_method: an error function to optimize the parameters' selection accordingly.
126
+ is_signed: Whether the quantization is signed or not. If None then compute SIGNED value.
125
127
 
126
128
  Returns:
127
129
  Optimal threshold to quantize the histogram a symmetric manner.
@@ -129,23 +131,27 @@ def symmetric_selection_histogram(bins: np.ndarray,
129
131
  tensor_max = np.max(np.abs(bins)[1:][counts > 0])
130
132
  if quant_error_method == qc.QuantizationErrorMethod.NOCLIPPING:
131
133
  threshold = get_init_threshold(min_threshold, tensor_max)
134
+ # Resolve is_signed in case it is None.
135
+ signed = (bins<0).any() if is_signed is None else is_signed
132
136
  elif quant_error_method == qc.QuantizationErrorMethod.KL:
133
137
  # search for KL error is separated because the error method signature is different from the other error methods.
134
- threshold = kl_qparams_symmetric_selection_histogram_search(_kl_error_histogram,
135
- tensor_max,
136
- bins,
137
- counts,
138
- n_bits,
139
- min_threshold=min_threshold)
138
+ threshold, signed = kl_qparams_symmetric_selection_histogram_search(_kl_error_histogram,
139
+ tensor_max,
140
+ bins,
141
+ counts,
142
+ n_bits,
143
+ min_threshold=min_threshold,
144
+ is_signed=is_signed)
140
145
  else:
141
146
  error_function = get_threshold_selection_histogram_error_function(QuantizationMethod.SYMMETRIC, quant_error_method, p)
142
- threshold = qparams_symmetric_selection_histogram_search(error_function,
143
- tensor_max,
144
- bins,
145
- counts,
146
- n_bits,
147
- min_threshold=min_threshold)
148
- return {THRESHOLD: threshold}
147
+ threshold, signed = qparams_symmetric_selection_histogram_search(error_function,
148
+ tensor_max,
149
+ bins,
150
+ counts,
151
+ n_bits,
152
+ min_threshold=min_threshold,
153
+ is_signed=is_signed)
154
+ return {THRESHOLD: threshold, SIGNED: signed}
149
155
 
150
156
 
151
157
  def symmetric_no_clipping_selection_min_max(bins: np.ndarray,
@@ -158,7 +164,8 @@ def symmetric_no_clipping_selection_min_max(bins: np.ndarray,
158
164
  n_iter: int = 20,
159
165
  min_threshold: float = MIN_THRESHOLD,
160
166
  quant_error_method: qc.QuantizationErrorMethod =
161
- qc.QuantizationErrorMethod.NOCLIPPING) -> dict:
167
+ qc.QuantizationErrorMethod.NOCLIPPING,
168
+ is_signed: bool = None) -> Dict:
162
169
  """
163
170
  Gets a threshold between min and max numbers.
164
171
  If computed threshold is less than min_threshold, min_threshold is returned.
@@ -175,7 +182,8 @@ def symmetric_no_clipping_selection_min_max(bins: np.ndarray,
175
182
  constrained,
176
183
  n_iter,
177
184
  min_threshold=min_threshold,
178
- quant_error_method=qc.QuantizationErrorMethod.NOCLIPPING)
185
+ quant_error_method=qc.QuantizationErrorMethod.NOCLIPPING,
186
+ is_signed=is_signed)
179
187
 
180
188
 
181
189
  def get_init_threshold(min_threshold: float, tensor_max: np.ndarray, per_channel: bool = False) -> np.ndarray:
@@ -16,7 +16,7 @@ import numpy as np
16
16
  from typing import Union, Tuple, Dict
17
17
 
18
18
  import model_compression_toolkit.core.common.quantization.quantization_config as qc
19
- from model_compression_toolkit.constants import MIN_THRESHOLD, RANGE_MIN, RANGE_MAX, NUM_QPARAM_HESSIAN_SAMPLES
19
+ from model_compression_toolkit.constants import MIN_THRESHOLD, RANGE_MIN, RANGE_MAX, NUM_QPARAM_HESSIAN_SAMPLES, SIGNED
20
20
  from model_compression_toolkit.core.common.hessian import HessianInfoService
21
21
  from model_compression_toolkit.core.common.quantization.quantization_params_generation.qparams_search import \
22
22
  qparams_uniform_selection_tensor_search, qparams_uniform_selection_histogram_search
@@ -114,7 +114,8 @@ def uniform_selection_histogram(bins: np.ndarray,
114
114
  constrained: bool = True,
115
115
  n_iter: int = 20,
116
116
  min_threshold: float = MIN_THRESHOLD,
117
- quant_error_method: qc.QuantizationErrorMethod = qc.QuantizationErrorMethod.MSE) -> dict:
117
+ quant_error_method: qc.QuantizationErrorMethod = qc.QuantizationErrorMethod.MSE,
118
+ is_signed: bool = None) -> Dict:
118
119
  """
119
120
  Compute the optimal quantization range based on the provided QuantizationErrorMethod
120
121
  to uniformly quantize the histogram.
@@ -131,6 +132,7 @@ def uniform_selection_histogram(bins: np.ndarray,
131
132
  n_iter: Number of iteration ot search for the threshold (not used for this method).
132
133
  min_threshold: Minimal threshold to use if threshold is too small (not used for this method).
133
134
  quant_error_method: an error function to optimize the range parameters selection accordingly.
135
+ is_signed: Whether the quantization is signed or not. If None then compute SIGNED value.
134
136
 
135
137
  Returns:
136
138
  Optimal quantization range to quantize the histogram uniformly.
@@ -139,6 +141,7 @@ def uniform_selection_histogram(bins: np.ndarray,
139
141
  tensor_max = np.max(bins[1:][counts > 0])
140
142
  tensor_min_max = np.array([tensor_min, tensor_max])
141
143
 
144
+ signed = tensor_min < 0 if is_signed is None else is_signed
142
145
  if quant_error_method == qc.QuantizationErrorMethod.NOCLIPPING:
143
146
  mm = tensor_min_max
144
147
  else:
@@ -150,7 +153,7 @@ def uniform_selection_histogram(bins: np.ndarray,
150
153
  n_bits)
151
154
 
152
155
  return {RANGE_MIN: mm[0],
153
- RANGE_MAX: mm[1]}
156
+ RANGE_MAX: mm[1], SIGNED: signed}
154
157
 
155
158
 
156
159
  def uniform_no_clipping_selection_min_max(bins: np.ndarray,
@@ -163,7 +166,8 @@ def uniform_no_clipping_selection_min_max(bins: np.ndarray,
163
166
  n_iter: int = 20,
164
167
  min_threshold: float = MIN_THRESHOLD,
165
168
  quant_error_method: qc.QuantizationErrorMethod =
166
- qc.QuantizationErrorMethod.NOCLIPPING) -> dict:
169
+ qc.QuantizationErrorMethod.NOCLIPPING,
170
+ is_signed: bool = None) -> Dict:
167
171
  """
168
172
  Gets a quantization rage between min and max numbers.
169
173
 
@@ -179,5 +183,5 @@ def uniform_no_clipping_selection_min_max(bins: np.ndarray,
179
183
  constrained,
180
184
  n_iter,
181
185
  min_threshold=min_threshold,
182
- quant_error_method=qc.QuantizationErrorMethod.NOCLIPPING)
183
-
186
+ quant_error_method=qc.QuantizationErrorMethod.NOCLIPPING,
187
+ is_signed=is_signed)