mct-nightly 2.1.0.20240731.414__py3-none-any.whl → 2.1.0.20240802.429__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. {mct_nightly-2.1.0.20240731.414.dist-info → mct_nightly-2.1.0.20240802.429.dist-info}/METADATA +1 -1
  2. {mct_nightly-2.1.0.20240731.414.dist-info → mct_nightly-2.1.0.20240802.429.dist-info}/RECORD +51 -47
  3. model_compression_toolkit/__init__.py +1 -1
  4. model_compression_toolkit/constants.py +2 -1
  5. model_compression_toolkit/core/common/framework_implementation.py +5 -9
  6. model_compression_toolkit/core/common/graph/base_graph.py +1 -23
  7. model_compression_toolkit/core/common/graph/base_node.py +52 -33
  8. model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_data.py +6 -6
  9. model_compression_toolkit/core/common/mixed_precision/sensitivity_evaluation.py +12 -12
  10. model_compression_toolkit/core/common/quantization/candidate_node_quantization_config.py +2 -2
  11. model_compression_toolkit/core/common/quantization/node_quantization_config.py +17 -38
  12. model_compression_toolkit/core/common/quantization/quantization_params_generation/lut_kmeans_params.py +6 -4
  13. model_compression_toolkit/core/common/quantization/quantization_params_generation/power_of_two_selection.py +19 -12
  14. model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_activations_computation.py +14 -14
  15. model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_search.py +14 -9
  16. model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_weights_computation.py +5 -27
  17. model_compression_toolkit/core/common/quantization/quantization_params_generation/symmetric_selection.py +25 -17
  18. model_compression_toolkit/core/common/quantization/quantization_params_generation/uniform_selection.py +10 -6
  19. model_compression_toolkit/core/common/quantization/quantizers/quantizers_helpers.py +1 -65
  20. model_compression_toolkit/core/common/quantization/set_node_quantization_config.py +12 -5
  21. model_compression_toolkit/core/common/substitutions/shift_negative_activation.py +7 -5
  22. model_compression_toolkit/core/keras/back2framework/factory_model_builder.py +3 -3
  23. model_compression_toolkit/core/keras/keras_implementation.py +21 -17
  24. model_compression_toolkit/core/keras/tf_tensor_numpy.py +2 -2
  25. model_compression_toolkit/core/pytorch/back2framework/factory_model_builder.py +3 -3
  26. model_compression_toolkit/core/pytorch/pytorch_implementation.py +15 -14
  27. model_compression_toolkit/core/pytorch/reader/node_holders.py +1 -1
  28. model_compression_toolkit/core/runner.py +1 -0
  29. model_compression_toolkit/exporter/model_wrapper/keras/validate_layer.py +2 -2
  30. model_compression_toolkit/gptq/common/gptq_training.py +0 -35
  31. model_compression_toolkit/qat/keras/quantizer/base_keras_qat_quantizer.py +1 -1
  32. model_compression_toolkit/qat/pytorch/quantizer/base_pytorch_qat_quantizer.py +1 -1
  33. model_compression_toolkit/target_platform_capabilities/target_platform/op_quantization_config.py +32 -8
  34. model_compression_toolkit/target_platform_capabilities/target_platform/target_platform_model.py +2 -2
  35. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/target_platform_capabilities.py +5 -0
  36. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1/tp_model.py +2 -0
  37. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_lut/tp_model.py +2 -0
  38. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_pot/tp_model.py +2 -0
  39. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2/tp_model.py +2 -0
  40. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tp_model.py +2 -0
  41. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3/tp_model.py +2 -0
  42. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v3_lut/tp_model.py +2 -0
  43. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/__init__.py +16 -0
  44. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tp_model.py +235 -0
  45. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tpc_keras.py +132 -0
  46. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v4/tpc_pytorch.py +112 -0
  47. model_compression_toolkit/target_platform_capabilities/tpc_models/qnnpack_tpc/v1/tp_model.py +2 -0
  48. model_compression_toolkit/target_platform_capabilities/tpc_models/tflite_tpc/v1/tp_model.py +2 -0
  49. {mct_nightly-2.1.0.20240731.414.dist-info → mct_nightly-2.1.0.20240802.429.dist-info}/LICENSE.md +0 -0
  50. {mct_nightly-2.1.0.20240731.414.dist-info → mct_nightly-2.1.0.20240802.429.dist-info}/WHEEL +0 -0
  51. {mct_nightly-2.1.0.20240731.414.dist-info → mct_nightly-2.1.0.20240802.429.dist-info}/top_level.txt +0 -0
@@ -60,33 +60,6 @@ def calculate_delta(threshold: np.ndarray,
60
60
  return threshold / (2 ** (n_bits - int(signed)))
61
61
 
62
62
 
63
- def calculate_min_max_values(threshold: np.ndarray,
64
- n_bits: int = 8,
65
- signed: bool = False) -> Tuple[np.ndarray, np.ndarray]:
66
- """
67
- Compute the min/max values of a quantization range according to the threshold,
68
- number of bits and whether its signed or unsigned.
69
-
70
- Args:
71
- threshold: Threshold of quantization range to compute its min/max values.
72
- n_bits: Number of bits used in the quantization.
73
- signed: Whether the quantization range is signed or not.
74
-
75
- Returns:
76
- Min/max values of quantization range.
77
- """
78
-
79
- delta = calculate_delta(threshold,
80
- n_bits=n_bits,
81
- signed=signed)
82
-
83
- # If unsigned: min=0, otherwise its -threshold
84
- min_value = int(signed) * -threshold
85
- max_value = threshold - delta
86
-
87
- return min_value, max_value
88
-
89
-
90
63
  def quantize_tensor(tensor_data: np.ndarray,
91
64
  threshold: np.ndarray,
92
65
  n_bits: int,
@@ -238,7 +211,7 @@ def get_tensor_max(tensor_data: np.ndarray,
238
211
 
239
212
  """
240
213
  if n_bits < 1:
241
- Logger.critical(f"Parameter n_bits must be positive; however 'n_bits'={n_bits} was provided.")
214
+ Logger.critical(f"Parameter n_bits must be positive; however 'n_bits'={n_bits} was provided.") # pragma: no cover
242
215
  if is_uniform_quantization:
243
216
  expansion_factor = 1.0
244
217
  elif n_bits == 1:
@@ -337,40 +310,3 @@ def get_output_shape(tensor_shape, channel_axis):
337
310
 
338
311
  """
339
312
  return [-1 if i is channel_axis else 1 for i in range(len(tensor_shape))]
340
-
341
-
342
- def get_range_bounds(tensor_min, tensor_max):
343
- """
344
- Gets bounds on the quantization range limits for the minimization process.
345
- Calculates the bounds in a way that would leave a gap between the possible optimized values
346
- and the tensor min-max values.
347
-
348
- Args:
349
- tensor_min: min value of a tensor.
350
- tensor_max: max value of a tensor.
351
-
352
- Returns: An array with (lbound, ubound) pairs on the quantization range limit values.
353
-
354
- """
355
- # choosing bounds that have some gap from the original tensor min/max values.
356
- l_bound = tensor_min / 2 if tensor_min > 0 else tensor_min * 2
357
- u_bound = tensor_max * 2 if tensor_max > 0 else tensor_min / 2
358
- return [(l_bound, u_bound), (l_bound, u_bound)]
359
-
360
-
361
- def get_threshold_bounds(min_threshold, max_threshold):
362
- """
363
- Gets bounds on the threshold for the minimization process.
364
- Calculates the bounds in a way that would leave a gap between the possible optimized threshold
365
- and the tensor max values. We use min_threshold as lower-bound to prevent the selected threshold
366
- from being zero or negative.
367
-
368
- Args:
369
- min_threshold: minimal threshold to use if threshold is too small (not used for this method).
370
- max_threshold: maximal threshold to be used in quantization.
371
-
372
- Returns: An array with a pair of (lbound, ubound) on the quantization threshold limit values.
373
-
374
- """
375
- max_threshold = max(min_threshold, max_threshold)
376
- return [(min_threshold, 2 * max_threshold)]
@@ -64,6 +64,7 @@ def set_quantization_configuration_to_graph(graph: Graph,
64
64
 
65
65
  for n in graph.nodes:
66
66
  set_quantization_configs_to_node(node=n,
67
+ graph=graph,
67
68
  quant_config=quant_config,
68
69
  fw_info=graph.fw_info,
69
70
  tpc=graph.tpc,
@@ -72,6 +73,7 @@ def set_quantization_configuration_to_graph(graph: Graph,
72
73
 
73
74
 
74
75
  def set_quantization_configs_to_node(node: BaseNode,
76
+ graph: Graph,
75
77
  quant_config: QuantizationConfig,
76
78
  fw_info: FrameworkInfo,
77
79
  tpc: TargetPlatformCapabilities,
@@ -81,19 +83,22 @@ def set_quantization_configs_to_node(node: BaseNode,
81
83
 
82
84
  Args:
83
85
  node: Node to set its quantization configurations.
86
+ graph: Model's internal representation graph.
84
87
  quant_config: Quantization configuration to generate the node's configurations from.
85
88
  fw_info: Information needed for quantization about the specific framework.
86
89
  tpc: TargetPlatformCapabilities to get default OpQuantizationConfig.
87
90
  mixed_precision_enable: is mixed precision enabled.
88
91
  """
89
92
  node_qc_options = node.get_qco(tpc)
93
+ base_config, node_qc_options_list = node.filter_node_qco_by_graph(tpc, graph.get_next_nodes(node), node_qc_options)
90
94
 
91
95
  # Create QC candidates for weights and activation combined
92
96
  weight_channel_axis = fw_info.kernel_channels_mapping.get(node.type)
93
97
  node.candidates_quantization_cfg = _create_node_candidates_qc(quant_config,
94
98
  fw_info,
95
99
  weight_channel_axis,
96
- node_qc_options,
100
+ node_qc_options_list,
101
+ base_config,
97
102
  node,
98
103
  mixed_precision_enable=mixed_precision_enable)
99
104
 
@@ -186,7 +191,8 @@ def _create_node_single_candidate_qc(qc: QuantizationConfig,
186
191
  def _create_node_candidates_qc(qc: QuantizationConfig,
187
192
  fw_info: FrameworkInfo,
188
193
  weight_channel_axis: Tuple[int, int],
189
- node_qc_options: QuantizationConfigOptions,
194
+ node_qc_options_list: List[OpQuantizationConfig],
195
+ base_config: OpQuantizationConfig,
190
196
  node: BaseNode,
191
197
  mixed_precision_enable: bool = False) -> List[CandidateNodeQuantizationConfig]:
192
198
  """
@@ -196,7 +202,8 @@ def _create_node_candidates_qc(qc: QuantizationConfig,
196
202
  qc: Quantization configuration the quantization process should follow.
197
203
  fw_info: Framework information (e.g., which layers should have their kernels' quantized).
198
204
  weight_channel_axis: (Output, Input) channel index of the node's kernel.
199
- node_qc_options: QuantizationConfigOptions for the node with quantization candidates information.
205
+ node_qc_options_list: List of quantization configs of node.
206
+ base_config: Base quantization config for node.
200
207
  node: A node to set quantization configuration candidates to.
201
208
  mixed_precision_enable: is mixed precision enabled
202
209
 
@@ -208,7 +215,7 @@ def _create_node_candidates_qc(qc: QuantizationConfig,
208
215
  node_attrs_list = node.get_node_weights_attributes()
209
216
 
210
217
  if mixed_precision_enable:
211
- for op_cfg in node_qc_options.quantization_config_list:
218
+ for op_cfg in node_qc_options_list:
212
219
  candidate_qc = copy.deepcopy(qc)
213
220
  candidates.append(_create_node_single_candidate_qc(candidate_qc,
214
221
  fw_info,
@@ -220,7 +227,7 @@ def _create_node_candidates_qc(qc: QuantizationConfig,
220
227
  candidates.append(_create_node_single_candidate_qc(qc,
221
228
  fw_info,
222
229
  weight_channel_axis,
223
- node_qc_options.base_config,
230
+ base_config,
224
231
  node_attrs_list))
225
232
 
226
233
  return candidates
@@ -349,9 +349,15 @@ def shift_negative_function(graph: Graph,
349
349
  add_node.output_shape,
350
350
  pad_top, pad_btm, pad_left, pad_right)
351
351
 
352
+ # Insert a pad node between the add node to the op2d, and create statistics for the pad node
353
+ insert_node_before_node(graph,
354
+ node_to_insert=pad_node,
355
+ last_node=op2d_node)
356
+
352
357
  # Set quantization configuration to node, even though we do not quantize it:
353
358
  set_quantization_configs_to_node(fw_info=fw_info,
354
359
  node=pad_node,
360
+ graph=graph,
355
361
  quant_config=core_config.quantization_config,
356
362
  tpc=graph.tpc,
357
363
  mixed_precision_enable=core_config.mixed_precision_enable)
@@ -361,11 +367,6 @@ def shift_negative_function(graph: Graph,
361
367
  for attr in pad_node.get_node_weights_attributes():
362
368
  candidate_qc.weights_quantization_cfg.get_attr_config(attr).enable_weights_quantization = False
363
369
 
364
- # Insert a pad node between the add node to the op2d, and create statistics for the pad node
365
- insert_node_before_node(graph,
366
- node_to_insert=pad_node,
367
- last_node=op2d_node)
368
-
369
370
  graph.set_out_stats_collector_to_node(pad_node,
370
371
  add_node_stats_collector) # We ignore the padding effect on statistics
371
372
 
@@ -373,6 +374,7 @@ def shift_negative_function(graph: Graph,
373
374
 
374
375
  set_quantization_configs_to_node(fw_info=fw_info,
375
376
  node=add_node,
377
+ graph=graph,
376
378
  quant_config=core_config.quantization_config,
377
379
  tpc=graph.tpc,
378
380
  mixed_precision_enable=core_config.mixed_precision_enable)
@@ -37,10 +37,10 @@ def get_keras_model_builder(mode: ModelBuilderMode) -> type:
37
37
  Keras model builder for the given mode.
38
38
  """
39
39
 
40
- if not isinstance(mode, ModelBuilderMode):
40
+ if not isinstance(mode, ModelBuilderMode): # pragma: no cover
41
41
  Logger.critical(f"Expected a ModelBuilderMode type for 'mode', but received {type(mode)} instead.")
42
- if mode is None:
42
+ if mode is None: # pragma: no cover
43
43
  Logger.critical(f"get_keras_model_builder received 'mode' is None")
44
- if mode not in keras_model_builders.keys():
44
+ if mode not in keras_model_builders.keys(): # pragma: no cover
45
45
  Logger.critical(f"'mode' {mode} is not recognized in the Keras model builders factory.")
46
46
  return keras_model_builders.get(mode)
@@ -21,6 +21,7 @@ from mct_quantizers import KerasQuantizationWrapper, KerasActivationQuantization
21
21
  from tensorflow.keras.models import Model
22
22
 
23
23
  from model_compression_toolkit.constants import HESSIAN_NUM_ITERATIONS
24
+ from model_compression_toolkit.core.common.graph.functional_node import FunctionalNode
24
25
  from model_compression_toolkit.core.common.hessian import HessianScoresRequest, HessianMode, HessianInfoService
25
26
  from model_compression_toolkit.core.keras.graph_substitutions.substitutions.remove_identity import RemoveIdentity
26
27
  from model_compression_toolkit.core.keras.hessian.activation_hessian_scores_calculator_keras import \
@@ -421,44 +422,47 @@ class KerasImplementation(FrameworkImplementation):
421
422
 
422
423
  return False
423
424
 
424
- def get_mp_node_distance_fn(self, layer_class: type,
425
- framework_attrs: Dict[str, Any],
426
- compute_distance_fn: Callable = None,
427
- axis: int = None,
428
- norm_mse: bool = False) -> Callable:
425
+ def get_mp_node_distance_fn(self, n: BaseNode,
426
+ compute_distance_fn: Callable = None,
427
+ norm_mse: bool = False) -> Tuple[Callable, int]:
429
428
  """
430
429
  A mapping between layers' types and a distance function for computing the distance between
431
430
  two tensors in mixed precision (for loss computation purposes). Returns a specific function if node of specific types is
432
431
  given, or a default (normalized MSE) function otherwise.
433
432
 
434
433
  Args:
435
- layer_class: Class path of a model's layer.
436
- framework_attrs: Framework attributes the layer had which the graph node holds.
434
+ n: Node to choose distance function for.
437
435
  compute_distance_fn: An optional distance function to use globally for all nodes.
438
- axis: The axis on which the operation is preformed (if specified).
439
436
  norm_mse: whether to normalize mse distance function.
440
437
 
441
- Returns: A distance function between two tensors.
438
+ Returns: A distance function between two tensors and a axis on which the distance is computed (if exists).
442
439
  """
443
440
 
441
+ axis = n.framework_attr.get(keras_constants.AXIS) \
442
+ if not isinstance(n, FunctionalNode) else n.op_call_kwargs.get(keras_constants.AXIS)
443
+
444
+ layer_class = n.layer_class
445
+ framework_attrs = n.framework_attr
446
+
444
447
  if compute_distance_fn is not None:
445
- return compute_distance_fn
448
+ return compute_distance_fn, axis
446
449
 
447
450
  if layer_class == Activation:
448
451
  node_type_name = framework_attrs[ACTIVATION]
449
452
  if node_type_name == SOFTMAX and axis is not None:
450
- return compute_kl_divergence
453
+ return compute_kl_divergence, axis
451
454
  elif node_type_name == SIGMOID:
452
- return compute_cs
455
+ return compute_cs, axis
453
456
  elif axis is not None and (layer_class == tf.nn.softmax or layer_class == tf.keras.layers.Softmax
454
457
  or (layer_class == TFOpLambda and
455
458
  SOFTMAX in framework_attrs[keras_constants.FUNCTION])):
456
- return compute_kl_divergence
457
- elif layer_class == tf.nn.sigmoid:
458
- return compute_cs
459
+ return compute_kl_divergence, axis
460
+ elif layer_class == tf.nn.sigmoid or (layer_class == TFOpLambda and
461
+ SIGMOID in framework_attrs[keras_constants.FUNCTION]):
462
+ return compute_cs, axis
459
463
  elif layer_class == Dense:
460
- return compute_cs
461
- return partial(compute_mse, norm=norm_mse)
464
+ return compute_cs, axis
465
+ return partial(compute_mse, norm=norm_mse), axis
462
466
 
463
467
  def get_hessian_scores_calculator(self,
464
468
  graph: Graph,
@@ -36,7 +36,7 @@ def to_tf_tensor(tensor):
36
36
  return (to_tf_tensor(t) for t in tensor)
37
37
  elif isinstance(tensor, np.ndarray):
38
38
  return tf.convert_to_tensor(tensor.astype(np.float32))
39
- else:
39
+ else: # pragma: no cover
40
40
  Logger.critical(f'Unsupported type for conversion to TF tensor: {type(tensor)}.')
41
41
 
42
42
 
@@ -69,5 +69,5 @@ def tf_tensor_to_numpy(tensor: Union[List, Tuple, np.ndarray, tf.Tensor, float],
69
69
  return np.array([np_tensor]) if np.isscalar(np_tensor) else np_tensor
70
70
  elif isinstance(tensor, float):
71
71
  return np.array([tensor])
72
- else:
72
+ else: # pragma: no cover
73
73
  Logger.critical(f'Unsupported type for conversion to Numpy array: {type(tensor)}.')
@@ -37,10 +37,10 @@ def get_pytorch_model_builder(mode: ModelBuilderMode) -> type:
37
37
  PyTorch model builder for the given mode.
38
38
  """
39
39
 
40
- if not isinstance(mode, ModelBuilderMode):
40
+ if not isinstance(mode, ModelBuilderMode): # pragma: no cover
41
41
  Logger.critical(f"Expected a ModelBuilderMode type for 'mode' parameter; received {type(mode)} instead.")
42
- if mode is None:
42
+ if mode is None: # pragma: no cover
43
43
  Logger.critical(f"Received 'mode' parameter is None.")
44
- if mode not in pytorch_model_builders.keys():
44
+ if mode not in pytorch_model_builders.keys(): # pragma: no cover
45
45
  Logger.critical(f"'mode' parameter {mode} is not supported by the PyTorch model builders factory.")
46
46
  return pytorch_model_builders.get(mode)
@@ -30,6 +30,7 @@ from model_compression_toolkit.core import QuantizationConfig, FrameworkInfo, Co
30
30
  from model_compression_toolkit.core import common
31
31
  from model_compression_toolkit.core.common import Graph, BaseNode
32
32
  from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
33
+ from model_compression_toolkit.core.common.graph.functional_node import FunctionalNode
33
34
  from model_compression_toolkit.core.common.hessian import HessianScoresRequest, HessianMode, HessianInfoService
34
35
  from model_compression_toolkit.core.common.mixed_precision.sensitivity_evaluation import SensitivityEvaluation
35
36
  from model_compression_toolkit.core.common.mixed_precision.set_layer_to_bitwidth import set_layer_to_bitwidth
@@ -403,36 +404,36 @@ class PytorchImplementation(FrameworkImplementation):
403
404
  return True
404
405
  return False
405
406
 
406
- def get_mp_node_distance_fn(self, layer_class: type,
407
- framework_attrs: Dict[str, Any],
408
- compute_distance_fn: Callable = None,
409
- axis: int = None,
410
- norm_mse: bool = False) -> Callable:
407
+ def get_mp_node_distance_fn(self, n: BaseNode,
408
+ compute_distance_fn: Callable = None,
409
+ norm_mse: bool = False) -> Tuple[Callable, int]:
411
410
  """
412
411
  A mapping between layers' types and a distance function for computing the distance between
413
412
  two tensors in mixed precision (for loss computation purposes). Returns a specific function if node of specific types is
414
413
  given, or a default (normalized MSE) function otherwise.
415
414
 
416
415
  Args:
417
- layer_class: Class path of a model's layer.
418
- framework_attrs: Framework attributes the layer had which the graph node holds.
416
+ n: Node to choose distance function for.
419
417
  compute_distance_fn: An optional distance function to use globally for all nodes.
420
- axis: The axis on which the operation is preformed (if specified).
421
418
  norm_mse: whether to normalize mse distance function.
422
419
 
423
- Returns: A distance function between two tensors.
420
+ Returns: A distance function between two tensors and a axis on which the distance is computed (if exists).
424
421
  """
422
+ axis = n.framework_attr.get(pytorch_constants.DIM) if not (
423
+ isinstance(n, FunctionalNode)) else n.op_call_kwargs.get(pytorch_constants.DIM)
424
+
425
+ layer_class = n.layer_class
425
426
 
426
427
  if compute_distance_fn is not None:
427
- return compute_distance_fn
428
+ return compute_distance_fn, axis
428
429
 
429
430
  elif layer_class in [Softmax, softmax] and axis is not None:
430
- return compute_kl_divergence
431
+ return compute_kl_divergence, axis
431
432
  elif layer_class in [Sigmoid, sigmoid]:
432
- return compute_cs
433
+ return compute_cs, axis
433
434
  elif layer_class == Linear:
434
- return compute_cs
435
- return partial(compute_mse, norm=norm_mse)
435
+ return compute_cs, axis
436
+ return partial(compute_mse, norm=norm_mse), axis
436
437
 
437
438
  def is_output_node_compatible_for_hessian_score_computation(self,
438
439
  node: BaseNode) -> bool:
@@ -25,7 +25,7 @@ class DummyPlaceHolder(torch.nn.Module):
25
25
  """
26
26
 
27
27
  def __name__(self):
28
- return PLACEHOLDER
28
+ return PLACEHOLDER # pragma: no cover
29
29
 
30
30
  def forward(self, x):
31
31
  return x
@@ -222,5 +222,6 @@ def _set_final_resource_utilization(graph: Graph,
222
222
 
223
223
  final_ru = ResourceUtilization()
224
224
  final_ru.set_resource_utilization_by_target(final_ru_dict)
225
+ print(final_ru)
225
226
  graph.user_info.final_resource_utilization = final_ru
226
227
  graph.user_info.mixed_precision_cfg = final_bit_widths_config
@@ -24,7 +24,7 @@ if FOUND_TF:
24
24
  if version.parse(tf.__version__) >= version.parse("2.13"):
25
25
  from keras.src.engine.base_layer import Layer
26
26
  from keras.src.engine.input_layer import InputLayer
27
- else:
27
+ else: # pragma: no cover
28
28
  from keras.engine.base_layer import Layer
29
29
  from keras.engine.input_layer import InputLayer
30
30
 
@@ -57,7 +57,7 @@ if FOUND_TF:
57
57
  f'KerasQuantizationWrapper must have a weights_quantizers but has a '
58
58
  f'{type(layer.weights_quantizers)} object') # pragma: no cover
59
59
 
60
- if len(layer.weights_quantizers) == 0:
60
+ if len(layer.weights_quantizers) == 0: # pragma: no cover
61
61
  Logger.critical(f'KerasQuantizationWrapper must have at least one weight quantizer, but found {len(layer.weights_quantizers)} quantizers. If layer is not quantized it should be a Keras layer.')
62
62
 
63
63
  for _, weights_quantizer in layer.weights_quantizers.items():
@@ -219,41 +219,6 @@ class GPTQTrainer(ABC):
219
219
 
220
220
  return hessian_approx_score_by_image
221
221
 
222
- def _get_approximations_by_interest_point(self, approximations: Dict, image_idx: int) -> List:
223
- """
224
- Retrieves hessian approximations for a specific image index.
225
-
226
- Args:
227
- approximations (Dict): Hessian approximations.
228
- image_idx (int): Image index.
229
-
230
- Returns:
231
- List: Hessian approximations for the given image index.
232
- """
233
- approx_by_interest_point = []
234
- for target_node in self.compare_points:
235
- hessian_approx_scores = approximations[target_node][image_idx]
236
- self._validate_scores_approximation(hessian_approx_scores)
237
- approx_by_interest_point.append(hessian_approx_scores[0])
238
- return approx_by_interest_point
239
-
240
- @staticmethod
241
- def _validate_scores_approximation(hessian_approx_scores: List):
242
- """
243
- Validates the structure and length of the Hessian-approximation scores.
244
-
245
- Args:
246
- hessian_approx_scores: Scores to validate.
247
- """
248
- if not isinstance(hessian_approx_scores, list):
249
- Logger.critical(f"Scores approximation was expected to be a list but is of type: {type(hessian_approx_scores)}.") # pragma: no cover
250
- if len(hessian_approx_scores) != 1:
251
- Logger.critical(f"Scores approximation was expected to have a length of 1 "
252
- f"(for computations with granularity set to 'HessianInfoGranularity.PER_TENSOR') "
253
- f"but has a length of {len(hessian_approx_scores)}."
254
- ) # pragma: no cover
255
-
256
-
257
222
  @abstractmethod
258
223
  def build_gptq_model(self):
259
224
  """
@@ -38,7 +38,7 @@ if FOUND_TF:
38
38
 
39
39
  super().__init__(quantization_config)
40
40
 
41
- else:
41
+ else: # pragma: no cover
42
42
  class BaseKerasQATTrainableQuantizer(BaseKerasTrainableQuantizer):
43
43
  def __init__(self,
44
44
  quantization_config: Union[TrainableQuantizerWeightsConfig, TrainableQuantizerActivationConfig]):
@@ -39,7 +39,7 @@ if FOUND_TORCH:
39
39
  """
40
40
  super().__init__(quantization_config)
41
41
 
42
- else:
42
+ else: # pragma: no cover
43
43
  class BasePytorchQATTrainableQuantizer(BasePytorchTrainableQuantizer):
44
44
  def __init__(self,
45
45
  quantization_config: Union[TrainableQuantizerWeightsConfig, TrainableQuantizerActivationConfig]):
@@ -14,7 +14,7 @@
14
14
  # ==============================================================================
15
15
 
16
16
  import copy
17
- from typing import List, Dict, Union, Any
17
+ from typing import List, Dict, Union, Any, Tuple
18
18
 
19
19
  from mct_quantizers import QuantizationMethod
20
20
  from model_compression_toolkit.constants import FLOAT_BITWIDTH
@@ -96,7 +96,7 @@ class AttributeQuantizationConfig:
96
96
  Whether this configuration is equal to another object or not.
97
97
  """
98
98
  if not isinstance(other, AttributeQuantizationConfig):
99
- return False
99
+ return False # pragma: no cover
100
100
  return self.weights_quantization_method == other.weights_quantization_method and \
101
101
  self.weights_n_bits == other.weights_n_bits and \
102
102
  self.weights_per_channel_threshold == other.weights_per_channel_threshold and \
@@ -114,11 +114,13 @@ class OpQuantizationConfig:
114
114
  attr_weights_configs_mapping: Dict[str, AttributeQuantizationConfig],
115
115
  activation_quantization_method: QuantizationMethod,
116
116
  activation_n_bits: int,
117
+ supported_input_activation_n_bits: Union[int, Tuple[int]],
117
118
  enable_activation_quantization: bool,
118
119
  quantization_preserving: bool,
119
120
  fixed_scale: float,
120
121
  fixed_zero_point: int,
121
- simd_size: int
122
+ simd_size: int,
123
+ is_signed: bool = None
122
124
  ):
123
125
  """
124
126
 
@@ -127,10 +129,12 @@ class OpQuantizationConfig:
127
129
  attr_weights_configs_mapping (Dict[str, AttributeQuantizationConfig]): A mapping between an op attribute name and its quantization configuration.
128
130
  activation_quantization_method (QuantizationMethod): Which method to use from QuantizationMethod for activation quantization.
129
131
  activation_n_bits (int): Number of bits to quantize the activations.
132
+ supported_input_activation_n_bits (int or Tuple[int]): Number of bits that operator accepts as input.
130
133
  enable_activation_quantization (bool): Whether to quantize the model activations or not.
131
134
  quantization_preserving (bool): Whether quantization parameters should be the same for an operator's input and output.
132
135
  fixed_scale (float): Scale to use for an operator quantization parameters.
133
136
  fixed_zero_point (int): Zero-point to use for an operator quantization parameters.
137
+ is_signed (bool): Force activation quantization signedness (None means don't force).
134
138
  simd_size (int): Per op integer representing the Single Instruction, Multiple Data (SIMD) width of an operator. It indicates the number of data elements that can be fetched and processed simultaneously in a single instruction.
135
139
 
136
140
  """
@@ -140,10 +144,17 @@ class OpQuantizationConfig:
140
144
 
141
145
  self.activation_quantization_method = activation_quantization_method
142
146
  self.activation_n_bits = activation_n_bits
147
+ if isinstance(supported_input_activation_n_bits, tuple):
148
+ self.supported_input_activation_n_bits = supported_input_activation_n_bits
149
+ elif isinstance(supported_input_activation_n_bits, int):
150
+ self.supported_input_activation_n_bits = (supported_input_activation_n_bits,)
151
+ else:
152
+ Logger.critical(f"Supported_input_activation_n_bits only accepts int or tuple of ints, but got {type(supported_input_activation_n_bits)}") # pragma: no cover
143
153
  self.enable_activation_quantization = enable_activation_quantization
144
154
  self.quantization_preserving = quantization_preserving
145
155
  self.fixed_scale = fixed_scale
146
156
  self.fixed_zero_point = fixed_zero_point
157
+ self.is_signed = is_signed
147
158
  self.simd_size = simd_size
148
159
 
149
160
  def get_info(self):
@@ -152,7 +163,7 @@ class OpQuantizationConfig:
152
163
  Returns: Info about the quantization configuration as a dictionary.
153
164
 
154
165
  """
155
- return self.__dict__
166
+ return self.__dict__ # pragma: no cover
156
167
 
157
168
  def clone_and_edit(self, attr_to_edit: Dict[str, Dict[str, Any]] = {}, **kwargs):
158
169
  """
@@ -188,14 +199,26 @@ class OpQuantizationConfig:
188
199
  Whether this configuration is equal to another object or not.
189
200
  """
190
201
  if not isinstance(other, OpQuantizationConfig):
191
- return False
202
+ return False # pragma: no cover
192
203
  return self.default_weight_attr_config == other.default_weight_attr_config and \
193
204
  self.attr_weights_configs_mapping == other.attr_weights_configs_mapping and \
194
205
  self.activation_quantization_method == other.activation_quantization_method and \
195
206
  self.activation_n_bits == other.activation_n_bits and \
207
+ self.supported_input_activation_n_bits == other.supported_input_activation_n_bits and \
196
208
  self.enable_activation_quantization == other.enable_activation_quantization and \
209
+ self.is_signed == other.is_signed and \
197
210
  self.simd_size == other.simd_size
198
211
 
212
+ @property
213
+ def max_input_activation_n_bits(self) -> int:
214
+ """
215
+ Get maximum supported input bit-width.
216
+
217
+ Returns: Maximum supported input bit-width.
218
+
219
+ """
220
+ return max(self.supported_input_activation_n_bits)
221
+
199
222
 
200
223
  class QuantizationConfigOptions:
201
224
  """
@@ -279,12 +302,12 @@ class QuantizationConfigOptions:
279
302
  if attrs is None:
280
303
  attrs_to_update = list(qc.attr_weights_configs_mapping.keys())
281
304
  else:
282
- if not isinstance(attrs, List):
305
+ if not isinstance(attrs, List): # pragma: no cover
283
306
  Logger.critical(f"Expected a list of attributes but received {type(attrs)}.")
284
307
  attrs_to_update = attrs
285
308
 
286
309
  for attr in attrs_to_update:
287
- if qc.attr_weights_configs_mapping.get(attr) is None:
310
+ if qc.attr_weights_configs_mapping.get(attr) is None: # pragma: no cover
288
311
  Logger.critical(f'Editing attributes is only possible for existing attributes in the configuration\'s '
289
312
  f'weights config mapping; {attr} does not exist in {qc}.')
290
313
  self.__edit_quantization_configuration(qc.attr_weights_configs_mapping[attr], kwargs)
@@ -310,6 +333,7 @@ class QuantizationConfigOptions:
310
333
  # If not, add base_config to the list of configurations to update
311
334
  cfgs_to_update = [cfg for cfg in qc_options.quantization_config_list]
312
335
  if not any(qc_options.base_config is cfg for cfg in cfgs_to_update):
336
+ # TODO: add test for this case
313
337
  cfgs_to_update.append(qc_options.base_config)
314
338
 
315
339
  for qc in cfgs_to_update:
@@ -319,7 +343,7 @@ class QuantizationConfigOptions:
319
343
  new_attr_mapping = {}
320
344
  for attr in list(qc.attr_weights_configs_mapping.keys()):
321
345
  new_key = layer_attrs_mapping.get(attr)
322
- if new_key is None:
346
+ if new_key is None: # pragma: no cover
323
347
  Logger.critical(f"Attribute \'{attr}\' does not exist in the provided attribute mapping.")
324
348
 
325
349
  new_attr_mapping[new_key] = qc.attr_weights_configs_mapping.pop(attr)
@@ -96,7 +96,7 @@ class TargetPlatformModel(ImmutableClass):
96
96
  for op_set in self.operator_set:
97
97
  if operators_set_name == op_set.name:
98
98
  return op_set.qc_options
99
- return get_default_quantization_config_options()
99
+ return self.default_qco
100
100
 
101
101
  def get_default_op_quantization_config(self) -> OpQuantizationConfig:
102
102
  """
@@ -158,7 +158,7 @@ class TargetPlatformModel(ImmutableClass):
158
158
  self.fusing_patterns.append(tp_model_component)
159
159
  elif isinstance(tp_model_component, OperatorsSetBase):
160
160
  self.operator_set.append(tp_model_component)
161
- else:
161
+ else: # pragma: no cover
162
162
  Logger.critical(f'Attempted to append an unrecognized TargetPlatformModelComponent of type: {type(tp_model_component)}.')
163
163
 
164
164
  def __enter__(self):