mct-nightly 2.0.0.20240417.406__py3-none-any.whl → 2.0.0.20240419.358__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. {mct_nightly-2.0.0.20240417.406.dist-info → mct_nightly-2.0.0.20240419.358.dist-info}/METADATA +1 -1
  2. {mct_nightly-2.0.0.20240417.406.dist-info → mct_nightly-2.0.0.20240419.358.dist-info}/RECORD +60 -57
  3. model_compression_toolkit/__init__.py +1 -1
  4. model_compression_toolkit/constants.py +2 -0
  5. model_compression_toolkit/core/common/graph/base_graph.py +2 -2
  6. model_compression_toolkit/core/common/graph/base_node.py +26 -9
  7. model_compression_toolkit/core/common/graph/functional_node.py +18 -1
  8. model_compression_toolkit/core/common/hessian/hessian_info_service.py +2 -3
  9. model_compression_toolkit/core/common/hessian/trace_hessian_request.py +1 -3
  10. model_compression_toolkit/core/common/network_editors/node_filters.py +4 -3
  11. model_compression_toolkit/core/common/quantization/node_quantization_config.py +0 -5
  12. model_compression_toolkit/core/common/quantization/quantization_config.py +5 -2
  13. model_compression_toolkit/core/common/quantization/quantization_params_generation/error_functions.py +67 -4
  14. model_compression_toolkit/core/common/quantization/quantization_params_generation/lut_kmeans_params.py +12 -4
  15. model_compression_toolkit/core/common/quantization/quantization_params_generation/power_of_two_selection.py +14 -4
  16. model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_computation.py +30 -3
  17. model_compression_toolkit/core/common/quantization/quantization_params_generation/qparams_weights_computation.py +17 -7
  18. model_compression_toolkit/core/common/quantization/quantization_params_generation/symmetric_selection.py +14 -3
  19. model_compression_toolkit/core/common/quantization/quantization_params_generation/uniform_selection.py +13 -3
  20. model_compression_toolkit/core/common/quantization/set_node_quantization_config.py +16 -3
  21. model_compression_toolkit/core/common/similarity_analyzer.py +16 -4
  22. model_compression_toolkit/core/common/substitutions/remove_identity.py +48 -0
  23. model_compression_toolkit/core/graph_prep_runner.py +10 -4
  24. model_compression_toolkit/core/keras/back2framework/keras_model_builder.py +4 -1
  25. model_compression_toolkit/core/keras/graph_substitutions/substitutions/batchnorm_folding.py +7 -7
  26. model_compression_toolkit/core/keras/graph_substitutions/substitutions/linear_collapsing.py +1 -1
  27. model_compression_toolkit/core/keras/graph_substitutions/substitutions/remove_identity.py +51 -0
  28. model_compression_toolkit/core/keras/graph_substitutions/substitutions/residual_collapsing.py +1 -1
  29. model_compression_toolkit/core/keras/keras_implementation.py +13 -11
  30. model_compression_toolkit/core/keras/keras_node_prior_info.py +4 -4
  31. model_compression_toolkit/core/keras/pruning/pruning_keras_implementation.py +4 -5
  32. model_compression_toolkit/core/keras/reader/common.py +2 -2
  33. model_compression_toolkit/core/keras/reader/node_builder.py +28 -9
  34. model_compression_toolkit/core/keras/tf_tensor_numpy.py +5 -2
  35. model_compression_toolkit/core/pytorch/back2framework/pytorch_model_builder.py +34 -21
  36. model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/batchnorm_folding.py +8 -8
  37. model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/const_holder_conv.py +2 -2
  38. model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/linear_collapsing.py +1 -1
  39. model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/relu_bound_to_power_of_2.py +4 -4
  40. model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/remove_identity.py +50 -0
  41. model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/residual_collapsing.py +1 -1
  42. model_compression_toolkit/core/pytorch/pruning/pruning_pytorch_implementation.py +8 -8
  43. model_compression_toolkit/core/pytorch/pytorch_implementation.py +7 -6
  44. model_compression_toolkit/core/pytorch/pytorch_node_prior_info.py +2 -2
  45. model_compression_toolkit/core/quantization_prep_runner.py +6 -2
  46. model_compression_toolkit/core/runner.py +5 -2
  47. model_compression_toolkit/exporter/model_wrapper/keras/builder/fully_quantized_model_builder.py +5 -1
  48. model_compression_toolkit/exporter/model_wrapper/pytorch/builder/fully_quantized_model_builder.py +9 -2
  49. model_compression_toolkit/gptq/keras/quantization_facade.py +2 -1
  50. model_compression_toolkit/gptq/pytorch/quantization_facade.py +3 -1
  51. model_compression_toolkit/gptq/runner.py +1 -0
  52. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/latest/__init__.py +5 -5
  53. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v1_lut/tp_model.py +1 -1
  54. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2/tp_model.py +20 -6
  55. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2/tpc_keras.py +1 -1
  56. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tp_model.py +22 -8
  57. model_compression_toolkit/target_platform_capabilities/tpc_models/imx500_tpc/v2_lut/tpc_keras.py +1 -1
  58. {mct_nightly-2.0.0.20240417.406.dist-info → mct_nightly-2.0.0.20240419.358.dist-info}/LICENSE.md +0 -0
  59. {mct_nightly-2.0.0.20240417.406.dist-info → mct_nightly-2.0.0.20240419.358.dist-info}/WHEEL +0 -0
  60. {mct_nightly-2.0.0.20240417.406.dist-info → mct_nightly-2.0.0.20240419.358.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,50 @@
1
+ # Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+ import torch
16
+
17
+ from model_compression_toolkit.core.common.substitutions.remove_identity import remove_identity_node
18
+ from model_compression_toolkit.core.common.graph.graph_matchers import NodeOperationMatcher
19
+ from model_compression_toolkit.core import common
20
+ from model_compression_toolkit.core.common.graph.base_graph import Graph
21
+ from model_compression_toolkit.core.common.graph.base_node import BaseNode
22
+
23
+
24
+ class RemoveIdentity(common.BaseSubstitution):
25
+ """
26
+ Remove `torch.nn.Identity` layers from the graph.
27
+ """
28
+
29
+ def __init__(self):
30
+ nodes = NodeOperationMatcher(torch.nn.Identity)
31
+ super().__init__(matcher_instance=nodes)
32
+
33
+ def substitute(self,
34
+ graph: Graph,
35
+ node: BaseNode) -> Graph:
36
+ """
37
+ The method to perform the substitution of the `torch.nn.Identity` node by
38
+ reconnecting its input directly to its output, effectively removing the node
39
+ from the graph.
40
+
41
+ Args:
42
+ graph: The current graph of operations where the node resides.
43
+ node: The specific `BaseNode` that is matched to be an Identity operation.
44
+
45
+ Returns:
46
+ Graph: The updated graph after removing the identity node.
47
+ """
48
+ return remove_identity_node(graph, node)
49
+
50
+
@@ -46,7 +46,7 @@ def residual_collapsing_fn(first_node: BaseNode,
46
46
  Returns:
47
47
  The modified layer node's weights: kernel
48
48
  """
49
- if first_node.type == Conv2d:
49
+ if first_node.is_match_type(Conv2d):
50
50
  # Get nodes attributes
51
51
  kernel = first_node.get_weights_by_keys(kernel_str)
52
52
  (Cout, Cin, kH, kW) = kernel.shape
@@ -76,9 +76,9 @@ class PruningPytorchImplementation(PytorchImplementation, PruningFrameworkImplem
76
76
  pruned_parameters = {}
77
77
  mask_bool = output_mask.astype(bool)
78
78
  node.weights = pruned_parameters
79
- if node.type == torch.nn.BatchNorm2d:
79
+ if node.is_match_type(torch.nn.BatchNorm2d):
80
80
  node.framework_attr[NUM_FEATURES] = int(np.sum(input_mask))
81
- elif node.type == torch.nn.PReLU:
81
+ elif node.is_match_type(torch.nn.PReLU):
82
82
  if node.framework_attr[NUM_PARAMETERS] > 1:
83
83
  node.framework_attr[NUM_PARAMETERS] = int(np.sum(input_mask))
84
84
  else:
@@ -227,9 +227,9 @@ def _is_pytorch_node_pruning_section_edge(node: BaseNode) -> bool:
227
227
  """
228
228
 
229
229
  # Check if the node is a Conv2D or Conv2DTranspose layer with groups set to 1.
230
- if node.type in [torch.nn.Conv2d, torch.nn.ConvTranspose2d]:
230
+ if node.is_match_type(torch.nn.Conv2d) or node.is_match_type(torch.nn.ConvTranspose2d):
231
231
  return node.framework_attr[GROUPS] == 1
232
- return node.type == torch.nn.Linear
232
+ return node.is_match_type(torch.nn.Linear)
233
233
 
234
234
 
235
235
  def _prune_pytorch_edge_node(node: BaseNode,
@@ -268,18 +268,18 @@ def _prune_pytorch_edge_node(node: BaseNode,
268
268
  if not is_exit_node:
269
269
  # Update 'out_channels' or 'out_features' attributes for entry nodes
270
270
  # Conv2d,ConvTranspose2d / Linear layers.
271
- if node.type in [torch.nn.Conv2d, torch.nn.ConvTranspose2d]:
271
+ if node.is_match_type(torch.nn.Conv2d) or node.is_match_type(torch.nn.ConvTranspose2d):
272
272
  node.framework_attr[OUT_CHANNELS] = int(np.sum(mask))
273
- elif node.type == torch.nn.Linear:
273
+ elif node.is_match_type(torch.nn.Linear):
274
274
  node.framework_attr[OUT_FEATURES] = int(np.sum(mask))
275
275
  else:
276
276
  Logger.critical(f"{node.type} is currently not supported"
277
277
  f"as an edge node in a pruning section")
278
278
 
279
279
  if is_exit_node:
280
- if node.type in [torch.nn.Conv2d, torch.nn.ConvTranspose2d]:
280
+ if node.is_match_type(torch.nn.Conv2d) or node.is_match_type(torch.nn.ConvTranspose2d):
281
281
  node.framework_attr[IN_CHANNELS] = int(np.sum(mask))
282
- elif node.type == torch.nn.Linear:
282
+ elif node.is_match_type(torch.nn.Linear):
283
283
  node.framework_attr[IN_FEATURES] = int(np.sum(mask))
284
284
  else:
285
285
  Logger.critical(f"{node.type} is currently not supported"
@@ -58,6 +58,7 @@ from model_compression_toolkit.core.pytorch.graph_substitutions.substitutions.co
58
58
  FunctionalConvSubstitution
59
59
  from model_compression_toolkit.core.pytorch.graph_substitutions.substitutions.relu_bound_to_power_of_2 import \
60
60
  ReLUBoundToPowerOfTwo
61
+ from model_compression_toolkit.core.pytorch.graph_substitutions.substitutions.remove_identity import RemoveIdentity
61
62
  from model_compression_toolkit.core.pytorch.graph_substitutions.substitutions.reshape_with_static_shapes import \
62
63
  ReshapeWithStaticShapes
63
64
  from model_compression_toolkit.core.pytorch.graph_substitutions.substitutions.residual_collapsing import \
@@ -238,7 +239,8 @@ class PytorchImplementation(FrameworkImplementation):
238
239
  PermuteCallMethod(),
239
240
  FunctionalConvSubstitution(fw_info),
240
241
  FunctionalBatchNorm(),
241
- FunctionalLayerNorm()]
242
+ FunctionalLayerNorm(),
243
+ RemoveIdentity()]
242
244
 
243
245
  def get_substitutions_pre_statistics_collection(self,
244
246
  quant_config: QuantizationConfig
@@ -396,8 +398,8 @@ class PytorchImplementation(FrameworkImplementation):
396
398
  Returns: True if the node should be considered an interest point, False otherwise.
397
399
  """
398
400
 
399
- if node.type in [Conv2d, Linear, ConvTranspose2d, Sigmoid, sigmoid, Softmax, softmax, operator.add, add, cat,
400
- operator.concat]:
401
+ if any([node.is_match_type(_type) for _type in [Conv2d, Linear, ConvTranspose2d, Sigmoid, sigmoid, Softmax,
402
+ softmax, operator.add, add, cat, operator.concat]]):
401
403
  return True
402
404
  return False
403
405
 
@@ -462,12 +464,12 @@ class PytorchImplementation(FrameworkImplementation):
462
464
  kernel_shape = node.get_weights_by_keys(fw_info.get_kernel_op_attributes(node.type)[0]).shape
463
465
  output_channel_axis, input_channel_axis = fw_info.kernel_channels_mapping.get(node.type)
464
466
 
465
- if node.type is Conv2d or node.type is ConvTranspose2d:
467
+ if node.is_match_type(Conv2d) or node.is_match_type(ConvTranspose2d):
466
468
  # (C_out * W_out * H_out) * C_in * (W_kernel * H_kernel)
467
469
  return np.prod([x for x in output_shape if x is not None]) * \
468
470
  kernel_shape[input_channel_axis] * \
469
471
  (kernel_shape[0] * kernel_shape[1])
470
- elif node.type is Linear:
472
+ elif node.is_match_type(Linear):
471
473
  # IN * OUT
472
474
  return kernel_shape[0] * kernel_shape[1]
473
475
  else:
@@ -550,7 +552,6 @@ class PytorchImplementation(FrameworkImplementation):
550
552
  Returns:
551
553
  weight_quantizers: A dictionary between a weight's name to its quantizer.
552
554
  activation_quantizers: A list of activations quantization, one for each layer output.
553
-
554
555
  """
555
556
 
556
557
  return get_inferable_quantizers(node,
@@ -62,7 +62,7 @@ def _get_mean_std_outputs(node: BaseNode,
62
62
  """
63
63
  mean_output, std_output = None, None
64
64
 
65
- if node.type == BatchNorm2d:
65
+ if node.is_match_type(BatchNorm2d):
66
66
  mean_output = node.get_weights_by_keys(BETA)
67
67
  if node.get_weights_by_keys(GAMMA) is None:
68
68
  std_output = 1.0
@@ -72,7 +72,7 @@ def _get_mean_std_outputs(node: BaseNode,
72
72
  mean_output = 0.0
73
73
  else:
74
74
  next_node_list = graph.get_next_nodes(node)
75
- bn_nodes = [bn_node for bn_node in next_node_list if bn_node.type == BatchNorm2d]
75
+ bn_nodes = [bn_node for bn_node in next_node_list if bn_node.is_match_type(BatchNorm2d)]
76
76
  if len(bn_nodes) != 0:
77
77
  bn_node = bn_nodes[0]
78
78
  moving_variance = bn_node.get_weights_by_keys(MOVING_VARIANCE)
@@ -21,6 +21,7 @@ from tqdm import tqdm
21
21
  from model_compression_toolkit.core.common import FrameworkInfo
22
22
  from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
23
23
  from model_compression_toolkit.core.common.graph.base_graph import Graph
24
+ from model_compression_toolkit.core.common.hessian import HessianInfoService
24
25
  from model_compression_toolkit.core.common.model_collector import ModelCollector
25
26
  from model_compression_toolkit.core.common.network_editors.edit_network import edit_network_graph
26
27
  from model_compression_toolkit.core.common.quantization.core_config import CoreConfig
@@ -38,7 +39,8 @@ def quantization_preparation_runner(graph: Graph,
38
39
  core_config: CoreConfig,
39
40
  fw_info: FrameworkInfo,
40
41
  fw_impl: FrameworkImplementation,
41
- tb_w: TensorboardWriter = None) -> Graph:
42
+ tb_w: TensorboardWriter = None,
43
+ hessian_info_service: HessianInfoService = None,) -> Graph:
42
44
  """
43
45
  Prepares a trained model for post-training quantization.
44
46
  First, the model graph is optimized using several transformations (e.g. folding BatchNormalization to preceding layers).
@@ -55,6 +57,7 @@ def quantization_preparation_runner(graph: Graph,
55
57
  groups of layers by how they should be quantized, etc.).
56
58
  fw_impl: FrameworkImplementation object with a specific framework methods implementation.
57
59
  tb_w: TensorboardWriter object for logging
60
+ hessian_info_service: HessianInfoService object for retrieving Hessian-based scores.
58
61
 
59
62
  Returns:
60
63
  Graph object that represents the model, contains thresholds, and ready for quantization.
@@ -86,7 +89,8 @@ def quantization_preparation_runner(graph: Graph,
86
89
  ######################################
87
90
  # Calculate quantization params
88
91
  ######################################
89
- calculate_quantization_params(graph)
92
+
93
+ calculate_quantization_params(graph, hessian_info_service=hessian_info_service)
90
94
 
91
95
  if tb_w is not None:
92
96
  tb_w.add_graph(graph, 'thresholds_selection')
@@ -48,6 +48,7 @@ def core_runner(in_model: Any,
48
48
  fw_impl: FrameworkImplementation,
49
49
  tpc: TargetPlatformCapabilities,
50
50
  target_resource_utilization: ResourceUtilization = None,
51
+ running_gptq: bool = False,
51
52
  tb_w: TensorboardWriter = None):
52
53
  """
53
54
  Quantize a trained model using post-training quantization.
@@ -97,7 +98,8 @@ def core_runner(in_model: Any,
97
98
  fw_impl,
98
99
  tpc,
99
100
  tb_w,
100
- mixed_precision_enable=core_config.mixed_precision_enable)
101
+ mixed_precision_enable=core_config.mixed_precision_enable,
102
+ running_gptq=running_gptq)
101
103
 
102
104
  hessian_info_service = HessianInfoService(graph=graph,
103
105
  representative_dataset=representative_data_gen,
@@ -108,7 +110,8 @@ def core_runner(in_model: Any,
108
110
  core_config=core_config,
109
111
  fw_info=fw_info,
110
112
  fw_impl=fw_impl,
111
- tb_w=tb_w)
113
+ tb_w=tb_w,
114
+ hessian_info_service=hessian_info_service)
112
115
 
113
116
  ######################################
114
117
  # Finalize bit widths
@@ -42,8 +42,12 @@ if FOUND_TF:
42
42
  """
43
43
  weights_quantizers, _ = fw_impl.get_inferable_quantizers(node)
44
44
  if len(weights_quantizers) > 0:
45
+ # for positional weights we need to extract the weight's value.
46
+ weights_values = {attr: node.get_weights_by_keys(attr)
47
+ for attr in weights_quantizers if isinstance(attr, int)}
45
48
  return KerasQuantizationWrapper(layer,
46
- weights_quantizers)
49
+ weights_quantizers,
50
+ weights_values)
47
51
  return layer
48
52
 
49
53
 
@@ -29,7 +29,7 @@ if FOUND_TORCH:
29
29
 
30
30
  def fully_quantized_wrapper(node: common.BaseNode,
31
31
  module: torch.nn.Module,
32
- fw_impl) -> Union[torch.nn.Module,PytorchQuantizationWrapper]:
32
+ fw_impl) -> Union[torch.nn.Module, PytorchQuantizationWrapper]:
33
33
  """
34
34
  A function which takes a computational graph node and a pytorch module and
35
35
  perform the quantization wrapping
@@ -37,20 +37,26 @@ if FOUND_TORCH:
37
37
  Args:
38
38
  node: A node of mct graph.
39
39
  module: A Pytorch module
40
+ fw_impl: FrameworkImplementation object with a specific framework methods implementation.
40
41
  Returns: Wrapped layer
41
42
 
42
43
  """
43
44
  weight_quantizers, _ = fw_impl.get_inferable_quantizers(node)
44
45
  if len(weight_quantizers) > 0:
45
- return PytorchQuantizationWrapper(module, weight_quantizers)
46
+ # for positional weights we need to extract the weight's value.
47
+ weights_values = {attr: fw_impl.to_tensor(node.get_weights_by_keys(attr))
48
+ for attr in weight_quantizers if isinstance(attr, int)}
49
+ return PytorchQuantizationWrapper(module, weight_quantizers, weights_values)
46
50
  return module
47
51
 
52
+
48
53
  def get_activation_quantizer_holder(node: BaseNode, fw_impl) -> Callable:
49
54
  """
50
55
  Retrieve a PytorchActivationQuantizationHolder layer to use for activation quantization of a node.
51
56
  If the layer is not supposed to be wrapped with an activation quantizer - return None.
52
57
  Args:
53
58
  node: Node to attach a PytorchActivationQuantizationHolder to its output.
59
+ fw_impl: FrameworkImplementation object with a specific framework methods implementation.
54
60
  Returns:
55
61
  A PytorchActivationQuantizationHolder module for the node's activation quantization.
56
62
  """
@@ -64,6 +70,7 @@ if FOUND_TORCH:
64
70
  f'PytorchActivationQuantizationHolder supports a single quantizer but {len(activation_quantizers)} quantizers '
65
71
  f'were found for node {node}')
66
72
 
73
+
67
74
  def get_exportable_pytorch_model(graph: Graph):
68
75
  """
69
76
  Convert graph to fully quantized PyTorch model.
@@ -212,7 +212,8 @@ if FOUND_TF:
212
212
  fw_impl=fw_impl,
213
213
  tpc=target_platform_capabilities,
214
214
  target_resource_utilization=target_resource_utilization,
215
- tb_w=tb_w)
215
+ tb_w=tb_w,
216
+ running_gptq=True)
216
217
 
217
218
  float_graph = copy.deepcopy(tg)
218
219
 
@@ -180,7 +180,9 @@ if FOUND_TORCH:
180
180
  fw_impl=fw_impl,
181
181
  tpc=target_platform_capabilities,
182
182
  target_resource_utilization=target_resource_utilization,
183
- tb_w=tb_w)
183
+ tb_w=tb_w,
184
+ running_gptq=True)
185
+
184
186
  float_graph = copy.deepcopy(graph)
185
187
 
186
188
  # ---------------------- #
@@ -111,6 +111,7 @@ def gptq_runner(tg: Graph,
111
111
  #############################################
112
112
  # Gradient Based Post Training Quantization
113
113
  #############################################
114
+ Logger.info("Running GPTQ optimization.")
114
115
  tg_gptq = _apply_gptq(gptq_config,
115
116
  gptq_representative_data_gen,
116
117
  tb_w,
@@ -13,12 +13,12 @@
13
13
  # limitations under the License.
14
14
  # ==============================================================================
15
15
  from model_compression_toolkit.constants import FOUND_TF, FOUND_TORCH
16
- from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v2.tp_model import get_tp_model, generate_tp_model, \
16
+ from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v1.tp_model import get_tp_model, generate_tp_model, \
17
17
  get_op_quantization_configs
18
18
  if FOUND_TF:
19
- from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v2.tpc_keras import get_keras_tpc as get_keras_tpc_latest
20
- from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v2.tpc_keras import generate_keras_tpc
19
+ from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v1.tpc_keras import get_keras_tpc as get_keras_tpc_latest
20
+ from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v1.tpc_keras import generate_keras_tpc
21
21
  if FOUND_TORCH:
22
- from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v2.tpc_pytorch import get_pytorch_tpc as \
22
+ from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v1.tpc_pytorch import get_pytorch_tpc as \
23
23
  get_pytorch_tpc_latest
24
- from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v2.tpc_pytorch import generate_pytorch_tpc
24
+ from model_compression_toolkit.target_platform_capabilities.tpc_models.imx500_tpc.v1.tpc_pytorch import generate_pytorch_tpc
@@ -56,7 +56,7 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza
56
56
 
57
57
  # We define a default quantization config for all non-specified weights attributes.
58
58
  default_weight_attr_config = AttributeQuantizationConfig(
59
- weights_quantization_method=tp.QuantizationMethod.SYMMETRIC,
59
+ weights_quantization_method=tp.QuantizationMethod.POWER_OF_TWO,
60
60
  weights_n_bits=8,
61
61
  weights_per_channel_threshold=False,
62
62
  enable_weights_quantization=False,
@@ -32,7 +32,7 @@ def get_tp_model() -> TargetPlatformModel:
32
32
  NOTE: in order to generate a target platform model with different configurations but with the same Operators Sets
33
33
  (for tests, experiments, etc.), use this method implementation as a test-case, i.e., override the
34
34
  'get_op_quantization_configs' method and use its output to call 'generate_tp_model' with your configurations.
35
- This version enables metadata by default
35
+ This version enables metadata by default.
36
36
 
37
37
  Returns: A TargetPlatformModel object.
38
38
 
@@ -44,7 +44,8 @@ def get_tp_model() -> TargetPlatformModel:
44
44
  name='imx500_tp_model')
45
45
 
46
46
 
47
- def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantizationConfig], OpQuantizationConfig]:
47
+ def get_op_quantization_configs() -> \
48
+ Tuple[OpQuantizationConfig, List[OpQuantizationConfig], OpQuantizationConfig]:
48
49
  """
49
50
  Creates a default configuration object for 8-bit quantization, to be used to set a default TargetPlatformModel.
50
51
  In addition, creates a default configuration objects list (with 8, 4 and 2 bit quantization) to be used as
@@ -151,6 +152,19 @@ def generate_tp_model(default_config: OpQuantizationConfig,
151
152
  # this configuration will be used for the operation quantization:
152
153
  default_configuration_options = tp.QuantizationConfigOptions([default_config])
153
154
 
155
+ # Create a QuantizationConfigOptions for quantizing constants in functional ops.
156
+ # Constant configuration is similar to the default eight bit configuration except for PoT
157
+ # quantization method for the constant.
158
+ # Since the constants are not named attributes of the layer, we use the default_weight_attr_config to
159
+ # define the desired quantization properties for them.
160
+ const_config = default_config.clone_and_edit(
161
+ default_weight_attr_config=default_config.default_weight_attr_config.clone_and_edit(
162
+ enable_weights_quantization=True))
163
+ if not (const_config.default_weight_attr_config.weights_quantization_method == tp.QuantizationMethod.POWER_OF_TWO and
164
+ const_config.default_weight_attr_config.weights_per_channel_threshold is False):
165
+ mct.logger.Logger.error('Constant quantization config should be per-tensor PoT.')
166
+ const_configuration_options = tp.QuantizationConfigOptions([const_config])
167
+
154
168
  # Create a TargetPlatformModel and set its default quantization config.
155
169
  # This default configuration will be used for all operations
156
170
  # unless specified otherwise (see OperatorsSet, for example):
@@ -184,10 +198,10 @@ def generate_tp_model(default_config: OpQuantizationConfig,
184
198
  # Define operations sets without quantization configuration
185
199
  # options (useful for creating fusing patterns, for example):
186
200
  any_relu = tp.OperatorsSet("AnyReLU")
187
- add = tp.OperatorsSet("Add")
188
- sub = tp.OperatorsSet("Sub")
189
- mul = tp.OperatorsSet("Mul")
190
- div = tp.OperatorsSet("Div")
201
+ add = tp.OperatorsSet("Add", const_configuration_options)
202
+ sub = tp.OperatorsSet("Sub", const_configuration_options)
203
+ mul = tp.OperatorsSet("Mul", const_configuration_options)
204
+ div = tp.OperatorsSet("Div", const_configuration_options)
191
205
  prelu = tp.OperatorsSet("PReLU")
192
206
  swish = tp.OperatorsSet("Swish")
193
207
  sigmoid = tp.OperatorsSet("Sigmoid")
@@ -122,7 +122,7 @@ def generate_keras_tpc(name: str, tp_model: tp.TargetPlatformModel):
122
122
  tp.OperationsSetToLayers("Add", [tf.add, Add])
123
123
  tp.OperationsSetToLayers("Sub", [tf.subtract, Subtract])
124
124
  tp.OperationsSetToLayers("Mul", [tf.math.multiply, Multiply])
125
- tp.OperationsSetToLayers("Div", [tf.math.divide])
125
+ tp.OperationsSetToLayers("Div", [tf.math.divide, tf.math.truediv])
126
126
  tp.OperationsSetToLayers("PReLU", [PReLU])
127
127
  tp.OperationsSetToLayers("Swish", [tf.nn.swish, tp.LayerFilterParams(Activation, activation="swish")])
128
128
  tp.OperationsSetToLayers("Sigmoid", [tf.nn.sigmoid, tp.LayerFilterParams(Activation, activation="sigmoid")])
@@ -33,7 +33,7 @@ def get_tp_model() -> TargetPlatformModel:
33
33
  NOTE: in order to generate a target platform model with different configurations but with the same Operators Sets
34
34
  (for tests, experiments, etc.), use this method implementation as a test-case, i.e., override the
35
35
  'get_op_quantization_configs' method and use its output to call 'generate_tp_model' with your configurations.
36
- This version enables metadata by default
36
+ This version enables metadata by default.
37
37
 
38
38
  Returns: A TargetPlatformModel object.
39
39
 
@@ -45,7 +45,8 @@ def get_tp_model() -> TargetPlatformModel:
45
45
  name='imx500_lut_tp_model')
46
46
 
47
47
 
48
- def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantizationConfig], OpQuantizationConfig]:
48
+ def get_op_quantization_configs() -> \
49
+ Tuple[OpQuantizationConfig, List[OpQuantizationConfig], OpQuantizationConfig]:
49
50
  """
50
51
  Creates a default configuration object for 8-bit quantization, to be used to set a default TargetPlatformModel.
51
52
  In addition, creates a default configuration objects list (with 8, 4 and 2 bit quantization) to be used as
@@ -57,13 +58,13 @@ def get_op_quantization_configs() -> Tuple[OpQuantizationConfig, List[OpQuantiza
57
58
 
58
59
  # We define a default quantization config for all non-specified weights attributes.
59
60
  default_weight_attr_config = AttributeQuantizationConfig(
60
- weights_quantization_method=tp.QuantizationMethod.SYMMETRIC,
61
+ weights_quantization_method=tp.QuantizationMethod.POWER_OF_TWO,
61
62
  weights_n_bits=8,
62
63
  weights_per_channel_threshold=False,
63
64
  enable_weights_quantization=False,
64
65
  lut_values_bitwidth=None)
65
66
 
66
- # We define a quantization config to quantize the kernel (for layers where there is a kernel attribute).
67
+ # define a quantization config to quantize the kernel (for layers where there is a kernel attribute).
67
68
  kernel_base_config = AttributeQuantizationConfig(
68
69
  weights_quantization_method=tp.QuantizationMethod.SYMMETRIC,
69
70
  weights_n_bits=8,
@@ -150,6 +151,19 @@ def generate_tp_model(default_config: OpQuantizationConfig,
150
151
  # this configuration will be used for the operation quantization:
151
152
  default_configuration_options = tp.QuantizationConfigOptions([default_config])
152
153
 
154
+ # Create a QuantizationConfigOptions for quantizing constants in functional ops.
155
+ # Constant configuration is similar to the default eight bit configuration except for PoT
156
+ # quantization method for the constant.
157
+ # Since the constants are not named attributes of the layer, we use the default_weight_attr_config to
158
+ # define the desired quantization properties for them.
159
+ const_config = default_config.clone_and_edit(
160
+ default_weight_attr_config=default_config.default_weight_attr_config.clone_and_edit(
161
+ enable_weights_quantization=True))
162
+ if not (const_config.default_weight_attr_config.weights_quantization_method == tp.QuantizationMethod.POWER_OF_TWO and
163
+ const_config.default_weight_attr_config.weights_per_channel_threshold is False):
164
+ mct.logger.Logger.error('Constant quantization config should be per-tensor PoT.')
165
+ const_configuration_options = tp.QuantizationConfigOptions([const_config])
166
+
153
167
  # Create a TargetPlatformModel and set its default quantization config.
154
168
  # This default configuration will be used for all operations
155
169
  # unless specified otherwise (see OperatorsSet, for example):
@@ -181,10 +195,10 @@ def generate_tp_model(default_config: OpQuantizationConfig,
181
195
  # Define operations sets without quantization configuration
182
196
  # options (useful for creating fusing patterns, for example):
183
197
  any_relu = tp.OperatorsSet("AnyReLU")
184
- add = tp.OperatorsSet("Add")
185
- sub = tp.OperatorsSet("Sub")
186
- mul = tp.OperatorsSet("Mul")
187
- div = tp.OperatorsSet("Div")
198
+ add = tp.OperatorsSet("Add", const_configuration_options)
199
+ sub = tp.OperatorsSet("Sub", const_configuration_options)
200
+ mul = tp.OperatorsSet("Mul", const_configuration_options)
201
+ div = tp.OperatorsSet("Div", const_configuration_options)
188
202
  prelu = tp.OperatorsSet("PReLU")
189
203
  swish = tp.OperatorsSet("Swish")
190
204
  sigmoid = tp.OperatorsSet("Sigmoid")
@@ -122,7 +122,7 @@ def generate_keras_tpc(name: str, tp_model: tp.TargetPlatformModel):
122
122
  tp.OperationsSetToLayers("Add", [tf.add, Add])
123
123
  tp.OperationsSetToLayers("Sub", [tf.subtract, Subtract])
124
124
  tp.OperationsSetToLayers("Mul", [tf.math.multiply, Multiply])
125
- tp.OperationsSetToLayers("Div", [tf.math.divide])
125
+ tp.OperationsSetToLayers("Div", [tf.math.divide, tf.math.truediv])
126
126
  tp.OperationsSetToLayers("PReLU", [PReLU])
127
127
  tp.OperationsSetToLayers("Swish", [tf.nn.swish, tp.LayerFilterParams(Activation, activation="swish")])
128
128
  tp.OperationsSetToLayers("Sigmoid", [tf.nn.sigmoid, tp.LayerFilterParams(Activation, activation="sigmoid")])