PyPI - mct-nightly - Versions diffs - 2.4.0.20250630.629__py3-none-any.whl → 2.4.0.20250702.605__py3-none-any.whl - Mend

mct-nightly 2.4.0.20250630.629py3-none-any.whl → 2.4.0.20250702.605py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (76) hide show

model_compression_toolkit/core/pytorch/default_framework_info.py CHANGED Viewed

@@ -21,12 +21,8 @@ from torch.nn import Conv2d, ConvTranspose2d, Linear
 from torch import sigmoid
 from model_compression_toolkit.core.common.framework_info import FrameworkInfo, set_fw_info, ChannelAxisMapping
-from mct_quantizers import QuantizationMethod
 from model_compression_toolkit.constants import SOFTMAX_THRESHOLD
 from model_compression_toolkit.core.pytorch.constants import KERNEL
-from model_compression_toolkit.core.pytorch.quantizer.fake_quant_builder import power_of_two_quantization, \
-    symmetric_quantization, uniform_quantization
-from model_compression_toolkit.core.pytorch.quantizer.lut_fake_quant import activation_lut_kmean_quantizer
 class PyTorchInfo(FrameworkInfo):
@@ -81,14 +77,6 @@ class PyTorchInfo(FrameworkInfo):
                               SiLU: (-0.279, None),
                               }
-    """
-    Mapping from a QuantizationMethod to an activation quantizer function.
-    """
-    activation_quantizer_mapping = {QuantizationMethod.POWER_OF_TWO: power_of_two_quantization,
-                                    QuantizationMethod.SYMMETRIC: symmetric_quantization,
-                                    QuantizationMethod.UNIFORM: uniform_quantization,
-                                    QuantizationMethod.LUT_POT_QUANTIZER: activation_lut_kmean_quantizer}
     @classmethod
     def get_kernel_channels(cls, node_type: Any) -> ChannelAxisMapping:
         """

model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/relu_bound_to_power_of_2.py CHANGED Viewed

@@ -95,11 +95,11 @@ class ReLUBoundToPowerOfTwo(common.BaseSubstitution):
             else:
                 return graph
         elif non_linear_node.is_match_type(hardtanh):
-            if (non_linear_node.framework_attr[HARDTANH_MIN_VAL] == 0.0) and not \
-                    (np.log2(non_linear_node.framework_attr[HARDTANH_MAX_VAL]).astype(int) -
-                     np.log2(non_linear_node.framework_attr[HARDTANH_MAX_VAL]) == 0):
-                scale_factor = non_linear_node.framework_attr[HARDTANH_MAX_VAL] / self.threshold
-                non_linear_node.functional_op.__defaults__ = (0.0, self.threshold, non_linear_node.framework_attr[INPLACE])
+            kwargs = non_linear_node.op_call_kwargs
+            if (kwargs[HARDTANH_MIN_VAL] == 0.0) and not \
+                    (np.log2(kwargs[HARDTANH_MAX_VAL]).astype(int) - np.log2(kwargs[HARDTANH_MAX_VAL]) == 0):
+                scale_factor = kwargs[HARDTANH_MAX_VAL] / self.threshold
+                non_linear_node.functional_op.__defaults__ = (0.0, self.threshold, kwargs[INPLACE])
             else:
                 return graph
         else:

model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/shift_negative_activation.py CHANGED Viewed

@@ -29,6 +29,7 @@ from model_compression_toolkit.core.common import BaseNode, Graph
 from model_compression_toolkit.core.common.graph.graph_matchers import EdgeMatcher
 from model_compression_toolkit.core.common.graph.graph_matchers import NodeOperationMatcher
 from model_compression_toolkit.core.common.substitutions.shift_negative_activation import apply_shift_negative_correction
+from model_compression_toolkit.core.pytorch.quantization.activation_quantization_fn_factory import get_activation_quantization_fn_factory
 from model_compression_toolkit.core.pytorch.constants import PAD, VALUE, PADDING, BIAS, USE_BIAS
 from model_compression_toolkit.core.pytorch.utils import to_torch_tensor
@@ -239,4 +240,5 @@ def pytorch_apply_shift_negative_correction(graph: Graph,
                                            PADDING,
                                            BIAS,
                                            USE_BIAS,
+                                           get_activation_quantization_fn_factory,
                                            params_search_quantization_fn=params_search_quantization_fn)

model_compression_toolkit/core/pytorch/hessian/weights_hessian_scores_calculator_pytorch.py CHANGED Viewed

@@ -91,7 +91,7 @@ class WeightsHessianScoresCalculatorPytorch(HessianScoresCalculatorPytorch):
             for i, ipt_node in enumerate(self.hessian_request.target_nodes):  # Per Interest point weights tensor
                 # Check if the target node's layer type is supported.
-                if not ipt_node.is_kernel_op:
+                if not ipt_node.kernel_attr:
                     Logger.critical(f"Hessian information with respect to weights is not supported for "
                                     f"{ipt_node.type} layers.")  # pragma: no cover

model_compression_toolkit/core/pytorch/mixed_precision/configurable_activation_quantizer.py CHANGED Viewed

@@ -20,6 +20,7 @@ from model_compression_toolkit.core.common.mixed_precision.configurable_quantize
     verify_candidates_descending_order, init_activation_quantizers
 from model_compression_toolkit.core.common.quantization.candidate_node_quantization_config import \
     CandidateNodeQuantizationConfig
+from model_compression_toolkit.core.pytorch.quantization.activation_quantization_fn_factory import get_activation_quantization_fn_factory
 from model_compression_toolkit.logger import Logger
 from mct_quantizers import QuantizationMethod
 from mct_quantizers import QuantizationTarget
@@ -67,7 +68,7 @@ class ConfigurableActivationQuantizer(BasePyTorchInferableQuantizer):
                 Logger.critical("Unsupported configuration: Mixing candidates with differing activation quantization states (enabled/disabled).")  # pragma: no cover
         # Setting layer's activation
-        self.activation_quantizers = init_activation_quantizers(self.node_q_cfg)
+        self.activation_quantizers = init_activation_quantizers(self.node_q_cfg, get_activation_quantization_fn_factory)
         self.active_quantization_config_index = max_candidate_idx  # initialize with first config as default
     def set_active_activation_quantizer(self, index: Optional[int]):

model_compression_toolkit/core/pytorch/pruning/pruning_pytorch_implementation.py CHANGED Viewed

@@ -167,7 +167,7 @@ class PruningPytorchImplementation(PytorchImplementation, PruningFrameworkImplem
         """
         attributes_with_axis = {}
-        if node.is_kernel_op:
+        if node.kernel_attr:
             attributes_with_axis[node.kernel_attr] = (node.channel_axis.output, node.channel_axis.input)
             # Bias is a vector at the length of the number of output channels.

model_compression_toolkit/core/pytorch/pytorch_implementation.py CHANGED Viewed

@@ -26,7 +26,7 @@ from torch.nn import Module, Sigmoid, Softmax
 import model_compression_toolkit.core.pytorch.constants as pytorch_constants
 from model_compression_toolkit.constants import HESSIAN_NUM_ITERATIONS
-from model_compression_toolkit.core import QuantizationConfig, FrameworkInfo, CoreConfig
+from model_compression_toolkit.core import QuantizationConfig, CoreConfig
 from model_compression_toolkit.core import common
 from model_compression_toolkit.core.common import Graph, BaseNode
 from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation

model_compression_toolkit/core/pytorch/quantization/activation_quantization_fn_factory.py ADDED Viewed

@@ -0,0 +1,45 @@
+# Copyright 2025 Sony Semiconductor Israel, Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+from collections.abc import Callable
+from mct_quantizers import QuantizationMethod
+from model_compression_toolkit.core.pytorch.quantization.fake_quant_builder import power_of_two_quantization, \
+    symmetric_quantization, uniform_quantization
+from model_compression_toolkit.core.pytorch.quantization.lut_fake_quant import activation_lut_kmean_quantizer
+"""
+Mapping from a QuantizationMethod to an activation quantizer function.
+"""
+_activation_quantizer_factory_mapping = {
+    QuantizationMethod.POWER_OF_TWO: power_of_two_quantization,
+    QuantizationMethod.SYMMETRIC: symmetric_quantization,
+    QuantizationMethod.UNIFORM: uniform_quantization,
+    QuantizationMethod.LUT_POT_QUANTIZER: activation_lut_kmean_quantizer
+}
+def get_activation_quantization_fn_factory(quantization_method: QuantizationMethod) -> Callable[[int, dict], Callable]:
+    """
+    Get factory for activation quantizer.
+    Args:
+        quantization_method: quantization method for activation.
+    Returns:
+        Factory that accepts activation bitwidth and a dict of quantization params, and returns the quantizer.
+    """
+    return _activation_quantizer_factory_mapping[quantization_method]

model_compression_toolkit/core/pytorch/statistics_correction/pytorch_compute_activation_bias_correction_of_graph.py CHANGED Viewed

@@ -18,7 +18,7 @@ from torch.nn import Conv2d, Linear, ConvTranspose2d
 from model_compression_toolkit.core import QuantizationConfig
 from model_compression_toolkit.core.common import Graph
 from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
-from model_compression_toolkit.core.common.framework_info import FrameworkInfo
+from model_compression_toolkit.core.pytorch.quantization.activation_quantization_fn_factory import get_activation_quantization_fn_factory
 from model_compression_toolkit.core.common.graph.graph_matchers import NodeOperationMatcher
 from model_compression_toolkit.core.common.statistics_correction.compute_activation_bias_correction_of_graph import \
     compute_activation_bias_correction_of_graph
@@ -50,5 +50,6 @@ def pytorch_compute_activation_bias_correction_of_graph(graph: Graph,
                                                         fw_impl=fw_impl,
                                                         activation_bias_correction_node_matchers=
                                                         activation_bias_correction_node_matchers,
-                                                        kernel_size=KERNEL_SIZE)
+                                                        kernel_size=KERNEL_SIZE,
+                                                        get_activation_quantization_fn_factory=get_activation_quantization_fn_factory)
     return graph

model_compression_toolkit/core/runner.py CHANGED Viewed

@@ -118,7 +118,7 @@ def core_runner(in_model: Any,
     if core_config.is_mixed_precision_enabled:
         if core_config.mixed_precision_config.configuration_overwrite is None:
-            filter_candidates_for_mixed_precision(graph, target_resource_utilization, fqc)
+            filter_candidates_for_mixed_precision(graph, target_resource_utilization)
             bit_widths_config = search_bit_width(tg,
                                                  fw_impl,
                                                  target_resource_utilization,

model_compression_toolkit/exporter/model_exporter/pytorch/fakely_quant_onnx_pytorch_exporter.py CHANGED Viewed

@@ -12,14 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-from typing import Callable
+from typing import Callable, Optional, List
 from io import BytesIO
 import torch.nn
 from mct_quantizers import PytorchActivationQuantizationHolder, PytorchQuantizationWrapper
-from model_compression_toolkit.core.pytorch.reader.node_holders import DummyPlaceHolder
 from model_compression_toolkit.verify_packages import FOUND_ONNX
 from model_compression_toolkit.logger import Logger
 from model_compression_toolkit.core.pytorch.utils import to_torch_tensor
@@ -65,11 +64,14 @@ if FOUND_ONNX:
             self._use_onnx_custom_quantizer_ops = use_onnx_custom_quantizer_ops
             self._onnx_opset_version = onnx_opset_version
-        def export(self, output_names=None) -> None:
+        def export(self, output_names: Optional[List[str]] = None) -> None:
             """
             Convert an exportable (fully-quantized) PyTorch model to a fakely-quant model
             (namely, weights that are in fake-quant format) and fake-quant layers for the activations.
+            Args:
+                output_names (Optional[List[str]]): Optional list of output node names for export compatibility.
             Returns:
                 Fake-quant PyTorch model.
             """
@@ -131,6 +133,8 @@ if FOUND_ONNX:
                     output_names = ['output']
                     dynamic_axes.update({'output': {0: 'batch_size'}})
             else:
+                assert isinstance(output_names, list), \
+                    f"`output_names` must be a list, but got {type(output_names).__name__}"
                 if isinstance(model_output, (list, tuple)):
                     num_of_outputs = len(model_output)
                 else:

model_compression_toolkit/exporter/model_exporter/pytorch/fakely_quant_torchscript_pytorch_exporter.py CHANGED Viewed

@@ -49,7 +49,7 @@ class FakelyQuantTorchScriptPyTorchExporter(BasePyTorchExporter):
                          save_model_path,
                          repr_dataset)
-    def export(self) -> None:
+    def export(self, output_names=None) -> None:
         """
         Convert an exportable (fully-quantized) PyTorch model to a fakely-quant model
         (namely, weights that are in fake-quant format) and fake-quant layers for the activations.

model_compression_toolkit/exporter/model_exporter/pytorch/pytorch_export_facade.py CHANGED Viewed

@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-from typing import Callable
+from typing import Callable, Optional, List
 from packaging import version
 from model_compression_toolkit.verify_packages import FOUND_TORCH
@@ -49,7 +49,8 @@ if FOUND_TORCH:
                              is_layer_exportable_fn: Callable = is_pytorch_layer_exportable,
                              serialization_format: PytorchExportSerializationFormat = PytorchExportSerializationFormat.ONNX,
                              quantization_format: QuantizationFormat = QuantizationFormat.MCTQ,
-                             onnx_opset_version=DEFAULT_ONNX_OPSET_VERSION) -> None:
+                             onnx_opset_version=DEFAULT_ONNX_OPSET_VERSION,
+                             output_names: Optional[List[str]] = None) -> None:
         """
         Export a PyTorch quantized model to a torchscript or onnx model.
         The model will be saved to the path in save_model_path.
@@ -67,11 +68,19 @@ if FOUND_TORCH:
             PytorchExportSerializationFormat.ONNX).
             quantization_format: Format of how quantizers are exported (fakely-quant, int8, MCTQ quantizers).
             onnx_opset_version: ONNX opset version to use for exported ONNX model.
+            output_names (Optional[List[str]]): Optional list of output node names for export compatibility.
+            This argument is relevant only when using PytorchExportSerializationFormat.ONNX.
         """
         # Ensure 'metadata' is available directly on the model, if present in submodules
         find_and_assign_metadata_attr(model)
+        if output_names is not None and serialization_format != PytorchExportSerializationFormat.ONNX:
+            Logger.warning(
+                f'`output_names` is only applicable when exporting to ONNX. '
+                f'Current serialization format is {serialization_format}, so `output_names` will be ignored.'
+            )  # pragma: no cover
         if serialization_format == PytorchExportSerializationFormat.TORCHSCRIPT:
             if quantization_format in supported_serialization_quantization_export_dict[serialization_format]:
                 exporter = FakelyQuantTorchScriptPyTorchExporter(model,
@@ -109,7 +118,7 @@ if FOUND_TORCH:
                 f'Unsupported serialization {serialization_format} was used to export Pytorch model.'
                 f' Please see API for supported formats.')  # pragma: no cover
-        exporter.export()
+        exporter.export(output_names=output_names)
 else:
     def pytorch_export_model(*args, **kwargs):

model_compression_toolkit/pruning/keras/pruning_facade.py CHANGED Viewed

@@ -17,6 +17,7 @@ from typing import Callable, Tuple, Union
 from model_compression_toolkit import get_target_platform_capabilities
 from model_compression_toolkit.constants import TENSORFLOW
+from model_compression_toolkit.quantization_preparation.load_fqc import load_fqc_configuration
 from model_compression_toolkit.target_platform_capabilities.schema.mct_current_schema import TargetPlatformCapabilities
 from model_compression_toolkit.target_platform_capabilities.tpc_io_handler import load_target_platform_capabilities
 from model_compression_toolkit.verify_packages import FOUND_TF
@@ -24,10 +25,8 @@ from model_compression_toolkit.core.common.mixed_precision.resource_utilization_
 from model_compression_toolkit.core.common.pruning.pruner import Pruner
 from model_compression_toolkit.core.common.pruning.pruning_config import PruningConfig
 from model_compression_toolkit.core.common.pruning.pruning_info import PruningInfo
-from model_compression_toolkit.core.common.quantization.set_node_quantization_config import set_quantization_configuration_to_graph
 from model_compression_toolkit.core.graph_prep_runner import read_model_to_graph
 from model_compression_toolkit.logger import Logger
-from model_compression_toolkit.core.common.quantization.quantization_config import DEFAULTCONFIG
 from model_compression_toolkit.target_platform_capabilities.constants import DEFAULT_TP_MODEL
 if FOUND_TF:
@@ -117,20 +116,17 @@ if FOUND_TF:
         target_platform_capabilities = load_target_platform_capabilities(target_platform_capabilities)
         # Attach tpc model to framework
-        attach2keras = AttachTpcToKeras()
-        target_platform_capabilities = attach2keras.attach(target_platform_capabilities)
+        framework_platform_capabilities = AttachTpcToKeras().attach(target_platform_capabilities)
         # Convert the original Keras model to an internal graph representation.
         float_graph = read_model_to_graph(model,
                                           representative_data_gen,
-                                          target_platform_capabilities,
+                                          framework_platform_capabilities,
                                           fw_impl)
         # Apply quantization configuration to the graph. This step is necessary even when not quantizing,
         # as it prepares the graph for the pruning process.
-        float_graph_with_compression_config = set_quantization_configuration_to_graph(float_graph,
-                                                                                      quant_config=DEFAULTCONFIG,
-                                                                                      mixed_precision_enable=False)
+        float_graph_with_compression_config = load_fqc_configuration(float_graph, framework_platform_capabilities)
         # Create a Pruner object with the graph and configuration.
         pruner = Pruner(float_graph_with_compression_config,
@@ -138,7 +134,7 @@ if FOUND_TF:
                         target_resource_utilization,
                         representative_data_gen,
                         pruning_config,
-                        target_platform_capabilities)
+                        framework_platform_capabilities)
         # Apply the pruning process.
         pruned_graph = pruner.prune_graph()

model_compression_toolkit/pruning/pytorch/pruning_facade.py CHANGED Viewed

@@ -23,10 +23,9 @@ from model_compression_toolkit.core.common.mixed_precision.resource_utilization_
 from model_compression_toolkit.core.common.pruning.pruner import Pruner
 from model_compression_toolkit.core.common.pruning.pruning_config import PruningConfig
 from model_compression_toolkit.core.common.pruning.pruning_info import PruningInfo
-from model_compression_toolkit.core.common.quantization.set_node_quantization_config import set_quantization_configuration_to_graph
+from model_compression_toolkit.quantization_preparation.load_fqc import load_fqc_configuration
 from model_compression_toolkit.core.graph_prep_runner import read_model_to_graph
 from model_compression_toolkit.logger import Logger
-from model_compression_toolkit.core.common.quantization.quantization_config import DEFAULTCONFIG
 from model_compression_toolkit.target_platform_capabilities.constants import DEFAULT_TP_MODEL
@@ -134,9 +133,7 @@ if FOUND_TORCH:
         # Apply quantization configuration to the graph. This step is necessary even when not quantizing,
         # as it prepares the graph for the pruning process.
-        float_graph_with_compression_config = set_quantization_configuration_to_graph(float_graph,
-                                                                                      quant_config=DEFAULTCONFIG,
-                                                                                      mixed_precision_enable=False)
+        float_graph_with_compression_config = load_fqc_configuration(float_graph, framework_platform_capabilities)
         # Create a Pruner object with the graph and configuration.
         pruner = Pruner(float_graph_with_compression_config,

model_compression_toolkit/ptq/keras/quantization_facade.py CHANGED Viewed

@@ -122,7 +122,7 @@ if FOUND_TF:
             >>> quantized_model, quantization_info = mct.ptq.keras_post_training_quantization(model, repr_datagen, ru, core_config=config)
-            For more configuration options, please take a look at our `API documentation <https://sony.github.io/model_optimization/api/api_docs/modules/mixed_precision_quantization_config.html>`_.
+            For more configuration options, please take a look at our `API documentation <https://sonysemiconductorsolutions.github.io/mct-model-optimization/api/api_docs/modules/mixed_precision_quantization_config.html>`_.
          """

model_compression_toolkit/qat/keras/quantization_facade.py CHANGED Viewed

@@ -167,7 +167,7 @@ if FOUND_TF:
              >>> quantized_model = tf.keras.models.load_model(model_file, custom_objects=custom_objects)
-             For more configuration options, please take a look at our `API documentation <https://sony.github.io/model_optimization/api/api_docs/modules/mixed_precision_quantization_config.html>`_.
+             For more configuration options, please take a look at our `API documentation <https://sonysemiconductorsolutions.github.io/mct-model-optimization/api/api_docs/modules/mixed_precision_quantization_config.html>`_.
          """

model_compression_toolkit/qat/pytorch/quantization_facade.py CHANGED Viewed

@@ -136,7 +136,7 @@ if FOUND_TORCH:
              >>> quantized_model, quantization_info = mct.qat.pytorch_quantization_aware_training_init_experimental(model, repr_datagen, core_config=config)
-             For more configuration options, please take a look at our `API documentation <https://sony.github.io/model_optimization/api/api_docs/modules/mixed_precision_quantization_config.html>`_.
+             For more configuration options, please take a look at our `API documentation <https://sonysemiconductorsolutions.github.io/mct-model-optimization/api/api_docs/modules/mixed_precision_quantization_config.html>`_.
          """
         Logger.warning(

model_compression_toolkit/quantization_preparation/__init__.py ADDED Viewed

@@ -0,0 +1,14 @@
+# Copyright 2025 Sony Semiconductor Israel, Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================

model_compression_toolkit/quantization_preparation/load_fqc.py ADDED Viewed

@@ -0,0 +1,223 @@
+# Copyright 2025 Sony Semiconductor Israel, Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+from typing import List, Optional
+from model_compression_toolkit.core.common import Graph, BaseNode
+from model_compression_toolkit.core.common.framework_info import ChannelAxisMapping
+from model_compression_toolkit.core.common.fusion.fusing_info import FusingInfoGenerator
+from model_compression_toolkit.core.common.quantization.candidate_node_quantization_config import \
+    CandidateNodeQuantizationConfig, NodeQuantizationConfig
+from model_compression_toolkit.core.common.quantization.node_quantization_config import \
+    NodeActivationQuantizationConfig, NodeWeightsQuantizationConfig, ActivationQuantizationMode
+from model_compression_toolkit.core.common.quantization.set_node_quantization_config import filter_node_qco_by_graph
+from model_compression_toolkit.logger import Logger
+from model_compression_toolkit.target_platform_capabilities import FrameworkQuantizationCapabilities, \
+    QuantizationConfigOptions, OpQuantizationConfig
+def load_fqc_configuration(graph: Graph, fqc: FrameworkQuantizationCapabilities):
+    """
+    Set-up graph for quantization per TPC.
+    Each node will contain quantization candidates for mixed precision and the base config for single precision.
+    The graph will contain the fusing info.
+    Args:
+        graph: graph.
+        fqc: framework quantization capabilities object.
+    Returns:
+        Updated graph.
+    """
+    graph = _set_nodes_quantization_configuration(graph, fqc)
+    graph = _set_fusion_info(graph, fqc)
+    return graph
+def set_quantization_configs_to_node(node: BaseNode,
+                                     graph: Graph,
+                                     fqc: FrameworkQuantizationCapabilities):
+    """
+    Create and set quantization configurations to a node (for both weights and activation).
+    Args:
+        node (BaseNode): Node to set its quantization configurations.
+        graph (Graph): Model's internal representation graph.
+        fqc (FrameworkQuantizationCapabilities): FrameworkQuantizationCapabilities to get default OpQuantizationConfig.
+    """
+    qc_options = fetch_qc_options_for_node(node, fqc)
+    base_config, candidates_qcs = filter_node_qco_by_graph(node, fqc, graph, qc_options)
+    node_attrs_list = node.get_node_weights_attributes()
+    mp_candidates = [_create_candidate(node.channel_axis, qc, node_attrs_list)
+                     for qc in candidates_qcs]
+    sp_cfg = _create_candidate(node.channel_axis, base_config, node_attrs_list)
+    node.quantization_cfg = NodeQuantizationConfig(base_quantization_cfg=sp_cfg,
+                                                   candidates_quantization_cfg=mp_candidates)
+    # TODO is not needed anymore as find min/max candidate look for a real max/min, but some tests still count on it
+    node.sort_node_candidates()
+    if not node.has_activation:
+        node.quantization_cfg.update_activation_quantization_mode(ActivationQuantizationMode.NO_QUANT)
+    _disable_unsupported_quant_preserving(node, graph)
+def fetch_qc_options_for_node(node: BaseNode,
+                              fqc: FrameworkQuantizationCapabilities,
+                              return_default=True) -> Optional[QuantizationConfigOptions]:
+    """
+    Get quantization configuration options for the node from TPC.
+    Args:
+        node: node for which to fetch quantization configuration.
+        fqc: framework quantization capabilities.
+        return_default: whether to return the default qco or None if node op is not in FQC.
+    Returns:
+        Quantization configuration options for the node.
+    """
+    # qcos by filters
+    filter_matches = [(fl, qco) for fl, qco in fqc.filterlayer2qco.items() if node.is_match_filter_params(fl)]
+    fls, filter_qcos = zip(*filter_matches) if filter_matches else (None, None)
+    if filter_qcos and any(qco != filter_qcos[0] for qco in filter_qcos[1:]):
+        raise ValueError(f'Cannot assign quantization configuration to {node} as it matches more than one filter with '
+                         f'conflicting configs: {fls}.')
+    # qco by opset
+    # must use is_match_type for functional op in TF2.15
+    matches = [(op_type, qco) for op_type, qco in fqc.layer2qco.items() if node.is_match_type(op_type)]
+    op_types, qcos = zip(*matches) if matches else (None, None)
+    if qcos and any(qco != qcos[0] for qco in qcos[1:]):
+        raise ValueError(f'Cannot assign quantization configuration to {node} as it matches more than one op type with '
+                         f'conflicting configs: {op_types}.')
+    # if node matches by both filter and opset, filter takes priority
+    if filter_qcos:
+        return filter_qcos[0]
+    if qcos:
+        return qcos[0]
+    return fqc.tpc.default_qco if return_default else None
+def _set_nodes_quantization_configuration(graph: Graph,
+                                          fqc: FrameworkQuantizationCapabilities) -> Graph:
+    """
+    Set quantization configuration for each graph node.
+    Args:
+        graph: graph to set with quantization configuration.
+        fqc: framework quantization capabilities.
+    Returns:
+        Graph: The graph with quantization configurations attached to each node in it.
+    """
+    _validate_custom_ops_have_qco(graph, fqc)
+    for n in graph.get_topo_sorted_nodes():
+        set_quantization_configs_to_node(node=n,
+                                         graph=graph,
+                                         fqc=fqc)
+    return graph
+def _set_fusion_info(graph: Graph, fqc: FrameworkQuantizationCapabilities) -> Graph:
+    """
+    Args:
+        graph: graph.
+        fqc: quantization capabilities with attached framework.
+    Returns:
+    """
+    # TODO fix the dict with const keys inside get_fusing_patterns. use named tuple or class
+    # TODO irena instead of storing fusion inside graph (including tpc objects) and then let graph convert tpc op config to
+    #  node config, do it here and only store in graph whatever is relevant after this stage.
+    fusing_info = FusingInfoGenerator(fqc.get_fusing_patterns()).generate_fusing_info(graph)
+    graph.fusing_info = fusing_info
+    graph.override_fused_node_activation_quantization_candidates()
+    return graph
+def _disable_unsupported_quant_preserving(node: BaseNode, graph: Graph):
+    """
+    Disable quantization for quantization preserving ops in cases it cannot be supported
+    (multiple inputs or un-quantized previous node).
+    Args:
+        node: current node.
+        graph: graph.
+    """
+    if not node.quantization_cfg.get_activation_quant_mode() == ActivationQuantizationMode.PRESERVE_QUANT:
+        return
+    prev_nodes = graph.get_prev_nodes(node)
+    if len(prev_nodes) != 1:
+        Logger.info(f'Disabling Quantization-Preserving for node {node.name} with {len(prev_nodes)} inputs.')
+        node.quantization_cfg.update_activation_quantization_mode(ActivationQuantizationMode.NO_QUANT)
+    elif prev_nodes[0].quantization_cfg.get_activation_quant_mode() == ActivationQuantizationMode.NO_QUANT:
+        Logger.info(f'Disabling Quantization-Preserving for node {node.name} since previous node activation '
+                    f'quantization is disabled.')
+        node.quantization_cfg.update_activation_quantization_mode(ActivationQuantizationMode.NO_QUANT)
+# TODO irena copied from graph.set_fqc as is. Why does it have Keras errors?
+def _validate_custom_ops_have_qco(graph, fqc):
+    custom_nodes = [n for n in graph.nodes if n.is_custom]
+    for n in custom_nodes:
+        qco = fetch_qc_options_for_node(n, fqc, return_default=False)
+        if not qco:
+            Logger.critical(f'MCT does not support optimizing Keras custom layers. Found a layer of type {n.type}. '
+                            ' Please add the custom layer to Framework Quantization Capabilities (FQC), or file a feature '
+                            'request or an issue if you believe this should be supported.')  # pragma: no cover
+        if any([qc.default_weight_attr_config.enable_weights_quantization for qc in qco.quantization_configurations]):
+            Logger.critical(f'Layer identified: {n.type}. MCT does not support weight quantization for Keras custom layers.')  # pragma: no cover
+def _create_candidate(weight_channel_axis: ChannelAxisMapping,
+                      op_cfg: OpQuantizationConfig,
+                      node_attrs_list: List[str]) -> CandidateNodeQuantizationConfig:
+    """
+    Create quantization configuration candidate.
+    Args:
+        weight_channel_axis: channels axes of the node's kernel.
+        op_cfg: quantization config for the op.
+        node_attrs_list: A list of the node's weights attributes names.
+    Returns:
+        Candidate quantization config.
+    """
+    aqc = NodeActivationQuantizationConfig(op_cfg=op_cfg)
+    # TODO: remove this validation and warning once enabling all attributes quantization by default
+    attrs_with_enabled_quantization = [attr for attr, cfg in op_cfg.attr_weights_configs_mapping.items()
+                                       if cfg.enable_weights_quantization]
+    if len(attrs_with_enabled_quantization) > 1:
+        Logger.warning(f"Multiple weights attributes quantization is enabled via the provided FQC."
+                       f"Quantizing any attribute other than the kernel is experimental "
+                       f"and may be subject to unstable behavior."
+                       f"Attributes with enabled weights quantization: {attrs_with_enabled_quantization}.")
+    wqc = NodeWeightsQuantizationConfig(op_cfg=op_cfg,
+                                        weights_channels_axis=weight_channel_axis,
+                                        node_attrs_list=node_attrs_list)
+    return CandidateNodeQuantizationConfig(activation_quantization_cfg=aqc, weights_quantization_cfg=wqc)

model_compression_toolkit/target_platform_capabilities/constants.py CHANGED Viewed

@@ -29,7 +29,7 @@ QNNPACK_TP_MODEL = 'qnnpack'
 # TP Attributes
 KERNEL_ATTR = "kernel_attr"
 BIAS_ATTR = "bias_attr"
-POS_ATTR = "pos_attr"
+POSITIONAL_ATTR = "pos_attr"
 # TODO: this is duplicated from the core frameworks constants files, because the original consts can't be used here
 #  duo to circular dependency. It might be best to extract the constants from the core file and put them here (in a

mct-nightly 2.4.0.20250630.629__py3-none-any.whl → 2.4.0.20250702.605__py3-none-any.whl

mct-nightly 2.4.0.20250630.629py3-none-any.whl → 2.4.0.20250702.605py3-none-any.whl