PyPI - mct-nightly - Versions diffs - 2.4.0.20250925.543__py3-none-any.whl → 2.4.2.20250926.532__py3-none-any.whl - Mend

mct-nightly 2.4.0.20250925.543py3-none-any.whl → 2.4.2.20250926.532py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (169) hide show

model_compression_toolkit/core/common/mixed_precision/configurable_quantizer_utils.py CHANGED Viewed

@@ -18,8 +18,6 @@ import numpy as np
 from model_compression_toolkit.core.common.quantization.candidate_node_quantization_config import \
     CandidateNodeQuantizationConfig
-from model_compression_toolkit.core.common.quantization.quantization_fn_selection import (get_activation_quantization_fn,
-                                                                                          get_weights_quantization_fn)
 def verify_candidates_descending_order(node_q_cfg: List[CandidateNodeQuantizationConfig],
@@ -79,21 +77,20 @@ def init_quantized_weights(node_q_cfg: List[CandidateNodeQuantizationConfig],
     quantized_weights = []
     for qc in node_q_cfg:
         qc_weights_attr = qc.weights_quantization_cfg.get_attr_config(kernel_attr)
-        weights_quantization_fn = get_weights_quantization_fn(qc_weights_attr.weights_quantization_method)
-        q_weight = weights_quantization_fn(float_weights,
-                                           qc_weights_attr.weights_n_bits,
-                                           True,
-                                           qc_weights_attr.weights_quantization_params,
-                                           qc_weights_attr.weights_per_channel_threshold,
-                                           qc_weights_attr.weights_channels_axis[0])  # output channel axis
+        q_weight = qc_weights_attr.weights_quantization_fn(float_weights,
+                                                           qc_weights_attr.weights_n_bits,
+                                                           True,
+                                                           qc_weights_attr.weights_quantization_params,
+                                                           qc_weights_attr.weights_per_channel_threshold,
+                                                           qc_weights_attr.weights_channels_axis[
+                                                               0])  # output channel axis
         quantized_weights.append(fw_tensor_convert_func(q_weight))
     return quantized_weights
-def init_activation_quantizers(node_q_cfg: List[CandidateNodeQuantizationConfig],
-                               get_activation_quantization_fn_factory: Callable) -> List:
+def init_activation_quantizers(node_q_cfg: List[CandidateNodeQuantizationConfig]) -> List:
     """
     Builds a list of quantizers for each of the bitwidth candidates for activation quantization,
     to be stored and used during MP search.
@@ -101,7 +98,6 @@ def init_activation_quantizers(node_q_cfg: List[CandidateNodeQuantizationConfig]
     Args:
         node_q_cfg: Quantization configuration candidates of the node that generated the layer that will
                     use this quantizer.
-        get_activation_quantization_fn_factory: activation quantization functions factory.
     Returns: a list of activation quantizers - for each bitwidth and layer's attribute to be quantized.
     """
@@ -109,7 +105,6 @@ def init_activation_quantizers(node_q_cfg: List[CandidateNodeQuantizationConfig]
     activation_quantizers = []
     for index, qc in enumerate(node_q_cfg):
         q_activation = node_q_cfg[index].activation_quantization_cfg
-        quantizer = get_activation_quantization_fn(q_activation, get_activation_quantization_fn_factory)
-        activation_quantizers.append(quantizer)
+        activation_quantizers.append(q_activation.quantize_node_output)
     return activation_quantizers

model_compression_toolkit/core/common/mixed_precision/mixed_precision_candidates_filter.py CHANGED Viewed

@@ -12,12 +12,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-from model_compression_toolkit.core import ResourceUtilization
+import numpy as np
+from model_compression_toolkit.core import ResourceUtilization, FrameworkInfo
 from model_compression_toolkit.core.common import Graph
+from model_compression_toolkit.target_platform_capabilities.targetplatform2framework.framework_quantization_capabilities import \
+    FrameworkQuantizationCapabilities
 def filter_candidates_for_mixed_precision(graph: Graph,
-                                          target_resource_utilization: ResourceUtilization):
+                                          target_resource_utilization: ResourceUtilization,
+                                          fw_info: FrameworkInfo,
+                                          fqc: FrameworkQuantizationCapabilities):
     """
     Filters out candidates in case of mixed precision search for only weights or activation compression.
     For instance, if running only weights compression - filters out candidates of activation configurable nodes
@@ -29,6 +35,9 @@ def filter_candidates_for_mixed_precision(graph: Graph,
     Args:
         graph: A graph representation of the model to be quantized.
         target_resource_utilization: The resource utilization of the target device.
+        fw_info: fw_info: Information needed for quantization about the specific framework.
+        fqc: FrameworkQuantizationCapabilities object that describes the desired inference target platform.
     """
     tru = target_resource_utilization
@@ -40,21 +49,21 @@ def filter_candidates_for_mixed_precision(graph: Graph,
         # filter out candidates activation only configurable node
         activation_configurable_nodes = [n for n in graph.get_activation_configurable_nodes()]
         for n in activation_configurable_nodes:
-            base_cfg_nbits = n.quantization_cfg.base_quantization_cfg.activation_quantization_cfg.activation_n_bits
-            filtered_cfgs = [c for c in n.candidates_quantization_cfg if
+            base_cfg_nbits = n.get_qco(fqc).base_config.activation_n_bits
+            filtered_conf = [c for c in n.candidates_quantization_cfg if
                              c.activation_quantization_cfg.enable_activation_quantization and
                              c.activation_quantization_cfg.activation_n_bits == base_cfg_nbits]
-            n.quantization_cfg.candidates_quantization_cfg = filtered_cfgs
+            n.candidates_quantization_cfg = filtered_conf
     elif tru.activation_restricted() and not tru.weight_restricted():
         # Running mixed precision for activation compression only -
         # filter out candidates weights only configurable node
-        weight_configurable_nodes = [n for n in graph.get_weights_configurable_nodes()]
+        weight_configurable_nodes = [n for n in graph.get_weights_configurable_nodes(fw_info)]
         for n in weight_configurable_nodes:
-            base_cfg_nbits = (n.quantization_cfg.base_quantization_cfg.weights_quantization_cfg.
-                              get_attr_config(n.kernel_attr).weights_n_bits)
-            filtered_cfgs = [c for c in n.candidates_quantization_cfg if
-                             c.weights_quantization_cfg.get_attr_config(n.kernel_attr).enable_weights_quantization and
-                             c.weights_quantization_cfg.get_attr_config(n.kernel_attr).weights_n_bits == base_cfg_nbits]
-            n.quantization_cfg.candidates_quantization_cfg = filtered_cfgs
+            kernel_attr = fw_info.get_kernel_op_attributes(n.type)[0]
+            base_cfg_nbits = n.get_qco(fqc).base_config.attr_weights_configs_mapping[kernel_attr].weights_n_bits
+            filtered_conf = [c for c in n.candidates_quantization_cfg if
+                             c.weights_quantization_cfg.get_attr_config(kernel_attr).enable_weights_quantization and
+                             c.weights_quantization_cfg.get_attr_config(kernel_attr).weights_n_bits == base_cfg_nbits]
+            n.candidates_quantization_cfg = filtered_conf

model_compression_toolkit/core/common/mixed_precision/mixed_precision_ru_helper.py CHANGED Viewed

@@ -30,10 +30,11 @@ from model_compression_toolkit.core.common.quantization.node_quantization_config
 class MixedPrecisionRUHelper:
     """ Helper class for resource utilization computations for mixed precision optimization. """
-    def __init__(self, graph: Graph, fw_impl: FrameworkImplementation):
+    def __init__(self, graph: Graph, fw_info: FrameworkInfo, fw_impl: FrameworkImplementation):
         self.graph = graph
+        self.fw_info = fw_info
         self.fw_impl = fw_impl
-        self.ru_calculator = ResourceUtilizationCalculator(graph, fw_impl)
+        self.ru_calculator = ResourceUtilizationCalculator(graph, fw_impl, fw_info)
     def compute_utilization(self, ru_targets: Set[RUTarget], mp_cfg: Dict[BaseNode, int]) -> Dict[RUTarget, np.ndarray]:
         """

model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py CHANGED Viewed

@@ -35,6 +35,7 @@ class BitWidthSearchMethod(Enum):
 def search_bit_width(graph: Graph,
+                     fw_info: FrameworkInfo,
                      fw_impl: FrameworkImplementation,
                      target_resource_utilization: ResourceUtilization,
                      mp_config: MixedPrecisionQuantizationConfig,
@@ -51,6 +52,7 @@ def search_bit_width(graph: Graph,
     Args:
         graph: Graph to search a MP configuration for.
+        fw_info: FrameworkInfo object about the specific framework (e.g., attributes of different layers' weights to quantize).
         fw_impl: FrameworkImplementation object with specific framework methods implementation.
         target_resource_utilization: Target Resource Utilization to bound our feasible solution space s.t the configuration does not violate it.
         mp_config: Mixed-precision quantization configuration.
@@ -77,7 +79,7 @@ def search_bit_width(graph: Graph,
     # Set Sensitivity Evaluator for MP search. It should always work with the original MP graph,
     # even if a virtual graph was created (and is used only for BOPS utilization computation purposes)
-    se = SensitivityEvaluation(graph, mp_config, representative_data_gen=representative_data_gen,
+    se = SensitivityEvaluation(graph, mp_config, representative_data_gen=representative_data_gen, fw_info=fw_info,
                                fw_impl=fw_impl, disable_activation_for_metric=disable_activation_for_metric,
                                hessian_info_service=hessian_info_service)
@@ -91,6 +93,7 @@ def search_bit_width(graph: Graph,
     # Search manager and LP are highly coupled, so LP search method was moved inside search manager.
     search_manager = MixedPrecisionSearchManager(graph,
+                                                 fw_info=fw_info,
                                                  fw_impl=fw_impl,
                                                  sensitivity_evaluator=se,
                                                  target_resource_utilization=target_resource_utilization,
@@ -102,6 +105,6 @@ def search_bit_width(graph: Graph,
     if mp_config.refine_mp_solution:
         nodes_bit_cfg = greedy_solution_refinement_procedure(nodes_bit_cfg, search_manager, target_resource_utilization)
-    topo_bit_cfg = [nodes_bit_cfg[n] for n in graph.get_configurable_sorted_nodes()]
+    topo_bit_cfg = [nodes_bit_cfg[n] for n in graph.get_configurable_sorted_nodes(fw_info)]
     assert len(topo_bit_cfg) == len(nodes_bit_cfg)
     return topo_bit_cfg

model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py CHANGED Viewed

@@ -53,6 +53,7 @@ class MixedPrecisionSearchManager:
     def __init__(self,
                  graph: Graph,
+                 fw_info: FrameworkInfo,
                  fw_impl: FrameworkImplementation,
                  sensitivity_evaluator: SensitivityEvaluation,
                  target_resource_utilization: ResourceUtilization,
@@ -61,12 +62,14 @@ class MixedPrecisionSearchManager:
         Args:
             graph: Graph to search for its MP configuration.
+            fw_info: FrameworkInfo object about the specific framework (e.g., attributes of different layers' weights to quantize).
             fw_impl: FrameworkImplementation object with specific framework methods implementation.
             sensitivity_evaluator: A SensitivityEvaluation which provides a function that evaluates the sensitivity of
                 a bit-width configuration for the MP model.
             target_resource_utilization: Target Resource Utilization to bound our feasible solution space s.t the configuration does not violate it.
         """
+        self.fw_info = fw_info
         self.fw_impl = fw_impl
         self.original_graph = graph
@@ -78,12 +81,12 @@ class MixedPrecisionSearchManager:
         self.target_resource_utilization = target_resource_utilization
         self.mp_config = mp_config
-        self.mp_topo_configurable_nodes = self.mp_graph.get_configurable_sorted_nodes()
+        self.mp_topo_configurable_nodes = self.mp_graph.get_configurable_sorted_nodes(fw_info)
         self.ru_targets = target_resource_utilization.get_restricted_targets()
-        self.orig_graph_ru_helper = MixedPrecisionRUHelper(self.original_graph, fw_impl)
+        self.orig_graph_ru_helper = MixedPrecisionRUHelper(self.original_graph, fw_info, fw_impl)
-        self.min_ru_config: Dict[BaseNode, int] = self.mp_graph.get_min_candidates_config()
+        self.min_ru_config: Dict[BaseNode, int] = self.mp_graph.get_min_candidates_config(fw_info)
         self.config_reconstructor = None
         orig_min_config = self.min_ru_config

model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_calculator.py CHANGED Viewed

@@ -124,9 +124,10 @@ class ResourceUtilizationCalculator:
     unexpected_qc_error = 'Custom quantization configuration is not expected for non-custom bit mode.'
     unexpected_qc_nodes_error = 'Custom quantization configuration contains unexpected node names.'
-    def __init__(self, graph: Graph, fw_impl: FrameworkImplementation):
+    def __init__(self, graph: Graph, fw_impl: FrameworkImplementation, fw_info: FrameworkInfo):
         self.graph = graph
         self.fw_impl = fw_impl
+        self.fw_info = fw_info
         # Currently we go over the full graph even if utilization won't be requested for all nodes.
         # We could fill the cache on the fly only for requested nodes, but it's probably negligible.
@@ -543,10 +544,14 @@ class ResourceUtilizationCalculator:
         self._validate_custom_qcs(w_qc, bitwidth_mode)
         # check if the node has kernel
-        if not n.kernel_attr:
+        kernel_attrs = self.fw_info.get_kernel_op_attributes(n.type)
+        if len(kernel_attrs) > 1:  # pragma: no cover
+            raise NotImplementedError('Multiple kernel attributes are not supported for BOPS computation.')
+        if not kernel_attrs or not kernel_attrs[0]:
             return 0
-        node_mac = self.fw_impl.get_node_mac_operations(n)
+        kernel_attr = kernel_attrs[0]
+        node_mac = self.fw_impl.get_node_mac_operations(n, self.fw_info)
         if node_mac == 0:
             return node_mac
@@ -554,12 +559,12 @@ class ResourceUtilizationCalculator:
         assert len(prev_nodes) == 1, f'Weights node is expected to have exactly one input, {n} has {len(prev_nodes)}'
         a_node = prev_nodes[0]
         if (target_criterion == TargetInclusionCriterion.AnyQuantized and
-                not (a_node.is_activation_quantization_enabled() or n.is_weights_quantization_enabled(n.kernel_attr))):
+                not (a_node.is_activation_quantization_enabled() or n.is_weights_quantization_enabled(kernel_attr))):
             return 0
         act_qc = self._extract_qc(a_node, act_qcs)
         a_nbits = self._get_activation_nbits(a_node, bitwidth_mode, act_qc)
-        w_nbits = self._get_weight_nbits(n, n.kernel_attr, bitwidth_mode, w_qc)
+        w_nbits = self._get_weight_nbits(n, kernel_attr, bitwidth_mode, w_qc)
         node_bops = a_nbits * w_nbits * node_mac
         return node_bops

model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_data.py CHANGED Viewed

@@ -15,7 +15,7 @@
 import copy
 from typing import Callable, Any
-from model_compression_toolkit.core import ResourceUtilization, CoreConfig, QuantizationErrorMethod
+from model_compression_toolkit.core import FrameworkInfo, ResourceUtilization, CoreConfig, QuantizationErrorMethod
 from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
 from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization_calculator import \
     ResourceUtilizationCalculator, BitwidthMode, TargetInclusionCriterion
@@ -27,6 +27,7 @@ def compute_resource_utilization_data(in_model: Any,
                                       representative_data_gen: Callable,
                                       core_config: CoreConfig,
                                       fqc: FrameworkQuantizationCapabilities,
+                                      fw_info: FrameworkInfo,
                                       fw_impl: FrameworkImplementation) -> ResourceUtilization:
     """
     Compute Resource Utilization of a model with the default single precision quantization.
@@ -38,6 +39,7 @@ def compute_resource_utilization_data(in_model: Any,
         core_config: CoreConfig containing parameters of how the model should be quantized.
         fqc: FrameworkQuantizationCapabilities object that models the inference target platform and
                                               the attached framework operator's information.
+        fw_info: Information needed for quantization about the specific framework.
         fw_impl: FrameworkImplementation object with a specific framework methods implementation.
     Returns:
@@ -53,11 +55,12 @@ def compute_resource_utilization_data(in_model: Any,
     transformed_graph = graph_preparation_runner(in_model,
                                                  representative_data_gen=representative_data_gen,
                                                  quantization_config=core_config.quantization_config,
+                                                 fw_info=fw_info,
                                                  fw_impl=fw_impl,
                                                  fqc=fqc,
                                                  bit_width_config=core_config.bit_width_config,
                                                  mixed_precision_enable=False,
                                                  running_gptq=False)
-    ru_calculator = ResourceUtilizationCalculator(transformed_graph, fw_impl)
+    ru_calculator = ResourceUtilizationCalculator(transformed_graph, fw_impl, fw_info)
     return ru_calculator.compute_resource_utilization(TargetInclusionCriterion.AnyQuantizedNonFused, BitwidthMode.QDefaultSP)

model_compression_toolkit/core/common/mixed_precision/sensitivity_eval/metric_calculators.py CHANGED Viewed

@@ -15,7 +15,7 @@
 import numpy as np
 from typing import runtime_checkable, Protocol, Callable, Any, List, Tuple
-from model_compression_toolkit.core import MixedPrecisionQuantizationConfig, MpDistanceWeighting
+from model_compression_toolkit.core import FrameworkInfo, MixedPrecisionQuantizationConfig, MpDistanceWeighting
 from model_compression_toolkit.core.common import Graph, BaseNode
 from model_compression_toolkit.core.common.hessian import HessianInfoService, HessianScoresRequest, HessianMode, \
     HessianScoresGranularity
@@ -62,12 +62,15 @@ class DistanceMetricCalculator(MetricCalculator):
                  graph: Graph,
                  mp_config: MixedPrecisionQuantizationConfig,
                  representative_data_gen: Callable,
+                 fw_info: FrameworkInfo,
                  fw_impl: Any,
                  hessian_info_service: HessianInfoService = None):
         """
         Args:
             graph: Graph to search for its MP configuration.
             mp_config: MP Quantization configuration for how the graph should be quantized.
+            fw_info: FrameworkInfo object about the specific framework
+                (e.g., attributes of different layers' weights to quantize).
             fw_impl: FrameworkImplementation object with a specific framework methods implementation.
             representative_data_gen: Dataset used for getting batches for inference.
             hessian_info_service: HessianInfoService to fetch Hessian approximation information.
@@ -75,13 +78,14 @@ class DistanceMetricCalculator(MetricCalculator):
         self.graph = graph
         self.mp_config = mp_config
         self.representative_data_gen = representative_data_gen
+        self.fw_info = fw_info
         self.fw_impl = fw_impl
         if self.mp_config.distance_weighting_method == MpDistanceWeighting.HESSIAN:
             assert hessian_info_service is not None, ('Expected HessianInfoService object to be passed with Hessian '
                                                       'distance weighting')
-        self.sorted_configurable_nodes_names = graph.get_configurable_sorted_nodes_names()
+        self.sorted_configurable_nodes_names = graph.get_configurable_sorted_nodes_names(self.fw_info)
         # Get interest points and output points set for distance measurement and set other helper datasets
         # We define a separate set of output nodes of the model for the purpose of sensitivity computation.
@@ -392,8 +396,9 @@ class DistanceMetricCalculator(MetricCalculator):
         """
         return [n.node for n in graph.get_outputs()
-                if (n.node.kernel_attr and n.node.is_weights_quantization_enabled(n.node.kernel_attr))
-                or n.node.is_activation_quantization_enabled()]
+                if (graph.fw_info.is_kernel_op(n.node.type) and
+                    n.node.is_weights_quantization_enabled(graph.fw_info.get_kernel_op_attributes(n.node.type)[0])) or
+                n.node.is_activation_quantization_enabled()]
     @staticmethod
     def bound_num_interest_points(sorted_ip_list: List[BaseNode], num_ip_factor: float) -> List[BaseNode]:

model_compression_toolkit/core/common/mixed_precision/sensitivity_eval/sensitivity_evaluation.py CHANGED Viewed

@@ -38,6 +38,7 @@ class SensitivityEvaluation:
                  graph: Graph,
                  mp_config: MixedPrecisionQuantizationConfig,
                  representative_data_gen: Callable,
+                 fw_info: FrameworkInfo,
                  fw_impl: Any,
                  disable_activation_for_metric: bool = False,
                  hessian_info_service: HessianInfoService = None
@@ -45,6 +46,8 @@ class SensitivityEvaluation:
         """
         Args:
             graph: Graph to search for its MP configuration.
+            fw_info: FrameworkInfo object about the specific framework
+                (e.g., attributes of different layers' weights to quantize).
             mp_config: MP Quantization configuration for how the graph should be quantized.
             representative_data_gen: Dataset used for getting batches for inference.
             fw_impl: FrameworkImplementation object with a specific framework methods implementation.
@@ -54,13 +57,14 @@ class SensitivityEvaluation:
         """
         self.mp_config = mp_config
         self.representative_data_gen = representative_data_gen
+        self.fw_info = fw_info
         self.fw_impl = fw_impl
         if self.mp_config.custom_metric_fn:
             self.metric_calculator = CustomMetricCalculator(graph, self.mp_config.custom_metric_fn)
         else:
             self.metric_calculator = DistanceMetricCalculator(graph, mp_config, representative_data_gen,
-                                                              fw_impl=fw_impl,
+                                                              fw_info=fw_info, fw_impl=fw_impl,
                                                               hessian_info_service=hessian_info_service)
         # Build a mixed-precision model which can be configured to use different bitwidth in different layers.
@@ -107,7 +111,8 @@ class SensitivityEvaluation:
         model_mp, _, conf_node2layers = self.fw_impl.model_builder(evaluation_graph,
                                                                    mode=ModelBuilderMode.MIXEDPRECISION,
-                                                                   append2output=outputs)
+                                                                   append2output=outputs,
+                                                                   fw_info=self.fw_info)
         # Disable all configurable quantizers. They will be activated one at a time during sensitivity evaluation.
         for layer in itertools.chain(*conf_node2layers.values()):

model_compression_toolkit/core/common/mixed_precision/solution_refinement_procedure.py CHANGED Viewed

@@ -50,11 +50,8 @@ def greedy_solution_refinement_procedure(mp_solution: Dict[BaseNode, int],
     if target_resource_utilization.bops_restricted():
         Logger.info(f'Target resource utilization constraint BOPs - Skipping MP greedy solution refinement')
         return mp_solution
-    assert search_manager.using_virtual_graph is False
-    tru = target_resource_utilization
-    activation_restricted = tru.activation_restricted() or tru.total_mem_restricted() or tru.bops_restricted()
-    weights_restricted = tru.weight_restricted() or tru.total_mem_restricted() or tru.bops_restricted()
+    assert search_manager.using_virtual_graph is False
     new_solution = mp_solution.copy()
     changed = True
@@ -65,7 +62,7 @@ def greedy_solution_refinement_procedure(mp_solution: Dict[BaseNode, int],
         nodes_next_candidate = {}
         for node in search_manager.mp_topo_configurable_nodes:
-            if new_solution[node] == node.find_max_candidate_index():
+            if new_solution[node] == 0:
                 # layer has max config in the given solution, nothing to optimize
                 continue
@@ -74,8 +71,9 @@ def greedy_solution_refinement_procedure(mp_solution: Dict[BaseNode, int],
             # only weights kernel attribute is quantized with weights mixed precision
             valid_candidates = _get_valid_candidates_indices(node_candidates,
                                                              new_solution[node],
-                                                             activation_restricted,
-                                                             weights_restricted)
+                                                             target_resource_utilization.activation_restricted(),
+                                                             target_resource_utilization.weight_restricted()
+                                                             )
             # Create a list of ru for the valid candidates.
             updated_ru = []

model_compression_toolkit/core/common/model_collector.py CHANGED Viewed

@@ -18,7 +18,7 @@ import numpy as np
 from typing import List, Union, Tuple, Optional
 from networkx.algorithms.dag import topological_sort
-from model_compression_toolkit.core import QuantizationErrorMethod
+from model_compression_toolkit.core import FrameworkInfo, QuantizationErrorMethod
 from model_compression_toolkit.core import common
 from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
 from model_compression_toolkit.core.common.graph.base_graph import Graph
@@ -30,6 +30,7 @@ from model_compression_toolkit.core.common.collectors.statistics_collector impor
 def create_stats_collector_for_node(node: common.BaseNode,
+                                    fw_info: FrameworkInfo,
                                     quant_node_in_fln: bool) -> BaseStatsCollector:
     """
     Gets a node and a groups list and create and return a statistics collector for a node
@@ -38,7 +39,7 @@ def create_stats_collector_for_node(node: common.BaseNode,
     Args:
         node: Node to create its statistics collector.
-        quant_node_in_fln: Whether the node should be quantized as part of an FLN.
+        fw_info: Information relevant to a specific framework about what is out channel axis (for statistics per-channel).
     Returns:
         Statistics collector for statistics collection for the node.
@@ -47,7 +48,7 @@ def create_stats_collector_for_node(node: common.BaseNode,
     if node.is_activation_quantization_enabled() or quant_node_in_fln:
         min_output = getattr(node.prior_info, 'min_output', None)
         max_output = getattr(node.prior_info, 'max_output', None)
-        stats_collector = common.StatsCollector(out_channel_axis=node.out_channel_axis,
+        stats_collector = common.StatsCollector(out_channel_axis=fw_info.out_channel_axis_mapping.get(node.type),
                                                 init_min_value=min_output,
                                                 init_max_value=max_output)
     else:
@@ -58,20 +59,20 @@ def create_stats_collector_for_node(node: common.BaseNode,
 def create_tensor2node(graph: common.Graph,
                        node: common.BaseNode,
-                       next_node_output_channel_axis: int):
+                       fw_info: common.FrameworkInfo):
     """
     Force statistic collector creation and assignment for a node.
     Args:
         graph: Graph of the node (for retrieving the current tensor).
         node: Node to create a tensor for.
-        next_node_output_channel_axis: channel output axis of next node.
+        fw_info: Specific framework information (for example, output channels index).
     """
     current_sc = graph.get_out_stats_collector(node)
     is_list_nostat_collectors = isinstance(current_sc, list) and len(
         [sc for sc in current_sc if not isinstance(sc, common.NoStatsCollector)]) == 0
     if isinstance(current_sc, common.NoStatsCollector) or current_sc is None or is_list_nostat_collectors:
-        stats_collector = common.StatsCollector(next_node_output_channel_axis if node.out_channel_axis is None else node.out_channel_axis)
+        stats_collector = common.StatsCollector(fw_info.out_channel_axis_mapping.get(node.type))
         graph.set_out_stats_collector_to_node(node, stats_collector)
@@ -139,6 +140,7 @@ class ModelCollector:
     def __init__(self, graph: Graph,
                  fw_impl: FrameworkImplementation,
+                 fw_info: FrameworkInfo,
                  hessian_info_service: HessianInfoService = None,
                  qc: common.QuantizationConfig = common.DEFAULTCONFIG):
         """
@@ -147,10 +149,12 @@ class ModelCollector:
         Args:
             graph: Graph to build a model from it.
             fw_impl: FrameworkImplementation object with a specific framework methods implementation.
+            fw_info: FrameworkInfo object with a specific framework information.
             qc: Quantization configuration containing parameters for how the graph should be quantized.
         """
         self.fw_impl = fw_impl
+        self.fw_info = fw_info
         self.hessian_service = hessian_info_service
         self.qc = qc
         self.model_outputs = [out.node for out in graph.get_outputs()]
@@ -158,27 +162,17 @@ class ModelCollector:
         # Assign statistics collectors to nodes
         for n in graph.get_topo_sorted_nodes():
             quant_node_in_fln = n.is_fln_quantization() and graph.fusing_info.is_quantized_node_in_fln(n)
-            sc = create_stats_collector_for_node(n, quant_node_in_fln=quant_node_in_fln)  # Get static collector for the node
-            if isinstance(sc, common.StatsCollector) and (sc.mc.axis is None or sc.mpcc.axis is None):
-                # Missing output channel axis info, so try to extract it from previous and next nodes output channel axis.
-                possible_output_channel_axis_set = {nn.out_channel_axis for nn in graph.get_next_nodes(n) + graph.get_prev_nodes(n)}
-                # Filter out None values.
-                possible_output_channel_axis_list = list(filter(lambda x: x is not None, possible_output_channel_axis_set))
-                if len(possible_output_channel_axis_list) > 0:
-                    if len(possible_output_channel_axis_list) > 1:
-                        Logger.warning(f'Ambiguous input channel data from next nodes for {n.name}.')
-                    sc.mc.axis = possible_output_channel_axis_list[0]
-                    sc.mpcc.axis = possible_output_channel_axis_list[0]
+            sc = create_stats_collector_for_node(n, fw_info=fw_info, quant_node_in_fln=quant_node_in_fln)  # Get static collector for the node
             # If we use bias correction, and the node has kernel weights to quantize, we need to make sure
             # its previous nodes' tensors are consistent with this node.
-            if qc.weights_bias_correction and n.kernel_attr is not None and n.is_weights_quantization_enabled(
-                    n.kernel_attr):
+            kernel_attr = fw_info.get_kernel_op_attributes(n.type)[0]
+            if qc.weights_bias_correction and kernel_attr is not None and n.is_weights_quantization_enabled(
+                    kernel_attr):
                 for ie in graph.incoming_edges(n):
                     input_node = ie.source_node
                     create_tensor2node(graph,
                                        input_node,
-                                       n.out_channel_axis)
+                                       fw_info)
             if sc is not None:
                 graph.set_out_stats_collector_to_node(n, sc)
@@ -211,11 +205,13 @@ class ModelCollector:
         # TODO: Add integration test for this case
         append2output = outputs_nodes + [n for n in self.model_outputs if n not in outputs_nodes]
         # Build a float model and output all layers' outputs
         # (that should be collected) as the model's outputs
         self.model, _ = self.fw_impl.model_builder(graph,
                                                    mode=ModelBuilderMode.FLOAT,
-                                                   append2output=append2output)
+                                                   append2output=append2output,
+                                                   fw_info=self.fw_info)
     def infer(self, inputs_list: List[np.ndarray]):
         """

model_compression_toolkit/core/common/model_validation.py ADDED Viewed

@@ -0,0 +1,44 @@
+from abc import abstractmethod
+from typing import Any
+from model_compression_toolkit.core import FrameworkInfo
+class ModelValidation:
+    """
+    Class to define validation methods in order to validate the received model to quantize.
+    """
+    def __init__(self,
+                 model: Any,
+                 fw_info:FrameworkInfo):
+        """
+        Initialize a ModelValidation object.
+        Args:
+            model: Model to check its validity.
+            fw_info: Information about the specific framework of the model.
+        """
+        self.model = model
+        self.fw_info = fw_info
+    @abstractmethod
+    def validate_output_channel_consistency(self):
+        """
+        Validate that output channels index in all layers of the model are the same.
+        If the model has layers with different output channels index, it should throw an exception.
+        """
+        raise NotImplemented(
+            f'Framework validation class did not implement validate_output_channel_consistency')  # pragma: no cover
+    def validate(self):
+        """
+        Run all validation methods before the quantization process starts.
+        """
+        self.validate_output_channel_consistency()

model_compression_toolkit/core/common/network_editors/__init__.py CHANGED Viewed

@@ -13,14 +13,7 @@
 # limitations under the License.
 # ==============================================================================
-from model_compression_toolkit.core.common.network_editors.actions import (
-    ChangeCandidatesWeightsQuantConfigAttr,
-    ChangeFinalWeightsQuantConfigAttr,
-    ChangeCandidatesActivationQuantConfigAttr,
-    ChangeCandidatesActivationQuantizationMethod,
-    ChangeFinalWeightsQuantizationMethod,
-    ChangeCandidatesWeightsQuantizationMethod,
-    ChangeFinalActivationQuantConfigAttr)
+from model_compression_toolkit.core.common.network_editors.actions import ChangeCandidatesWeightsQuantConfigAttr, ChangeFinalWeightsQuantConfigAttr, ChangeCandidatesActivationQuantConfigAttr, ChangeQuantizationParamFunction, ChangeCandidatesActivationQuantizationMethod, ChangeFinalWeightsQuantizationMethod, ChangeCandidatesWeightsQuantizationMethod, ChangeFinalActivationQuantConfigAttr
 from model_compression_toolkit.core.common.network_editors.actions import EditRule
 from model_compression_toolkit.core.common.network_editors.node_filters import NodeTypeFilter, NodeNameScopeFilter, \
     NodeNameFilter

mct-nightly 2.4.0.20250925.543__py3-none-any.whl → 2.4.2.20250926.532__py3-none-any.whl

mct-nightly 2.4.0.20250925.543py3-none-any.whl → 2.4.2.20250926.532py3-none-any.whl