PyPI - mct-nightly - Versions diffs - 2.4.0.20250925.543__py3-none-any.whl → 2.4.2.20250926.532__py3-none-any.whl - Mend

mct-nightly 2.4.0.20250925.543py3-none-any.whl → 2.4.2.20250926.532py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (169) hide show

model_compression_toolkit/core/common/graph/base_graph.py CHANGED Viewed

@@ -23,6 +23,7 @@ import numpy as np
 from networkx.algorithms.dag import topological_sort
+from model_compression_toolkit.core.common.framework_info import FrameworkInfo
 from model_compression_toolkit.core.common.fusion.fusing_info import FusingInfo
 from model_compression_toolkit.core.common.graph.edge import EDGE_SINK_INDEX, EDGE_SOURCE_INDEX
 from model_compression_toolkit.core.common.graph.edge import Edge, convert_to_edge
@@ -32,8 +33,7 @@ from model_compression_toolkit.core.common.collectors.statistics_collector impor
 from model_compression_toolkit.core.common.collectors.statistics_collector import scale_statistics, shift_statistics
 from model_compression_toolkit.core.common.pruning.pruning_section import PruningSection
 from model_compression_toolkit.core.common.user_info import UserInformation
-from model_compression_toolkit.core.common.quantization.node_quantization_config import \
-    NodeActivationQuantizationConfig, ActivationQuantizationMode
+from model_compression_toolkit.core.common.quantization.node_quantization_config import ActivationQuantizationMode
 from model_compression_toolkit.logger import Logger
 from model_compression_toolkit.target_platform_capabilities.targetplatform2framework import LayerFilterParams
 from model_compression_toolkit.target_platform_capabilities.targetplatform2framework.framework_quantization_capabilities import \
@@ -74,6 +74,7 @@ class Graph(nx.MultiDiGraph, GraphSearches):
                  input_nodes: List[BaseNode],
                  output_nodes: List[OutTensor],
                  edge_list: List[Edge],
+                 fw_info: FrameworkInfo = None,
                  **attr):
         """
         Args:
@@ -81,6 +82,7 @@ class Graph(nx.MultiDiGraph, GraphSearches):
             input_nodes: List of input nodes the model
             output_nodes: List of output nodes of the model to a list of their output indices.
             edge_list: List of edges the graph has between nodes.
+            fw_info: FrameworkInfo object (needed for computing the graph's weights memory).
             **attr: Attributes to add to graph as key=value pairs.
         """
@@ -101,6 +103,7 @@ class Graph(nx.MultiDiGraph, GraphSearches):
                           e.sink_node,
                           **e.get_attributes())
         self.user_info = UserInformation()
+        self.fw_info = fw_info
     @property
     def skip_validation_check(self) -> bool:
@@ -121,13 +124,38 @@ class Graph(nx.MultiDiGraph, GraphSearches):
     def fusing_info(self, fusing_info: FusingInfo):
         self._fusing_info = fusing_info
-    def set_fqc(self, fqc: FrameworkQuantizationCapabilities):
+    def set_fw_info(self,
+                    fw_info: FrameworkInfo):
+        """
+        Set the graph's framework info.
+        Args:
+            fw_info: FrameworkInfo object.
+        """
+        self.fw_info = fw_info
+    def set_fqc(self,
+                fqc: FrameworkQuantizationCapabilities):
         """
         Set the graph's FQC.
         Args:
             fqc: FrameworkQuantizationCapabilities object.
         """
-        # TODO irena: this is only passed for negative shift activation.
+        # validate graph nodes are either from the framework or a custom layer defined in the FQC
+        # Validate graph nodes are either built-in layers from the framework or custom layers defined in the FQC
+        fqc_layers = fqc.op_sets_to_layers.get_layers()
+        fqc_filtered_layers = [layer for layer in fqc_layers if isinstance(layer, LayerFilterParams)]
+        for n in self.nodes:
+            is_node_in_fqc = any([n.is_match_type(_type) for _type in fqc_layers]) or \
+                             any([n.is_match_filter_params(filtered_layer) for filtered_layer in fqc_filtered_layers])
+            if n.is_custom:
+                if not is_node_in_fqc:
+                    Logger.critical(f'MCT does not support optimizing Keras custom layers. Found a layer of type {n.type}. '
+                                    ' Please add the custom layer to Framework Quantization Capabilities (FQC), or file a feature '
+                                    'request or an issue if you believe this should be supported.')  # pragma: no cover
+                if any([qc.default_weight_attr_config.enable_weights_quantization for qc in n.get_qco(fqc).quantization_configurations]):
+                    Logger.critical(f'Layer identified: {n.type}. MCT does not support weight quantization for Keras custom layers.')  # pragma: no cover
         self.fqc = fqc
     def get_topo_sorted_nodes(self):
@@ -535,6 +563,7 @@ class Graph(nx.MultiDiGraph, GraphSearches):
         return output_edges
     def get_configurable_sorted_nodes_names(self,
+                                            fw_info: FrameworkInfo,
                                             include_reused_nodes: bool = False) -> List[str]:
         """
         Get a list of nodes' names that can be configured (namely, has one or
@@ -542,49 +571,56 @@ class Graph(nx.MultiDiGraph, GraphSearches):
         order of the graph.
         Args:
+            fw_info: FrameworkInfo object with information about the specific framework's model.
             include_reused_nodes: Whether or not to include reused nodes (False by default).
         Returns: List of nodes' names that can be configured (namely, has one or
         more weight qc candidate) sorted topology.
         """
-        sorted_names = [n.name for n in self.get_configurable_sorted_nodes(include_reused_nodes=include_reused_nodes)]
+        sorted_names = [n.name for n in self.get_configurable_sorted_nodes(fw_info=fw_info,
+                                                                           include_reused_nodes=include_reused_nodes)]
         return sorted_names
     def get_weights_configurable_nodes(self,
+                                       fw_info: FrameworkInfo,
                                        include_reused_nodes: bool = False) -> List[BaseNode]:
         """
         Get a list of nodes that their weights can be configured (namely, has one or
         more weight qc candidate and their weights should be quantized).
         Args:
+            fw_info: FrameworkInfo object with information about the specific framework's model.
             include_reused_nodes: Whether to include reused nodes (False by default).
         Returns:
             A list of nodes that their weights can be configured (namely, has one or more weight qc candidate).
         """
         # configurability is only relevant for kernel attribute quantization
-        potential_conf_nodes = [n for n in self.nodes if n.kernel_attr]
+        potential_conf_nodes = [n for n in list(self) if fw_info.is_kernel_op(n.type)]
         def is_configurable(n):
-            return n.is_configurable_weight(n.kernel_attr) and (not n.reuse or include_reused_nodes)
+            kernel_attrs = fw_info.get_kernel_op_attributes(n.type)
+            return any(n.is_configurable_weight(attr) for attr in kernel_attrs) and (not n.reuse or include_reused_nodes)
         return [n for n in potential_conf_nodes if is_configurable(n)]
     def get_sorted_weights_configurable_nodes(self,
+                                              fw_info: FrameworkInfo,
                                               include_reused_nodes: bool = False) -> List[BaseNode]:
         """
         Get a list of sorted nodes that their weights can be configured (namely, has one or
         more weight qc candidate and their weights should be quantized).
         Args:
+            fw_info: FrameworkInfo object with information about the specific framework's model.
             include_reused_nodes: Whether to include reused nodes (False by default).
         Returns:
             A list of nodes that their weights can be configured (namely, has one or more weight qc candidate)
             sorted topologically.
         """
-        return self._sort_nodes_in_list(self.get_weights_configurable_nodes(include_reused_nodes))
+        return self._sort_nodes_in_list(self.get_weights_configurable_nodes(fw_info, include_reused_nodes))
     def get_activation_configurable_nodes(self) -> List[BaseNode]:
         """
@@ -608,6 +644,7 @@ class Graph(nx.MultiDiGraph, GraphSearches):
         return self._sort_nodes_in_list(self.get_activation_configurable_nodes())
     def get_configurable_sorted_nodes(self,
+                                      fw_info: FrameworkInfo,
                                       include_reused_nodes: bool = False) -> List[BaseNode]:
         """
         Get a list of nodes that can be configured (namely, has one or
@@ -615,13 +652,14 @@ class Graph(nx.MultiDiGraph, GraphSearches):
         The nodes are sorted according to the topological order of the graph.
         Args:
+            fw_info: fw_info: FrameworkInfo object with information about the specific framework's model.
             include_reused_nodes: Whether or not to include reused nodes (False by default).
         Returns:
              A list of nodes that can be configured (namely, has one or more qc candidate) sorted topology.
         """
-        weights_configurable_nodes = self.get_weights_configurable_nodes(include_reused_nodes)
+        weights_configurable_nodes = self.get_weights_configurable_nodes(fw_info, include_reused_nodes)
         activation_configurable_nodes = self.get_activation_configurable_nodes()
         # combine and remove duplications
@@ -646,7 +684,7 @@ class Graph(nx.MultiDiGraph, GraphSearches):
                 sorted_configurable_nodes.append(n)
         return sorted_configurable_nodes
-    def get_min_candidates_config(self) -> Dict[BaseNode, int]:
+    def get_min_candidates_config(self, fw_info: FrameworkInfo) -> Dict[BaseNode, int]:
         """
         Builds a minimal configuration.
         Note: we assume that a minimal configuration exists, i.e., each configurable node has exactly one candidate
@@ -659,33 +697,38 @@ class Graph(nx.MultiDiGraph, GraphSearches):
         Returns:
             A dict from layer to an index of its minimal candidate.
         """
-        conf_sorted_nodes = self.get_configurable_sorted_nodes()
+        conf_sorted_nodes = self.get_configurable_sorted_nodes(fw_info)
         return {n: n.find_min_candidate_index() for n in conf_sorted_nodes}
-    def get_max_candidates_config(self) -> Dict[BaseNode, int]:
+    def get_max_candidates_config(self, fw_info: FrameworkInfo) -> Dict[BaseNode, int]:
         """
         Builds a maximal configuration.
         Note: we assume that a maximal configuration exists, i.e., each configurable node has exactly one candidate
             with maximal n_bits (in both weight and activation if both are quantized, or in the relevant one if only
             one of them is quantized)
+        Args:
+            fw_info: fw_info: FrameworkInfo object with information about the specific framework's model.
         Returns:
             A dict from layer to an index of its maximal candidate.
         """
-        conf_sorted_nodes = self.get_configurable_sorted_nodes()
+        conf_sorted_nodes = self.get_configurable_sorted_nodes(fw_info)
         return {n: n.find_max_candidate_index() for n in conf_sorted_nodes}
-    def get_final_weights_config(self) -> List[Tuple[BaseNode, int]]:
+    def get_final_weights_config(self, fw_info: FrameworkInfo) -> List[Tuple[BaseNode, int]]:
         """
         Gets the final number of bits for quantization of each weights' configurable layer.
-        Returns:
-            A list of pairs of (node type, node's weights quantization bitwidth).
+        Args:
+            fw_info: fw_info: FrameworkInfo object with information about the specific framework's model.
+        Returns: A list of pairs of (node type, node's weights quantization bitwidth).
         """
-        sorted_conf_weights = self.get_sorted_weights_configurable_nodes()
+        sorted_conf_weights = self.get_sorted_weights_configurable_nodes(fw_info)
         # a configurable node by definition has a kernel op
-        return [(n, n.final_weights_quantization_cfg.get_attr_config(n.kernel_attr).weights_n_bits)
+        return [(n, n.final_weights_quantization_cfg.get_attr_config(self.fw_info.get_kernel_op_attributes(n.type)[0]).weights_n_bits)
                 for n in sorted_conf_weights]
     def get_final_activation_config(self) -> List[Tuple[BaseNode, int]]:
@@ -803,7 +846,7 @@ class Graph(nx.MultiDiGraph, GraphSearches):
             next_node = self.out_edges(next_node)[0].sink_node
             # If next_node is an exit node and has only one incoming edge, the topology is prunable.
-            if fw_impl.is_node_exit_node(next_node, entry_node) and len(self.in_edges(next_node)) == 1:
+            if fw_impl.is_node_exit_node(next_node, entry_node, self.fw_info) and len(self.in_edges(next_node)) == 1:
                 return True
             # If the next node is not an intermediate node or has more than one incoming/outgoing edge,
@@ -833,7 +876,7 @@ class Graph(nx.MultiDiGraph, GraphSearches):
         intermediate_nodes, exit_node = self._find_intermediate_and_exit_nodes(entry_node, fw_impl)
-        if not fw_impl.is_node_exit_node(exit_node, entry_node):
+        if not fw_impl.is_node_exit_node(exit_node, entry_node, self.fw_info):
             Logger.critical(f"Node {exit_node} is not a valid exit node for the pruning section starting with {entry_node}.")   # pragma: no cover
         return PruningSection(entry_node=entry_node,
@@ -854,37 +897,21 @@ class Graph(nx.MultiDiGraph, GraphSearches):
         """
         intermediate_nodes = []
         next_node = self.out_edges(entry_node)[0].sink_node
-        while not fw_impl.is_node_exit_node(next_node, entry_node):
+        while not fw_impl.is_node_exit_node(next_node, entry_node, self.fw_info):
             intermediate_nodes.append(next_node)
             next_node = self.out_edges(next_node)[0].sink_node
         return intermediate_nodes, next_node
-    # TODO irena move to load_fqc and clean up tests (currently tests_pytest/common_tests/unit_tests/core/graph/test_base_graph.py)
-    def override_fused_node_activation_quantization_candidates(self):
+    def disable_fused_nodes_activation_quantization(self):
         """
-        Override fused node activation quantization candidates for all nodes in fused operations,
+        Disable activation quantization for all nodes in fused operations,
         except for the last node in each fused group.
-        Update the value of quantization_config with the value of op_quaitization_cfg from FusingInfo.
-        """
-        nodes_in_fln = self.fusing_info.get_inner_fln_nodes()
-        for node in nodes_in_fln:
-            fused_node_op_id = self.fusing_info.get_fused_op_id_for_node(node.name)
-            fusing_op_quantization_cfg = self.fusing_info.get_fused_op_quantization_config(fused_node_op_id)
-            if fusing_op_quantization_cfg is not None and fusing_op_quantization_cfg.enable_activation_quantization:
-                def update(qc):
-                    qc.activation_quantization_cfg = NodeActivationQuantizationConfig(fusing_op_quantization_cfg)
-                    qc.activation_quantization_cfg.quant_mode = ActivationQuantizationMode.FLN_QUANT
-                node.quantization_cfg.update_all(update, remove_duplicates=True)
-            else:
-                node.quantization_cfg.update_activation_quantization_mode(ActivationQuantizationMode.FLN_NO_QUANT)
-                # Remove duplicate candidates. We cannot compare whole candidates since activation configs might not
-                # be identical, but we do want to treat them as such. So we only check duplication by weight configs.
-                uniq_qcs = []
-                for qc in node.candidates_quantization_cfg:
-                    if not any(qc.weights_quantization_cfg == uqc.weights_quantization_cfg for uqc in uniq_qcs):
-                        uniq_qcs.append(qc)
-                node.quantization_cfg.candidates_quantization_cfg = uniq_qcs
+        """
+        nodes_to_disable = self.fusing_info.get_inner_fln_nodes()
+        for node in nodes_to_disable:
+            for qc in node.candidates_quantization_cfg:
+                qc.activation_quantization_cfg.quant_mode = ActivationQuantizationMode.FLN_QUANT
     def validate(self):
         """
@@ -908,4 +935,4 @@ class Graph(nx.MultiDiGraph, GraphSearches):
         """
         Wrap networkx functions (that modifies the graph) with our validate decorator.
         """
-        return super().remove_edge(*args, **kwargs)
+        return super().remove_edge(*args, **kwargs)

mct-nightly 2.4.0.20250925.543__py3-none-any.whl → 2.4.2.20250926.532__py3-none-any.whl

mct-nightly 2.4.0.20250925.543py3-none-any.whl → 2.4.2.20250926.532py3-none-any.whl