PyPI - mct-nightly - Versions diffs - 2.1.0.20240806.441__py3-none-any.whl → 2.1.0.20240808.431__py3-none-any.whl - Mend

mct-nightly 2.1.0.20240806.441py3-none-any.whl → 2.1.0.20240808.431py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

model_compression_toolkit/xquant/keras/tensorboard_utils.py CHANGED Viewed

@@ -12,19 +12,24 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #  ==============================================================================
-from typing import Dict, Callable
+from typing import Dict, Callable, Any
 import keras
-from model_compression_toolkit.core.common import Graph
+from mct_quantizers import KerasActivationQuantizationHolder, KerasQuantizationWrapper
+from model_compression_toolkit.constants import MEM_ELEMENTS, CUTS, OP_ORDER, NODE_NAME, NODE_OUTPUT_INDEX, TOTAL_SIZE, FUSED_NODES_MAPPING
+from model_compression_toolkit.core.common import Graph, BaseNode
 from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
 from model_compression_toolkit.core.common.framework_info import FrameworkInfo
 from model_compression_toolkit.core.keras.reader.reader import model_reader
-from model_compression_toolkit.xquant.common.constants import XQUANT_REPR, INTERMEDIATE_SIMILARITY_METRICS_REPR, XQUANT_VAL, INTERMEDIATE_SIMILARITY_METRICS_VAL
+from model_compression_toolkit.xquant.common.constants import XQUANT_REPR, INTERMEDIATE_SIMILARITY_METRICS_REPR, \
+    XQUANT_VAL, INTERMEDIATE_SIMILARITY_METRICS_VAL, CUT_MEMORY_ELEMENTS, CUT_TOTAL_SIZE
 from model_compression_toolkit.xquant.common.tensorboard_utils import TensorboardUtils
+NODES_WITHOUT_CUT_INFO = [KerasActivationQuantizationHolder]
 class KerasTensorboardUtils(TensorboardUtils):
     """
@@ -52,7 +57,8 @@ class KerasTensorboardUtils(TensorboardUtils):
     def get_graph_for_tensorboard_display(self,
                                           quantized_model: keras.Model,
                                           similarity_metrics: Dict[str, Dict[str, float]],
-                                          repr_dataset: Callable) -> Graph:
+                                          repr_dataset: Callable,
+                                          quantized_model_metadata: Dict) -> Graph:
         """
         Generate a graph suitable for TensorBoard display from the provided quantized model
         and similarity metrics.
@@ -62,6 +68,7 @@ class KerasTensorboardUtils(TensorboardUtils):
             similarity_metrics (Dict[str, Dict[str, float]]): A dictionary containing similarity metrics
                 for different nodes in the model.
             repr_dataset (Callable): A function or callable that provides the representative dataset.
+            quantized_model_metadata (Dict): Metadata from the quantized model.
         Returns:
             Graph: A graph object representing the quantized model, annotated with similarity metrics.
@@ -69,6 +76,8 @@ class KerasTensorboardUtils(TensorboardUtils):
         # Read the quantized model into a graph structure.
         quant_graph = model_reader(quantized_model)
+        insert_cut_info_into_graph(quant_graph, quantized_model_metadata)
         # Iterate over each node in the graph.
         for node in quant_graph.nodes:
             # Check if the node's name is in the similarity metrics for intermediate representation.
@@ -82,3 +91,91 @@ class KerasTensorboardUtils(TensorboardUtils):
                 node.framework_attr[XQUANT_VAL] = similarity_metrics[INTERMEDIATE_SIMILARITY_METRICS_VAL][node.name]
         return quant_graph
+def populate_fused_node_memory_elements(quantized_model_metadata: Dict[str, Any]) -> Dict[str, list]:
+    """
+    Populate a dictionary mapping fused node names to their corresponding memory elements.
+    Args:
+        quantized_model_metadata (dict): Metadata containing scheduling information for the quantized model.
+    Returns:
+        dict: A dictionary with fused node names as keys and memory elements as values.
+    """
+    fused_node_to_memory_elements = {}
+    for cut in quantized_model_metadata['scheduling_info'][CUTS]:
+        fused_node = cut[OP_ORDER][-1]
+        # Ignore dummy types
+        if not fused_node.startswith('DummyType'):
+            fused_node_to_memory_elements[fused_node] = cut[MEM_ELEMENTS]
+    return fused_node_to_memory_elements
+def assign_cut_info_to_node(node: BaseNode, memory_elements: list):
+    """
+    Assign cut memory elements and total size to a node's attributes according to the
+    tensors in the cut of this node.
+    Args:
+        node (Node): The node to which the memory elements and total size will be assigned.
+        memory_elements (list): List of memory elements to be assigned to the node since they are in memory during this node inference.
+    """
+    node.framework_attr[CUT_MEMORY_ELEMENTS] = [
+        f"{mem_element[NODE_NAME]}_outTensor_{mem_element[NODE_OUTPUT_INDEX]}"
+        for mem_element in memory_elements
+    ]
+    node.framework_attr[CUT_TOTAL_SIZE] = sum(
+        mem_element[TOTAL_SIZE] for mem_element in memory_elements
+    )
+def process_node_cut_info(node: BaseNode,
+                          fused_node_to_memory_elements: Dict[str, list],
+                          quantized_model_metadata: Dict[str, Any]):
+    """
+    Process and assign cut information for a given node based on metadata and fused nodes mapping.
+    Args:
+        node (Node): The node to process.
+        fused_node_to_memory_elements (dict): Dictionary mapping fused nodes to memory elements.
+        quantized_model_metadata (dict): Metadata containing scheduling information for the quantized model.
+    """
+    if node.name in fused_node_to_memory_elements:
+        # Directly assign cut info if node name is in fused_node_to_memory_elements
+        assign_cut_info_to_node(node, fused_node_to_memory_elements[node.name])
+    elif node.name in quantized_model_metadata['scheduling_info'][FUSED_NODES_MAPPING]:
+        # Assign cut info if the node name is in the fused nodes mapping
+        original_node_name = quantized_model_metadata['scheduling_info'][FUSED_NODES_MAPPING][node.name]
+        assign_cut_info_to_node(node, fused_node_to_memory_elements[original_node_name])
+    elif node.type == KerasQuantizationWrapper:
+        if node.framework_attr['layer']['config']['name'] in fused_node_to_memory_elements:
+            # Assign cut info if the node is a KerasQuantizationWrapper with a matching layer name
+            assign_cut_info_to_node(node, fused_node_to_memory_elements[node.framework_attr['layer']['config']['name']])
+        elif node.framework_attr['layer']['config']['name'] in quantized_model_metadata['scheduling_info'][FUSED_NODES_MAPPING]:
+            # Assign cut info if the node is a KerasQuantizationWrapper and its layer name is in the fused nodes mapping
+            original_node_name = quantized_model_metadata['scheduling_info'][FUSED_NODES_MAPPING][node.framework_attr['layer']['config']['name']]
+            assign_cut_info_to_node(node, fused_node_to_memory_elements[original_node_name])
+def insert_cut_info_into_graph(quant_graph: Graph, quantized_model_metadata: Dict[str, Any]):
+    """
+    Insert information about cut tensors into the graph nodes based on the provided metadata.
+    Args:
+        quant_graph (Graph): The graph representing the quantized model.
+        quantized_model_metadata (dict): Metadata containing scheduling information for the quantized model.
+    """
+    # Populate the mapping of fused nodes to memory elements
+    fused_node_to_memory_elements = populate_fused_node_memory_elements(quantized_model_metadata)
+    for node in quant_graph.nodes:
+        # Skip nodes without cut information
+        if node.type not in NODES_WITHOUT_CUT_INFO:
+            process_node_cut_info(node,
+                                  fused_node_to_memory_elements,
+                                  quantized_model_metadata)

model_compression_toolkit/xquant/pytorch/facade_xquant_report.py CHANGED Viewed

@@ -54,6 +54,8 @@ if FOUND_TORCH:
                                                 fw_report_utils=pytorch_report_utils,
                                                 xquant_config=xquant_config)
+        Logger.shutdown()
         return _collected_data
 else:

model_compression_toolkit/xquant/pytorch/pytorch_report_utils.py CHANGED Viewed

@@ -24,7 +24,7 @@ from model_compression_toolkit.xquant.pytorch.dataset_utils import PytorchDatase
 from model_compression_toolkit.xquant.pytorch.model_analyzer import PytorchModelAnalyzer
 from model_compression_toolkit.xquant.pytorch.similarity_functions import PytorchSimilarityFunctions
 from model_compression_toolkit.xquant.pytorch.tensorboard_utils import PytorchTensorboardUtils
+from mct_quantizers.pytorch.metadata import get_metadata
 class PytorchReportUtils(FrameworkReportUtils):
     """
@@ -58,4 +58,5 @@ class PytorchReportUtils(FrameworkReportUtils):
                          tb_utils=tb_utils,
                          dataset_utils=dataset_utils,
                          similarity_calculator=similarity_calculator,
-                         model_folding_utils=model_folding)
+                         model_folding_utils=model_folding,
+                         get_metadata_fn=get_metadata)

model_compression_toolkit/xquant/pytorch/tensorboard_utils.py CHANGED Viewed

@@ -12,9 +12,10 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 #  ==============================================================================
+from mct_quantizers import PytorchActivationQuantizationHolder, PytorchQuantizationWrapper
+from model_compression_toolkit.core.common import Graph, BaseNode
 from model_compression_toolkit.core.common.framework_info import FrameworkInfo
-from typing import Dict, Any, Callable
+from typing import Dict, Any, Callable, List
 import torch
@@ -24,6 +25,14 @@ from model_compression_toolkit.xquant.common.constants import XQUANT_REPR, INTER
 from model_compression_toolkit.xquant.common.model_folding_utils import ModelFoldingUtils
 from model_compression_toolkit.xquant.common.tensorboard_utils import TensorboardUtils
+NODES_WITHOUT_CUT_INFO = [torch.fake_quantize_per_tensor_affine]
+def is_wrapped_linear_op(quantized_model, node):
+    # Check if a node in a torch fx graph represents a linear layer (conv2d/linear)
+    # that is wrapped in the quantized model
+    return hasattr(quantized_model, node.name.removesuffix('_layer')) and isinstance(
+        getattr(quantized_model, node.name.removesuffix('_layer')), PytorchQuantizationWrapper)
 class PytorchTensorboardUtils(TensorboardUtils):
     """
     Utility class for handling PyTorch models with TensorBoard. Inherits from TensorboardUtils.
@@ -49,7 +58,8 @@ class PytorchTensorboardUtils(TensorboardUtils):
     def get_graph_for_tensorboard_display(self,
                                           quantized_model: torch.nn.Module,
                                           similarity_metrics: Dict[str, Any],
-                                          repr_dataset: Callable):
+                                          repr_dataset: Callable,
+                                          quantized_model_metadata: Dict):
         """
         Get the graph to display on TensorBoard. The graph represents the quantized model
         with the similarity metrics that were measured.
@@ -58,6 +68,7 @@ class PytorchTensorboardUtils(TensorboardUtils):
             quantized_model: The quantized model to be displayed on TensorBoard.
             similarity_metrics: Dictionary containing the collected similarity metrics values.
             repr_dataset: Callable that generates the representative dataset used during graph building.
+            quantized_model_metadata (Dict): Metadata from the quantized model.
         Returns:
             The updated quantized model graph with similarity metrics embedded.
@@ -68,6 +79,8 @@ class PytorchTensorboardUtils(TensorboardUtils):
                                    to_tensor=self.fw_impl.to_tensor,
                                    to_numpy=self.fw_impl.to_numpy)
+        insert_cut_info_into_graph(quant_graph, quantized_model_metadata, quantized_model)
         # Iterate through each node in the graph
         for node in quant_graph.nodes:
             # Check and add similarity metrics for each node in the graph
@@ -85,3 +98,96 @@ class PytorchTensorboardUtils(TensorboardUtils):
                     node.name.removesuffix("_layer")]
         return quant_graph
+def populate_fused_node_memory_elements(quantized_model_metadata: Dict[str, Any]) -> Dict[str, list]:
+    """
+    Populate a dictionary mapping fused node names to their corresponding memory elements.
+    Args:
+        quantized_model_metadata: Metadata containing scheduling information for the quantized model.
+    Returns:
+        dict: A dictionary with fused node names as keys and memory elements as values.
+    """
+    fused_node_to_memory_elements = {}
+    for cut in quantized_model_metadata['scheduling_info']['cuts']:
+        fused_node = cut['op_order'][-1]
+        # Ignore dummy types
+        if not fused_node.startswith('DummyType'):
+            fused_node_to_memory_elements[fused_node] = cut['mem_elements']
+    return fused_node_to_memory_elements
+def assign_cut_info_to_node(node: BaseNode, memory_elements: List[dict]):
+    """
+    Assign cut memory elements and total size to a node's framework attributes.
+    Args:
+        node (Node): The node to which the memory elements and total size will be assigned.
+        memory_elements (list): List of memory elements to be assigned to the node.
+    """
+    node.framework_attr['cut_memory_elements'] = [
+        f"{mem_element['node_name']}_outTensor_{mem_element['node_output_index']}"
+        for mem_element in memory_elements
+    ]
+    node.framework_attr['cut_total_size'] = sum(
+        mem_element['total_size'] for mem_element in memory_elements
+    )
+def process_node_cut_info(node: BaseNode, fused_node_to_memory_elements: Dict[str, list], quantized_model_metadata: Dict[str, Any], quantized_model: torch.nn.Module):
+    """
+    Process and assign cut information for a given node based on metadata and fused nodes mapping.
+    Args:
+        node: The node to process.
+        fused_node_to_memory_elements: Dictionary mapping fused nodes to memory elements.
+        quantized_model_metadata: Metadata containing scheduling information for the quantized model.
+        quantized_model: The quantized model.
+    """
+    node_name_without_suffix = node.name.removesuffix('_layer')
+    fused_nodes_mapping = quantized_model_metadata['scheduling_info']['fused_nodes_mapping']
+    if node.name in fused_node_to_memory_elements:
+        # Directly assign cut info if node name is in fused_node_to_memory_elements
+        assign_cut_info_to_node(node, fused_node_to_memory_elements[node.name])
+    elif is_wrapped_linear_op(quantized_model, node) and node_name_without_suffix in fused_node_to_memory_elements:
+        # Assign cut info if the node is a wrapped linear operation with a matching name without suffix
+        assign_cut_info_to_node(node, fused_node_to_memory_elements[node_name_without_suffix])
+    elif node.name in fused_nodes_mapping:
+        # Assign cut info if the node name is in the fused nodes mapping
+        original_node_name = fused_nodes_mapping[node.name]
+        assign_cut_info_to_node(node, fused_node_to_memory_elements[original_node_name])
+    elif is_wrapped_linear_op(quantized_model, node) and node_name_without_suffix in fused_nodes_mapping:
+        # Assign cut info if the node is a wrapped linear operation and its name without suffix is in the fused nodes mapping
+        original_node_name = fused_nodes_mapping[node_name_without_suffix]
+        assign_cut_info_to_node(node, fused_node_to_memory_elements[original_node_name])
+def insert_cut_info_into_graph(quant_graph: Graph,
+                               quantized_model_metadata: Dict[str, Any],
+                               quantized_model: torch.nn.Module):
+    """
+    Insert information about cut tensors into the graph nodes based on the provided metadata.
+    Args:
+        quant_graph: The graph representing the quantized model.
+        quantized_model_metadata: Metadata containing scheduling information for the quantized model.
+        quantized_model: The quantized model.
+    """
+    # Populate the mapping of fused nodes to memory elements
+    fused_node_to_memory_elements = populate_fused_node_memory_elements(quantized_model_metadata)
+    for node in quant_graph.nodes:
+        # Skip nodes without cut information
+        if node.type not in NODES_WITHOUT_CUT_INFO:
+            process_node_cut_info(node, fused_node_to_memory_elements, quantized_model_metadata, quantized_model)

{mct_nightly-2.1.0.20240806.441.dist-info → mct_nightly-2.1.0.20240808.431.dist-info}/LICENSE.md RENAMED Viewed

File without changes

{mct_nightly-2.1.0.20240806.441.dist-info → mct_nightly-2.1.0.20240808.431.dist-info}/WHEEL RENAMED Viewed

File without changes

{mct_nightly-2.1.0.20240806.441.dist-info → mct_nightly-2.1.0.20240808.431.dist-info}/top_level.txt RENAMED Viewed

File without changes

mct-nightly 2.1.0.20240806.441__py3-none-any.whl → 2.1.0.20240808.431__py3-none-any.whl

mct-nightly 2.1.0.20240806.441py3-none-any.whl → 2.1.0.20240808.431py3-none-any.whl