PyPI - mct-nightly - Versions diffs - 2.0.0.20240505.435__tar.gz → 2.0.0.20240507.417__tar.gz - Mend

mct-nightly 2.0.0.20240505.435tar.gz → 2.0.0.20240507.417tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (496) hide show

{mct-nightly-2.0.0.20240505.435 → mct-nightly-2.0.0.20240507.417}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: mct-nightly
-Version: 2.0.0.20240505.435
+Version: 2.0.0.20240507.417
 Summary: A Model Compression Toolkit for neural networks
 Home-page: UNKNOWN
 License: UNKNOWN

{mct-nightly-2.0.0.20240505.435 → mct-nightly-2.0.0.20240507.417}/mct_nightly.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: mct-nightly
-Version: 2.0.0.20240505.435
+Version: 2.0.0.20240507.417
 Summary: A Model Compression Toolkit for neural networks
 Home-page: UNKNOWN
 License: UNKNOWN

{mct-nightly-2.0.0.20240505.435 → mct-nightly-2.0.0.20240507.417}/model_compression_toolkit/__init__.py RENAMED Viewed

@@ -27,4 +27,4 @@ from model_compression_toolkit import data_generation
 from model_compression_toolkit import pruning
 from model_compression_toolkit.trainable_infrastructure.keras.load_model import keras_load_quantized_model
-__version__ = "2.0.0.20240505.000435"
+__version__ = "2.0.0.20240507.000417"

{mct-nightly-2.0.0.20240505.435 → mct-nightly-2.0.0.20240507.417}/model_compression_toolkit/core/common/graph/base_graph.py RENAMED Viewed

@@ -414,7 +414,7 @@ class Graph(nx.MultiDiGraph, GraphSearches):
         """
         if new_node is None:
-            Logger.critical("Cannot replace input node with a None value; new input node is required.")
+            Logger.critical("Cannot replace input node with a None value; new input node is required.")  # pragma: no cover
         graph_inputs = self.get_inputs()
         new_graph_inputs = copy(graph_inputs)
@@ -828,12 +828,12 @@ class Graph(nx.MultiDiGraph, GraphSearches):
         """
         if not fw_impl.is_node_entry_node(entry_node):
-            Logger.critical(f"Node {entry_node} is not a valid entry node for creating a pruning section")
+            Logger.critical(f"Node {entry_node} is not a valid entry node for creating a pruning section")  # pragma: no cover
         intermediate_nodes, exit_node = self._find_intermediate_and_exit_nodes(entry_node, fw_impl)
         if not fw_impl.is_node_exit_node(exit_node, entry_node, self.fw_info):
-            Logger.critical(f"Node {exit_node} is not a valid exit node for the pruning section starting with {entry_node}.")
+            Logger.critical(f"Node {exit_node} is not a valid exit node for the pruning section starting with {entry_node}.")   # pragma: no cover
         return PruningSection(entry_node=entry_node,
                               intermediate_nodes=intermediate_nodes,

mct-nightly-2.0.0.20240507.417/model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_data.py ADDED Viewed

@@ -0,0 +1,247 @@
+# Copyright 2022 Sony Semiconductor Israel, Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import numpy as np
+from typing import Callable, Any, Dict, Tuple
+from model_compression_toolkit.constants import FLOAT_BITWIDTH, BITS_TO_BYTES
+from model_compression_toolkit.core import FrameworkInfo, ResourceUtilization, CoreConfig
+from model_compression_toolkit.core.common import Graph
+from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
+from model_compression_toolkit.core.common.graph.edge import EDGE_SINK_INDEX
+from model_compression_toolkit.core.graph_prep_runner import graph_preparation_runner
+from model_compression_toolkit.target_platform_capabilities.target_platform import TargetPlatformCapabilities, \
+    QuantizationConfigOptions
+def compute_resource_utilization_data(in_model: Any,
+                                      representative_data_gen: Callable,
+                                      core_config: CoreConfig,
+                                      tpc: TargetPlatformCapabilities,
+                                      fw_info: FrameworkInfo,
+                                      fw_impl: FrameworkImplementation,
+                                      transformed_graph: Graph = None,
+                                      mixed_precision_enable: bool = True) -> ResourceUtilization:
+    """
+    Compute Resource Utilization information that can be relevant for defining target ResourceUtilization for mixed precision search.
+    Calculates maximal activation tensor size, the sum of the model's weight parameters and the total memory combining both weights
+    and maximal activation tensor size.
+    Args:
+        in_model:  Model to build graph from (the model that intended to be quantized).
+        representative_data_gen: Dataset used for calibration.
+        core_config: CoreConfig containing parameters of how the model should be quantized.
+        tpc: TargetPlatformCapabilities object that models the inference target platform and
+                                              the attached framework operator's information.
+        fw_info: Information needed for quantization about the specific framework.
+        fw_impl: FrameworkImplementation object with a specific framework methods implementation.
+        transformed_graph: An internal graph representation of the input model. Defaults to None.
+                            If no graph is provided, a graph will be constructed using the specified model.
+        mixed_precision_enable: Indicates if mixed precision is enabled, defaults to True.
+                                If disabled, computes resource utilization using base quantization
+                                configurations across all layers.
+    Returns:
+        ResourceUtilization: An object encapsulating the calculated resource utilization computations.
+    """
+    # We assume that the resource_utilization_data API is used to compute the model resource utilization for
+    # mixed precision scenario, so we run graph preparation under the assumption of enabled mixed precision.
+    if transformed_graph is None:
+        transformed_graph = graph_preparation_runner(in_model,
+                                                     representative_data_gen,
+                                                     core_config.quantization_config,
+                                                     fw_info,
+                                                     fw_impl,
+                                                     tpc,
+                                                     mixed_precision_enable=mixed_precision_enable)
+    # Compute parameters sum
+    weights_memory_bytes, weights_params = compute_nodes_weights_params(graph=transformed_graph, fw_info=fw_info)
+    total_weights_params = 0 if len(weights_params) == 0 else sum(weights_params)
+    # Compute max activation tensor
+    activation_output_sizes_bytes, activation_output_sizes = compute_activation_output_sizes(graph=transformed_graph)
+    max_activation_tensor_size = 0 if len(activation_output_sizes) == 0 else max(activation_output_sizes)
+    # Compute total memory utilization - parameters sum + max activation tensor
+    total_size = total_weights_params + max_activation_tensor_size
+    # Compute BOPS utilization - total count of bit-operations for all configurable layers with kernel
+    bops_count = compute_total_bops(graph=transformed_graph, fw_info=fw_info, fw_impl=fw_impl)
+    bops_count = np.inf if len(bops_count) == 0 else sum(bops_count)
+    return ResourceUtilization(weights_memory=total_weights_params,
+                               activation_memory=max_activation_tensor_size,
+                               total_memory=total_size,
+                               bops=bops_count)
+def compute_nodes_weights_params(graph: Graph, fw_info: FrameworkInfo) -> Tuple[np.ndarray, np.ndarray]:
+    """
+    Calculates the memory usage in bytes and the number of weight parameters for each node within a graph.
+    Memory calculations are based on the maximum bit-width used for quantization per node.
+    Args:
+        graph: A finalized Graph object, representing the model structure.
+        fw_info: FrameworkInfo object containing details about the specific framework's
+                 quantization attributes for different layers' weights.
+    Returns:
+        A tuple containing two arrays:
+            - The first array represents the memory in bytes for each node's weights when quantized at the maximal bit-width.
+            - The second array represents the total number of weight parameters for each node.
+    """
+    weights_params = []
+    weights_memory_bytes = []
+    for n in graph.nodes:
+        # TODO: when enabling multiple attribute quantization by default (currently,
+        #  only kernel quantization is enabled) we should include other attributes memory in the sum of all
+        #  weights memory.
+        #  When implementing this, we should just go over all attributes in the node instead of counting only kernels.
+        kernel_attr = fw_info.get_kernel_op_attributes(n.type)[0]
+        if kernel_attr is not None and not n.reuse:
+            kernel_candidates = n.get_all_weights_attr_candidates(kernel_attr)
+            if len(kernel_candidates) > 0 and any([c.enable_weights_quantization for c in kernel_candidates]):
+                max_weight_bits = max([kc.weights_n_bits for kc in kernel_candidates])
+                node_num_weights_params = 0
+                for attr in fw_info.get_kernel_op_attributes(n.type):
+                    if attr is not None:
+                        node_num_weights_params += n.get_weights_by_keys(attr).flatten().shape[0]
+                weights_params.append(node_num_weights_params)
+                # multiply num params by num bits and divide by BITS_TO_BYTES to convert from bits to bytes
+                weights_memory_bytes.append(node_num_weights_params * max_weight_bits / BITS_TO_BYTES)
+    return np.array(weights_memory_bytes), np.array(weights_params)
+def compute_activation_output_sizes(graph: Graph) -> Tuple[np.ndarray, np.ndarray]:
+    """
+    Computes an array of the respective output tensor size and an array of the output tensor size in bytes for
+    each node.
+    Args:
+        graph: A finalized Graph object, representing the model structure.
+    Returns:
+    A tuple containing two arrays:
+        - The first array represents the size of each node's activation output tensor size in bytes,
+          calculated using the maximal bit-width for quantization.
+        - The second array represents the size of each node's activation output tensor size.
+    """
+    activation_outputs = []
+    activation_outputs_bytes = []
+    for n in graph.nodes:
+        # Go over all nodes that have configurable activation.
+        if n.has_activation_quantization_enabled_candidate():
+            # Fetch maximum bits required for quantizing activations
+            max_activation_bits = max([qc.activation_quantization_cfg.activation_n_bits for qc in n.candidates_quantization_cfg])
+            node_output_size = n.get_total_output_params()
+            activation_outputs.append(node_output_size)
+            # Calculate activation size in bytes and append to list
+            activation_outputs_bytes.append(node_output_size * max_activation_bits / BITS_TO_BYTES)
+    return np.array(activation_outputs_bytes), np.array(activation_outputs)
+def compute_total_bops(graph: Graph, fw_info: FrameworkInfo, fw_impl: FrameworkImplementation) -> np.ndarray:
+    """
+    Computes a vector with the respective Bit-operations count for each configurable node that includes MAC operations.
+    The computation assumes that the graph is a representation of a float model, thus, BOPs computation uses 32-bit.
+    Args:
+        graph: Finalized Graph object.
+        fw_info: FrameworkInfo object about the specific framework
+            (e.g., attributes of different layers' weights to quantize).
+        fw_impl: FrameworkImplementation object with a specific framework methods implementation.
+    Returns: A vector of nodes' Bit-operations count.
+    """
+    bops = []
+    # Go over all configurable nodes that have kernels.
+    for n in graph.get_topo_sorted_nodes():
+        if n.has_kernel_weight_to_quantize(fw_info):
+            # If node doesn't have weights then its MAC count is 0, and we shouldn't consider it in the BOPS count.
+            incoming_edges = graph.incoming_edges(n, sort_by_attr=EDGE_SINK_INDEX)
+            assert len(incoming_edges) == 1, f"Can't compute BOPS metric for node {n.name} with multiple inputs."
+            node_mac = fw_impl.get_node_mac_operations(n, fw_info)
+            node_bops = (FLOAT_BITWIDTH ** 2) * node_mac
+            bops.append(node_bops)
+    return np.array(bops)
+def requires_mixed_precision(in_model: Any,
+                            target_resource_utilization: ResourceUtilization,
+                            representative_data_gen: Callable,
+                            core_config: CoreConfig,
+                            tpc: TargetPlatformCapabilities,
+                            fw_info: FrameworkInfo,
+                            fw_impl: FrameworkImplementation) -> bool:
+    """
+    The function checks whether the model requires mixed precision to meet the requested target resource utilization.
+    This is determined by whether the target memory usage of the weights is less than the available memory,
+    the target maximum size of an activation tensor is less than the available memory,
+    and the target number of BOPs is less than the available BOPs.
+    If any of these conditions are met, the function returns True. Otherwise, it returns False.
+    Args:
+        in_model: The model to be evaluated.
+        target_resource_utilization: The resource utilization of the target device.
+        representative_data_gen: A function that generates representative data for the model.
+        core_config: CoreConfig containing parameters of how the model should be quantized.
+        tpc: TargetPlatformCapabilities object that models the inference target platform and
+                                              the attached framework operator's information.
+        fw_info: Information needed for quantization about the specific framework.
+        fw_impl: FrameworkImplementation object with a specific framework methods implementation.
+    Returns: A boolean indicating if mixed precision is needed.
+    """
+    is_mixed_precision = False
+    transformed_graph = graph_preparation_runner(in_model,
+                                                 representative_data_gen,
+                                                 core_config.quantization_config,
+                                                 fw_info,
+                                                 fw_impl,
+                                                 tpc,
+                                                 mixed_precision_enable=False)
+    # Compute max weights memory in bytes
+    weights_memory_by_layer_bytes, _ = compute_nodes_weights_params(transformed_graph, fw_info)
+    total_weights_memory_bytes = 0 if len(weights_memory_by_layer_bytes) == 0 else sum(weights_memory_by_layer_bytes)
+    # Compute max activation tensor in bytes
+    activation_output_sizes_bytes, _ = compute_activation_output_sizes(transformed_graph)
+    max_activation_tensor_size_bytes = 0 if len(activation_output_sizes_bytes) == 0 else max(activation_output_sizes_bytes)
+    # Compute BOPS utilization - total count of bit-operations for all configurable layers with kernel
+    bops_count = compute_total_bops(graph=transformed_graph, fw_info=fw_info, fw_impl=fw_impl)
+    bops_count = np.inf if len(bops_count) == 0 else sum(bops_count)
+    is_mixed_precision |= target_resource_utilization.weights_memory < total_weights_memory_bytes
+    is_mixed_precision |= target_resource_utilization.activation_memory < max_activation_tensor_size_bytes
+    is_mixed_precision |= target_resource_utilization.total_memory < total_weights_memory_bytes + max_activation_tensor_size_bytes
+    is_mixed_precision |= target_resource_utilization.bops < bops_count
+    return is_mixed_precision

{mct-nightly-2.0.0.20240505.435 → mct-nightly-2.0.0.20240507.417}/model_compression_toolkit/core/common/pruning/mask/per_channel_mask.py RENAMED Viewed

@@ -73,7 +73,7 @@ class PerChannelMask:
             mask_indicator: The new value to set in the mask (either PRUNED or REMAINED).
         """
         if mask_indicator not in [MaskIndicator.PRUNED, MaskIndicator.REMAINED]:
-            Logger.critical("Mask value must be either 'MaskIndicator.PRUNED' or 'MaskIndicator.REMAINED'")
+            Logger.critical("Mask value must be either 'MaskIndicator.PRUNED' or 'MaskIndicator.REMAINED'")  # pragma: no cover
         self._mask[node][channel_idx] = mask_indicator.value
     def has_pruned_channel(self) -> bool:

{mct-nightly-2.0.0.20240505.435 → mct-nightly-2.0.0.20240507.417}/model_compression_toolkit/core/common/pruning/mask/per_simd_group_mask.py RENAMED Viewed

@@ -79,7 +79,7 @@ class PerSIMDGroupMask:
             mask_indicator: The new value to set in the mask (either PRUNED or REMAINED).
         """
         if mask_indicator not in [MaskIndicator.PRUNED, MaskIndicator.REMAINED]:
-            Logger.critical("Mask value must be either 'MaskIndicator.PRUNED' or 'MaskIndicator.REMAINED'")
+            Logger.critical("Mask value must be either 'MaskIndicator.PRUNED' or 'MaskIndicator.REMAINED'")  # pragma: no cover
         # Update the SIMD group mask and corresponding per-channel mask
         self._mask_simd[node][group_index] = mask_indicator.value

{mct-nightly-2.0.0.20240505.435 → mct-nightly-2.0.0.20240507.417}/model_compression_toolkit/core/common/pruning/pruner.py RENAMED Viewed

@@ -92,7 +92,7 @@ class Pruner:
             mask_calculator.compute_mask()
             self.per_oc_mask = mask_calculator.get_mask()
         else:
-            Logger.critical("Only GREEDY ChannelsFilteringStrategy is currently supported.")
+            Logger.critical("Only GREEDY ChannelsFilteringStrategy is currently supported.")  # pragma: no cover
         Logger.info("Start pruning graph...")
         _pruned_graph = build_pruned_graph(self.float_graph,

{mct-nightly-2.0.0.20240505.435 → mct-nightly-2.0.0.20240507.417}/model_compression_toolkit/core/common/pruning/pruning_info.py RENAMED Viewed

@@ -76,7 +76,7 @@ def unroll_simd_scores_to_per_channel_scores(simd_scores: Dict[BaseNode, np.ndar
     """
     if simd_scores is None or simd_groups_indices is None:
         Logger.critical(f"Failed to find scores and indices to create unrolled scores for pruning information."
-                        f" Scores: {simd_scores}, Group indices: {simd_groups_indices}.")
+                        f" Scores: {simd_scores}, Group indices: {simd_groups_indices}.")  # pragma: no cover
     _scores = {}
     for node, groups_indices in simd_groups_indices.items():
         node_scores = simd_scores[node]

{mct-nightly-2.0.0.20240505.435 → mct-nightly-2.0.0.20240507.417}/model_compression_toolkit/core/common/quantization/node_quantization_config.py RENAMED Viewed

@@ -65,10 +65,8 @@ class BaseNodeQuantizationConfig(object):
         """
         Returns: String to display a NodeQuantizationConfig object.
         """
-        repr_str = ''
-        for k, v in self.__dict__.items():
-            repr_str += f'{k}: {v}\n'
-        return repr_str
+        # Used for debugging, thus no cover.
+        return ''.join(f'{k}: {v}\n' for k, v in self.__dict__.items())  # pragma: no cover
 class NodeActivationQuantizationConfig(BaseNodeQuantizationConfig):

{mct-nightly-2.0.0.20240505.435 → mct-nightly-2.0.0.20240507.417}/model_compression_toolkit/core/common/quantization/quantization_config.py RENAMED Viewed

@@ -124,7 +124,8 @@ class QuantizationConfig:
         self.concat_threshold_update = concat_threshold_update
     def __repr__(self):
-        return str(self.__dict__)
+        # Used for debugging, thus no cover.
+        return str(self.__dict__)  # pragma: no cover
 # Default quantization configuration the library use.

{mct-nightly-2.0.0.20240505.435 → mct-nightly-2.0.0.20240507.417}/model_compression_toolkit/core/common/quantization/quantization_fn_selection.py RENAMED Viewed

@@ -44,6 +44,6 @@ def get_weights_quantization_fn(weights_quantization_method: QuantizationMethod)
         quantizer_fn = lut_kmeans_quantizer
     else:
         Logger.critical(
-            f"No quantizer function found for the specified quantization method: {weights_quantization_method}")
+            f"No quantizer function found for the specified quantization method: {weights_quantization_method}")  # pragma: no cover
     return quantizer_fn

{mct-nightly-2.0.0.20240505.435 → mct-nightly-2.0.0.20240507.417}/model_compression_toolkit/core/common/substitutions/remove_identity.py RENAMED Viewed

@@ -15,6 +15,7 @@
 from model_compression_toolkit.core.common.graph.base_graph import Graph, OutTensor
 from model_compression_toolkit.core.common.graph.base_node import BaseNode
+from model_compression_toolkit.logger import Logger
 def remove_identity_node(graph: Graph,
@@ -36,7 +37,9 @@ def remove_identity_node(graph: Graph,
     # Ensure there is exactly one predecessor; otherwise, do nothing.
     if len(prev_identity_nodes) != 1:
-        return graph
+        # We do not expect to get here.
+        Logger.error(f"Identity node {node} have {len(prev_identity_nodes)} inputs, while expected to have one. Skipping remove identity substitution.") # pragma: no cover
+        return graph  # pragma: no cover
     graph_outputs = graph.get_outputs()
     for i, g_out in enumerate(graph_outputs):

{mct-nightly-2.0.0.20240505.435 → mct-nightly-2.0.0.20240507.417}/model_compression_toolkit/core/keras/back2framework/keras_model_builder.py RENAMED Viewed

@@ -25,9 +25,9 @@ if version.parse(tf.__version__) >= version.parse("2.13"):
     from keras.src.layers.core import TFOpLambda
     from keras.src.engine.base_layer import TensorFlowOpLayer, Layer
 else:
-    from keras import Input
-    from keras.layers.core import TFOpLambda
-    from keras.engine.base_layer import TensorFlowOpLayer, Layer
+    from keras import Input   # pragma: no cover
+    from keras.layers.core import TFOpLambda   # pragma: no cover
+    from keras.engine.base_layer import TensorFlowOpLayer, Layer   # pragma: no cover
 from typing import Any, Dict, List, Tuple, Callable
 from tensorflow.python.util.object_identity import Reference as TFReference

{mct-nightly-2.0.0.20240505.435 → mct-nightly-2.0.0.20240507.417}/model_compression_toolkit/core/keras/back2framework/mixed_precision_model_builder.py RENAMED Viewed

@@ -19,7 +19,7 @@ import tensorflow as tf
 if version.parse(tf.__version__) >= version.parse("2.13"):
     from keras.src.engine.base_layer import Layer
 else:
-    from keras.engine.base_layer import Layer
+    from keras.engine.base_layer import Layer  # pragma: no cover
 from keras.models import Model
 from mct_quantizers import KerasQuantizationWrapper, KerasActivationQuantizationHolder, QuantizationTarget

{mct-nightly-2.0.0.20240505.435 → mct-nightly-2.0.0.20240507.417}/model_compression_toolkit/core/keras/default_framework_info.py RENAMED Viewed

@@ -22,7 +22,7 @@ from packaging import version
 if version.parse(tf.__version__) >= version.parse("2.13"):
     from keras.src.layers import Conv2D, DepthwiseConv2D, Dense, Conv2DTranspose, Softmax, ELU
 else:
-    from keras.layers import Conv2D, DepthwiseConv2D, Dense, Conv2DTranspose, Softmax, ELU
+    from keras.layers import Conv2D, DepthwiseConv2D, Dense, Conv2DTranspose, Softmax, ELU  # pragma: no cover
 from model_compression_toolkit.defaultdict import DefaultDict
 from model_compression_toolkit.core.common.framework_info import FrameworkInfo

{mct-nightly-2.0.0.20240505.435 → mct-nightly-2.0.0.20240507.417}/model_compression_toolkit/core/keras/graph_substitutions/substitutions/activation_decomposition.py RENAMED Viewed

@@ -66,7 +66,7 @@ class ActivationDecomposition(common.BaseSubstitution):
         if ACTIVATION not in op2d_node.framework_attr:
             Logger.warning(f'Op2d node {op2d_node.name} of type {op2d_node.type} is missing an "{ACTIVATION}"'
                            f' attribute -> Skipping substitution ActivationDecomposition')  # pragma: no cover
-            return graph
+            return graph  # pragma: no cover
         activation_node_name = op2d_node.name + '_post_activation'

{mct-nightly-2.0.0.20240505.435 → mct-nightly-2.0.0.20240507.417}/model_compression_toolkit/core/keras/graph_substitutions/substitutions/linear_collapsing.py RENAMED Viewed

@@ -104,7 +104,7 @@ def conv2d_collapsing_fn(first_node: BaseNode,
         return kernel_collapsed, bias_collapsed
     else:
-        Logger.critical(f"Layer collapsing unsupported for combination: {first_node.type} and {second_node.type}.")
+        Logger.critical(f"Layer collapsing unsupported for combination: {first_node.type} and {second_node.type}.")  # pragma: no cover
 def keras_linear_collapsing() -> Conv2DCollapsing:

{mct-nightly-2.0.0.20240505.435 → mct-nightly-2.0.0.20240507.417}/model_compression_toolkit/core/keras/graph_substitutions/substitutions/residual_collapsing.py RENAMED Viewed

@@ -62,7 +62,7 @@ def residual_collapsing_fn(first_node: BaseNode,
         return kernel
     else:
-        Logger.critical(f"Residual collapsing is unsupported for {first_node.type} node types.")
+        Logger.critical(f"Residual collapsing is unsupported for {first_node.type} node types.")  # pragma: no cover
 def keras_residual_collapsing() -> ResidualCollapsing:

{mct-nightly-2.0.0.20240505.435 → mct-nightly-2.0.0.20240507.417}/model_compression_toolkit/core/keras/graph_substitutions/substitutions/shift_negative_activation.py RENAMED Viewed

@@ -22,7 +22,7 @@ import tensorflow as tf
 from tensorflow.python.keras.layers.core import TFOpLambda
 if version.parse(tf.__version__) >= version.parse("2.13"):
     from keras.src.layers import Activation, Conv2D, Dense, DepthwiseConv2D, ZeroPadding2D, Reshape, \
-        GlobalAveragePooling2D, Dropout, ReLU, PReLU, ELU
+        GlobalAveragePooling2D, Dropout, ReLU, PReLU, ELU  # pragma: no cover
 else:
     from tensorflow.keras.layers import Activation, Conv2D, Dense, DepthwiseConv2D, ZeroPadding2D, Reshape, \
         GlobalAveragePooling2D, Dropout, ReLU, PReLU, ELU

{mct-nightly-2.0.0.20240505.435 → mct-nightly-2.0.0.20240507.417}/model_compression_toolkit/core/keras/hessian/trace_hessian_calculator_keras.py RENAMED Viewed

@@ -74,6 +74,6 @@ class TraceHessianCalculatorKeras(TraceHessianCalculator):
         concat_axis_dim = [o.shape[0] for o in _r_tensors]
         if not all(d == concat_axis_dim[0] for d in concat_axis_dim):
             Logger.critical(
-                "Unable to concatenate tensors for gradient calculation due to mismatched shapes along the first axis.")# pragma: no cover
+                "Unable to concatenate tensors for gradient calculation due to mismatched shapes along the first axis.") # pragma: no cover
         return tf.concat(_r_tensors, axis=1)

{mct-nightly-2.0.0.20240505.435 → mct-nightly-2.0.0.20240507.417}/model_compression_toolkit/core/keras/keras_implementation.py RENAMED Viewed

@@ -51,13 +51,11 @@ from model_compression_toolkit.core.keras.statistics_correction.apply_second_mom
 from packaging import version
 if version.parse(tf.__version__) >= version.parse("2.13"):
-    from keras.src.layers import Dense, Activation, Conv2D, DepthwiseConv2D, Conv2DTranspose, \
-        Concatenate, Add
+    from keras.src.layers import Dense, Activation, Conv2D, DepthwiseConv2D, Conv2DTranspose, Concatenate, Add
     from keras.src.layers.core import TFOpLambda
 else:
-    from keras.layers import Dense, Activation, Conv2D, DepthwiseConv2D, Conv2DTranspose, \
-        Concatenate, Add
-    from keras.layers.core import TFOpLambda
+    from keras.layers import Dense, Activation, Conv2D, DepthwiseConv2D, Conv2DTranspose, Concatenate, Add   # pragma: no cover
+    from keras.layers.core import TFOpLambda   # pragma: no cover
 from model_compression_toolkit.core import QuantizationConfig, FrameworkInfo, CoreConfig, MixedPrecisionQuantizationConfig
 from model_compression_toolkit.core import common
@@ -489,7 +487,7 @@ class KerasImplementation(FrameworkImplementation):
                                                       fw_impl=self,
                                                       num_iterations_for_approximation=num_iterations_for_approximation)
         else:
-            Logger.critical(f"Unsupported Hessian mode for Keras: {trace_hessian_request.mode}.")
+            Logger.critical(f"Unsupported Hessian mode for Keras: {trace_hessian_request.mode}.")   # pragma: no cover
     def is_output_node_compatible_for_hessian_score_computation(self,
                                                                 node: BaseNode) -> Any:

{mct-nightly-2.0.0.20240505.435 → mct-nightly-2.0.0.20240507.417}/model_compression_toolkit/core/keras/keras_node_prior_info.py RENAMED Viewed

@@ -6,7 +6,7 @@ from packaging import version
 if version.parse(tf.__version__) >= version.parse("2.13"):
     from keras.src.layers import Activation, ReLU, BatchNormalization
 else:
-    from keras.layers import Activation, ReLU, BatchNormalization
+    from keras.layers import Activation, ReLU, BatchNormalization  # pragma: no cover
 from model_compression_toolkit.core import FrameworkInfo
 from model_compression_toolkit.core.common import BaseNode

{mct-nightly-2.0.0.20240505.435 → mct-nightly-2.0.0.20240507.417}/model_compression_toolkit/core/keras/quantizer/fake_quant_builder.py RENAMED Viewed

@@ -24,6 +24,17 @@ from model_compression_toolkit.logger import Logger
 from model_compression_toolkit.constants import THRESHOLD, SIGNED, RANGE_MIN, RANGE_MAX
 from model_compression_toolkit.core.common.quantization.quantizers.uniform_quantizers import threshold_is_power_of_two
+################################################################
+################################################################
+# TODO:
+#  These quantizer functions are for internal use. They are currently
+#  used in some features like MP for activation and SNC (where
+#  inference in the framework is needed).
+#  It may worth considering removing these functions and use
+#  activation inferable quantizers in those features like we do
+#  in GPTQ.
+################################################################
+################################################################
 def quantizer_min_max_calculator(threshold: np.ndarray,
                                  num_bits: int,

{mct-nightly-2.0.0.20240505.435 → mct-nightly-2.0.0.20240507.417}/model_compression_toolkit/core/keras/reader/common.py RENAMED Viewed

@@ -24,10 +24,10 @@ if version.parse(tf.__version__) >= version.parse("2.13"):
     from keras.src.engine.functional import Functional
     from keras.src.engine.sequential import Sequential
 else:
-    from keras.engine.input_layer import InputLayer
-    from keras.engine.node import Node as KerasNode
-    from keras.engine.functional import Functional
-    from keras.engine.sequential import Sequential
+    from keras.engine.input_layer import InputLayer # pragma: no cover
+    from keras.engine.node import Node as KerasNode # pragma: no cover
+    from keras.engine.functional import Functional # pragma: no cover
+    from keras.engine.sequential import Sequential # pragma: no cover
 from model_compression_toolkit.logger import Logger
 from model_compression_toolkit.core.common.graph.base_node import BaseNode

{mct-nightly-2.0.0.20240505.435 → mct-nightly-2.0.0.20240507.417}/model_compression_toolkit/core/pytorch/back2framework/instance_builder.py RENAMED Viewed

@@ -38,18 +38,3 @@ def node_builder(n: BaseNode) -> Module:
     return node_instance
-# todo: remove. It is not used anymore
-def identity_wrapper(node: BaseNode,
-                     module: Module,
-                     include_activation_quantizers: bool):
-    """
-    A function which takes a computational graph node and a pytorch module and return an identity wrapping which return the layer itself
-    Args:
-        node: A node of mct graph.
-        layer: A pytorch module
-        include_activation_quantizers: bool flag.
-    Returns: pytorch module
-    """
-    return module

{mct-nightly-2.0.0.20240505.435 → mct-nightly-2.0.0.20240507.417}/model_compression_toolkit/core/pytorch/back2framework/pytorch_model_builder.py RENAMED Viewed

@@ -27,7 +27,7 @@ from model_compression_toolkit.core.common.back2framework.base_model_builder imp
 from model_compression_toolkit.core.common.graph.edge import EDGE_SINK_INDEX
 from model_compression_toolkit.core.common.graph.functional_node import FunctionalNode
 from model_compression_toolkit.core.common.user_info import UserInformation
-from model_compression_toolkit.core.pytorch.back2framework.instance_builder import node_builder, identity_wrapper
+from model_compression_toolkit.core.pytorch.back2framework.instance_builder import node_builder
 from model_compression_toolkit.core.pytorch.default_framework_info import DEFAULT_PYTORCH_INFO
 from model_compression_toolkit.core.pytorch.pytorch_device_config import get_working_device
 from model_compression_toolkit.core.pytorch.reader.node_holders import DummyPlaceHolder

{mct-nightly-2.0.0.20240505.435 → mct-nightly-2.0.0.20240507.417}/model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/linear_collapsing.py RENAMED Viewed

@@ -101,7 +101,7 @@ def conv2d_collapsing_fn(first_node: BaseNode,
         return kernel_collapsed, bias_collapsed
     else:
-        Logger.critical(f"Layer collapsing is not supported for the combination of {first_node.type} and {second_node.type}.")
+        Logger.critical(f"Layer collapsing is not supported for the combination of {first_node.type} and {second_node.type}.")  # pragma: no cover
 def pytorch_linear_collapsing() -> Conv2DCollapsing:

{mct-nightly-2.0.0.20240505.435 → mct-nightly-2.0.0.20240507.417}/model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/residual_collapsing.py RENAMED Viewed

@@ -58,7 +58,7 @@ def residual_collapsing_fn(first_node: BaseNode,
             kernel[i, i, idxH, idxW] += 1
         return kernel
     else:
-        Logger.critical(f"Residual collapsing not supported for node type: {first_node.type}")
+        Logger.critical(f"Residual collapsing not supported for node type: {first_node.type}")  # pragma: no cover
 def pytorch_residual_collapsing() -> ResidualCollapsing:

{mct-nightly-2.0.0.20240505.435 → mct-nightly-2.0.0.20240507.417}/model_compression_toolkit/core/pytorch/hessian/trace_hessian_calculator_pytorch.py RENAMED Viewed

@@ -65,6 +65,6 @@ class TraceHessianCalculatorPytorch(TraceHessianCalculator):
         concat_axis_dim = [o.shape[0] for o in _r_tensors]
         if not all(d == concat_axis_dim[0] for d in concat_axis_dim):
             Logger.critical(
-                "Unable to concatenate tensors for gradient calculation due to mismatched shapes along the first axis.")
+                "Unable to concatenate tensors for gradient calculation due to mismatched shapes along the first axis.")  # pragma: no cover
         return torch.concat(_r_tensors, dim=1)

{mct-nightly-2.0.0.20240505.435 → mct-nightly-2.0.0.20240507.417}/model_compression_toolkit/core/pytorch/pytorch_device_config.py RENAMED Viewed

@@ -87,13 +87,16 @@ class DeviceManager:
                 device_index = int(device_name.split(':')[1])
                 if device_index >= torch.cuda.device_count():
                     return False, f"CUDA device index {device_index} out of range. Number of valid devices: {torch.cuda.device_count()}"
-            except IndexError:
+            except Exception:
                 # Handle cases where the device name is incorrectly formatted
                 return False, "Invalid CUDA device format. Use 'cuda' or 'cuda:x' where x is the device index."
             return True, "Valid device"
-        return True, "Valid device"
+        if CPU in device_name:
+            return True, "Valid device"
+        return False, "Invalid device"

mct-nightly 2.0.0.20240505.435__tar.gz → 2.0.0.20240507.417__tar.gz

mct-nightly 2.0.0.20240505.435tar.gz → 2.0.0.20240507.417tar.gz