PyPI - mct-nightly - Versions diffs - 1.11.0.20240304.post404__py3-none-any.whl → 1.11.0.20240305.post352__py3-none-any.whl - Mend

mct-nightly 1.11.0.20240304.post404py3-none-any.whl → 1.11.0.20240305.post352py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

model_compression_toolkit/pruning/pytorch/pruning_facade.py ADDED Viewed

@@ -0,0 +1,166 @@
+# Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+from typing import Callable, Tuple
+from model_compression_toolkit import get_target_platform_capabilities
+from model_compression_toolkit.constants import FOUND_TORCH, PYTORCH
+from model_compression_toolkit.core.common.mixed_precision.kpi_tools.kpi import KPI
+from model_compression_toolkit.core.common.pruning.pruner import Pruner
+from model_compression_toolkit.core.common.pruning.pruning_config import PruningConfig
+from model_compression_toolkit.core.common.pruning.pruning_info import PruningInfo
+from model_compression_toolkit.core.common.quantization.set_node_quantization_config import set_quantization_configuration_to_graph
+from model_compression_toolkit.core.graph_prep_runner import read_model_to_graph
+from model_compression_toolkit.logger import Logger
+from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework import TargetPlatformCapabilities
+from model_compression_toolkit.core.common.quantization.quantization_config import DEFAULTCONFIG
+from model_compression_toolkit.target_platform_capabilities.constants import DEFAULT_TP_MODEL
+# Check if PyTorch is available in the environment.
+if FOUND_TORCH:
+    # Import PyTorch-specific modules from the model compression toolkit.
+    from model_compression_toolkit.core.pytorch.back2framework.float_model_builder import FloatPyTorchModelBuilder
+    from model_compression_toolkit.core.pytorch.pruning.pruning_pytorch_implementation import \
+        PruningPytorchImplementation
+    from model_compression_toolkit.core.pytorch.default_framework_info import DEFAULT_PYTORCH_INFO
+    from torch.nn import Module
+    # Set the default Target Platform Capabilities (TPC) for PyTorch.
+    DEFAULT_PYOTRCH_TPC = get_target_platform_capabilities(PYTORCH, DEFAULT_TP_MODEL)
+    def pytorch_pruning_experimental(model: Module,
+                                     target_kpi: KPI,
+                                     representative_data_gen: Callable,
+                                     pruning_config: PruningConfig = PruningConfig(),
+                                     target_platform_capabilities: TargetPlatformCapabilities = DEFAULT_PYOTRCH_TPC) -> \
+            Tuple[Module, PruningInfo]:
+        """
+        Perform structured pruning on a Pytorch model to meet a specified target KPI.
+        This function prunes the provided model according to the target KPI by grouping and pruning
+        channels based on each layer's SIMD configuration in the Target Platform Capabilities (TPC).
+        By default, the importance of each channel group is determined using the Label-Free Hessian
+        (LFH) method, assessing each channel's sensitivity to the Hessian of the loss function.
+        This pruning strategy considers groups of channels together for a more hardware-friendly
+        architecture. The process involves analyzing the model with a representative dataset to
+        identify groups of channels that can be removed with minimal impact on performance.
+        Notice that the pruned model must be retrained to recover the compressed model's performance.
+        Args:
+            model (Module): The PyTorch model to be pruned.
+            target_kpi (KPI): Key Performance Indicators specifying the pruning targets.
+            representative_data_gen (Callable): A function to generate representative data for pruning analysis.
+            pruning_config (PruningConfig): Configuration settings for the pruning process. Defaults to standard config.
+            target_platform_capabilities (TargetPlatformCapabilities): Platform-specific constraints and capabilities.
+                Defaults to DEFAULT_PYTORCH_TPC.
+        Returns:
+            Tuple[Model, PruningInfo]: A tuple containing the pruned Pytorch model and associated pruning information.
+        Note:
+            The pruned model should be fine-tuned or retrained to recover or improve its performance post-pruning.
+        Examples:
+            Import MCT:
+            >>> import model_compression_toolkit as mct
+            Import a Pytorch model:
+            >>> from torchvision.models import resnet50, ResNet50_Weights
+            >>> model = resnet50(weights=ResNet50_Weights.IMAGENET1K_V1)
+            Create a random dataset generator:
+            >>> import numpy as np
+            >>> def repr_datagen(): yield [np.random.random((1, 3, 224, 224))]
+            Define a target KPI for pruning.
+            Here, we aim to reduce the memory footprint of weights by 50%, assuming the model weights
+            are represented in float32 data type (thus, each parameter is represented using 4 bytes):
+            >>> dense_nparams = sum(p.numel() for p in model.state_dict().values())
+            >>> target_kpi = mct.KPI(weights_memory=dense_nparams * 4 * 0.5)
+            Optionally, define a pruning configuration. num_score_approximations can be passed
+            to configure the number of importance scores that will be calculated for each channel.
+            A higher value for this parameter yields more precise score approximations but also
+            extends the duration of the pruning process:
+            >>> pruning_config = mct.pruning.PruningConfig(num_score_approximations=1)
+            Perform pruning:
+            >>> pruned_model, pruning_info = mct.pruning.pytorch_pruning_experimental(model=model, target_kpi=target_kpi, representative_data_gen=repr_datagen, pruning_config=pruning_config)
+        """
+        # Instantiate the Pytorch framework implementation.
+        fw_impl = PruningPytorchImplementation()
+        # Convert the original Pytorch model to an internal graph representation.
+        float_graph = read_model_to_graph(model,
+                                          representative_data_gen,
+                                          target_platform_capabilities,
+                                          DEFAULT_PYTORCH_INFO,
+                                          fw_impl)
+        # Apply quantization configuration to the graph. This step is necessary even when not quantizing,
+        # as it prepares the graph for the pruning process.
+        float_graph_with_compression_config = set_quantization_configuration_to_graph(float_graph,
+                                                                                      quant_config=DEFAULTCONFIG,
+                                                                                      mixed_precision_enable=False)
+        # Create a Pruner object with the graph and configuration.
+        pruner = Pruner(float_graph_with_compression_config,
+                        DEFAULT_PYTORCH_INFO,
+                        fw_impl,
+                        target_kpi,
+                        representative_data_gen,
+                        pruning_config,
+                        target_platform_capabilities)
+        # Apply the pruning process.
+        pruned_graph = pruner.prune_graph()
+        # Retrieve pruning information which includes the pruning masks and scores.
+        pruning_info = pruner.get_pruning_info()
+        # Rebuild the pruned graph back into a trainable Pytorch model.
+        pruned_model, _ = FloatPyTorchModelBuilder(graph=pruned_graph).build_model()
+        pruned_model.trainable = True
+        # Return the pruned model along with its pruning information.
+        return pruned_model, pruning_info
+else:
+    def pytorch_pruning_experimental(*args, **kwargs):
+        """
+        Raises a critical error if PyTorch is not installed but the pruning function is invoked.
+        This function acts as a placeholder to provide a clear error message when PyTorch dependencies are missing,
+        indicating that the pruning functionality cannot be used without the PyTorch framework installed.
+        Args:
+            *args: Variable length argument list, not used.
+            **kwargs: Arbitrary keyword arguments, not used.
+        Raises:
+            CriticalError: Indicates that PyTorch must be installed to use this function.
+        """
+        Logger.critical('Installing Pytorch is mandatory '
+                        'when using pytorch_pruning_experimental. '
+                        'Could not find the torch package.')  # pragma: no cover

model_compression_toolkit/ptq/keras/quantization_facade.py CHANGED Viewed

@@ -22,7 +22,7 @@ from model_compression_toolkit.logger import Logger
 from model_compression_toolkit.constants import TENSORFLOW, FOUND_TF
 from model_compression_toolkit.core.common.mixed_precision.kpi_tools.kpi import KPI
 from model_compression_toolkit.core.common.mixed_precision.mixed_precision_quantization_config import \
-    MixedPrecisionQuantizationConfigV2
+    MixedPrecisionQuantizationConfig
 from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework import TargetPlatformCapabilities
 from model_compression_toolkit.core.exporter import export_model
 from model_compression_toolkit.core.runner import core_runner
@@ -99,7 +99,7 @@ if FOUND_TF:
             The candidates bitwidth for quantization should be defined in the target platform model.
             In this example we use 1 image to search mixed-precision configuration:
-            >>> config = mct.core.CoreConfig(mixed_precision_config=mct.core.MixedPrecisionQuantizationConfigV2(num_of_images=1))
+            >>> config = mct.core.CoreConfig(mixed_precision_config=mct.core.MixedPrecisionQuantizationConfig(num_of_images=1))
             For mixed-precision set a target KPI object:
             Create a KPI object to limit our returned model's size. Note that this value affects only coefficients
@@ -123,14 +123,11 @@ if FOUND_TF:
                              fw_info=fw_info).validate()
         if core_config.mixed_precision_enable:
-            if not isinstance(core_config.mixed_precision_config, MixedPrecisionQuantizationConfigV2):
+            if not isinstance(core_config.mixed_precision_config, MixedPrecisionQuantizationConfig):
                 Logger.error("Given quantization config to mixed-precision facade is not of type "
-                                    "MixedPrecisionQuantizationConfigV2. Please use keras_post_training_quantization "
+                                    "MixedPrecisionQuantizationConfig. Please use keras_post_training_quantization "
                                     "API, or pass a valid mixed precision configuration.")  # pragma: no cover
-            Logger.info("Using experimental mixed-precision quantization. "
-                               "If you encounter an issue please file a bug.")
         tb_w = init_tensorboard_writer(fw_info)
         fw_impl = KerasImplementation()

model_compression_toolkit/ptq/pytorch/quantization_facade.py CHANGED Viewed

@@ -22,7 +22,7 @@ from model_compression_toolkit.target_platform_capabilities.target_platform impo
 from model_compression_toolkit.core.common.mixed_precision.kpi_tools.kpi import KPI
 from model_compression_toolkit.core import CoreConfig
 from model_compression_toolkit.core.common.mixed_precision.mixed_precision_quantization_config import \
-    MixedPrecisionQuantizationConfigV2
+    MixedPrecisionQuantizationConfig
 from model_compression_toolkit.core.runner import core_runner
 from model_compression_toolkit.ptq.runner import ptq_runner
 from model_compression_toolkit.core.exporter import export_model
@@ -94,15 +94,12 @@ if FOUND_TORCH:
         """
         if core_config.mixed_precision_enable:
-            if not isinstance(core_config.mixed_precision_config, MixedPrecisionQuantizationConfigV2):
+            if not isinstance(core_config.mixed_precision_config, MixedPrecisionQuantizationConfig):
                 Logger.error("Given quantization config to mixed-precision facade is not of type "
-                             "MixedPrecisionQuantizationConfigV2. Please use "
+                             "MixedPrecisionQuantizationConfig. Please use "
                              "pytorch_post_training_quantization API, or pass a valid mixed precision "
                              "configuration.")  # pragma: no cover
-            Logger.info("Using experimental mixed-precision quantization. "
-                        "If you encounter an issue please file a bug.")
         tb_w = init_tensorboard_writer(DEFAULT_PYTORCH_INFO)
         fw_impl = PytorchImplementation()

model_compression_toolkit/qat/keras/quantization_facade.py CHANGED Viewed

@@ -22,7 +22,7 @@ from model_compression_toolkit.logger import Logger
 from model_compression_toolkit.constants import FOUND_TF
 from model_compression_toolkit.core.common.mixed_precision.kpi_tools.kpi import KPI
 from model_compression_toolkit.core.common.mixed_precision.mixed_precision_quantization_config import \
-    MixedPrecisionQuantizationConfigV2
+    MixedPrecisionQuantizationConfig
 from mct_quantizers import KerasActivationQuantizationHolder
 from model_compression_toolkit.trainable_infrastructure import KerasTrainableQuantizationWrapper
 from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework import TargetPlatformCapabilities
@@ -145,7 +145,7 @@ if FOUND_TF:
              If mixed precision is desired, create a MCT core config with a mixed-precision configuration, to quantize a model with different bitwidths for different layers.
              The candidates bitwidth for quantization should be defined in the target platform model:
-             >>> config = mct.core.CoreConfig(mixed_precision_config=MixedPrecisionQuantizationConfigV2())
+             >>> config = mct.core.CoreConfig(mixed_precision_config=MixedPrecisionQuantizationConfig())
              For mixed-precision set a target KPI object:
              Create a KPI object to limit our returned model's size. Note that this value affects only coefficients
@@ -170,13 +170,10 @@ if FOUND_TF:
                              fw_info=fw_info).validate()
         if core_config.mixed_precision_enable:
-            if not isinstance(core_config.mixed_precision_config, MixedPrecisionQuantizationConfigV2):
+            if not isinstance(core_config.mixed_precision_config, MixedPrecisionQuantizationConfig):
                 Logger.error("Given quantization config to mixed-precision facade is not of type "
-                                    "MixedPrecisionQuantizationConfigV2. Please use keras_post_training_quantization API,"
-                                    "or pass a valid mixed precision configuration.")
-            Logger.info("Using experimental mixed-precision quantization. "
-                               "If you encounter an issue please file a bug.")
+                             "MixedPrecisionQuantizationConfig. Please use keras_post_training_quantization API,"
+                             "or pass a valid mixed precision configuration.")
         tb_w = init_tensorboard_writer(fw_info)
@@ -239,7 +236,7 @@ if FOUND_TF:
              If mixed precision is desired, create a MCT core config with a mixed-precision configuration, to quantize a model with different bitwidths for different layers.
              The candidates bitwidth for quantization should be defined in the target platform model:
-             >>> config = mct.core.CoreConfig(mixed_precision_config=MixedPrecisionQuantizationConfigV2())
+             >>> config = mct.core.CoreConfig(mixed_precision_config=MixedPrecisionQuantizationConfig())
              For mixed-precision set a target KPI object:
              Create a KPI object to limit our returned model's size. Note that this value affects only coefficients

model_compression_toolkit/qat/pytorch/quantization_facade.py CHANGED Viewed

@@ -25,7 +25,7 @@ from model_compression_toolkit.logger import Logger
 from model_compression_toolkit.core.common.framework_info import FrameworkInfo
 from model_compression_toolkit.core.common.mixed_precision.kpi_tools.kpi import KPI
 from model_compression_toolkit.core.common.mixed_precision.mixed_precision_quantization_config import \
-    MixedPrecisionQuantizationConfigV2
+    MixedPrecisionQuantizationConfig
 from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework import \
     TargetPlatformCapabilities
 from model_compression_toolkit.core.runner import core_runner
@@ -138,16 +138,12 @@ if FOUND_TORCH:
          """
         if core_config.mixed_precision_enable:
-            if not isinstance(core_config.mixed_precision_config, MixedPrecisionQuantizationConfigV2):
+            if not isinstance(core_config.mixed_precision_config, MixedPrecisionQuantizationConfig):
                 Logger.error("Given quantization config to mixed-precision facade is not of type "
-                             "MixedPrecisionQuantizationConfigV2. Please use pytorch_post_training_quantization API,"
+                             "MixedPrecisionQuantizationConfig. Please use pytorch_post_training_quantization API,"
                              "or pass a valid mixed precision configuration.")
-            Logger.info("Using experimental mixed-precision quantization. "
-                        "If you encounter an issue please file a bug.")
         tb_w = init_tensorboard_writer(fw_info)
         fw_impl = PytorchImplementation()
         # Ignore trace hessian service as we do not use it here

model_compression_toolkit/core/common/quantization/quantization_params_generation/kmeans_params.py DELETED Viewed

@@ -1,64 +0,0 @@
-# Copyright 2021 Sony Semiconductor Israel, Inc. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-import numpy as np
-from sklearn.cluster import KMeans
-import model_compression_toolkit.core.common.quantization.quantization_config as qc
-from model_compression_toolkit.constants import LUT_VALUES, SCALE_PER_CHANNEL, MIN_THRESHOLD, EPS
-def kmeans_tensor(tensor_data: np.ndarray,
-                  p: int,
-                  n_bits: int,
-                  per_channel: bool = False,
-                  channel_axis: int = 1,
-                  n_iter: int = 10,
-                  min_threshold: float = MIN_THRESHOLD,
-                  quant_error_method: qc.QuantizationErrorMethod = None) -> dict:
-    """
-    Compute the 2^nbit cluster assignments for the given tensor according to the k-means algorithm.
-    Args:
-        tensor_data: Tensor content as Numpy array.
-        p: p-norm to use for the Lp-norm distance.
-        n_bits: Number of bits to quantize the tensor.
-        per_channel: Whether the quantization should be per-channel or not.
-        channel_axis: Output channel index.
-        n_iter: Number of iterations to search_methods for the optimal threshold.
-        min_threshold: Minimal threshold to chose when the computed one is smaller.
-        quant_error_method: an error function to optimize the parameters' selection accordingly (not used for this method).
-    Returns:
-        A dictionary containing the cluster assignments according to the k-means algorithm and the scales per channel.
-    """
-    if len(np.unique(tensor_data.flatten())) < 2 ** n_bits:
-        n_clusters = len(np.unique(tensor_data.flatten()))
-    else:
-        n_clusters = 2 ** n_bits
-    kmeans = KMeans(n_clusters=n_clusters)
-    axis_not_channel = [i for i in range(len(tensor_data.shape))]
-    if channel_axis in axis_not_channel:
-        axis_not_channel.remove(channel_axis)
-    if per_channel:
-        scales_per_channel = np.max(np.abs(tensor_data), axis=tuple(axis_not_channel), keepdims=True)
-    else:
-        scales_per_channel = np.max(np.abs(tensor_data), keepdims=True)
-    tensor_for_kmeans = (tensor_data / (scales_per_channel + EPS))
-    kmeans.fit(tensor_for_kmeans.reshape(-1, 1))
-    return {LUT_VALUES: kmeans.cluster_centers_,
-            SCALE_PER_CHANNEL: scales_per_channel,
-            }

model_compression_toolkit/core/common/quantization/quantizers/kmeans_quantizer.py DELETED Viewed

@@ -1,53 +0,0 @@
-# Copyright 2021 Sony Semiconductor Israel, Inc. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-from sklearn.cluster import KMeans
-import numpy as np
-from model_compression_toolkit.constants import LUT_VALUES, MIN_THRESHOLD, SCALE_PER_CHANNEL
-from model_compression_toolkit.core.common.quantization.quantizers.quantizers_helpers import kmeans_assign_clusters
-def kmeans_quantizer(tensor_data: np.ndarray,
-                        n_bits: int,
-                        signed: bool,
-                        quantization_params: dict,
-                        per_channel: bool,
-                        output_channels_axis: int) -> np.ndarray:
-    """
-    Quantize a tensor according to k-means algorithm. This function assigns cluster centers
-    to the tensor data values.
-    Args:
-        tensor_data: Tensor values to quantize.
-        n_bits: Number of bits to quantize the tensor.
-        signed: Whether the tensor contains negative values or not.
-        quantization_params: Dictionary of specific parameters for this quantization function.
-        per_channel: Whether to use separate quantization per output channel.
-        output_channels_axis: Axis of the output channel.
-    Returns:
-        Quantized data.
-    """
-    eps = 1e-8
-    lut_values = quantization_params[LUT_VALUES]
-    scales_per_channel = quantization_params[SCALE_PER_CHANNEL]
-    tensor = (tensor_data / (scales_per_channel + eps))
-    shape_before_kmeans = tensor.shape
-    cluster_assignments = kmeans_assign_clusters(lut_values, tensor.reshape(-1, 1))
-    quant_tensor = lut_values[cluster_assignments].reshape(shape_before_kmeans)
-    if per_channel:
-        quant_tensor = (quant_tensor * scales_per_channel)
-    return quant_tensor

{mct_nightly-1.11.0.20240304.post404.dist-info → mct_nightly-1.11.0.20240305.post352.dist-info}/LICENSE.md RENAMED Viewed

File without changes

{mct_nightly-1.11.0.20240304.post404.dist-info → mct_nightly-1.11.0.20240305.post352.dist-info}/WHEEL RENAMED Viewed

File without changes

{mct_nightly-1.11.0.20240304.post404.dist-info → mct_nightly-1.11.0.20240305.post352.dist-info}/top_level.txt RENAMED Viewed

File without changes

mct-nightly 1.11.0.20240304.post404__py3-none-any.whl → 1.11.0.20240305.post352__py3-none-any.whl

mct-nightly 1.11.0.20240304.post404py3-none-any.whl → 1.11.0.20240305.post352py3-none-any.whl