PyPI - mct-nightly - Versions diffs - 1.11.0.20240304.post404__py3-none-any.whl → 1.11.0.20240306.post426__py3-none-any.whl - Mend

mct-nightly 1.11.0.20240304.post404py3-none-any.whl → 1.11.0.20240306.post426py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

model_compression_toolkit/gptq/keras/quantization_facade.py CHANGED Viewed

@@ -21,10 +21,10 @@ from model_compression_toolkit.gptq.common.gptq_constants import REG_DEFAULT
 from model_compression_toolkit.logger import Logger
 from model_compression_toolkit.constants import TENSORFLOW, FOUND_TF
 from model_compression_toolkit.core.common.user_info import UserInformation
-from model_compression_toolkit.gptq.common.gptq_config import GradientPTQConfigV2
+from model_compression_toolkit.gptq.common.gptq_config import GradientPTQConfig
 from model_compression_toolkit.core.common.mixed_precision.kpi_tools.kpi import KPI
 from model_compression_toolkit.core.common.framework_info import FrameworkInfo
-from model_compression_toolkit.core.common.mixed_precision.mixed_precision_quantization_config import MixedPrecisionQuantizationConfigV2
+from model_compression_toolkit.core.common.mixed_precision.mixed_precision_quantization_config import MixedPrecisionQuantizationConfig
 from model_compression_toolkit.core import CoreConfig
 from model_compression_toolkit.core.runner import core_runner
 from model_compression_toolkit.gptq.runner import gptq_runner
@@ -66,7 +66,7 @@ if FOUND_TF:
                               loss: Callable = GPTQMultipleTensorsLoss(),
                               log_function: Callable = None,
                               use_hessian_based_weights: bool = True,
-                              regularization_factor: float = REG_DEFAULT) -> GradientPTQConfigV2:
+                              regularization_factor: float = REG_DEFAULT) -> GradientPTQConfig:
         """
         Create a GradientPTQConfigV2 instance for Keras models.
@@ -102,26 +102,25 @@ if FOUND_TF:
         """
         bias_optimizer = tf.keras.optimizers.SGD(learning_rate=LR_BIAS_DEFAULT,
                                                  momentum=GPTQ_MOMENTUM)
-        return GradientPTQConfigV2(n_epochs,
-                                   optimizer,
-                                   optimizer_rest=optimizer_rest,
-                                   loss=loss,
-                                   log_function=log_function,
-                                   train_bias=True,
-                                   optimizer_bias=bias_optimizer,
-                                   use_hessian_based_weights=use_hessian_based_weights,
-                                   regularization_factor=regularization_factor)
+        return GradientPTQConfig(n_epochs,
+                                 optimizer,
+                                 optimizer_rest=optimizer_rest,
+                                 loss=loss,
+                                 log_function=log_function,
+                                 train_bias=True,
+                                 optimizer_bias=bias_optimizer,
+                                 use_hessian_based_weights=use_hessian_based_weights,
+                                 regularization_factor=regularization_factor)
     def keras_gradient_post_training_quantization(in_model: Model,
                                                   representative_data_gen: Callable,
-                                                  gptq_config: GradientPTQConfigV2,
+                                                  gptq_config: GradientPTQConfig,
                                                   gptq_representative_data_gen: Callable = None,
                                                   target_kpi: KPI = None,
                                                   core_config: CoreConfig = CoreConfig(),
                                                   fw_info: FrameworkInfo = DEFAULT_KERAS_INFO,
-                                                  target_platform_capabilities: TargetPlatformCapabilities = DEFAULT_KERAS_TPC,
-                                                  new_experimental_exporter: bool = True) -> Tuple[Model, UserInformation]:
+                                                  target_platform_capabilities: TargetPlatformCapabilities = DEFAULT_KERAS_TPC) -> Tuple[Model, UserInformation]:
         """
         Quantize a trained Keras model using post-training quantization. The model is quantized using a
         symmetric constraint quantization thresholds (power of two).
@@ -141,13 +140,12 @@ if FOUND_TF:
         Args:
             in_model (Model): Keras model to quantize.
             representative_data_gen (Callable): Dataset used for calibration.
-            gptq_config (GradientPTQConfigV2): Configuration for using gptq (e.g. optimizer).
+            gptq_config (GradientPTQConfig): Configuration for using gptq (e.g. optimizer).
             gptq_representative_data_gen (Callable): Dataset used for GPTQ training. If None defaults to representative_data_gen
             target_kpi (KPI): KPI object to limit the search of the mixed-precision configuration as desired.
             core_config (CoreConfig): Configuration object containing parameters of how the model should be quantized, including mixed precision parameters.
             fw_info (FrameworkInfo): Information needed for quantization about the specific framework (e.g., kernel channels indices, groups of layers by how they should be quantized, etc.). `Default Keras info <https://github.com/sony/model_optimization/blob/main/model_compression_toolkit/core/keras/default_framework_info.py>`_
             target_platform_capabilities (TargetPlatformCapabilities): TargetPlatformCapabilities to optimize the Keras model according to.
-            new_experimental_exporter (bool): Whether to wrap the quantized model using quantization information or not. Enabled by default. Experimental and subject to future changes.
         Returns:
@@ -177,7 +175,7 @@ if FOUND_TF:
             with different bitwidths for different layers.
             The candidates bitwidth for quantization should be defined in the target platform model:
-            >>> config = mct.core.CoreConfig(mixed_precision_config=mct.core.MixedPrecisionQuantizationConfigV2(num_of_images=1))
+            >>> config = mct.core.CoreConfig(mixed_precision_config=mct.core.MixedPrecisionQuantizationConfig(num_of_images=1))
             For mixed-precision set a target KPI object:
             Create a KPI object to limit our returned model's size. Note that this value affects only coefficients
@@ -199,9 +197,9 @@ if FOUND_TF:
                              fw_info=fw_info).validate()
         if core_config.mixed_precision_enable:
-            if not isinstance(core_config.mixed_precision_config, MixedPrecisionQuantizationConfigV2):
+            if not isinstance(core_config.mixed_precision_config, MixedPrecisionQuantizationConfig):
                 Logger.error("Given quantization config to mixed-precision facade is not of type "
-                             "MixedPrecisionQuantizationConfigV2. Please use keras_post_training_quantization "
+                             "MixedPrecisionQuantizationConfig. Please use keras_post_training_quantization "
                              "API, or pass a valid mixed precision configuration.")  # pragma: no cover
         tb_w = init_tensorboard_writer(fw_info)
@@ -232,19 +230,7 @@ if FOUND_TF:
         if core_config.debug_config.analyze_similarity:
             analyzer_model_quantization(representative_data_gen, tb_w, tg_gptq, fw_impl, fw_info)
-        if new_experimental_exporter:
-            Logger.warning('Using new experimental wrapped and ready for export models. To '
-                           'disable it, please set new_experimental_exporter to False when '
-                           'calling keras_gradient_post_training_quantization. '
-                           'If you encounter an issue please file a bug.')
-            return get_exportable_keras_model(tg_gptq)
-        return export_model(tg_gptq,
-                            fw_info,
-                            fw_impl,
-                            tb_w,
-                            bit_widths_config)
+        return get_exportable_keras_model(tg_gptq)
 else:
     # If tensorflow is not installed,

model_compression_toolkit/gptq/keras/quantizer/quantization_builder.py CHANGED Viewed

@@ -14,7 +14,7 @@
 # ==============================================================================
 from typing import Dict, List, Tuple
-from model_compression_toolkit.gptq import GradientPTQConfigV2
+from model_compression_toolkit.gptq import GradientPTQConfig
 from model_compression_toolkit.core import common
 from model_compression_toolkit.core.keras.default_framework_info import DEFAULT_KERAS_INFO
 from model_compression_toolkit.exporter.model_wrapper.keras.builder.node_to_quantizer import \
@@ -33,7 +33,7 @@ from model_compression_toolkit.trainable_infrastructure.common.get_quantizers im
 def quantization_builder(n: common.BaseNode,
-                         gptq_config: GradientPTQConfigV2
+                         gptq_config: GradientPTQConfig
                          ) -> Tuple[Dict[str, BaseKerasGPTQTrainableQuantizer], List[BaseKerasInferableQuantizer]]:
     """
     Build quantizers for a node according to its quantization configuration and
@@ -41,7 +41,7 @@ def quantization_builder(n: common.BaseNode,
     Args:
         n: Node to build its QuantizeConfig.
-        gptq_config (GradientPTQConfigV2): GradientPTQConfigV2 configuration.
+        gptq_config (GradientPTQConfig): GradientPTQConfigV2 configuration.
     Returns:
         A dictionary which maps the weights kernel attribute to a quantizer for GPTQ training.

model_compression_toolkit/gptq/keras/quantizer/regularization_factory.py CHANGED Viewed

@@ -14,7 +14,7 @@
 # ==============================================================================
 from typing import Callable
-from model_compression_toolkit.gptq import RoundingType, GradientPTQConfigV2, GradientPTQConfig
+from model_compression_toolkit.gptq import RoundingType, GradientPTQConfig, GradientPTQConfig
 from model_compression_toolkit.gptq.keras.quantizer.soft_rounding.soft_quantizer_reg import \
     SoftQuantizerRegularization
@@ -38,8 +38,6 @@ def get_regularization(gptq_config: GradientPTQConfig, representative_data_gen:
         for _ in representative_data_gen():
             num_batches += 1
-        n_epochs = GradientPTQConfigV2.from_v1(n_ptq_iter=num_batches, config_v1=gptq_config).n_epochs if \
-            not type(gptq_config) == GradientPTQConfigV2 else gptq_config.n_epochs
-        return SoftQuantizerRegularization(total_gradient_steps=num_batches * n_epochs)
+        return SoftQuantizerRegularization(total_gradient_steps=num_batches * gptq_config.n_epochs)
     else:
         return lambda m, e_reg: 0

model_compression_toolkit/gptq/pytorch/gptq_training.py CHANGED Viewed

@@ -25,7 +25,7 @@ from model_compression_toolkit.logger import Logger
 from model_compression_toolkit.core.pytorch.back2framework.pytorch_model_builder import PyTorchModelBuilder
 from model_compression_toolkit.gptq.common.gptq_graph import get_kernel_attribute_name_for_gptq
 from model_compression_toolkit.gptq.common.gptq_training import GPTQTrainer
-from model_compression_toolkit.gptq.common.gptq_config import GradientPTQConfigV2
+from model_compression_toolkit.gptq.common.gptq_config import GradientPTQConfig
 from model_compression_toolkit.core.common import Graph, BaseNode
 from model_compression_toolkit.core.common.framework_info import FrameworkInfo
 from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
@@ -46,7 +46,7 @@ class PytorchGPTQTrainer(GPTQTrainer):
     def __init__(self,
                  graph_float: Graph,
                  graph_quant: Graph,
-                 gptq_config: GradientPTQConfigV2,
+                 gptq_config: GradientPTQConfig,
                  fw_impl: FrameworkImplementation,
                  fw_info: FrameworkInfo,
                  representative_data_gen: Callable,

model_compression_toolkit/gptq/pytorch/quantization_facade.py CHANGED Viewed

@@ -19,7 +19,7 @@ from model_compression_toolkit.core.common.visualization.tensorboard_writer impo
 from model_compression_toolkit.gptq.common.gptq_constants import REG_DEFAULT
 from model_compression_toolkit.logger import Logger
 from model_compression_toolkit.constants import PYTORCH
-from model_compression_toolkit.gptq.common.gptq_config import GradientPTQConfigV2
+from model_compression_toolkit.gptq.common.gptq_config import GradientPTQConfig
 from model_compression_toolkit.target_platform_capabilities.target_platform import TargetPlatformCapabilities
 from model_compression_toolkit.core.common.mixed_precision.kpi_tools.kpi import KPI
 from model_compression_toolkit.core.runner import core_runner
@@ -29,7 +29,7 @@ from model_compression_toolkit.core.exporter import export_model
 from model_compression_toolkit.core.analyzer import analyzer_model_quantization
 from model_compression_toolkit.core import CoreConfig
 from model_compression_toolkit.core.common.mixed_precision.mixed_precision_quantization_config import \
-    MixedPrecisionQuantizationConfigV2
+    MixedPrecisionQuantizationConfig
 LR_DEFAULT = 1e-4
 LR_REST_DEFAULT = 1e-4
@@ -54,7 +54,7 @@ if FOUND_TORCH:
                                 loss: Callable = multiple_tensors_mse_loss,
                                 log_function: Callable = None,
                                 use_hessian_based_weights: bool = True,
-                                regularization_factor: float = REG_DEFAULT) -> GradientPTQConfigV2:
+                                regularization_factor: float = REG_DEFAULT) -> GradientPTQConfig:
         """
         Create a GradientPTQConfigV2 instance for Pytorch models.
@@ -86,21 +86,19 @@ if FOUND_TORCH:
         """
         bias_optimizer = torch.optim.SGD([torch.Tensor([])], lr=LR_BIAS_DEFAULT, momentum=GPTQ_MOMENTUM)
-        return GradientPTQConfigV2(n_epochs, optimizer, optimizer_rest=optimizer_rest, loss=loss,
-                                   log_function=log_function, train_bias=True, optimizer_bias=bias_optimizer,
-                                   use_hessian_based_weights=use_hessian_based_weights,
-                                   regularization_factor=regularization_factor)
+        return GradientPTQConfig(n_epochs, optimizer, optimizer_rest=optimizer_rest, loss=loss,
+                                 log_function=log_function, train_bias=True, optimizer_bias=bias_optimizer,
+                                 use_hessian_based_weights=use_hessian_based_weights,
+                                 regularization_factor=regularization_factor)
     def pytorch_gradient_post_training_quantization(model: Module,
                                                     representative_data_gen: Callable,
                                                     target_kpi: KPI = None,
                                                     core_config: CoreConfig = CoreConfig(),
-                                                    gptq_config: GradientPTQConfigV2 = None,
+                                                    gptq_config: GradientPTQConfig = None,
                                                     gptq_representative_data_gen: Callable = None,
-                                                    target_platform_capabilities: TargetPlatformCapabilities =
-                                                    DEFAULT_PYTORCH_TPC,
-                                                    new_experimental_exporter: bool = True):
+                                                    target_platform_capabilities: TargetPlatformCapabilities = DEFAULT_PYTORCH_TPC):
         """
         Quantize a trained Pytorch module using post-training quantization.
         By default, the module is quantized using a symmetric constraint quantization thresholds
@@ -122,10 +120,9 @@ if FOUND_TORCH:
             representative_data_gen (Callable): Dataset used for calibration.
             target_kpi (KPI): KPI object to limit the search of the mixed-precision configuration as desired.
             core_config (CoreConfig): Configuration object containing parameters of how the model should be quantized, including mixed precision parameters.
-            gptq_config (GradientPTQConfigV2): Configuration for using gptq (e.g. optimizer).
+            gptq_config (GradientPTQConfig): Configuration for using gptq (e.g. optimizer).
             gptq_representative_data_gen (Callable): Dataset used for GPTQ training. If None defaults to representative_data_gen
             target_platform_capabilities (TargetPlatformCapabilities): TargetPlatformCapabilities to optimize the PyTorch model according to.
-            new_experimental_exporter (bool): Whether to wrap the quantized model using quantization information or not. Enabled by default. Experimental and subject to future changes.
         Returns:
             A quantized module and information the user may need to handle the quantized module.
@@ -157,9 +154,9 @@ if FOUND_TORCH:
         """
         if core_config.mixed_precision_enable:
-            if not isinstance(core_config.mixed_precision_config, MixedPrecisionQuantizationConfigV2):
+            if not isinstance(core_config.mixed_precision_config, MixedPrecisionQuantizationConfig):
                 Logger.error("Given quantization config to mixed-precision facade is not of type "
-                             "MixedPrecisionQuantizationConfigV2. Please use keras_post_training_quantization "
+                             "MixedPrecisionQuantizationConfig. Please use keras_post_training_quantization "
                              "API, or pass a valid mixed precision configuration.")  # pragma: no cover
         tb_w = init_tensorboard_writer(DEFAULT_PYTORCH_INFO)
@@ -194,22 +191,8 @@ if FOUND_TORCH:
         if core_config.debug_config.analyze_similarity:
             analyzer_model_quantization(representative_data_gen, tb_w, graph_gptq, fw_impl, DEFAULT_PYTORCH_INFO)
-        # ---------------------- #
-        # Export
-        # ---------------------- #
-        if new_experimental_exporter:
-            Logger.warning('Using new experimental wrapped and ready for export models. To '
-                           'disable it, please set new_experimental_exporter to False when '
-                           'calling pytorch_gradient_post_training_quantization_experimental. '
-                           'If you encounter an issue please file a bug.')
-            return get_exportable_pytorch_model(graph_gptq)
-        return export_model(graph_gptq,
-                            DEFAULT_PYTORCH_INFO,
-                            fw_impl,
-                            tb_w,
-                            bit_widths_config)
+        return get_exportable_pytorch_model(graph_gptq)
 else:
     # If torch is not installed,

model_compression_toolkit/gptq/pytorch/quantizer/quantization_builder.py CHANGED Viewed

@@ -14,7 +14,7 @@
 # ==============================================================================
 from typing import List, Dict, Tuple
-from model_compression_toolkit.gptq import GradientPTQConfigV2
+from model_compression_toolkit.gptq import GradientPTQConfig
 from model_compression_toolkit.core import common
 from model_compression_toolkit.core.pytorch.constants import KERNEL
 from model_compression_toolkit.exporter.model_wrapper.pytorch.builder.node_to_quantizer import \
@@ -34,7 +34,7 @@ from model_compression_toolkit.trainable_infrastructure.common.get_quantizers im
 def quantization_builder(n: common.BaseNode,
-                         gptq_config: GradientPTQConfigV2,
+                         gptq_config: GradientPTQConfig,
                          ) -> Tuple[Dict[str, BasePytorchQATTrainableQuantizer],
                                     List[BasePyTorchInferableQuantizer]]:
     """
@@ -43,7 +43,7 @@ def quantization_builder(n: common.BaseNode,
     Args:
         n: Node to build its QuantizeConfig.
-        gptq_config (GradientPTQConfigV2): GradientPTQConfigV2 configuration.
+        gptq_config (GradientPTQConfig): GradientPTQConfigV2 configuration.
     Returns:
         A dictionary which maps the weights kernel attribute to a quantizer for GPTQ training.

model_compression_toolkit/gptq/pytorch/quantizer/regularization_factory.py CHANGED Viewed

@@ -14,7 +14,7 @@
 # ==============================================================================
 from typing import Callable
-from model_compression_toolkit.gptq import RoundingType, GradientPTQConfigV2, GradientPTQConfig
+from model_compression_toolkit.gptq import RoundingType, GradientPTQConfig, GradientPTQConfig
 from model_compression_toolkit.gptq.pytorch.quantizer.soft_rounding.soft_quantizer_reg import \
     SoftQuantizerRegularization
@@ -38,8 +38,6 @@ def get_regularization(gptq_config: GradientPTQConfig, representative_data_gen:
         for _ in representative_data_gen():
             num_batches += 1
-        n_epochs = GradientPTQConfigV2.from_v1(n_ptq_iter=num_batches, config_v1=gptq_config).n_epochs if \
-            not type(gptq_config) == GradientPTQConfigV2 else gptq_config.n_epochs
-        return SoftQuantizerRegularization(total_gradient_steps=num_batches * n_epochs)
+        return SoftQuantizerRegularization(total_gradient_steps=num_batches * gptq_config.n_epochs)
     else:
         return lambda m, e_reg: 0

model_compression_toolkit/gptq/runner.py CHANGED Viewed

@@ -20,7 +20,7 @@ from model_compression_toolkit.core import common
 from model_compression_toolkit.core.common.hessian import HessianInfoService
 from model_compression_toolkit.core.common.statistics_correction.statistics_correction import \
     apply_statistics_correction
-from model_compression_toolkit.gptq.common.gptq_config import GradientPTQConfigV2
+from model_compression_toolkit.gptq.common.gptq_config import GradientPTQConfig
 from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
 from model_compression_toolkit.core.common import FrameworkInfo
 from model_compression_toolkit.core.common.graph.base_graph import Graph
@@ -32,7 +32,7 @@ from model_compression_toolkit.core.common.statistics_correction.apply_bias_corr
 from model_compression_toolkit.logger import Logger
-def _apply_gptq(gptq_config: GradientPTQConfigV2,
+def _apply_gptq(gptq_config: GradientPTQConfig,
                 representative_data_gen: Callable,
                 tb_w: TensorboardWriter,
                 tg: Graph,
@@ -74,7 +74,7 @@ def _apply_gptq(gptq_config: GradientPTQConfigV2,
 def gptq_runner(tg: Graph,
                 core_config: CoreConfig,
-                gptq_config: GradientPTQConfigV2,
+                gptq_config: GradientPTQConfig,
                 representative_data_gen: Callable,
                 gptq_representative_data_gen: Callable,
                 fw_info: FrameworkInfo,

model_compression_toolkit/pruning/__init__.py CHANGED Viewed

@@ -16,4 +16,5 @@
 from model_compression_toolkit.core.common.pruning.pruning_info import PruningInfo
 from model_compression_toolkit.core.common.pruning.pruning_config import ImportanceMetric, PruningConfig, ChannelsFilteringStrategy
 from model_compression_toolkit.pruning.keras.pruning_facade import keras_pruning_experimental
+from model_compression_toolkit.pruning.pytorch.pruning_facade import pytorch_pruning_experimental

model_compression_toolkit/pruning/pytorch/__init__.py ADDED Viewed

@@ -0,0 +1,14 @@
+# Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================

model_compression_toolkit/pruning/pytorch/pruning_facade.py ADDED Viewed

@@ -0,0 +1,166 @@
+# Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+from typing import Callable, Tuple
+from model_compression_toolkit import get_target_platform_capabilities
+from model_compression_toolkit.constants import FOUND_TORCH, PYTORCH
+from model_compression_toolkit.core.common.mixed_precision.kpi_tools.kpi import KPI
+from model_compression_toolkit.core.common.pruning.pruner import Pruner
+from model_compression_toolkit.core.common.pruning.pruning_config import PruningConfig
+from model_compression_toolkit.core.common.pruning.pruning_info import PruningInfo
+from model_compression_toolkit.core.common.quantization.set_node_quantization_config import set_quantization_configuration_to_graph
+from model_compression_toolkit.core.graph_prep_runner import read_model_to_graph
+from model_compression_toolkit.logger import Logger
+from model_compression_toolkit.target_platform_capabilities.target_platform.targetplatform2framework import TargetPlatformCapabilities
+from model_compression_toolkit.core.common.quantization.quantization_config import DEFAULTCONFIG
+from model_compression_toolkit.target_platform_capabilities.constants import DEFAULT_TP_MODEL
+# Check if PyTorch is available in the environment.
+if FOUND_TORCH:
+    # Import PyTorch-specific modules from the model compression toolkit.
+    from model_compression_toolkit.core.pytorch.back2framework.float_model_builder import FloatPyTorchModelBuilder
+    from model_compression_toolkit.core.pytorch.pruning.pruning_pytorch_implementation import \
+        PruningPytorchImplementation
+    from model_compression_toolkit.core.pytorch.default_framework_info import DEFAULT_PYTORCH_INFO
+    from torch.nn import Module
+    # Set the default Target Platform Capabilities (TPC) for PyTorch.
+    DEFAULT_PYOTRCH_TPC = get_target_platform_capabilities(PYTORCH, DEFAULT_TP_MODEL)
+    def pytorch_pruning_experimental(model: Module,
+                                     target_kpi: KPI,
+                                     representative_data_gen: Callable,
+                                     pruning_config: PruningConfig = PruningConfig(),
+                                     target_platform_capabilities: TargetPlatformCapabilities = DEFAULT_PYOTRCH_TPC) -> \
+            Tuple[Module, PruningInfo]:
+        """
+        Perform structured pruning on a Pytorch model to meet a specified target KPI.
+        This function prunes the provided model according to the target KPI by grouping and pruning
+        channels based on each layer's SIMD configuration in the Target Platform Capabilities (TPC).
+        By default, the importance of each channel group is determined using the Label-Free Hessian
+        (LFH) method, assessing each channel's sensitivity to the Hessian of the loss function.
+        This pruning strategy considers groups of channels together for a more hardware-friendly
+        architecture. The process involves analyzing the model with a representative dataset to
+        identify groups of channels that can be removed with minimal impact on performance.
+        Notice that the pruned model must be retrained to recover the compressed model's performance.
+        Args:
+            model (Module): The PyTorch model to be pruned.
+            target_kpi (KPI): Key Performance Indicators specifying the pruning targets.
+            representative_data_gen (Callable): A function to generate representative data for pruning analysis.
+            pruning_config (PruningConfig): Configuration settings for the pruning process. Defaults to standard config.
+            target_platform_capabilities (TargetPlatformCapabilities): Platform-specific constraints and capabilities.
+                Defaults to DEFAULT_PYTORCH_TPC.
+        Returns:
+            Tuple[Model, PruningInfo]: A tuple containing the pruned Pytorch model and associated pruning information.
+        Note:
+            The pruned model should be fine-tuned or retrained to recover or improve its performance post-pruning.
+        Examples:
+            Import MCT:
+            >>> import model_compression_toolkit as mct
+            Import a Pytorch model:
+            >>> from torchvision.models import resnet50, ResNet50_Weights
+            >>> model = resnet50(weights=ResNet50_Weights.IMAGENET1K_V1)
+            Create a random dataset generator:
+            >>> import numpy as np
+            >>> def repr_datagen(): yield [np.random.random((1, 3, 224, 224))]
+            Define a target KPI for pruning.
+            Here, we aim to reduce the memory footprint of weights by 50%, assuming the model weights
+            are represented in float32 data type (thus, each parameter is represented using 4 bytes):
+            >>> dense_nparams = sum(p.numel() for p in model.state_dict().values())
+            >>> target_kpi = mct.KPI(weights_memory=dense_nparams * 4 * 0.5)
+            Optionally, define a pruning configuration. num_score_approximations can be passed
+            to configure the number of importance scores that will be calculated for each channel.
+            A higher value for this parameter yields more precise score approximations but also
+            extends the duration of the pruning process:
+            >>> pruning_config = mct.pruning.PruningConfig(num_score_approximations=1)
+            Perform pruning:
+            >>> pruned_model, pruning_info = mct.pruning.pytorch_pruning_experimental(model=model, target_kpi=target_kpi, representative_data_gen=repr_datagen, pruning_config=pruning_config)
+        """
+        # Instantiate the Pytorch framework implementation.
+        fw_impl = PruningPytorchImplementation()
+        # Convert the original Pytorch model to an internal graph representation.
+        float_graph = read_model_to_graph(model,
+                                          representative_data_gen,
+                                          target_platform_capabilities,
+                                          DEFAULT_PYTORCH_INFO,
+                                          fw_impl)
+        # Apply quantization configuration to the graph. This step is necessary even when not quantizing,
+        # as it prepares the graph for the pruning process.
+        float_graph_with_compression_config = set_quantization_configuration_to_graph(float_graph,
+                                                                                      quant_config=DEFAULTCONFIG,
+                                                                                      mixed_precision_enable=False)
+        # Create a Pruner object with the graph and configuration.
+        pruner = Pruner(float_graph_with_compression_config,
+                        DEFAULT_PYTORCH_INFO,
+                        fw_impl,
+                        target_kpi,
+                        representative_data_gen,
+                        pruning_config,
+                        target_platform_capabilities)
+        # Apply the pruning process.
+        pruned_graph = pruner.prune_graph()
+        # Retrieve pruning information which includes the pruning masks and scores.
+        pruning_info = pruner.get_pruning_info()
+        # Rebuild the pruned graph back into a trainable Pytorch model.
+        pruned_model, _ = FloatPyTorchModelBuilder(graph=pruned_graph).build_model()
+        pruned_model.trainable = True
+        # Return the pruned model along with its pruning information.
+        return pruned_model, pruning_info
+else:
+    def pytorch_pruning_experimental(*args, **kwargs):
+        """
+        Raises a critical error if PyTorch is not installed but the pruning function is invoked.
+        This function acts as a placeholder to provide a clear error message when PyTorch dependencies are missing,
+        indicating that the pruning functionality cannot be used without the PyTorch framework installed.
+        Args:
+            *args: Variable length argument list, not used.
+            **kwargs: Arbitrary keyword arguments, not used.
+        Raises:
+            CriticalError: Indicates that PyTorch must be installed to use this function.
+        """
+        Logger.critical('Installing Pytorch is mandatory '
+                        'when using pytorch_pruning_experimental. '
+                        'Could not find the torch package.')  # pragma: no cover

model_compression_toolkit/ptq/__init__.py CHANGED Viewed

@@ -13,5 +13,5 @@
 # limitations under the License.
 # ==============================================================================
-from model_compression_toolkit.ptq.pytorch.quantization_facade import pytorch_post_training_quantization_experimental
-from model_compression_toolkit.ptq.keras.quantization_facade import keras_post_training_quantization_experimental
+from model_compression_toolkit.ptq.pytorch.quantization_facade import pytorch_post_training_quantization
+from model_compression_toolkit.ptq.keras.quantization_facade import keras_post_training_quantization

mct-nightly 1.11.0.20240304.post404__py3-none-any.whl → 1.11.0.20240306.post426__py3-none-any.whl

mct-nightly 1.11.0.20240304.post404py3-none-any.whl → 1.11.0.20240306.post426py3-none-any.whl