PyPI - mct-nightly - Versions diffs - 2.3.0.20250526.601__py3-none-any.whl → 2.3.0.20250527.555__py3-none-any.whl - Mend

mct-nightly 2.3.0.20250526.601py3-none-any.whl → 2.3.0.20250527.555py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

{mct_nightly-2.3.0.20250526.601.dist-info → mct_nightly-2.3.0.20250527.555.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mct-nightly
-Version: 2.3.0.20250526.601
+Version: 2.3.0.20250527.555
 Summary: A Model Compression Toolkit for neural networks
 Author-email: ssi-dnn-dev@sony.com
 Classifier: Programming Language :: Python :: 3

{mct_nightly-2.3.0.20250526.601.dist-info → mct_nightly-2.3.0.20250527.555.dist-info}/RECORD RENAMED Viewed

@@ -1,11 +1,11 @@
-mct_nightly-2.3.0.20250526.601.dist-info/licenses/LICENSE.md,sha256=aYSSIb-5AFPeITTvXm1UAoe0uYBiMmSS8flvXaaFUks,10174
-model_compression_toolkit/__init__.py,sha256=h0yrmmeo04GsUcV-lK41wbKSmLv-C_RXbP5Bgqo0EOA,1557
+mct_nightly-2.3.0.20250527.555.dist-info/licenses/LICENSE.md,sha256=aYSSIb-5AFPeITTvXm1UAoe0uYBiMmSS8flvXaaFUks,10174
+model_compression_toolkit/__init__.py,sha256=ac_6iGXJR83ii1qVJhusgDNmQ7il3U3QYpAm-wdLf14,1557
 model_compression_toolkit/constants.py,sha256=KNgiNLpsMgSYyXMNEbHXd4bFNerQc1D6HH3vpbUq_Gs,4086
 model_compression_toolkit/defaultdict.py,sha256=LSc-sbZYXENMCw3U9F4GiXuv67IKpdn0Qm7Fr11jy-4,2277
 model_compression_toolkit/logger.py,sha256=L3q7tn3Uht0i_7phnlOWMR2Te2zvzrt2HOz9vYEInts,4529
 model_compression_toolkit/metadata.py,sha256=x_Bk4VpzILdsFax6--CZ3X18qUTP28sbF_AhoQW8dNc,4003
 model_compression_toolkit/verify_packages.py,sha256=l0neIRr8q_QwxmuiTI4vyCMDISDedK0EihjEQUe66tE,1319
-model_compression_toolkit/core/__init__.py,sha256=8a0wUNBKwTdJGDk_Ho6WQAXjGuCqQZG1FUxxJlAV8L8,2096
+model_compression_toolkit/core/__init__.py,sha256=phfdtc09uruSyOpWRaUMUeMNRSwYB5q9NBus3cqcjIM,2113
 model_compression_toolkit/core/analyzer.py,sha256=X-2ZpkH1xdXnISnw1yJvXnvV-ssoUh-9LkLISSWNqiY,3691
 model_compression_toolkit/core/graph_prep_runner.py,sha256=C6eUTd-fcgxk0LUbt51gFZwmyDDDEB8-9Q4kr9ujYvI,11555
 model_compression_toolkit/core/quantization_prep_runner.py,sha256=DPevqQ8brkdut8K5f5v9g5lbT3r1GSmhLAk3NkL40Fg,6593
@@ -60,18 +60,15 @@ model_compression_toolkit/core/common/matchers/edge_matcher.py,sha256=bS9KIBhB6Y
 model_compression_toolkit/core/common/matchers/function.py,sha256=kMwcinxn_PInvetNh_L_lqGXT1hoi3f97PqBpjqfXoA,1773
 model_compression_toolkit/core/common/matchers/node_matcher.py,sha256=63cMwa5YbQ5LKZy8-KFmdchVc3N7mpDJ6fNDt_uAQsk,2745
 model_compression_toolkit/core/common/matchers/walk_matcher.py,sha256=xqfLKk6xZt72hSnND_HoX5ESOooNMypb5VOZkVsJ_nw,1111
-model_compression_toolkit/core/common/mixed_precision/__init__.py,sha256=sw7LOPN1bM82o3SkMaklyH0jw-TLGK0-fl2Wq73rffI,697
-model_compression_toolkit/core/common/mixed_precision/bit_width_setter.py,sha256=lB3cxQPQqpAH5tP6kqOxqv7RmOtf1YciIkvr9irvKq0,7084
+model_compression_toolkit/core/common/mixed_precision/__init__.py,sha256=Jm6pls3QUCMQ9d86KOYxOq05br_k130ByGHLCojIZ_M,766
+model_compression_toolkit/core/common/mixed_precision/bit_width_setter.py,sha256=npqLPyk5xXR11M_zdImtSALc5vJv9N4fEapaludKLBw,7139
 model_compression_toolkit/core/common/mixed_precision/configurable_quant_id.py,sha256=LLDguK7afsbN742ucLpmJr5TUfTyFpK1vbf2bpVr1v0,882
 model_compression_toolkit/core/common/mixed_precision/configurable_quantizer_utils.py,sha256=7dKMi5S0zQZ16m8NWn1XIuoXsKuZUg64G4-uK8-j1PQ,5177
-model_compression_toolkit/core/common/mixed_precision/distance_weighting.py,sha256=-x8edUyudu1EAEM66AuXPtgayLpzbxoLNubfEbFM5kU,2867
 model_compression_toolkit/core/common/mixed_precision/mixed_precision_candidates_filter.py,sha256=6pLUEEIqRTVIlCYQC4JIvY55KAvuBHEX8uTOQ-1Ac4Q,3859
-model_compression_toolkit/core/common/mixed_precision/mixed_precision_quantization_config.py,sha256=BO4ouM_UVS9Fg0z95gLJSMz1ep6YQC5za_iXI_qW2yQ,5399
+model_compression_toolkit/core/common/mixed_precision/mixed_precision_quantization_config.py,sha256=rdtxPmRhjrC160O3fqAjDzGxpMeM49hYhmlnf_Kwqds,5416
 model_compression_toolkit/core/common/mixed_precision/mixed_precision_ru_helper.py,sha256=axgAypzsiCOw04ZOtOEjK4riuNsaEU2qU6KkWnEXtMo,4951
-model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py,sha256=KhiHGpmN5QbpyJQnTZmXigdXFlSlRNqpOOyKGj1Fwek,6412
-model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py,sha256=MXOK9WPy3fSt5uxsWYMF4szwwqWWgrlzNJdE9VIb-AQ,28145
-model_compression_toolkit/core/common/mixed_precision/sensitivity_evaluation.py,sha256=4uhUXKgwyMrJqEVK5uJzVr67GI5YzDTHLveV4maB7z0,28079
-model_compression_toolkit/core/common/mixed_precision/set_layer_to_bitwidth.py,sha256=Zn6SgzGLWWKmuYGHd1YtKxZdYnQWRDeXEkKlBiTbHcs,2929
+model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py,sha256=1877xOUdgpWrXWyhdX1pJOePuopq43L71WqBFMqzyR4,6418
+model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py,sha256=TAxA9BKxINwUQfJpmf2Qghz-5DTbesuf1Pe1L0Tc-j4,28157
 model_compression_toolkit/core/common/mixed_precision/solution_refinement_procedure.py,sha256=MY8df-c_kITEr_7hOctaxhdiq29hSTA0La9Qo0oTJJY,9678
 model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/__init__.py,sha256=Rf1RcYmelmdZmBV5qOKvKWF575ofc06JFQSq83Jz99A,696
 model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization.py,sha256=PKkhc5q8pEPnNLXwo3U56EOCfYnPXIvPs0LlCGZOoKU,4426
@@ -79,6 +76,11 @@ model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools
 model_compression_toolkit/core/common/mixed_precision/resource_utilization_tools/resource_utilization_data.py,sha256=ZY5yFIDzbaqIk0UzakDBObfsVevn4fydqAfAm4RCikY,4058
 model_compression_toolkit/core/common/mixed_precision/search_methods/__init__.py,sha256=sw7LOPN1bM82o3SkMaklyH0jw-TLGK0-fl2Wq73rffI,697
 model_compression_toolkit/core/common/mixed_precision/search_methods/linear_programming.py,sha256=6Z6nQL9UH7B8dbcUR0cuCTEYFOKZAlvOb-SCk_cAZFA,6670
+model_compression_toolkit/core/common/mixed_precision/sensitivity_eval/__init__.py,sha256=5yxITHNJcCfeGKdIpAYbNbKDoXUSvENuRQm3OQu8Qf4,697
+model_compression_toolkit/core/common/mixed_precision/sensitivity_eval/distance_weighting.py,sha256=-x8edUyudu1EAEM66AuXPtgayLpzbxoLNubfEbFM5kU,2867
+model_compression_toolkit/core/common/mixed_precision/sensitivity_eval/metric_calculators.py,sha256=W4CySFtN874npcM9j9wu1PVrv7IZHLyKdLOPrTsCNQg,22209
+model_compression_toolkit/core/common/mixed_precision/sensitivity_eval/sensitivity_evaluation.py,sha256=5l0qP0mZ061xh3rjqTJZcLD2mMKC-hfSnNAN0OmSusk,8938
+model_compression_toolkit/core/common/mixed_precision/sensitivity_eval/set_layer_to_bitwidth.py,sha256=Zn6SgzGLWWKmuYGHd1YtKxZdYnQWRDeXEkKlBiTbHcs,2929
 model_compression_toolkit/core/common/network_editors/__init__.py,sha256=vZmu55bYqiaOQs3AjfwWDXHmuKZcLHt-wm7uR5fPEqg,1307
 model_compression_toolkit/core/common/network_editors/actions.py,sha256=nid0_j-Cn10xvmztT8yCKW_6uA7JEnom9SW9syx7wc0,19594
 model_compression_toolkit/core/common/network_editors/edit_network.py,sha256=dfgawi-nB0ocAJ0xcGn9E-Zv203oUnQLuMiXpX8vTgA,1748
@@ -132,7 +134,7 @@ model_compression_toolkit/core/common/statistics_correction/__init__.py,sha256=s
 model_compression_toolkit/core/common/statistics_correction/apply_activation_bias_correction_to_graph.py,sha256=b05ZwQ2CwG0Q-yqs9A1uHfP8o17aGEZFCeJNP1p4IWk,4450
 model_compression_toolkit/core/common/statistics_correction/apply_bias_correction_to_graph.py,sha256=b5clhUWGoDaQLn2pDCeYkV0FomVebcKS8pMXtQTTzIg,4679
 model_compression_toolkit/core/common/statistics_correction/apply_second_moment_correction_to_graph.py,sha256=C_nwhhitTd1pCto0nHZPn3fjIMOeDD7VIciumTR3s6k,5641
-model_compression_toolkit/core/common/statistics_correction/compute_activation_bias_correction_of_graph.py,sha256=F8kK8yoYCGeTdXUsHGcM3T2tRdjSlcWg3UToGtovNOs,9196
+model_compression_toolkit/core/common/statistics_correction/compute_activation_bias_correction_of_graph.py,sha256=zIkhOPF6K5aIgMExpD7HFT9UZSDpvXh51F6V-qZ7H-4,9048
 model_compression_toolkit/core/common/statistics_correction/compute_bias_correction_of_graph.py,sha256=LaGhYES7HgIDf9Bi2KAG_mBzAWuum0J6AGmAFPC8wwo,10478
 model_compression_toolkit/core/common/statistics_correction/statistics_correction.py,sha256=E0ZA4edimJwpHh9twI5gafcoJ9fX5F1JX2QUOkUOKEw,6250
 model_compression_toolkit/core/common/substitutions/__init__.py,sha256=sw7LOPN1bM82o3SkMaklyH0jw-TLGK0-fl2Wq73rffI,697
@@ -528,7 +530,7 @@ model_compression_toolkit/xquant/pytorch/model_analyzer.py,sha256=b93o800yVB3Z-i
 model_compression_toolkit/xquant/pytorch/pytorch_report_utils.py,sha256=UVN_S9ULHBEldBpShCOt8-soT8YTQ5oE362y96qF_FA,3950
 model_compression_toolkit/xquant/pytorch/similarity_functions.py,sha256=CERxq5K8rqaiE-DlwhZBTUd9x69dtYJlkHOPLB54vm8,2354
 model_compression_toolkit/xquant/pytorch/tensorboard_utils.py,sha256=mkoEktLFFHtEKzzFRn_jCnxjhJolK12TZ5AQeDHzUO8,9767
-mct_nightly-2.3.0.20250526.601.dist-info/METADATA,sha256=y5pozmwxQDw3vKdFGMhDfkQCjfugDgQPgIU_V58eWNw,25135
-mct_nightly-2.3.0.20250526.601.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
-mct_nightly-2.3.0.20250526.601.dist-info/top_level.txt,sha256=gsYA8juk0Z-ZmQRKULkb3JLGdOdz8jW_cMRjisn9ga4,26
-mct_nightly-2.3.0.20250526.601.dist-info/RECORD,,
+mct_nightly-2.3.0.20250527.555.dist-info/METADATA,sha256=m5m0MizrO50qbrB0RkMCLt9s317qhSe3TcCD9otx0lQ,25135
+mct_nightly-2.3.0.20250527.555.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
+mct_nightly-2.3.0.20250527.555.dist-info/top_level.txt,sha256=gsYA8juk0Z-ZmQRKULkb3JLGdOdz8jW_cMRjisn9ga4,26
+mct_nightly-2.3.0.20250527.555.dist-info/RECORD,,

model_compression_toolkit/__init__.py CHANGED Viewed

@@ -27,4 +27,4 @@ from model_compression_toolkit import data_generation
 from model_compression_toolkit import pruning
 from model_compression_toolkit.trainable_infrastructure.keras.load_model import keras_load_quantized_model
-__version__ = "2.3.0.20250526.000601"
+__version__ = "2.3.0.20250527.000555"

model_compression_toolkit/core/__init__.py CHANGED Viewed

@@ -25,5 +25,5 @@ from model_compression_toolkit.core.common.mixed_precision.resource_utilization_
 from model_compression_toolkit.core.common.mixed_precision.mixed_precision_quantization_config import MixedPrecisionQuantizationConfig
 from model_compression_toolkit.core.keras.resource_utilization_data_facade import keras_resource_utilization_data
 from model_compression_toolkit.core.pytorch.resource_utilization_data_facade import pytorch_resource_utilization_data
-from model_compression_toolkit.core.common.mixed_precision.distance_weighting import MpDistanceWeighting
+from model_compression_toolkit.core.common.mixed_precision.sensitivity_eval.distance_weighting import MpDistanceWeighting

model_compression_toolkit/core/common/mixed_precision/__init__.py CHANGED Viewed

@@ -12,3 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
+from .sensitivity_eval.distance_weighting import MpDistanceWeighting

model_compression_toolkit/core/common/mixed_precision/bit_width_setter.py CHANGED Viewed

@@ -76,7 +76,8 @@ def set_bit_widths(mixed_precision_enable: bool,
         for n in graph.nodes:
             assert len(n.candidates_quantization_cfg) == 1
             n.final_weights_quantization_cfg = copy.deepcopy(n.candidates_quantization_cfg[0].weights_quantization_cfg)
-            n.final_activation_quantization_cfg = copy.deepcopy(n.candidates_quantization_cfg[0].activation_quantization_cfg)
+            if not n.is_quantization_preserving():
+                n.final_activation_quantization_cfg = copy.deepcopy(n.candidates_quantization_cfg[0].activation_quantization_cfg)
     return graph

model_compression_toolkit/core/common/mixed_precision/mixed_precision_quantization_config.py CHANGED Viewed

@@ -17,7 +17,7 @@ from dataclasses import dataclass, field
 from enum import Enum
 from typing import List, Callable, Optional
 from model_compression_toolkit.constants import MP_DEFAULT_NUM_SAMPLES, ACT_HESSIAN_DEFAULT_BATCH_SIZE
-from model_compression_toolkit.core.common.mixed_precision.distance_weighting import MpDistanceWeighting
+from model_compression_toolkit.core.common.mixed_precision.sensitivity_eval.distance_weighting import MpDistanceWeighting
 class MpMetricNormalization(Enum):

model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_facade.py CHANGED Viewed

@@ -14,10 +14,10 @@
 # ==============================================================================
 from enum import Enum
-from typing import List, Callable, Dict
+from typing import List, Callable
 from model_compression_toolkit.core import MixedPrecisionQuantizationConfig
-from model_compression_toolkit.core.common import Graph, BaseNode
+from model_compression_toolkit.core.common import Graph
 from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
 from model_compression_toolkit.core.common.framework_info import FrameworkInfo
 from model_compression_toolkit.core.common.hessian import HessianInfoService
@@ -25,7 +25,7 @@ from model_compression_toolkit.core.common.mixed_precision.mixed_precision_searc
     MixedPrecisionSearchManager
 from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import \
     ResourceUtilization
-from model_compression_toolkit.core.common.mixed_precision.sensitivity_evaluation import SensitivityEvaluation
+from model_compression_toolkit.core.common.mixed_precision.sensitivity_eval.sensitivity_evaluation import SensitivityEvaluation
 from model_compression_toolkit.core.common.mixed_precision.solution_refinement_procedure import \
     greedy_solution_refinement_procedure
@@ -79,14 +79,9 @@ def search_bit_width(graph: Graph,
     # Set Sensitivity Evaluator for MP search. It should always work with the original MP graph,
     # even if a virtual graph was created (and is used only for BOPS utilization computation purposes)
-    se = SensitivityEvaluation(
-        graph,
-        mp_config,
-        representative_data_gen=representative_data_gen,
-        fw_info=fw_info,
-        fw_impl=fw_impl,
-        disable_activation_for_metric=disable_activation_for_metric,
-        hessian_info_service=hessian_info_service)
+    se = SensitivityEvaluation(graph, mp_config, representative_data_gen=representative_data_gen, fw_info=fw_info,
+                               fw_impl=fw_impl, disable_activation_for_metric=disable_activation_for_metric,
+                               hessian_info_service=hessian_info_service)
     if search_method != BitWidthSearchMethod.INTEGER_PROGRAMMING:
         raise NotImplementedError()

model_compression_toolkit/core/common/mixed_precision/mixed_precision_search_manager.py CHANGED Viewed

@@ -21,7 +21,7 @@ from collections import defaultdict
 from tqdm import tqdm
-from typing import Dict, List, Tuple, Optional, Set
+from typing import Dict, List, Tuple, Optional
 import numpy as np
@@ -39,7 +39,7 @@ from model_compression_toolkit.core.common.mixed_precision.mixed_precision_ru_he
     MixedPrecisionRUHelper
 from model_compression_toolkit.core.common.mixed_precision.search_methods.linear_programming import \
     MixedPrecisionIntegerLPSolver
-from model_compression_toolkit.core.common.mixed_precision.sensitivity_evaluation import SensitivityEvaluation
+from model_compression_toolkit.core.common.mixed_precision.sensitivity_eval.sensitivity_evaluation import SensitivityEvaluation
 from model_compression_toolkit.core.common.substitutions.apply_substitutions import substitute
 from model_compression_toolkit.logger import Logger
 from model_compression_toolkit.core.common.mixed_precision.mixed_precision_quantization_config import \

model_compression_toolkit/core/common/mixed_precision/sensitivity_eval/__init__.py ADDED Viewed

@@ -0,0 +1,14 @@
+# Copyright 2025 Sony Semiconductor Israel, Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================

model_compression_toolkit/core/common/mixed_precision/{sensitivity_evaluation.py → sensitivity_eval/metric_calculators.py} RENAMED Viewed

@@ -1,4 +1,4 @@
-# Copyright 2022 Sony Semiconductor Israel, Inc. All rights reserved.
+# Copyright 2025 Sony Semiconductor Israel, Inc. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,40 +12,59 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-import contextlib
-import copy
-import itertools
 import numpy as np
-from typing import Callable, Any, List, Tuple, Dict, Optional
+from typing import runtime_checkable, Protocol, Callable, Any, List, Tuple
-from model_compression_toolkit.core import FrameworkInfo, MixedPrecisionQuantizationConfig
+from model_compression_toolkit.core import MixedPrecisionQuantizationConfig, FrameworkInfo
 from model_compression_toolkit.core.common import Graph, BaseNode
-from model_compression_toolkit.core.common.mixed_precision.set_layer_to_bitwidth import \
-    set_activation_quant_layer_to_bitwidth, set_weights_quant_layer_to_bitwidth
-from model_compression_toolkit.core.common.quantization.node_quantization_config import ActivationQuantizationMode
-from model_compression_toolkit.core.common.similarity_analyzer import compute_kl_divergence
+from model_compression_toolkit.core.common.hessian import HessianInfoService, HessianScoresRequest, HessianMode, \
+    HessianScoresGranularity
 from model_compression_toolkit.core.common.model_builder_mode import ModelBuilderMode
+from model_compression_toolkit.core.common.similarity_analyzer import compute_kl_divergence
 from model_compression_toolkit.logger import Logger
-from model_compression_toolkit.core.common.hessian import HessianScoresRequest, HessianMode, \
-    HessianScoresGranularity, HessianInfoService
-class SensitivityEvaluation:
-    """
-    Class to wrap and manage the computation on distance metric for Mixed-Precision quantization search.
-    It provides a function that evaluates the sensitivity of a bit-width configuration for the MP model.
-    """
+@runtime_checkable
+class MetricCalculator(Protocol):
+    """ Abstract class for metric calculators. """
+    # all interest points (including graph outputs)
+    all_interest_points: list
+    def compute(self, mp_model) -> float:
+        """ Compute the metric for the given model. """
+        ...
+class CustomMetricCalculator(MetricCalculator):
+    """ Calculate metric with custom function applied on graph outputs. """
+    def __init__(self, graph: Graph, custom_metric_fn: Callable):
+        """
+        Args:
+            graph: input graph.
+            custom_metric_fn: custom metric function, that accepts the model as input and return float scalar metric.
+        """
+        self.all_interest_points = [n.node for n in graph.get_outputs()]
+        self.metric_fn = custom_metric_fn
+    def compute(self, mp_model: Any) -> float:
+        """ Compute the metric for the given model. """
+        sensitivity_metric = self.metric_fn(mp_model)
+        if not isinstance(sensitivity_metric, (float, np.floating)):
+            raise TypeError(
+                f'The custom_metric_fn is expected to return float or numpy float, got {type(sensitivity_metric).__name__}')
+        return sensitivity_metric
+class DistanceMetricCalculator(MetricCalculator):
+    """ Calculator for distance-based metrics. """
     def __init__(self,
                  graph: Graph,
-                 quant_config: MixedPrecisionQuantizationConfig,
+                 mp_config: MixedPrecisionQuantizationConfig,
                  representative_data_gen: Callable,
                  fw_info: FrameworkInfo,
                  fw_impl: Any,
-                 disable_activation_for_metric: bool = False,
-                 hessian_info_service: HessianInfoService = None
-                 ):
+                 hessian_info_service: HessianInfoService = None):
         """
         Initiates all relevant objects to manage a sensitivity evaluation for MP search.
         Create an object that allows to compute the sensitivity metric of an MP model (the sensitivity
@@ -59,23 +78,21 @@ class SensitivityEvaluation:
         Args:
             graph: Graph to search for its MP configuration.
+            mp_config: MP Quantization configuration for how the graph should be quantized.
             fw_info: FrameworkInfo object about the specific framework
                 (e.g., attributes of different layers' weights to quantize).
-            quant_config: MP Quantization configuration for how the graph should be quantized.
-            representative_data_gen: Dataset used for getting batches for inference.
             fw_impl: FrameworkImplementation object with a specific framework methods implementation.
-            disable_activation_for_metric: Whether to disable activation quantization when computing the MP metric.
+            representative_data_gen: Dataset used for getting batches for inference.
             hessian_info_service: HessianInfoService to fetch Hessian approximation information.
         """
         self.graph = graph
-        self.quant_config = quant_config
+        self.mp_config = mp_config
         self.representative_data_gen = representative_data_gen
         self.fw_info = fw_info
         self.fw_impl = fw_impl
-        self.disable_activation_for_metric = disable_activation_for_metric
-        if self.quant_config.use_hessian_based_scores:
-            if not isinstance(hessian_info_service, HessianInfoService):    # pragma: no cover
+        if self.mp_config.use_hessian_based_scores:
+            if not isinstance(hessian_info_service, HessianInfoService):  # pragma: no cover
                 Logger.critical(
                     f"When using Hessian-based approximations for sensitivity evaluation, a valid HessianInfoService object is required; found {type(hessian_info_service)}.")
             self.hessian_info_service = hessian_info_service
@@ -84,42 +101,35 @@ class SensitivityEvaluation:
         # Get interest points and output points set for distance measurement and set other helper datasets
         # We define a separate set of output nodes of the model for the purpose of sensitivity computation.
-        self.interest_points = get_mp_interest_points(graph,
-                                                      fw_impl.count_node_for_mixed_precision_interest_points,
-                                                      quant_config.num_interest_points_factor)
-        # If using a custom metric - return only model outputs
-        if self.quant_config.custom_metric_fn is not None:
-            self.interest_points = []
+        self.interest_points = self.get_mp_interest_points(graph,
+                                                           fw_impl.count_node_for_mixed_precision_interest_points,
+                                                           mp_config.num_interest_points_factor)
         # We use normalized MSE when not running hessian-based. For Hessian-based normalized MSE is not needed
         # because hessian weights already do normalization.
-        use_normalized_mse = self.quant_config.use_hessian_based_scores is False
-        self.ips_distance_fns, self.ips_axis = self._init_metric_points_lists(self.interest_points, use_normalized_mse)
-        self.output_points = get_output_nodes_for_metric(graph)
-        # If using a custom metric - return all model outputs
-        if self.quant_config.custom_metric_fn is not None:
-            self.output_points = [n.node for n in graph.get_outputs()]
-        self.out_ps_distance_fns, self.out_ps_axis = self._init_metric_points_lists(self.output_points,
+        use_normalized_mse = self.mp_config.use_hessian_based_scores is False
+        self.ips_distance_fns, self.ips_axis = self._init_metric_points_lists(self.interest_points,
+                                                                              use_normalized_mse)
+        output_points = self.get_output_nodes_for_metric(graph)
+        self.all_interest_points = self.interest_points + output_points
+        self.out_ps_distance_fns, self.out_ps_axis = self._init_metric_points_lists(output_points,
                                                                                     use_normalized_mse)
+        self.ref_model, _ = fw_impl.model_builder(graph, mode=ModelBuilderMode.FLOAT,
+                                                  append2output=self.all_interest_points)
         # Setting lists with relative position of the interest points
         # and output points in the list of all mp model activation tensors
         graph_sorted_nodes = self.graph.get_topo_sorted_nodes()
-        all_out_tensors_indices = [graph_sorted_nodes.index(n) for n in self.interest_points + self.output_points]
+        all_out_tensors_indices = [graph_sorted_nodes.index(n) for n in self.all_interest_points]
         global_ipts_indices = [graph_sorted_nodes.index(n) for n in self.interest_points]
-        global_out_pts_indices = [graph_sorted_nodes.index(n) for n in self.output_points]
+        global_out_pts_indices = [graph_sorted_nodes.index(n) for n in output_points]
         self.ips_act_indices = [all_out_tensors_indices.index(i) for i in global_ipts_indices]
         self.out_ps_act_indices = [all_out_tensors_indices.index(i) for i in global_out_pts_indices]
-        # Build a mixed-precision model which can be configured to use different bitwidth in different layers.
-        # And a baseline model.
-        # Also, returns a mapping between a configurable graph's node and its matching layer(s)
-        # in the new built MP model.
-        self.baseline_model, self.model_mp, self.conf_node2layers = self._build_models()
         # Build images batches for inference comparison and cat to framework type
-        images_batches = self._get_images_batches(quant_config.num_of_images)
+        images_batches = self._get_images_batches(mp_config.num_of_images)
         self.images_batches = [self.fw_impl.to_tensor(img) for img in images_batches]
         # Initiating baseline_tensors_list since it is not initiated in SensitivityEvaluationManager init.
@@ -128,12 +138,28 @@ class SensitivityEvaluation:
         # Computing Hessian-based scores for weighted average distance metric computation (only if requested),
         # and assigning distance_weighting method accordingly.
         self.interest_points_hessians = None
-        if self.quant_config.use_hessian_based_scores is True:
+        if self.mp_config.use_hessian_based_scores is True:
             self.interest_points_hessians = self._compute_hessian_based_scores()
-            self.quant_config.distance_weighting_method = lambda d: self.interest_points_hessians
+            self.mp_config.distance_weighting_method = lambda d: self.interest_points_hessians
-    def _init_metric_points_lists(self, points: List[BaseNode], norm_mse: bool = False) -> Tuple[
-        List[Callable], List[int]]:
+    def compute(self, mp_model) -> float:
+        """
+        Compute the metric for the given model.
+        Args:
+            mp_model: MP configured model.
+        Returns:
+            Computed metric.
+        """
+        ipts_distances, out_pts_distances = self._compute_distance(mp_model)
+        sensitivity_metric = self._compute_mp_distance_measure(ipts_distances, out_pts_distances,
+                                                               self.mp_config.distance_weighting_method)
+        return sensitivity_metric
+    def _init_metric_points_lists(self,
+                                  points: List[BaseNode],
+                                  norm_mse: bool = False) -> Tuple[List[Callable], List[int]]:
         """
         Initiates required lists for future use when computing the sensitivity metric.
         Each point on which the metric is computed uses a dedicated distance function based on its type.
@@ -150,101 +176,19 @@ class SensitivityEvaluation:
         axis_list = []
         for n in points:
             distance_fn, axis = self.fw_impl.get_mp_node_distance_fn(n,
-                                                                     compute_distance_fn=self.quant_config.compute_distance_fn,
+                                                                     compute_distance_fn=self.mp_config.compute_distance_fn,
                                                                      norm_mse=norm_mse)
             distance_fns_list.append(distance_fn)
             # Axis is needed only for KL Divergence calculation, otherwise we use per-tensor computation
             axis_list.append(axis if distance_fn == compute_kl_divergence else None)
         return distance_fns_list, axis_list
-    def compute_metric(self, mp_a_cfg: Dict[str, Optional[int]], mp_w_cfg: Dict[str, Optional[int]]) -> float:
-        """
-        Compute the sensitivity metric of the MP model for a given configuration (the sensitivity
-        is computed based on the similarity of the interest points' outputs between the MP model
-        and the float model or a custom metric if given).
-        Quantization for any configurable activation / weight that were not passed is disabled.
-        Args:
-            mp_a_cfg: Bitwidth activations configuration for the MP model.
-            mp_w_cfg: Bitwidth weights configuration for the MP model.
-        Returns:
-            The sensitivity metric of the MP model for a given configuration.
-        """
-        with self._configured_mp_model(mp_a_cfg, mp_w_cfg):
-            sensitivity_metric = self._compute_metric()
-        return sensitivity_metric
-    def _compute_metric(self) -> float:
-        """
-        Compute sensitivity metric on a configured mp model.
-        Returns:
-            Sensitivity metric.
-        """
-        if self.quant_config.custom_metric_fn:
-            sensitivity_metric = self.quant_config.custom_metric_fn(self.model_mp)
-            if not isinstance(sensitivity_metric, (float, np.floating)):
-                raise TypeError(
-                    f'The custom_metric_fn is expected to return float or numpy float, got {type(sensitivity_metric).__name__}')
-            return sensitivity_metric
-        # compute default metric
-        ipts_distances, out_pts_distances = self._compute_distance()
-        sensitivity_metric = self._compute_mp_distance_measure(ipts_distances, out_pts_distances,
-                                                               self.quant_config.distance_weighting_method)
-        return sensitivity_metric
     def _init_baseline_tensors_list(self):
         """
         Evaluates the baseline model on all images and returns the obtained lists of tensors in a list for later use.
         """
-        return [self.fw_impl.to_numpy(self.fw_impl.sensitivity_eval_inference(self.baseline_model, images))
-                                      for images in self.images_batches]
-    def _build_models(self) -> Any:
-        """
-        Builds two models - an MP model with configurable layers and a baseline, float model.
-        Returns: A tuple with two models built from the given graph: a baseline model (with baseline configuration) and
-            an MP model (which can be configured for a specific bitwidth configuration).
-            Note that the type of the returned models is dependent on the used framework (TF/Pytorch).
-        """
-        evaluation_graph = copy.deepcopy(self.graph)
-        # Disable quantization for non-configurable nodes, and, if requested, for all activations (quantizers won't
-        # be added to the model).
-        for n in evaluation_graph.get_topo_sorted_nodes():
-            if self.disable_activation_for_metric or not n.has_configurable_activation():
-                for c in n.candidates_quantization_cfg:
-                    c.activation_quantization_cfg.quant_mode = ActivationQuantizationMode.NO_QUANT
-            if not n.has_any_configurable_weight():
-                for c in n.candidates_quantization_cfg:
-                    c.weights_quantization_cfg.disable_all_weights_quantization()
-        model_mp, _, conf_node2layers = self.fw_impl.model_builder(evaluation_graph,
-                                                                   mode=ModelBuilderMode.MIXEDPRECISION,
-                                                                   append2output=self.interest_points + self.output_points,
-                                                                   fw_info=self.fw_info)
-        # Disable all configurable quantizers. They will be activated one at a time during sensitivity evaluation.
-        # Note: from this point mp_model is not in sync with graph quantization configuration for configurable nodes.
-        for layer in itertools.chain(*conf_node2layers.values()):
-            if isinstance(layer, self.fw_impl.activation_quant_layer_cls):
-                set_activation_quant_layer_to_bitwidth(layer, None, self.fw_impl)
-            else:
-                assert isinstance(layer, self.fw_impl.weights_quant_layer_cls)
-                set_weights_quant_layer_to_bitwidth(layer, None, self.fw_impl)
-        # Build a baseline model (to compute distances from).
-        baseline_model, _ = self.fw_impl.model_builder(evaluation_graph,
-                                                       mode=ModelBuilderMode.FLOAT,
-                                                       append2output=self.interest_points + self.output_points)
-        return baseline_model, model_mp, conf_node2layers
+        return [self.fw_impl.to_numpy(self.fw_impl.sensitivity_eval_inference(self.ref_model, images))
+                for images in self.images_batches]
     def _compute_hessian_based_scores(self) -> np.ndarray:
         """
@@ -257,61 +201,21 @@ class SensitivityEvaluation:
         # Create a request for Hessian approximation scores with specific configurations
         # (here we use per-tensor approximation of the Hessian's trace w.r.t the node's activations)
         fw_dataloader = self.fw_impl.convert_data_gen_to_dataloader(self.representative_data_gen,
-                                                                    batch_size=self.quant_config.hessian_batch_size)
+                                                                    batch_size=self.mp_config.hessian_batch_size)
         hessian_info_request = HessianScoresRequest(mode=HessianMode.ACTIVATION,
                                                     granularity=HessianScoresGranularity.PER_TENSOR,
                                                     target_nodes=self.interest_points,
                                                     data_loader=fw_dataloader,
-                                                    n_samples=self.quant_config.num_of_images)
+                                                    n_samples=self.mp_config.num_of_images)
         # Fetch the Hessian approximation scores for the current interest point
         nodes_approximations = self.hessian_info_service.fetch_hessian(request=hessian_info_request)
-        approx_by_image = np.stack([nodes_approximations[n.name] for n in self.interest_points], axis=1)    # samples X nodes
+        approx_by_image = np.stack([nodes_approximations[n.name] for n in self.interest_points],
+                                   axis=1)  # samples X nodes
         # Return the mean approximation value across all images for each interest point
         return np.mean(approx_by_image, axis=0)
-    @contextlib.contextmanager
-    def _configured_mp_model(self, mp_a_cfg: Dict[str, Optional[int]], mp_w_cfg: Dict[str, Optional[int]]):
-        """
-        Context manager to configure specific configurable layers of the mp model. At exit, configuration is
-        automatically restored to un-quantized.
-        Args:
-            mp_a_cfg: Nodes bitwidth indices to configure activation quantizers to.
-            mp_w_cfg: Nodes bitwidth indices to configure weights quantizers to.
-        """
-        if not (mp_a_cfg and any(v is not None for v in mp_a_cfg.values()) or
-                mp_w_cfg and any(v is not None for v in mp_w_cfg.values())):
-            raise ValueError(f'Requested configuration is either empty or contain only None values.')
-        # defined here so that it can't be used directly
-        def apply_bitwidth_config(a_cfg, w_cfg):
-            node_names = set(a_cfg.keys()).union(set(w_cfg.keys()))
-            for n in node_names:
-                node_quant_layers = self.conf_node2layers.get(n)
-                if node_quant_layers is None:    # pragma: no cover
-                    raise ValueError(f"Matching layers for node {n} not found in the mixed precision model configuration.")
-                for qlayer in node_quant_layers:
-                    assert isinstance(qlayer, (self.fw_impl.activation_quant_layer_cls,
-                                               self.fw_impl.weights_quant_layer_cls)), f'Unexpected {type(qlayer)} of node {n}'
-                    if isinstance(qlayer, self.fw_impl.activation_quant_layer_cls) and n in a_cfg:
-                        set_activation_quant_layer_to_bitwidth(qlayer, a_cfg[n], self.fw_impl)
-                        a_cfg.pop(n)
-                    elif isinstance(qlayer, self.fw_impl.weights_quant_layer_cls) and n in w_cfg:
-                        set_weights_quant_layer_to_bitwidth(qlayer, w_cfg[n], self.fw_impl)
-                        w_cfg.pop(n)
-            if a_cfg or w_cfg:
-                raise ValueError(f'Not all mp configs were consumed, remaining activation config {a_cfg}, '
-                                 f'weights config {w_cfg}.')
-        apply_bitwidth_config(mp_a_cfg.copy(), mp_w_cfg.copy())
-        try:
-            yield
-        finally:
-            apply_bitwidth_config({n: None for n in mp_a_cfg}, {n: None for n in mp_w_cfg})
     def _compute_points_distance(self,
                                  baseline_tensors: List[Any],
                                  mp_tensors: List[Any],
@@ -338,7 +242,7 @@ class SensitivityEvaluation:
         return np.asarray(distance_v)
-    def _compute_distance(self) -> Tuple[np.ndarray, np.ndarray]:
+    def _compute_distance(self, mp_model) -> Tuple[np.ndarray, np.ndarray]:
         """
         Computing the interest points distance and the output points distance, and using them to build a
         unified distance vector.
@@ -352,7 +256,7 @@ class SensitivityEvaluation:
         # Compute the distance matrix for num_of_images images.
         for images, baseline_tensors in zip(self.images_batches, self.baseline_tensors_list):
             # when using model.predict(), it does not use the QuantizeWrapper functionality
-            mp_tensors = self.fw_impl.sensitivity_eval_inference(self.model_mp, images)
+            mp_tensors = self.fw_impl.sensitivity_eval_inference(mp_model, images)
             mp_tensors = self.fw_impl.to_numpy(mp_tensors)
             # Compute distance: similarity between the baseline model to the float model
@@ -440,77 +344,78 @@ class SensitivityEvaluation:
             samples_count += batch_size
         else:
             if samples_count < num_of_images:
-                Logger.warning(f'Not enough images in representative dataset to generate {num_of_images} data points, '
-                               f'only {samples_count} were generated')
+                Logger.warning(
+                    f'Not enough images in representative dataset to generate {num_of_images} data points, '
+                    f'only {samples_count} were generated')
         return images_batches
+    @classmethod
+    def get_mp_interest_points(cls, graph: Graph,
+                               interest_points_classifier: Callable,
+                               num_ip_factor: float) -> List[BaseNode]:
+        """
+        Gets a list of interest points for the mixed precision metric computation.
+        The list is constructed from a filtered set of nodes in the graph.
+        Note that the output layers are separated from the interest point set for metric computation purposes.
-def get_mp_interest_points(graph: Graph,
-                           interest_points_classifier: Callable,
-                           num_ip_factor: float) -> List[BaseNode]:
-    """
-    Gets a list of interest points for the mixed precision metric computation.
-    The list is constructed from a filtered set of nodes in the graph.
-    Note that the output layers are separated from the interest point set for metric computation purposes.
-    Args:
-        graph: Graph to search for its MP configuration.
-        interest_points_classifier: A function that indicates whether a given node in considered as a potential
-            interest point for mp metric computation purposes.
-        num_ip_factor: Percentage out of the total set of interest points that we want to actually use.
-    Returns: A list of interest points (nodes in the graph).
-    """
-    sorted_nodes = graph.get_topo_sorted_nodes()
-    ip_nodes = [n for n in sorted_nodes if interest_points_classifier(n)]
-    interest_points_nodes = bound_num_interest_points(ip_nodes, num_ip_factor)
+        Args:
+            graph: Graph to search for its MP configuration.
+            interest_points_classifier: A function that indicates whether a given node in considered as a potential
+                interest point for mp metric computation purposes.
+            num_ip_factor: Percentage out of the total set of interest points that we want to actually use.
-    # We exclude output nodes from the set of interest points since they are used separately in the sensitivity evaluation.
-    output_nodes = [n.node for n in graph.get_outputs()]
+        Returns: A list of interest points (nodes in the graph).
-    interest_points = [n for n in interest_points_nodes if n not in output_nodes]
+        """
+        sorted_nodes = graph.get_topo_sorted_nodes()
+        ip_nodes = [n for n in sorted_nodes if interest_points_classifier(n)]
-    return interest_points
+        interest_points_nodes = cls.bound_num_interest_points(ip_nodes, num_ip_factor)
+        # We exclude output nodes from the set of interest points since they are used separately in the sensitivity evaluation.
+        output_nodes = [n.node for n in graph.get_outputs()]
-def get_output_nodes_for_metric(graph: Graph) -> List[BaseNode]:
-    """
-    Returns a list of output nodes that are also quantized (either kernel weights attribute or activation)
-    to be used as a set of output points in the distance metric computation.
+        interest_points = [n for n in interest_points_nodes if n not in output_nodes]
-    Args:
-        graph: Graph to search for its MP configuration.
+        return interest_points
-    Returns: A list of output nodes.
+    @staticmethod
+    def get_output_nodes_for_metric(graph: Graph) -> List[BaseNode]:
+        """
+        Returns a list of output nodes that are also quantized (either kernel weights attribute or activation)
+        to be used as a set of output points in the distance metric computation.
-    """
+        Args:
+            graph: Graph to search for its MP configuration.
-    return [n.node for n in graph.get_outputs()
-            if (graph.fw_info.is_kernel_op(n.node.type) and
-                n.node.is_weights_quantization_enabled(graph.fw_info.get_kernel_op_attributes(n.node.type)[0])) or
-            n.node.is_activation_quantization_enabled()]
+        Returns: A list of output nodes.
+        """
-def bound_num_interest_points(sorted_ip_list: List[BaseNode], num_ip_factor: float) -> List[BaseNode]:
-    """
-    Filters the list of interest points and returns a shorter list with number of interest points smaller than some
-    default threshold.
+        return [n.node for n in graph.get_outputs()
+                if (graph.fw_info.is_kernel_op(n.node.type) and
+                    n.node.is_weights_quantization_enabled(graph.fw_info.get_kernel_op_attributes(n.node.type)[0])) or
+                n.node.is_activation_quantization_enabled()]
-    Args:
-        sorted_ip_list: List of nodes which are considered as interest points for the metric computation.
-        num_ip_factor: Percentage out of the total set of interest points that we want to actually use.
+    @staticmethod
+    def bound_num_interest_points(sorted_ip_list: List[BaseNode], num_ip_factor: float) -> List[BaseNode]:
+        """
+        Filters the list of interest points and returns a shorter list with number of interest points smaller than some
+        default threshold.
-    Returns: A new list of interest points (list of nodes).
+        Args:
+            sorted_ip_list: List of nodes which are considered as interest points for the metric computation.
+            num_ip_factor: Percentage out of the total set of interest points that we want to actually use.
-    """
-    if num_ip_factor < 1.0:
-        num_interest_points = int(num_ip_factor * len(sorted_ip_list))
-        Logger.info(f'Using {num_interest_points} for mixed-precision metric evaluation out of total '
-                    f'{len(sorted_ip_list)} potential interest points.')
-        # Take num_interest_points evenly spaced interest points from the original list
-        indices = np.round(np.linspace(0, len(sorted_ip_list) - 1, num_interest_points)).astype(int)
-        return [sorted_ip_list[i] for i in indices]
+        Returns: A new list of interest points (list of nodes).
-    return sorted_ip_list
+        """
+        if num_ip_factor < 1.0:
+            num_interest_points = int(num_ip_factor * len(sorted_ip_list))
+            Logger.info(f'Using {num_interest_points} for mixed-precision metric evaluation out of total '
+                        f'{len(sorted_ip_list)} potential interest points.')
+            # Take num_interest_points evenly spaced interest points from the original list
+            indices = np.round(np.linspace(0, len(sorted_ip_list) - 1, num_interest_points)).astype(int)
+            return [sorted_ip_list[i] for i in indices]
+        return sorted_ip_list

model_compression_toolkit/core/common/mixed_precision/sensitivity_eval/sensitivity_evaluation.py ADDED Viewed

@@ -0,0 +1,168 @@
+# Copyright 2022 Sony Semiconductor Israel, Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import contextlib
+import copy
+import itertools
+from typing import Callable, Any, Tuple, Dict, Optional
+from model_compression_toolkit.core import FrameworkInfo, MixedPrecisionQuantizationConfig
+from model_compression_toolkit.core.common import Graph
+from model_compression_toolkit.core.common.mixed_precision.sensitivity_eval.metric_calculators import \
+    CustomMetricCalculator, DistanceMetricCalculator
+from model_compression_toolkit.core.common.mixed_precision.sensitivity_eval.set_layer_to_bitwidth import \
+    set_activation_quant_layer_to_bitwidth, set_weights_quant_layer_to_bitwidth
+from model_compression_toolkit.core.common.quantization.node_quantization_config import ActivationQuantizationMode
+from model_compression_toolkit.core.common.model_builder_mode import ModelBuilderMode
+from model_compression_toolkit.core.common.hessian import HessianInfoService
+class SensitivityEvaluation:
+    """
+    Sensitivity evaluation of a bit-width configuration for Mixed Precision search.
+    """
+    def __init__(self,
+                 graph: Graph,
+                 mp_config: MixedPrecisionQuantizationConfig,
+                 representative_data_gen: Callable,
+                 fw_info: FrameworkInfo,
+                 fw_impl: Any,
+                 disable_activation_for_metric: bool = False,
+                 hessian_info_service: HessianInfoService = None
+                 ):
+        """
+        Args:
+            graph: Graph to search for its MP configuration.
+            fw_info: FrameworkInfo object about the specific framework
+                (e.g., attributes of different layers' weights to quantize).
+            mp_config: MP Quantization configuration for how the graph should be quantized.
+            representative_data_gen: Dataset used for getting batches for inference.
+            fw_impl: FrameworkImplementation object with a specific framework methods implementation.
+            disable_activation_for_metric: Whether to disable activation quantization when computing the MP metric.
+            hessian_info_service: HessianInfoService to fetch Hessian approximation information.
+        """
+        self.mp_config = mp_config
+        self.representative_data_gen = representative_data_gen
+        self.fw_info = fw_info
+        self.fw_impl = fw_impl
+        if self.mp_config.custom_metric_fn:
+            self.metric_calculator = CustomMetricCalculator(graph, self.mp_config.custom_metric_fn)
+        else:
+            self.metric_calculator = DistanceMetricCalculator(graph, mp_config, representative_data_gen,
+                                                              fw_info=fw_info, fw_impl=fw_impl,
+                                                              hessian_info_service=hessian_info_service)
+        # Build a mixed-precision model which can be configured to use different bitwidth in different layers.
+        # Also, returns a mapping between a configurable graph's node and its matching layer(s) in the built MP model.
+        self.mp_model, self.conf_node2layers = self._build_mp_model(graph, self.metric_calculator.all_interest_points,
+                                                                    disable_activation_for_metric)
+    def compute_metric(self, mp_a_cfg: Dict[str, Optional[int]], mp_w_cfg: Dict[str, Optional[int]]) -> float:
+        """
+        Compute the sensitivity metric of the MP model for a given configuration.
+        Quantization for any configurable activation / weight that were not passed is disabled.
+        Args:
+            mp_a_cfg: Bitwidth activations configuration for the MP model.
+            mp_w_cfg: Bitwidth weights configuration for the MP model.
+        Returns:
+            The sensitivity metric of the MP model for a given configuration.
+        """
+        with self._configured_mp_model(mp_a_cfg, mp_w_cfg):
+            sensitivity_metric = self.metric_calculator.compute(self.mp_model)
+        return sensitivity_metric
+    def _build_mp_model(self, graph, outputs, disable_activations: bool) -> Tuple[Any, dict]:
+        """
+        Builds an MP model with configurable layers.
+        Returns:
+            MP model and a mapping from configurable graph nodes to their corresponding quantization layer(s)
+            in the MP model.
+        """
+        evaluation_graph = copy.deepcopy(graph)
+        # Disable quantization for non-configurable nodes, and, if requested, for all activations (quantizers won't
+        # be added to the model).
+        for n in evaluation_graph.get_topo_sorted_nodes():
+            if disable_activations or not n.has_configurable_activation():
+                for c in n.candidates_quantization_cfg:
+                    c.activation_quantization_cfg.quant_mode = ActivationQuantizationMode.NO_QUANT
+            if not n.has_any_configurable_weight():
+                for c in n.candidates_quantization_cfg:
+                    c.weights_quantization_cfg.disable_all_weights_quantization()
+        model_mp, _, conf_node2layers = self.fw_impl.model_builder(evaluation_graph,
+                                                                   mode=ModelBuilderMode.MIXEDPRECISION,
+                                                                   append2output=outputs,
+                                                                   fw_info=self.fw_info)
+        # Disable all configurable quantizers. They will be activated one at a time during sensitivity evaluation.
+        for layer in itertools.chain(*conf_node2layers.values()):
+            if isinstance(layer, self.fw_impl.activation_quant_layer_cls):
+                set_activation_quant_layer_to_bitwidth(layer, None, self.fw_impl)
+            else:
+                assert isinstance(layer, self.fw_impl.weights_quant_layer_cls)
+                set_weights_quant_layer_to_bitwidth(layer, None, self.fw_impl)
+        return model_mp, conf_node2layers
+    @contextlib.contextmanager
+    def _configured_mp_model(self, mp_a_cfg: Dict[str, Optional[int]], mp_w_cfg: Dict[str, Optional[int]]):
+        """
+        Context manager to configure specific configurable layers of the mp model. At exit, configuration is
+        automatically restored to un-quantized.
+        Args:
+            mp_a_cfg: Nodes bitwidth indices to configure activation quantizers to.
+            mp_w_cfg: Nodes bitwidth indices to configure weights quantizers to.
+        """
+        if not (mp_a_cfg and any(v is not None for v in mp_a_cfg.values()) or
+                mp_w_cfg and any(v is not None for v in mp_w_cfg.values())):
+            raise ValueError(f'Requested configuration is either empty or contain only None values.')
+        # defined here so that it can't be used directly
+        def apply_bitwidth_config(a_cfg, w_cfg):
+            node_names = set(a_cfg.keys()).union(set(w_cfg.keys()))
+            for n in node_names:
+                node_quant_layers = self.conf_node2layers.get(n)
+                if node_quant_layers is None:    # pragma: no cover
+                    raise ValueError(f"Matching layers for node {n} not found in the mixed precision model configuration.")
+                for qlayer in node_quant_layers:
+                    assert isinstance(qlayer, (self.fw_impl.activation_quant_layer_cls,
+                                               self.fw_impl.weights_quant_layer_cls)), f'Unexpected {type(qlayer)} of node {n}'
+                    if isinstance(qlayer, self.fw_impl.activation_quant_layer_cls) and n in a_cfg:
+                        set_activation_quant_layer_to_bitwidth(qlayer, a_cfg[n], self.fw_impl)
+                        a_cfg.pop(n)
+                    elif isinstance(qlayer, self.fw_impl.weights_quant_layer_cls) and n in w_cfg:
+                        set_weights_quant_layer_to_bitwidth(qlayer, w_cfg[n], self.fw_impl)
+                        w_cfg.pop(n)
+            if a_cfg or w_cfg:
+                raise ValueError(f'Not all mp configs were consumed, remaining activation config {a_cfg}, '
+                                 f'weights config {w_cfg}.')
+        apply_bitwidth_config(mp_a_cfg.copy(), mp_w_cfg.copy())
+        try:
+            yield
+        finally:
+            apply_bitwidth_config({n: None for n in mp_a_cfg}, {n: None for n in mp_w_cfg})

model_compression_toolkit/core/common/statistics_correction/compute_activation_bias_correction_of_graph.py CHANGED Viewed

@@ -42,13 +42,9 @@ def get_previous_node_with_activation_quantization(linear_node: BaseNode,
     prev_node = prev_nodes[0]
-    activation_quantization_config = prev_node.final_activation_quantization_cfg
+    prev_quant_node = graph.retrieve_preserved_quantization_node(prev_node)
-    # Search for node with activation quantization
-    if activation_quantization_config.enable_activation_quantization:
-        return prev_node
-    else:
-        return get_previous_node_with_activation_quantization(prev_node, graph)
+    return prev_quant_node if prev_quant_node.is_activation_quantization_enabled() else None
 def calculate_bin_centers(bin_edges: np.ndarray) -> np.ndarray:

{mct_nightly-2.3.0.20250526.601.dist-info → mct_nightly-2.3.0.20250527.555.dist-info}/WHEEL RENAMED Viewed

File without changes

{mct_nightly-2.3.0.20250526.601.dist-info → mct_nightly-2.3.0.20250527.555.dist-info}/licenses/LICENSE.md RENAMED Viewed

File without changes

{mct_nightly-2.3.0.20250526.601.dist-info → mct_nightly-2.3.0.20250527.555.dist-info}/top_level.txt RENAMED Viewed

File without changes

/model_compression_toolkit/core/common/mixed_precision/{distance_weighting.py → sensitivity_eval/distance_weighting.py} RENAMED Viewed

File without changes

/model_compression_toolkit/core/common/mixed_precision/{set_layer_to_bitwidth.py → sensitivity_eval/set_layer_to_bitwidth.py} RENAMED Viewed

File without changes

mct-nightly 2.3.0.20250526.601__py3-none-any.whl → 2.3.0.20250527.555__py3-none-any.whl

mct-nightly 2.3.0.20250526.601py3-none-any.whl → 2.3.0.20250527.555py3-none-any.whl