PyPI - mct-nightly - Versions diffs - 2.2.0.20241006.532__py3-none-any.whl → 2.2.0.20241008.450__py3-none-any.whl - Mend

mct-nightly 2.2.0.20241006.532py3-none-any.whl → 2.2.0.20241008.450py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

{mct_nightly-2.2.0.20241006.532.dist-info → mct_nightly-2.2.0.20241008.450.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: mct-nightly
-Version: 2.2.0.20241006.532
+Version: 2.2.0.20241008.450
 Summary: A Model Compression Toolkit for neural networks
 Home-page: UNKNOWN
 License: UNKNOWN

{mct_nightly-2.2.0.20241006.532.dist-info → mct_nightly-2.2.0.20241008.450.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-model_compression_toolkit/__init__.py,sha256=kSEKSXjzPQru90gByS3M6uomZGoS5vT50wU-WzVOQEU,1573
+model_compression_toolkit/__init__.py,sha256=N9yCh68lSsYuGo6DuxotIhOSedwXIAg8XDYshb0Nz4g,1573
 model_compression_toolkit/constants.py,sha256=i4wYheBkIdQmsQA-axIpcT3YiSO1USNc-jaNiNE8w6E,3920
 model_compression_toolkit/defaultdict.py,sha256=LSc-sbZYXENMCw3U9F4GiXuv67IKpdn0Qm7Fr11jy-4,2277
 model_compression_toolkit/logger.py,sha256=3DByV41XHRR3kLTJNbpaMmikL8icd9e1N-nkQAY9oDk,4567
@@ -46,11 +46,11 @@ model_compression_toolkit/core/common/graph/memory_graph/cut.py,sha256=aPdXJPP5a
 model_compression_toolkit/core/common/graph/memory_graph/max_cut_astar.py,sha256=crV2NCLVO8jx9MlryZBYuJKFe_G9HfM7rUR64fDymlw,17045
 model_compression_toolkit/core/common/graph/memory_graph/memory_element.py,sha256=gRmBEFRmyJsNKezQfiwDwQu1cmbGd2wgKCRTH6iw8mw,3961
 model_compression_toolkit/core/common/graph/memory_graph/memory_graph.py,sha256=gw4av_rzn_3oEAPpD3B7PHZDqnxHMjIESevl6ppPnkk,7175
-model_compression_toolkit/core/common/hessian/__init__.py,sha256=6216QgHl7h4DXGn5ForP9Tija-wrBSONNtQ769ikP2s,1025
-model_compression_toolkit/core/common/hessian/hessian_info_service.py,sha256=TfgSIh5pmZcJM9335aAxZriCzMljnk3mYhmKBsK2x5Y,20848
+model_compression_toolkit/core/common/hessian/__init__.py,sha256=Sj3I9mLBq-yrcBFxpUkOy0Rb5pxJQBPcECvgyOqhHSY,1064
+model_compression_toolkit/core/common/hessian/hessian_info_service.py,sha256=fUgW-AUhRu609_RSRd1WKaQAfPk2SmLnlkT74v6TZwY,23769
 model_compression_toolkit/core/common/hessian/hessian_info_utils.py,sha256=1axmN0tjJSo_7hUr2d2KMv4y1pBi19cqWSQpi4BbdsA,1458
 model_compression_toolkit/core/common/hessian/hessian_scores_calculator.py,sha256=Pe4uKerx-MeDQPJ7Slr8fvFUHfv02q33w3gbQK5kBKs,4186
-model_compression_toolkit/core/common/hessian/hessian_scores_request.py,sha256=atGJgJBL9uwYRC3t9NnzGgHYxV4XJj4Ai_xPpQH0rhY,3229
+model_compression_toolkit/core/common/hessian/hessian_scores_request.py,sha256=fYXcOMa2bpbJjQ2S4r021WOvhoDWFa_jy95hofqVBFA,3632
 model_compression_toolkit/core/common/matchers/__init__.py,sha256=sw7LOPN1bM82o3SkMaklyH0jw-TLGK0-fl2Wq73rffI,697
 model_compression_toolkit/core/common/matchers/base_graph_filter.py,sha256=mTk54z0mIbFmPOb4h0xfLtLDookcFyNh8H0pIN5js_M,3091
 model_compression_toolkit/core/common/matchers/base_matcher.py,sha256=JCj-NLAXOJa-GcSX-94PVUTWjooQUd0NemiyNg5uKGQ,2210
@@ -256,7 +256,7 @@ model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/transfo
 model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/virtual_activation_weights_composition.py,sha256=WmEa8Xjji-_tIbthDxlLAGSr69nWk-YKcHNaVqLa7sg,1375
 model_compression_toolkit/core/pytorch/graph_substitutions/substitutions/weights_activation_split.py,sha256=tp78axmUQc0Zpj3KwVmV0PGYHvCf7sAW_sRmXXw7gsY,1616
 model_compression_toolkit/core/pytorch/hessian/__init__.py,sha256=lNJ29DYxaLUPDstRDA1PGI5r9Fulq_hvrZMlhst1Z5g,697
-model_compression_toolkit/core/pytorch/hessian/activation_hessian_scores_calculator_pytorch.py,sha256=xc_-utc9_Hq915X02VbT8zXxGqxE4fFz6dhiiZwU3ok,8578
+model_compression_toolkit/core/pytorch/hessian/activation_hessian_scores_calculator_pytorch.py,sha256=fKeql1cXOieHTbxQDOIMpFO1sVktqXVCRBgZkv3R13Q,10929
 model_compression_toolkit/core/pytorch/hessian/hessian_scores_calculator_pytorch.py,sha256=vXluX-awgavv7DGihG9HrlvLhak8qIHy837PPTOd4jg,3471
 model_compression_toolkit/core/pytorch/hessian/weights_hessian_scores_calculator_pytorch.py,sha256=C4-7naBQUh8TN6fEwkyKY6rlY_nvHSAmCnWT4iMBs8E,8497
 model_compression_toolkit/core/pytorch/mixed_precision/__init__.py,sha256=Rf1RcYmelmdZmBV5qOKvKWF575ofc06JFQSq83Jz99A,696
@@ -345,11 +345,11 @@ model_compression_toolkit/exporter/model_wrapper/pytorch/builder/node_to_quantiz
 model_compression_toolkit/gptq/__init__.py,sha256=pEgkJvmf05KSw70iLDTz_6LI_2Oi5L8sTN0JsEUpnpk,1445
 model_compression_toolkit/gptq/runner.py,sha256=La12JTYjWyJW0YW4Al4TP1_Xi4JWBCEKw6FR_JQsxe0,5982
 model_compression_toolkit/gptq/common/__init__.py,sha256=cco4TmeIDIh32nj9ZZXVkws4dd9F2UDrmjKzTN8G0V0,697
-model_compression_toolkit/gptq/common/gptq_config.py,sha256=xogD4mM2825NXyX7rKWBaKBhBFo31bMUmxECREGgtWc,6132
+model_compression_toolkit/gptq/common/gptq_config.py,sha256=GP4lcDeyVgXA-QFArDW28UucOOKY0zeYJpq2pvyNVM8,6510
 model_compression_toolkit/gptq/common/gptq_constants.py,sha256=QSm6laLkIV0LYmU0BLtmKp3Fi3SqDfbncFQWOGA1cGU,611
 model_compression_toolkit/gptq/common/gptq_framework_implementation.py,sha256=n3mSf4J92kFjekzyGyrJULylI-8Jf5OVWJ5AFoVnEx0,1266
 model_compression_toolkit/gptq/common/gptq_graph.py,sha256=-bL5HhPcKqV8nj4dZPXc5QmQJbFBel6etrioikP0tEo,3039
-model_compression_toolkit/gptq/common/gptq_training.py,sha256=CtSpjG27BQ3rLPGWeBnZYYiGnMREpdBd6dx7SQf_wDk,14965
+model_compression_toolkit/gptq/common/gptq_training.py,sha256=dRNEjjKdVqlazbGWjZNE9q-MsU0PBffGKHfDpy3NX5Q,16661
 model_compression_toolkit/gptq/keras/__init__.py,sha256=cco4TmeIDIh32nj9ZZXVkws4dd9F2UDrmjKzTN8G0V0,697
 model_compression_toolkit/gptq/keras/gptq_keras_implementation.py,sha256=axBwnCSjq5xk-xGymOwSOqjp39It-CVtGcCTRTf0E_4,1248
 model_compression_toolkit/gptq/keras/gptq_loss.py,sha256=rbRkF15MYd6nq4G49kcjb_dPTa-XNq9cTkrb93mXawo,6241
@@ -368,19 +368,19 @@ model_compression_toolkit/gptq/keras/quantizer/soft_rounding/uniform_soft_quanti
 model_compression_toolkit/gptq/keras/quantizer/ste_rounding/__init__.py,sha256=cco4TmeIDIh32nj9ZZXVkws4dd9F2UDrmjKzTN8G0V0,697
 model_compression_toolkit/gptq/keras/quantizer/ste_rounding/symmetric_ste.py,sha256=pgZADwaNWUwm9QTrYaW6yXE3-zfedPZSa9TKBVedNd4,8356
 model_compression_toolkit/gptq/pytorch/__init__.py,sha256=cco4TmeIDIh32nj9ZZXVkws4dd9F2UDrmjKzTN8G0V0,697
-model_compression_toolkit/gptq/pytorch/gptq_loss.py,sha256=kDuWw-6zh17wZpYWh4Xa94rpoodf82DksgjQCnL7nBc,2719
+model_compression_toolkit/gptq/pytorch/gptq_loss.py,sha256=nVW3URcCWQywoXfmTOBMxliZVvosshf4-G0Sq7dNwzU,3877
 model_compression_toolkit/gptq/pytorch/gptq_pytorch_implementation.py,sha256=tECPTavxn8EEwgLaP2zvxdJH6Vg9jC0YOIMJ7857Sdc,1268
-model_compression_toolkit/gptq/pytorch/gptq_training.py,sha256=bnL4DyPLBz2-pip3RV_jBmExvQKZ4N1vXzQudc1VgMY,17117
+model_compression_toolkit/gptq/pytorch/gptq_training.py,sha256=j_FZcs8ey_9voI83TrL4q1Mne59zO2_v0MzdhZcxWuY,20071
 model_compression_toolkit/gptq/pytorch/graph_info.py,sha256=4mVM-VvnBaA64ACVdOe6wTGHdMSa2UTLIUe7nACLcdo,4008
-model_compression_toolkit/gptq/pytorch/quantization_facade.py,sha256=Z1xCEDiRWE6xtjVjgVGpgGazuY9l9IhUOPNiRZegLMQ,15408
+model_compression_toolkit/gptq/pytorch/quantization_facade.py,sha256=7UPaLBx66mJIlDTpT1uLI9LpHPzOr8EtywZ0aawveDA,16527
 model_compression_toolkit/gptq/pytorch/quantizer/__init__.py,sha256=ZHNHo1yzye44m9_ht4UUZfTpK01RiVR3Tr74-vtnOGI,968
 model_compression_toolkit/gptq/pytorch/quantizer/base_pytorch_gptq_quantizer.py,sha256=fKg-PNOhGBiL-4eySS9Fyw0GkA76Pq8jT_HbJuJ8iZU,4143
 model_compression_toolkit/gptq/pytorch/quantizer/gradual_activation_quantization.py,sha256=nngu2TeXjngkqt_6-wciFmCvo-dbpeh_tJJxBV_cfHk,3686
 model_compression_toolkit/gptq/pytorch/quantizer/quant_utils.py,sha256=OocYYRqvl7rZ37QT0hTzfJnWGiNCPskg7cziTlR7TRk,3893
 model_compression_toolkit/gptq/pytorch/quantizer/quantization_builder.py,sha256=5EyAzvlU01vLyXmMwY_8dNyb7GwYktXmnrvUON8n8WI,4696
-model_compression_toolkit/gptq/pytorch/quantizer/regularization_factory.py,sha256=lkeEBgAAhC1VHu4DHoqDz8GC7BIU4cU0HIAXFYfgUFU,2098
+model_compression_toolkit/gptq/pytorch/quantizer/regularization_factory.py,sha256=H6pARLK-jq3cKoaipY0SK9wMGrqy6CSEZTk14KdrKA0,2105
 model_compression_toolkit/gptq/pytorch/quantizer/soft_rounding/__init__.py,sha256=lNJ29DYxaLUPDstRDA1PGI5r9Fulq_hvrZMlhst1Z5g,697
-model_compression_toolkit/gptq/pytorch/quantizer/soft_rounding/soft_quantizer_reg.py,sha256=UZwVCpG8WOw7r0-cmPYXNkJYpTZciW66KWtKG004J6Q,2683
+model_compression_toolkit/gptq/pytorch/quantizer/soft_rounding/soft_quantizer_reg.py,sha256=vlQEhif-R49UstORkXmpMA4ZE82Aqh-mJqKCnB31gag,3005
 model_compression_toolkit/gptq/pytorch/quantizer/soft_rounding/symmetric_soft_quantizer.py,sha256=kLVQC1hXzDpP4Jx7AwnA764oGnY5AMEuvUUhAvhz09M,12347
 model_compression_toolkit/gptq/pytorch/quantizer/soft_rounding/uniform_soft_quantizer.py,sha256=FgPSKoV8p8y-gLNz359XdOPD6w_wpDvcJFtTNLWqYb0,9099
 model_compression_toolkit/gptq/pytorch/quantizer/ste_rounding/__init__.py,sha256=Rf1RcYmelmdZmBV5qOKvKWF575ofc06JFQSq83Jz99A,696
@@ -551,8 +551,8 @@ tests_pytest/pytorch/gptq/test_annealing_cfg.py,sha256=hGC7L6mp3N1ygcJ3OctgS_Fz2
 tests_pytest/pytorch/gptq/test_gradual_act_quantization.py,sha256=tI01aFIUaiCILL5Qn--p1E_rLBUelxLdSY3k52lwcx0,4594
 tests_pytest/pytorch/trainable_infrastructure/__init__.py,sha256=RAe8mgIr1V8dRIQtLf_dSG5zTUCKuQzxyybYx1dzEAs,697
 tests_pytest/pytorch/trainable_infrastructure/test_linear_annealing.py,sha256=eNOpSp0GoLxtEdiRypBp8jaujXfdNxBwKh5Rd-P7WLs,1786
-mct_nightly-2.2.0.20241006.532.dist-info/LICENSE.md,sha256=aYSSIb-5AFPeITTvXm1UAoe0uYBiMmSS8flvXaaFUks,10174
-mct_nightly-2.2.0.20241006.532.dist-info/METADATA,sha256=skhhX9UH3JERO3bWA-6PalAx6JLwSUkVJyWsT07eFrs,20830
-mct_nightly-2.2.0.20241006.532.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
-mct_nightly-2.2.0.20241006.532.dist-info/top_level.txt,sha256=csdfSXhtRnpWYRzjZ-dRLIhOmM2TEdVXUxG05A5fgb8,39
-mct_nightly-2.2.0.20241006.532.dist-info/RECORD,,
+mct_nightly-2.2.0.20241008.450.dist-info/LICENSE.md,sha256=aYSSIb-5AFPeITTvXm1UAoe0uYBiMmSS8flvXaaFUks,10174
+mct_nightly-2.2.0.20241008.450.dist-info/METADATA,sha256=J3vzhM5gpeuXgdgaqJRl6bQc17gSCONWmeNCPLYvyTs,20830
+mct_nightly-2.2.0.20241008.450.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
+mct_nightly-2.2.0.20241008.450.dist-info/top_level.txt,sha256=csdfSXhtRnpWYRzjZ-dRLIhOmM2TEdVXUxG05A5fgb8,39
+mct_nightly-2.2.0.20241008.450.dist-info/RECORD,,

model_compression_toolkit/__init__.py CHANGED Viewed

@@ -27,4 +27,4 @@ from model_compression_toolkit import data_generation
 from model_compression_toolkit import pruning
 from model_compression_toolkit.trainable_infrastructure.keras.load_model import keras_load_quantized_model
-__version__ = "2.2.0.20241006.000532"
+__version__ = "2.2.0.20241008.000450"

model_compression_toolkit/core/common/hessian/__init__.py CHANGED Viewed

@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-from model_compression_toolkit.core.common.hessian.hessian_scores_request import HessianScoresRequest, HessianMode, HessianScoresGranularity
+from model_compression_toolkit.core.common.hessian.hessian_scores_request import (
+    HessianScoresRequest, HessianMode, HessianScoresGranularity, HessianEstimationDistribution
+)
 from model_compression_toolkit.core.common.hessian.hessian_info_service import HessianInfoService
 import model_compression_toolkit.core.common.hessian.hessian_info_utils as hessian_utils

model_compression_toolkit/core/common/hessian/hessian_info_service.py CHANGED Viewed

@@ -12,16 +12,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
+import hashlib
 import numpy as np
 from functools import partial
 from tqdm import tqdm
-from typing import Callable, List, Dict, Any, Tuple
+from typing import Callable, List, Dict, Any, Tuple, TYPE_CHECKING
 from model_compression_toolkit.constants import HESSIAN_NUM_ITERATIONS
 from model_compression_toolkit.core.common.hessian.hessian_scores_request import HessianScoresRequest, \
     HessianScoresGranularity, HessianMode
 from model_compression_toolkit.logger import Logger
+if TYPE_CHECKING:    # pragma: no cover
+    from model_compression_toolkit.core.common import BaseNode
 class HessianInfoService:
@@ -228,6 +231,61 @@ class HessianInfoService:
         return next_iter_remain_samples if next_iter_remain_samples is not None and len(next_iter_remain_samples) > 0 \
         and len(next_iter_remain_samples[0]) > 0 else None
+    def compute_trackable_per_sample_hessian(self,
+                                             hessian_scores_request: HessianScoresRequest,
+                                             inputs_batch: List[np.ndarray]) -> Dict[str, Dict['BaseNode', np.ndarray]]:
+        """
+        Compute hessian score per image hash. We compute the score directly for images rather than via data generator,
+        as data generator might yield different images each time, depending on how it was defined,
+        Args:
+            hessian_scores_request: hessian scores request
+            inputs_batch: a list containing a batch of inputs.
+        Returns:
+            A dict of Hessian scores per image hash per layer {image hash: {layer: score}}
+        """
+        topo_sorted_nodes_names = [x.name for x in self.graph.get_topo_sorted_nodes()]
+        hessian_scores_request.target_nodes.sort(key=lambda x: topo_sorted_nodes_names.index(x.name))
+        hessian_score_by_image_hash = {}
+        if not inputs_batch or not isinstance(inputs_batch, list):
+            raise TypeError('Expected a non-empty list of inputs')    # pragma: no cover
+        if len(inputs_batch) > 1:
+            raise NotImplementedError('Per-sample hessian computation is not supported for networks with multiple inputs')    # pragma: no cover
+        # Get the framework-specific calculator Hessian-approximation scores
+        fw_hessian_calculator = self.fw_impl.get_hessian_scores_calculator(graph=self.graph,
+                                                                           input_images=inputs_batch,
+                                                                           hessian_scores_request=hessian_scores_request,
+                                                                           num_iterations_for_approximation=self.num_iterations_for_approximation)
+        hessian_scores = fw_hessian_calculator.compute()
+        for i in range(inputs_batch[0].shape[0]):
+            img_hash = self.calc_image_hash(inputs_batch[0][i])
+            hessian_score_by_image_hash[img_hash] = {
+                node: score[i] for node, score in zip(hessian_scores_request.target_nodes, hessian_scores)
+            }
+        return hessian_score_by_image_hash
+    @staticmethod
+    def calc_image_hash(image):
+        """
+        Calculates hash for an input image.
+        Args:
+            image: input 3d image (without batch).
+        Returns:
+            Image hash.
+        """
+        if not len(image.shape) == 3:    # pragma: no cover
+            raise ValueError(f'Expected 3d image (without batch) for image hash calculation, got {len(image.shape)}')
+        image_bytes = image.astype(np.float32).tobytes()
+        return hashlib.md5(image_bytes).hexdigest()
     def fetch_hessian(self,
                       hessian_scores_request: HessianScoresRequest,
                       required_size: int,
@@ -248,7 +306,7 @@ class HessianInfoService:
             OC for per-output-channel when the requested node has OC output-channels, etc.)
         """
-        if len(hessian_scores_request.target_nodes) == 0:
+        if len(hessian_scores_request.target_nodes) == 0:    # pragma: no cover
             return []
         if required_size == 0:

model_compression_toolkit/core/common/hessian/hessian_scores_request.py CHANGED Viewed

@@ -40,6 +40,14 @@ class HessianScoresGranularity(Enum):
     PER_TENSOR = 2
+class HessianEstimationDistribution(str, Enum):
+    """
+    Distribution for Hutchinson estimator random vector
+    """
+    GAUSSIAN = 'gaussian'
+    RADEMACHER = 'rademacher'
 class HessianScoresRequest:
     """
     Request configuration for the Hessian-approximation scores.
@@ -53,7 +61,8 @@ class HessianScoresRequest:
     def __init__(self,
                  mode: HessianMode,
                  granularity: HessianScoresGranularity,
-                 target_nodes: List):
+                 target_nodes: List,
+                 distribution: HessianEstimationDistribution = HessianEstimationDistribution.GAUSSIAN):
         """
         Attributes:
             mode (HessianMode): Mode of Hessian-approximation score (w.r.t weights or activations).
@@ -64,6 +73,7 @@ class HessianScoresRequest:
         self.mode = mode  # w.r.t activations or weights
         self.granularity = granularity  # per element, per layer, per channel
         self.target_nodes = target_nodes
+        self.distribution = distribution
     def __eq__(self, other):
         # Checks if the other object is an instance of HessianScoresRequest
@@ -71,9 +81,10 @@ class HessianScoresRequest:
         return isinstance(other, HessianScoresRequest) and \
                self.mode == other.mode and \
                self.granularity == other.granularity and \
-               self.target_nodes == other.target_nodes
+               self.target_nodes == other.target_nodes and \
+               self.distribution == other.distribution
     def __hash__(self):
         # Computes the hash based on the attributes.
         # The use of a tuple here ensures that the hash is influenced by all the attributes.
-        return hash((self.mode, self.granularity, tuple(self.target_nodes)))
+        return hash((self.mode, self.granularity, tuple(self.target_nodes), self.distribution))

model_compression_toolkit/core/pytorch/hessian/activation_hessian_scores_calculator_pytorch.py CHANGED Viewed

@@ -21,7 +21,8 @@ import numpy as np
 from model_compression_toolkit.constants import MIN_HESSIAN_ITER, HESSIAN_COMP_TOLERANCE, HESSIAN_NUM_ITERATIONS
 from model_compression_toolkit.core.common import Graph
-from model_compression_toolkit.core.common.hessian import HessianScoresRequest, HessianScoresGranularity
+from model_compression_toolkit.core.common.hessian import (HessianScoresRequest, HessianScoresGranularity,
+                                                           HessianEstimationDistribution)
 from model_compression_toolkit.core.pytorch.back2framework.float_model_builder import FloatPyTorchModelBuilder
 from model_compression_toolkit.core.pytorch.hessian.hessian_scores_calculator_pytorch import \
     HessianScoresCalculatorPytorch
@@ -55,6 +56,66 @@ class ActivationHessianScoresCalculatorPytorch(HessianScoresCalculatorPytorch):
                                                                        hessian_scores_request=hessian_scores_request,
                                                                        num_iterations_for_approximation=num_iterations_for_approximation)
+    def forward_pass(self):
+        model_output_nodes = [ot.node for ot in self.graph.get_outputs()]
+        if len([n for n in self.hessian_request.target_nodes if n in model_output_nodes]) > 0:
+            Logger.critical("Activation Hessian approximation cannot be computed for model outputs. "
+                            "Exclude output nodes from Hessian request targets.")
+        grad_model_outputs = self.hessian_request.target_nodes + model_output_nodes
+        model, _ = FloatPyTorchModelBuilder(graph=self.graph, append2output=grad_model_outputs).build_model()
+        model.eval()
+        # Run model inference
+        # Set inputs to track gradients during inference
+        for input_tensor in self.input_images:
+            input_tensor.requires_grad_()
+            input_tensor.retain_grad()
+        outputs = model(*self.input_images)
+        if len(outputs) != len(grad_model_outputs):  # pragma: no cover
+            Logger.critical(f"Mismatch in expected and actual model outputs for activation Hessian approximation. "
+                            f"Expected {len(grad_model_outputs)} outputs, received {len(outputs)}.")
+        # Extracting the intermediate activation tensors and the model real output.
+        # Note that we do not allow computing Hessian for output nodes, so there shouldn't be an overlap.
+        num_target_nodes = len(self.hessian_request.target_nodes)
+        # Extract activation tensors of nodes for which we want to compute Hessian
+        target_activation_tensors = outputs[:num_target_nodes]
+        # Extract the model outputs
+        output_tensors = outputs[num_target_nodes:]
+        device = output_tensors[0].device
+        # Concat outputs
+        # First, we need to unfold all outputs that are given as list, to extract the actual output tensors
+        output = self.concat_tensors(output_tensors)
+        return output, target_activation_tensors
+    def _generate_random_vectors_batch(self, shape: tuple, distribution: HessianEstimationDistribution,
+                                       device: torch.device) -> torch.Tensor:
+        """
+        Generate a batch of random vectors for Hutchinson estimation
+        Args:
+            shape: target shape
+            distribution: distribution to sample from
+            device: target device
+        Returns:
+            Random tensor
+        """
+        if distribution == HessianEstimationDistribution.GAUSSIAN:
+            return torch.randn(shape, device=device)
+        if distribution == HessianEstimationDistribution.RADEMACHER:
+            v = torch.randint(high=2, size=shape, device=device)
+            v[v == 0] = -1
+            return v
+        raise ValueError(f'Unknown distribution {distribution}')    # pragma: no cover
     def compute(self) -> List[np.ndarray]:
         """
         Compute the scores that are based on the approximation of the Hessian w.r.t the requested target nodes' activations.
@@ -62,91 +123,79 @@ class ActivationHessianScoresCalculatorPytorch(HessianScoresCalculatorPytorch):
         Returns:
             List[np.ndarray]: Scores based on the approximated Hessian for the requested nodes.
         """
-        if self.hessian_request.granularity == HessianScoresGranularity.PER_TENSOR:
-            model_output_nodes = [ot.node for ot in self.graph.get_outputs()]
-            if len([n for n in self.hessian_request.target_nodes if n in model_output_nodes]) > 0:
-                Logger.critical("Activation Hessian approximation cannot be computed for model outputs. "
-                                "Exclude output nodes from Hessian request targets.")
-            grad_model_outputs = self.hessian_request.target_nodes + model_output_nodes
-            model, _ = FloatPyTorchModelBuilder(graph=self.graph, append2output=grad_model_outputs).build_model()
-            model.eval()
-            # Run model inference
-            # Set inputs to track gradients during inference
-            for input_tensor in self.input_images:
-                input_tensor.requires_grad_()
-                input_tensor.retain_grad()
-            outputs = model(*self.input_images)
-            if len(outputs) != len(grad_model_outputs):  # pragma: no cover
-                Logger.critical(f"Mismatch in expected and actual model outputs for activation Hessian approximation. "
-                                f"Expected {len(grad_model_outputs)} outputs, received {len(outputs)}.")
-            # Extracting the intermediate activation tensors and the model real output.
-            # Note that we do not allow computing Hessian for output nodes, so there shouldn't be an overlap.
-            num_target_nodes = len(self.hessian_request.target_nodes)
-            # Extract activation tensors of nodes for which we want to compute Hessian
-            target_activation_tensors = outputs[:num_target_nodes]
-            # Extract the model outputs
-            output_tensors = outputs[num_target_nodes:]
-            device = output_tensors[0].device
-            # Concat outputs
-            # First, we need to unfold all outputs that are given as list, to extract the actual output tensors
-            output = self.concat_tensors(output_tensors)
-            ipts_hessian_approx_scores = [torch.tensor([0.0],
-                                                      requires_grad=True,
-                                                      device=device)
-                                         for _ in range(len(target_activation_tensors))]
-            prev_mean_results = None
-            for j in tqdm(range(self.num_iterations_for_approximation), "Hessian random iterations"):  # Approximation iterations
-                # Getting a random vector with normal distribution
-                v = torch.randn(output.shape, device=device)
-                f_v = torch.sum(v * output)
-                for i, ipt_tensor in enumerate(target_activation_tensors):  # Per Interest point activation tensor
-                    # Computing the hessian-approximation scores by getting the gradient of (output * v)
-                    hess_v = autograd.grad(outputs=f_v,
-                                           inputs=ipt_tensor,
-                                           retain_graph=True,
-                                           allow_unused=True)[0]
-                    if hess_v is None:
-                        # In case we have an output node, which is an interest point, but it is not differentiable,
-                        # we consider its Hessian to be the initial value 0.
-                        continue  # pragma: no cover
-                    # Mean over all dims but the batch (CXHXW for conv)
-                    hessian_approx_scores = torch.sum(hess_v ** 2.0, dim=tuple(d for d in range(1, len(hess_v.shape))))
-                    # Update node Hessian approximation mean over random iterations
-                    ipts_hessian_approx_scores[i] = (j * ipts_hessian_approx_scores[i] + hessian_approx_scores) / (j + 1)
-                # If the change to the maximal mean Hessian approximation is insignificant we stop the calculation
-                if j > MIN_HESSIAN_ITER:
-                    if prev_mean_results is not None:
-                        new_mean_res = torch.mean(torch.stack(ipts_hessian_approx_scores), dim=1)
-                        relative_delta_per_node = (torch.abs(new_mean_res - prev_mean_results) /
-                                                   (torch.abs(new_mean_res) + 1e-6))
-                        max_delta = torch.max(relative_delta_per_node)
-                        if max_delta < HESSIAN_COMP_TOLERANCE:
-                            break
-                prev_mean_results = torch.mean(torch.stack(ipts_hessian_approx_scores), dim=1)
-            # Convert results to list of numpy arrays
-            hessian_results = [torch_tensor_to_numpy(h) for h in ipts_hessian_approx_scores]
-            # Extend the Hessian tensors shape to align with expected return type
-            # TODO: currently, only per-tensor Hessian is available for activation.
-            #  Once implementing per-channel or per-element, this alignment needs to be verified and handled separately.
-            hessian_results = [h[..., np.newaxis] for h in hessian_results]
-            return hessian_results
-        else:  # pragma: no cover
-            Logger.critical(f"PyTorch activation Hessian's approximation scores does not support "
-                            f"{self.hessian_request.granularity} granularity.")
+        output, target_activation_tensors = self.forward_pass()
+        if self.hessian_request.granularity == HessianScoresGranularity.PER_TENSOR:
+            hessian_scores = self._compute_per_tensor(output, target_activation_tensors)
+        elif self.hessian_request.granularity == HessianScoresGranularity.PER_OUTPUT_CHANNEL:
+            hessian_scores = self._compute_per_channel(output, target_activation_tensors)
+        else:
+            raise NotImplementedError(f'{self.hessian_request.granularity} is not supported')    # pragma: no cover
+        # Convert results to list of numpy arrays
+        hessian_results = [torch_tensor_to_numpy(h) for h in hessian_scores]
+        return hessian_results
+    def _compute_per_tensor(self, output, target_activation_tensors):
+        assert self.hessian_request.granularity == HessianScoresGranularity.PER_TENSOR
+        ipts_hessian_approx_scores = [torch.tensor([0.0], requires_grad=True, device=output.device)
+                                      for _ in range(len(target_activation_tensors))]
+        prev_mean_results = None
+        for j in tqdm(range(self.num_iterations_for_approximation), "Hessian random iterations"):  # Approximation iterations
+            # Getting a random vector with normal distribution
+            v = self._generate_random_vectors_batch(output.shape, self.hessian_request.distribution, output.device)
+            f_v = torch.sum(v * output)
+            for i, ipt_tensor in enumerate(target_activation_tensors):  # Per Interest point activation tensor
+                # Computing the hessian-approximation scores by getting the gradient of (output * v)
+                hess_v = autograd.grad(outputs=f_v,
+                                       inputs=ipt_tensor,
+                                       retain_graph=True,
+                                       allow_unused=True)[0]
+                if hess_v is None:
+                    # In case we have an output node, which is an interest point, but it is not differentiable,
+                    # we consider its Hessian to be the initial value 0.
+                    continue  # pragma: no cover
+                # Mean over all dims but the batch (CXHXW for conv)
+                hessian_approx_scores = torch.sum(hess_v ** 2.0, dim=tuple(d for d in range(1, len(hess_v.shape))))
+                # Update node Hessian approximation mean over random iterations
+                ipts_hessian_approx_scores[i] = (j * ipts_hessian_approx_scores[i] + hessian_approx_scores) / (j + 1)
+            # If the change to the maximal mean Hessian approximation is insignificant we stop the calculation
+            if j > MIN_HESSIAN_ITER:
+                if prev_mean_results is not None:
+                    new_mean_res = torch.mean(torch.stack(ipts_hessian_approx_scores), dim=1)
+                    relative_delta_per_node = (torch.abs(new_mean_res - prev_mean_results) /
+                                               (torch.abs(new_mean_res) + 1e-6))
+                    max_delta = torch.max(relative_delta_per_node)
+                    if max_delta < HESSIAN_COMP_TOLERANCE:
+                        break
+            prev_mean_results = torch.mean(torch.stack(ipts_hessian_approx_scores), dim=1)
+        # add extra dimension to preserve previous behaviour
+        ipts_hessian_approx_scores = [torch.unsqueeze(t, -1) for t in ipts_hessian_approx_scores]
+        return ipts_hessian_approx_scores
+    def _compute_per_channel(self, output, target_activation_tensors):
+        assert self.hessian_request.granularity == HessianScoresGranularity.PER_OUTPUT_CHANNEL
+        ipts_hessian_approx_scores = [torch.tensor(0.0, requires_grad=True, device=output.device)
+                                      for _ in range(len(target_activation_tensors))]
+        for j in tqdm(range(self.num_iterations_for_approximation), "Hessian random iterations"):  # Approximation iterations
+            v = self._generate_random_vectors_batch(output.shape, self.hessian_request.distribution, output.device)
+            f_v = torch.sum(v * output)
+            for i, ipt_tensor in enumerate(target_activation_tensors):  # Per Interest point activation tensor
+                hess_v = autograd.grad(outputs=f_v,
+                                       inputs=ipt_tensor,
+                                       retain_graph=True)[0]
+                hessian_approx_scores = hess_v ** 2
+                rank = len(hess_v.shape)
+                if rank > 2:
+                    hessian_approx_scores = torch.mean(hessian_approx_scores, dim=tuple(range(2, rank)))
+                # Update node Hessian approximation mean over random iterations
+                ipts_hessian_approx_scores[i] = (j * ipts_hessian_approx_scores[i] + hessian_approx_scores) / (j + 1)
+        return ipts_hessian_approx_scores

model_compression_toolkit/gptq/common/gptq_config.py CHANGED Viewed

@@ -17,6 +17,7 @@ from enum import Enum
 from typing import Callable, Any, Dict, Optional
 from model_compression_toolkit.constants import GPTQ_HESSIAN_NUM_SAMPLES, ACT_HESSIAN_DEFAULT_BATCH_SIZE
+from model_compression_toolkit.core.common.hessian import HessianScoresGranularity, HessianEstimationDistribution
 from model_compression_toolkit.gptq.common.gptq_constants import REG_DEFAULT
@@ -39,17 +40,21 @@ class GPTQHessianScoresConfig:
     Configuration to use for computing the Hessian-based scores for GPTQ loss metric.
     Args:
-        hessians_num_samples (int): Number of samples to use for computing the Hessian-based scores.
+        hessians_num_samples (int|None): Number of samples to use for computing the Hessian-based scores.
+          If None, compute Hessian for all images.
         norm_scores (bool): Whether to normalize the returned scores of the weighted loss function (to get values between 0 and 1).
         log_norm (bool): Whether to use log normalization for the GPTQ Hessian-based scores.
         scale_log_norm (bool): Whether to scale the final vector of the Hessian-based scores.
         hessian_batch_size (int): The Hessian computation batch size. used only if using GPTQ with Hessian-based objective.
+        per_sample (bool): Whether to use per sample attention score.
     """
-    hessians_num_samples: int = GPTQ_HESSIAN_NUM_SAMPLES
+    hessians_num_samples: Optional[int] = GPTQ_HESSIAN_NUM_SAMPLES
     norm_scores: bool = True
     log_norm: bool = True
     scale_log_norm: bool = False
     hessian_batch_size: int = ACT_HESSIAN_DEFAULT_BATCH_SIZE
+    per_sample: bool = False
+    estimator_distribution: HessianEstimationDistribution = HessianEstimationDistribution.GAUSSIAN
 @dataclass

model_compression_toolkit/gptq/common/gptq_training.py CHANGED Viewed

@@ -13,6 +13,7 @@
 # limitations under the License.
 # ==============================================================================
 import copy
+import hashlib
 from abc import ABC, abstractmethod
 import numpy as np
 from typing import Callable, List, Any, Dict
@@ -143,7 +144,11 @@ class GPTQTrainer(ABC):
             return np.asarray([1 / num_nodes for _ in range(num_nodes)])
         # Fetch hessian approximations for each target node
-        compare_point_to_hessian_approx_scores = self._fetch_hessian_approximations()
+        # TODO this smells like a potential bug. In hessian calculation target nodes are topo sorted and results are returned
+        # TODO also target nodes are replaced for reuse. Does this work correctly?
+        approximations = self._fetch_hessian_approximations(HessianScoresGranularity.PER_TENSOR)
+        compare_point_to_hessian_approx_scores = {node: score for node, score in zip(self.compare_points, approximations)}
         # Process the fetched hessian approximations to gather them per images
         hessian_approx_score_by_image = (
             self._process_hessian_approximations(compare_point_to_hessian_approx_scores))
@@ -172,29 +177,55 @@ class GPTQTrainer(ABC):
             # If log normalization is not enabled, return the mean of the approximations across images
             return np.mean(hessian_approx_score_by_image, axis=0)
-    def _fetch_hessian_approximations(self) -> Dict[BaseNode, List[List[float]]]:
+    def _compute_sample_layer_attention_scores(self, inputs_batch) -> Dict[str, Dict[BaseNode, np.ndarray]]:
+        """
+        Compute sample layer attention scores per image hash per layer.
+        Args:
+            inputs_batch: a list containing a batch of inputs.
+        Returns:
+            A dictionary with a structure {img_hash: {layer: score}}.
+        """
+        request = self._build_hessian_request(HessianScoresGranularity.PER_OUTPUT_CHANNEL)
+        hessian_batch_size = self.gptq_config.hessian_weights_config.hessian_batch_size
+        hessian_score_per_image_per_layer = {}
+        # If hessian batch is smaller than inputs batch, split it to hessian batches. If hessian batch is larger,
+        # it's currently ignored (TODO)
+        for i in range(0, inputs_batch[0].shape[0], hessian_batch_size):
+            inputs = [t[i: i+hessian_batch_size] for t in inputs_batch]
+            hessian_score_per_image_per_layer.update(
+                self.hessian_service.compute_trackable_per_sample_hessian(request, inputs)
+            )
+        for img_hash, v in hessian_score_per_image_per_layer.items():
+            hessian_score_per_image_per_layer[img_hash] = {k: t.max(axis=0) for k, t in v.items()}
+        return hessian_score_per_image_per_layer
+    def _fetch_hessian_approximations(self, granularity: HessianScoresGranularity) -> Dict[BaseNode, List[List[float]]]:
         """
         Fetches hessian approximations for each target node.
         Returns:
             Mapping of target nodes to their hessian approximations.
         """
-        approximations = {}
-        hessian_scores_request = HessianScoresRequest(
-            mode=HessianMode.ACTIVATION,
-            granularity=HessianScoresGranularity.PER_TENSOR,
-            target_nodes=self.compare_points
-        )
+        hessian_scores_request = self._build_hessian_request(granularity)
         node_approximations = self.hessian_service.fetch_hessian(
             hessian_scores_request=hessian_scores_request,
             required_size=self.gptq_config.hessian_weights_config.hessians_num_samples,
             batch_size=self.gptq_config.hessian_weights_config.hessian_batch_size
         )
+        return node_approximations
-        for i, target_node in enumerate(self.compare_points):
-            approximations[target_node] = node_approximations[i]
-        return approximations
+    def _build_hessian_request(self, granularity):
+        return HessianScoresRequest(
+            mode=HessianMode.ACTIVATION,
+            granularity=granularity,
+            target_nodes=self.compare_points,
+            distribution=self.gptq_config.hessian_weights_config.estimator_distribution
+        )
     def _process_hessian_approximations(self, approximations: Dict[BaseNode, List[List[float]]]) -> List:
         """

model_compression_toolkit/gptq/pytorch/gptq_loss.py CHANGED Viewed

@@ -13,8 +13,10 @@
 # limitations under the License.
 # ==============================================================================
 from typing import List
 import torch
 def mse_loss(y: torch.Tensor, x: torch.Tensor, normalized: bool = True) -> torch.Tensor:
     """
     Compute the MSE of two tensors.
@@ -25,7 +27,7 @@ def mse_loss(y: torch.Tensor, x: torch.Tensor, normalized: bool = True) -> torch
     Returns:
         The MSE of two tensors.
     """
-    loss = torch.nn.MSELoss()(x,y)
+    loss = torch.nn.MSELoss()(x, y)
     return loss / torch.mean(torch.square(x)) if normalized else loss
@@ -62,3 +64,36 @@ def multiple_tensors_mse_loss(y_list: List[torch.Tensor],
     else:
         return torch.mean(torch.stack(loss_values_list))
+def sample_layer_attention_loss(y_list: List[torch.Tensor],
+                                x_list: List[torch.Tensor],
+                                fxp_w_list,
+                                flp_w_list,
+                                act_bn_mean,
+                                act_bn_std,
+                                loss_weights: torch.Tensor) -> torch.Tensor:
+    """
+    Compute Sample Layer Attention loss between two lists of tensors.
+    Args:
+        y_list: First list of tensors.
+        x_list: Second list of tensors.
+        fxp_w_list, flp_w_list, act_bn_mean, act_bn_std: unused (needed to comply with the interface).
+        loss_weights: layer-sample weights tensor of shape (layers, batch)
+    Returns:
+        Sample Layer Attention loss (a scalar).
+    """
+    loss = 0
+    layers_mean_w = []
+    for i, (y, x, w) in enumerate(zip(y_list, x_list, loss_weights)):
+        norm = (y - x).pow(2).sum(1)
+        if len(norm.shape) > 1:
+            norm = norm.flatten(1).mean(1)
+        loss += torch.mean(w * norm)
+        layers_mean_w.append(w.mean())
+    loss = loss / torch.stack(layers_mean_w).max()
+    return loss

model_compression_toolkit/gptq/pytorch/gptq_training.py CHANGED Viewed

@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-from typing import Callable, List, Tuple, Union
+from typing import Callable, List, Tuple, Union, Dict
 import numpy as np
 from torch.nn import Module
@@ -105,8 +105,18 @@ class PytorchGPTQTrainer(GPTQTrainer):
         self.optimizer_with_param = self.get_optimizer_with_param(trainable_weights,
                                                                   trainable_bias,
                                                                   trainable_threshold)
-        self.weights_for_average_loss = to_torch_tensor(self.compute_hessian_based_weights())
+        hessian_cfg = self.gptq_config.hessian_weights_config
+        self.use_sample_layer_attention = hessian_cfg.per_sample
+        self.hessian_score_per_layer = None    # for fixed layer weights
+        self.hessian_score_per_image_per_layer = None    # for sample-layer attention
+        if self.use_sample_layer_attention:
+            # normalization is currently not supported, make sure the config reflects it.
+            if hessian_cfg.norm_scores or hessian_cfg.log_norm or hessian_cfg.scale_log_norm:
+                raise NotImplementedError()
+            # Per sample hessian scores are calculated on-demand during the training loop
+            self.hessian_score_per_image_per_layer = {}
+        else:
+            self.hessian_score_per_layer = to_torch_tensor(self.compute_hessian_based_weights())
         self.reg_func = get_regularization(self.gptq_config, _get_total_grad_steps)
@@ -210,13 +220,17 @@ class PytorchGPTQTrainer(GPTQTrainer):
     def compute_gradients(self,
                           y_float: List[torch.Tensor],
-                          input_tensors: List[torch.Tensor]) -> Tuple[torch.Tensor, List[np.ndarray]]:
+                          input_tensors: List[torch.Tensor],
+                          distill_loss_weights: torch.Tensor,
+                          round_reg_weights: torch.Tensor) -> Tuple[torch.Tensor, List[np.ndarray]]:
         """
         Get outputs from both teacher and student networks. Compute the observed error,
         and use it to compute the gradients and applying them to the student weights.
         Args:
             y_float: A list of reference tensor from the floating point network.
             input_tensors: A list of Input tensors to pass through the networks.
+            distill_loss_weights: Weights for the distillation loss.
+            round_reg_weights: Weight for the rounding regularization loss.
         Returns:
             Loss and gradients.
         """
@@ -231,9 +245,8 @@ class PytorchGPTQTrainer(GPTQTrainer):
                                            self.flp_weights_list,
                                            self.compare_points_mean,
                                            self.compare_points_std,
-                                           self.weights_for_average_loss)
-        reg_value = self.reg_func(self.fxp_model, self.gptq_config.regularization_factor)
+                                           distill_loss_weights)
+        reg_value = self.reg_func(self.fxp_model, self.gptq_config.regularization_factor, round_reg_weights)
         loss_value += reg_value
@@ -261,10 +274,11 @@ class PytorchGPTQTrainer(GPTQTrainer):
             for _ in epochs_pbar:
                 with tqdm(data_function(), position=1, leave=False) as data_pbar:
                     for data in data_pbar:
+                        distill_weights, reg_weights = to_torch_tensor(self._get_loss_weights(data))
                         input_data = [d * self.input_scale for d in data]
                         input_tensor = to_torch_tensor(input_data)
                         y_float = self.float_model(input_tensor)  # running float model
-                        loss_value, grads = self.compute_gradients(y_float, input_tensor)
+                        loss_value, grads = self.compute_gradients(y_float, input_tensor, distill_weights, reg_weights)
                         # Run one step of gradient descent by updating the value of the variables to minimize the loss.
                         for (optimizer, _) in self.optimizer_with_param:
                             optimizer.step()
@@ -276,6 +290,42 @@ class PytorchGPTQTrainer(GPTQTrainer):
                         self.loss_list.append(loss_value.item())
                         Logger.debug(f'last loss value: {self.loss_list[-1]}')
+    def _get_loss_weights(self, input_tensors: List[torch.Tensor]) -> Tuple[torch.Tensor, torch.Tensor]:
+        """
+        Fetches weights for distillation and round regularization parts of loss.
+        Args:
+            input_tensors: list containing a batch of inputs.
+        Returns:
+            A tuple of two tensors:
+            - weights for distillation loss
+            - weights for rounding regularization loss
+        """
+        if self.use_sample_layer_attention is False:
+            return self.hessian_score_per_layer, torch.ones_like(self.hessian_score_per_layer)
+        if len(input_tensors) > 1:
+            raise NotImplementedError('Sample-Layer attention is not currently supported for networks with multiple inputs')
+        image_scores = []
+        batch = input_tensors[0]
+        img_hashes = [self.hessian_service.calc_image_hash(img) for img in batch]
+        for img_hash in img_hashes:
+            # If sample-layer attention score for the image is not found, compute and store it for the whole batch.
+            if img_hash not in self.hessian_score_per_image_per_layer:
+                score_per_image_per_layer = self._compute_sample_layer_attention_scores(input_tensors)
+                self.hessian_score_per_image_per_layer.update(score_per_image_per_layer)
+            img_scores_per_layer: Dict[BaseNode, np.ndarray] = self.hessian_score_per_image_per_layer[img_hash]
+            # fetch image scores for all layers and combine them into a single tensor
+            img_scores = np.stack(list(img_scores_per_layer.values()), axis=0)
+            image_scores.append(img_scores)
+        layer_sample_weights = np.stack(image_scores, axis=1)    # layers X images
+        layer_weights = layer_sample_weights.mean(axis=1)
+        return layer_sample_weights, layer_weights
     def update_graph(self) -> Graph:
         """
         Update a graph using GPTQ after minimizing the loss between the float model's output

model_compression_toolkit/gptq/pytorch/quantization_facade.py CHANGED Viewed

@@ -18,6 +18,7 @@ from typing import Callable, Union
 from model_compression_toolkit.constants import ACT_HESSIAN_DEFAULT_BATCH_SIZE, PYTORCH
 from model_compression_toolkit.core import CoreConfig
 from model_compression_toolkit.core.analyzer import analyzer_model_quantization
+from model_compression_toolkit.core.common.hessian import HessianScoresGranularity, HessianEstimationDistribution
 from model_compression_toolkit.core.common.mixed_precision.mixed_precision_quantization_config import \
     MixedPrecisionQuantizationConfig
 from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import \
@@ -43,7 +44,7 @@ if FOUND_TORCH:
     from model_compression_toolkit.core.pytorch.default_framework_info import DEFAULT_PYTORCH_INFO
     from model_compression_toolkit.gptq.pytorch.gptq_pytorch_implementation import GPTQPytorchImplemantation
     from model_compression_toolkit.target_platform_capabilities.constants import DEFAULT_TP_MODEL
-    from model_compression_toolkit.gptq.pytorch.gptq_loss import multiple_tensors_mse_loss
+    from model_compression_toolkit.gptq.pytorch.gptq_loss import multiple_tensors_mse_loss, sample_layer_attention_loss
     from model_compression_toolkit.exporter.model_wrapper.pytorch.builder.fully_quantized_model_builder import get_exportable_pytorch_model
     import torch
     from torch.nn import Module
@@ -55,11 +56,12 @@ if FOUND_TORCH:
     def get_pytorch_gptq_config(n_epochs: int,
                                 optimizer: Optimizer = None,
                                 optimizer_rest: Optimizer = None,
-                                loss: Callable = multiple_tensors_mse_loss,
+                                loss: Callable = None,
                                 log_function: Callable = None,
                                 use_hessian_based_weights: bool = True,
                                 regularization_factor: float = REG_DEFAULT,
                                 hessian_batch_size: int = ACT_HESSIAN_DEFAULT_BATCH_SIZE,
+                                use_hessian_sample_attention: bool = False,
                                 gradual_activation_quantization: Union[bool, GradualActivationQuantizationConfig] = False,
                                 ) -> GradientPTQConfig:
         """
@@ -74,6 +76,7 @@ if FOUND_TORCH:
             use_hessian_based_weights (bool): Whether to use Hessian-based weights for weighted average loss.
             regularization_factor (float): A floating point number that defines the regularization factor.
             hessian_batch_size (int): Batch size for Hessian computation in Hessian-based weights GPTQ.
+            use_hessian_sample_attention (bool): whether to use Sample-Layer Attention score for weighted loss.
             gradual_activation_quantization (bool, GradualActivationQuantizationConfig):
               If False, GradualActivationQuantization is disabled.
               If True, GradualActivationQuantization is enabled with the default settings.
@@ -105,19 +108,37 @@ if FOUND_TORCH:
         bias_optimizer = torch.optim.SGD([torch.Tensor([])], lr=LR_BIAS_DEFAULT, momentum=GPTQ_MOMENTUM)
+        if use_hessian_sample_attention:
+            if not use_hessian_based_weights:    # pragma: no cover
+                raise ValueError('use_hessian_based_weights must be set to True in order to use Sample Layer Attention.')
+            hessian_weights_config = GPTQHessianScoresConfig(
+                hessians_num_samples=None,
+                norm_scores=False,
+                log_norm=False,
+                scale_log_norm=False,
+                hessian_batch_size=hessian_batch_size,
+                per_sample=True,
+                estimator_distribution=HessianEstimationDistribution.RADEMACHER
+            )
+            loss = loss or sample_layer_attention_loss
+        else:
+            hessian_weights_config = GPTQHessianScoresConfig(hessian_batch_size=hessian_batch_size)
+            loss = loss or multiple_tensors_mse_loss
         if isinstance(gradual_activation_quantization, bool):
             gradual_quant_config = GradualActivationQuantizationConfig() if gradual_activation_quantization else None
         elif isinstance(gradual_activation_quantization, GradualActivationQuantizationConfig):
             gradual_quant_config = gradual_activation_quantization
-        else:
+        else:    # pragma: no cover
             raise TypeError(f'gradual_activation_quantization argument should be bool or '
-                            f'GradualActivationQuantizationConfig, received {type(gradual_activation_quantization)}')    # pragma: no cover
+                            f'GradualActivationQuantizationConfig, received {type(gradual_activation_quantization)}')
         return GradientPTQConfig(n_epochs, optimizer, optimizer_rest=optimizer_rest, loss=loss,
                                  log_function=log_function, train_bias=True, optimizer_bias=bias_optimizer,
                                  use_hessian_based_weights=use_hessian_based_weights,
                                  regularization_factor=regularization_factor,
-                                 hessian_weights_config=GPTQHessianScoresConfig(hessian_batch_size=hessian_batch_size),
+                                 hessian_weights_config=hessian_weights_config,
                                  gradual_activation_quantization_config=gradual_quant_config)
     def pytorch_gradient_post_training_quantization(model: Module,
@@ -185,11 +206,11 @@ if FOUND_TORCH:
         """
-        if core_config.is_mixed_precision_enabled:
+        if core_config.is_mixed_precision_enabled:    # pragma: no cover
             if not isinstance(core_config.mixed_precision_config, MixedPrecisionQuantizationConfig):
                 Logger.critical("Given quantization config for mixed-precision is not of type 'MixedPrecisionQuantizationConfig'. "
                                 "Ensure usage of the correct API for 'pytorch_gradient_post_training_quantization' "
-                                "or provide a valid mixed-precision configuration.")  # pragma: no cover
+                                "or provide a valid mixed-precision configuration.")
         tb_w = init_tensorboard_writer(DEFAULT_PYTORCH_INFO)

model_compression_toolkit/gptq/pytorch/quantizer/regularization_factory.py CHANGED Viewed

@@ -41,4 +41,4 @@ def get_regularization(gptq_config: GradientPTQConfig, get_total_grad_steps_fn:
         scheduler = LinearAnnealingScheduler(t_start=t_start, t_end=total_gradient_steps, initial_val=20, target_val=2)
         return SoftQuantizerRegularization(scheduler)
     else:
-        return lambda m, e_reg: 0
+        return lambda *args, **kwargs: 0

model_compression_toolkit/gptq/pytorch/quantizer/soft_rounding/soft_quantizer_reg.py CHANGED Viewed

@@ -40,32 +40,34 @@ class SoftQuantizerRegularization:
         self.count_iter = 0
-    def __call__(self, model: nn.Module, entropy_reg: float):
+    def __call__(self, model: nn.Module, entropy_reg: float, layer_weights: torch.Tensor):
         """
         Returns the soft quantizer regularization value for SoftRounding.
         Args:
             model: A model to be quantized with SoftRounding.
             entropy_reg: Entropy value to scale the quantizer regularization.
+            layer_weights: a vector of layer weights.
         Returns: Regularization value.
         """
+        layers = [m for m in model.modules() if isinstance(m, PytorchQuantizationWrapper)]
-        soft_reg_aux: List[torch.Tensor] = []
-        b = self.beta_scheduler(self.count_iter)
-        for layer in model.modules():
-            if isinstance(layer, PytorchQuantizationWrapper):
-                kernel_attribute = get_kernel_attribute_name_for_gptq(layer_type=type(layer.layer),
-                                                                      fw_info=DEFAULT_PYTORCH_INFO)
-                st = layer.weights_quantizers[kernel_attribute].get_soft_targets()
-                soft_reg_aux.append((1 - torch.pow(torch.abs(st - .5) * 2, b)).sum())
+        if len(layer_weights.shape) != 1 or layer_weights.shape[0] != len(layers):
+            raise ValueError(f'Expected weights to be a vector of length {len(layers)}, received {layer_weights.shape}.')    # pragma: no cover
+        max_w = layer_weights.max()
+        b = self.beta_scheduler(self.count_iter)
         reg = 0
+        for layer, w in zip(layers, layer_weights):
+            kernel_attribute = get_kernel_attribute_name_for_gptq(layer_type=type(layer.layer),
+                                                                  fw_info=DEFAULT_PYTORCH_INFO)
-        for sq in soft_reg_aux:
-            reg += sq
+            st = layer.weights_quantizers[kernel_attribute].get_soft_targets()
+            soft_loss = (1 - torch.pow(torch.abs(st - .5) * 2, b)).sum()
+            reg += w * soft_loss
+        reg = reg / max_w
         self.count_iter += 1
         return entropy_reg * reg

{mct_nightly-2.2.0.20241006.532.dist-info → mct_nightly-2.2.0.20241008.450.dist-info}/LICENSE.md RENAMED Viewed

File without changes

{mct_nightly-2.2.0.20241006.532.dist-info → mct_nightly-2.2.0.20241008.450.dist-info}/WHEEL RENAMED Viewed

File without changes

{mct_nightly-2.2.0.20241006.532.dist-info → mct_nightly-2.2.0.20241008.450.dist-info}/top_level.txt RENAMED Viewed

File without changes

mct-nightly 2.2.0.20241006.532__py3-none-any.whl → 2.2.0.20241008.450__py3-none-any.whl

mct-nightly 2.2.0.20241006.532py3-none-any.whl → 2.2.0.20241008.450py3-none-any.whl