PyPI - mct-nightly - Versions diffs - 2.2.0.20241022.507__py3-none-any.whl → 2.2.0.20241024.501__py3-none-any.whl - Mend

mct-nightly 2.2.0.20241022.507py3-none-any.whl → 2.2.0.20241024.501py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

model_compression_toolkit/gptq/pytorch/gptq_training.py CHANGED Viewed

@@ -12,32 +12,35 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-from typing import Callable, List, Tuple, Union, Dict
+import copy
+from typing import Callable, List, Tuple, Union, Generator
 import numpy as np
+import torch
+from mct_quantizers import PytorchQuantizationWrapper, PytorchActivationQuantizationHolder
 from torch.nn import Module
+from torch.utils.data import DataLoader
 from tqdm import tqdm
-import copy
-import torch
-from model_compression_toolkit.core.common.hessian import HessianInfoService
-from model_compression_toolkit.gptq.pytorch.quantizer.gradual_activation_quantization import \
-    get_gradual_activation_quantizer_wrapper_factory
-from model_compression_toolkit.logger import Logger
-from model_compression_toolkit.core.pytorch.back2framework.pytorch_model_builder import PyTorchModelBuilder
-from model_compression_toolkit.gptq.common.gptq_graph import get_kernel_attribute_name_for_gptq
-from model_compression_toolkit.gptq.common.gptq_training import GPTQTrainer
-from model_compression_toolkit.gptq.common.gptq_config import GradientPTQConfig
 from model_compression_toolkit.core.common import Graph, BaseNode
-from model_compression_toolkit.core.common.framework_info import FrameworkInfo
 from model_compression_toolkit.core.common.framework_implementation import FrameworkImplementation
+from model_compression_toolkit.core.common.framework_info import FrameworkInfo
+from model_compression_toolkit.core.common.hessian import HessianInfoService, HessianScoresGranularity
+from model_compression_toolkit.core.pytorch.back2framework.pytorch_model_builder import PyTorchModelBuilder
 from model_compression_toolkit.core.pytorch.constants import BIAS
+from model_compression_toolkit.core.pytorch.data_util import FixedDatasetFromGenerator, IterableDatasetFromGenerator, \
+    IterableSampleWithConstInfoDataset, FixedSampleInfoDataset, get_collate_fn_with_extra_outputs
 from model_compression_toolkit.core.pytorch.utils import to_torch_tensor, set_model, torch_tensor_to_numpy
+from model_compression_toolkit.gptq.common.gptq_config import GradientPTQConfig
+from model_compression_toolkit.gptq.common.gptq_graph import get_kernel_attribute_name_for_gptq
+from model_compression_toolkit.gptq.common.gptq_training import GPTQTrainer
 from model_compression_toolkit.gptq.pytorch.graph_info import get_gptq_trainable_parameters, \
     get_weights_for_loss
+from model_compression_toolkit.gptq.pytorch.quantizer.gradual_activation_quantization import \
+    get_gradual_activation_quantizer_wrapper_factory
 from model_compression_toolkit.gptq.pytorch.quantizer.quantization_builder import quantization_builder
 from model_compression_toolkit.gptq.pytorch.quantizer.regularization_factory import get_regularization
-from mct_quantizers import PytorchQuantizationWrapper, PytorchActivationQuantizationHolder
+from model_compression_toolkit.logger import Logger
 from model_compression_toolkit.trainable_infrastructure.pytorch.util import get_total_grad_steps
@@ -70,6 +73,7 @@ class PytorchGPTQTrainer(GPTQTrainer):
             hessian_info_service: HessianInfoService to fetch info based on the hessian approximation of the float model.
         """
         def _get_total_grad_steps():
+            # TODO get it from the dataset
             return get_total_grad_steps(representative_data_gen) * gptq_config.n_epochs
         # must be set prior to model building in the base class constructor
@@ -81,6 +85,7 @@ class PytorchGPTQTrainer(GPTQTrainer):
                          gptq_config,
                          fw_impl,
                          fw_info,
+                         representative_data_gen_fn=representative_data_gen,
                          hessian_info_service=hessian_info_service)
         self.loss_list = []
@@ -106,20 +111,87 @@ class PytorchGPTQTrainer(GPTQTrainer):
                                                                   trainable_bias,
                                                                   trainable_threshold)
         hessian_cfg = self.gptq_config.hessian_weights_config
         self.use_sample_layer_attention = hessian_cfg.per_sample
-        self.hessian_score_per_layer = None    # for fixed layer weights
-        self.hessian_score_per_image_per_layer = None    # for sample-layer attention
         if self.use_sample_layer_attention:
             # normalization is currently not supported, make sure the config reflects it.
             if hessian_cfg.norm_scores or hessian_cfg.log_norm or hessian_cfg.scale_log_norm:
                 raise NotImplementedError()
-            # Per sample hessian scores are calculated on-demand during the training loop
-            self.hessian_score_per_image_per_layer = {}
+            self.train_dataloader = self._prepare_train_dataloader_sla(representative_data_gen)
         else:
-            self.hessian_score_per_layer = to_torch_tensor(self.compute_hessian_based_weights())
+            self.train_dataloader = self._prepare_train_dataloader_for_non_sla(representative_data_gen)
         self.reg_func = get_regularization(self.gptq_config, _get_total_grad_steps)
+    def _prepare_train_dataloader_sla(self, data_gen_fn: Callable[[], Generator]) -> DataLoader:
+        """
+        Computes Sample-Layer Attention score and builds a train dataloader.
+        Args:
+            data_gen_fn: factory for representative dataset generator.
+        Returns:
+            PyTorch dataloader yielding three outputs - samples, weights for the distillation loss and
+              weights for regularization.
+        """
+        fixed_dataset = FixedDatasetFromGenerator(data_gen_fn)
+        orig_batch_size = fixed_dataset.orig_batch_size
+        # compute hessians for the whole dataset
+        hess_data_loader = DataLoader(fixed_dataset,
+                                      batch_size=self.gptq_config.hessian_weights_config.hessian_batch_size,
+                                      shuffle=False)
+        request = self._build_hessian_request(granularity=HessianScoresGranularity.PER_OUTPUT_CHANNEL,
+                                              data_loader=hess_data_loader,
+                                              n_samples=None)
+        layers_hessians = self.hessian_service.fetch_hessian(request, force_compute=True)
+        # compute sla score defined as max over channels
+        layers_hessians = {layer: to_torch_tensor(hess.max(1)) for layer, hess in layers_hessians.items()}
+        # build train dataset and dataloader
+        hessians_tensor = torch.stack([layers_hessians[layer.name] for layer in self.compare_points], dim=1)    # samples X layers
+        assert hessians_tensor.shape[1] == len(self.compare_points)
+        loss_weights = list(hessians_tensor)
+        sla_train_dataset = FixedSampleInfoDataset(fixed_dataset.samples, loss_weights)
+        reg_weights = hessians_tensor.mean(dim=0)
+        # use collate to add a single value to each batch
+        collate_fn = get_collate_fn_with_extra_outputs(reg_weights)
+        return DataLoader(sla_train_dataset, batch_size=orig_batch_size, shuffle=True, collate_fn=collate_fn)
+    def _prepare_train_dataloader_for_non_sla(self, data_gen_fn: Callable[[], Generator]) -> DataLoader:
+        """
+        Computes loss weights and builds a train dataloader.
+        Args:
+            data_gen_fn: factory for representative dataset generator.
+        Returns:
+            PyTorch dataloader yielding three outputs - samples, weights for the distillation loss and
+              weights for regularization.
+        """
+        dataset = IterableDatasetFromGenerator(data_gen_fn)
+        num_nodes = len(self.compare_points)
+        if self.gptq_config.use_hessian_based_weights:
+            hess_dataloader = DataLoader(dataset, batch_size=self.gptq_config.hessian_weights_config.hessian_batch_size)
+            loss_weights = torch.from_numpy(self.compute_hessian_based_weights(hess_dataloader))
+        else:
+            loss_weights = torch.ones(num_nodes) / num_nodes
+        train_dataset = IterableSampleWithConstInfoDataset(dataset, loss_weights)
+        reg_weights = torch.ones(num_nodes)
+        # use collate to add a single value to each batch
+        collate_fn = get_collate_fn_with_extra_outputs(reg_weights)
+        # NOTE: Don't just increase num_workers! With iterable dataset each worker fetches a full pass, so having
+        # more workers will result in multiple passes within the same epoch. Special handling is needed either
+        # in dataset or in worker_init_fn passed to dataloader, and it might not speed anything up anyway.
+        return DataLoader(train_dataset, batch_size=dataset.orig_batch_size,
+                          collate_fn=collate_fn, num_workers=1)
     def _is_gptq_weights_trainable(self,
                                    node: BaseNode) -> bool:
         """
@@ -195,11 +267,10 @@ class PytorchGPTQTrainer(GPTQTrainer):
         return gptq_model, gptq_user_info
-    def train(self, representative_data_gen: Callable):
+    def train(self):
         """
           GPTQ Training using pytorch framework
-          Args:
-              representative_data_gen: Dataset generator to get images.
           Returns:
               Graph after GPTQ training
           """
@@ -216,7 +287,7 @@ class PytorchGPTQTrainer(GPTQTrainer):
         # ----------------------------------------------
         # Training loop
         # ----------------------------------------------
-        self.micro_training_loop(representative_data_gen, self.gptq_config.n_epochs)
+        self.micro_training_loop(self.gptq_config.n_epochs)
     def compute_gradients(self,
                           y_float: List[torch.Tensor],
@@ -262,23 +333,21 @@ class PytorchGPTQTrainer(GPTQTrainer):
         return loss_value, grads
     def micro_training_loop(self,
-                            data_function: Callable,
                             n_epochs: int):
         """
         This function run a micro training loop on given set of parameters.
         Args:
-            data_function: A callable function that give a batch of samples.
             n_epochs: Number of update iterations of representative dataset.
         """
         with tqdm(range(n_epochs), "Running GPTQ optimization") as epochs_pbar:
             for _ in epochs_pbar:
-                with tqdm(data_function(), position=1, leave=False) as data_pbar:
-                    for data in data_pbar:
-                        distill_weights, reg_weights = to_torch_tensor(self._get_loss_weights(data))
+                with tqdm(self.train_dataloader, position=1, leave=False) as data_pbar:
+                    for sample in data_pbar:
+                        data, loss_weight, reg_weight = to_torch_tensor(sample)
                         input_data = [d * self.input_scale for d in data]
                         input_tensor = to_torch_tensor(input_data)
                         y_float = self.float_model(input_tensor)  # running float model
-                        loss_value, grads = self.compute_gradients(y_float, input_tensor, distill_weights, reg_weights)
+                        loss_value, grads = self.compute_gradients(y_float, input_tensor, loss_weight, reg_weight)
                         # Run one step of gradient descent by updating the value of the variables to minimize the loss.
                         for (optimizer, _) in self.optimizer_with_param:
                             optimizer.step()
@@ -290,42 +359,6 @@ class PytorchGPTQTrainer(GPTQTrainer):
                         self.loss_list.append(loss_value.item())
                         Logger.debug(f'last loss value: {self.loss_list[-1]}')
-    def _get_loss_weights(self, input_tensors: List[torch.Tensor]) -> Tuple[torch.Tensor, torch.Tensor]:
-        """
-        Fetches weights for distillation and round regularization parts of loss.
-        Args:
-            input_tensors: list containing a batch of inputs.
-        Returns:
-            A tuple of two tensors:
-            - weights for distillation loss
-            - weights for rounding regularization loss
-        """
-        if self.use_sample_layer_attention is False:
-            return self.hessian_score_per_layer, torch.ones_like(self.hessian_score_per_layer)
-        if len(input_tensors) > 1:
-            raise NotImplementedError('Sample-Layer attention is not currently supported for networks with multiple inputs')
-        image_scores = []
-        batch = input_tensors[0]
-        img_hashes = [self.hessian_service.calc_image_hash(img) for img in batch]
-        for img_hash in img_hashes:
-            # If sample-layer attention score for the image is not found, compute and store it for the whole batch.
-            if img_hash not in self.hessian_score_per_image_per_layer:
-                score_per_image_per_layer = self._compute_sample_layer_attention_scores(input_tensors)
-                self.hessian_score_per_image_per_layer.update(score_per_image_per_layer)
-            img_scores_per_layer: Dict[BaseNode, np.ndarray] = self.hessian_score_per_image_per_layer[img_hash]
-            # fetch image scores for all layers and combine them into a single tensor
-            img_scores = np.stack(list(img_scores_per_layer.values()), axis=0)
-            image_scores.append(img_scores)
-        layer_sample_weights = np.stack(image_scores, axis=1)    # layers X images
-        layer_weights = layer_sample_weights.mean(axis=1)
-        return layer_sample_weights, layer_weights
     def update_graph(self) -> Graph:
         """
         Update a graph using GPTQ after minimizing the loss between the float model's output

model_compression_toolkit/gptq/pytorch/quantization_facade.py CHANGED Viewed

@@ -18,7 +18,6 @@ from typing import Callable, Union
 from model_compression_toolkit.constants import ACT_HESSIAN_DEFAULT_BATCH_SIZE, PYTORCH
 from model_compression_toolkit.core import CoreConfig
 from model_compression_toolkit.core.analyzer import analyzer_model_quantization
-from model_compression_toolkit.core.common.hessian import HessianScoresGranularity, HessianEstimationDistribution
 from model_compression_toolkit.core.common.mixed_precision.mixed_precision_quantization_config import \
     MixedPrecisionQuantizationConfig
 from model_compression_toolkit.core.common.mixed_precision.resource_utilization_tools.resource_utilization import \
@@ -119,7 +118,6 @@ if FOUND_TORCH:
                 scale_log_norm=False,
                 hessian_batch_size=hessian_batch_size,
                 per_sample=True,
-                estimator_distribution=HessianEstimationDistribution.RADEMACHER
             )
             loss = loss or sample_layer_attention_loss
         else:

model_compression_toolkit/gptq/pytorch/quantizer/soft_rounding/soft_quantizer_reg.py CHANGED Viewed

@@ -47,14 +47,15 @@ class SoftQuantizerRegularization:
         Args:
             model: A model to be quantized with SoftRounding.
             entropy_reg: Entropy value to scale the quantizer regularization.
-            layer_weights: a vector of layer weights.
+            layer_weights: a vector of layers weights.
         Returns: Regularization value.
         """
         layers = [m for m in model.modules() if isinstance(m, PytorchQuantizationWrapper)]
-        if len(layer_weights.shape) != 1 or layer_weights.shape[0] != len(layers):
-            raise ValueError(f'Expected weights to be a vector of length {len(layers)}, received {layer_weights.shape}.')    # pragma: no cover
+        if layer_weights.shape[0] != len(layers):
+            raise ValueError(f'Expected weights.shape[0] to be {len(layers)}, '
+                             f'received shape {layer_weights.shape}.')    # pragma: no cover
         max_w = layer_weights.max()
         b = self.beta_scheduler(self.count_iter)

tests_pytest/keras/__init__.py ADDED Viewed

@@ -0,0 +1,14 @@
+# Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================

tests_pytest/keras/core/__init__.py ADDED Viewed

@@ -0,0 +1,14 @@
+# Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================

tests_pytest/keras/core/test_data_util.py ADDED Viewed

@@ -0,0 +1,91 @@
+# Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import numpy as np
+import pytest
+from model_compression_toolkit.core.keras.data_util import data_gen_to_dataloader, TFDatasetFromGenerator
+@pytest.fixture(scope='session')
+def fixed_dataset():
+    # generate 320 images with data1[i] = i and data2[i] = i+10
+    data1 = np.stack([np.full((3, 30, 20), v) for v in range(320)], axis=0)
+    data2 = np.stack([np.full((10,), v + 10) for v in range(320)], axis=0)
+    return data1, data2
+@pytest.fixture
+def fixed_gen(fixed_dataset):
+    def f():
+        for i in range(10):
+            yield [fixed_dataset[0][32 * i: 32 * (i + 1)], fixed_dataset[1][32 * i: 32 * (i + 1)]]
+    return f
+def get_random_data_gen_fn(seed=42):
+    """ get gen factory for reproducible gen yielding different samples in each epoch """
+    rng = np.random.default_rng(seed)
+    def f():
+        for i in range(10):
+            yield [rng.random((32, 3, 20, 30)).astype(np.float32), rng.random((32, 10)).astype(np.float32)]
+    return f
+class TestTFDataUtil:
+    create_dataloader_fn = data_gen_to_dataloader
+    def test_iterable_dataset_from_fixed_gen(self, fixed_gen):
+        """ tests iterable dataset from fixed gen - same samples are generated in each epoch in the same order """
+        ds = TFDatasetFromGenerator(fixed_gen, batch_size=1)
+        self._validate_ds_from_fixed_gen(ds, 320)
+    def test_iterable_dataset_from_random_gen(self):
+        """ test that dataset samples over epochs are identical to the original data generator """
+        ds = TFDatasetFromGenerator(get_random_data_gen_fn(), batch_size=1)
+        pass1 = np.concatenate([t[0] for t in ds], axis=0)
+        pass2 = np.concatenate([t[0] for t in ds], axis=0)
+        gen_fn = get_random_data_gen_fn()
+        # one invocation is used for validation and batch size in dataset, so promote the reference gen for comparison
+        next(gen_fn())
+        gen_pass1 = np.concatenate([t[0] for t in gen_fn()], axis=0)
+        gen_pass2 = np.concatenate([t[0] for t in gen_fn()], axis=0)
+        # check that each pass is identical to corresponding pass in the original gen
+        assert np.array_equal(pass1, gen_pass1)
+        assert np.array_equal(pass2, gen_pass2)
+        assert not np.allclose(pass1, pass2)
+    def test_dataloader(self, fixed_gen):
+        ds = TFDatasetFromGenerator(fixed_gen, batch_size=25)
+        ds_iter = iter(ds)
+        batch1 = next(ds_iter)
+        assert batch1[0].shape[0] == batch1[1].shape[0] == 25
+        assert np.array_equal(batch1[0][0], np.full((3, 30, 20), 0))
+        assert np.array_equal(batch1[1][0], np.full((10,), 10))
+        assert np.array_equal(batch1[0][-1], np.full((3, 30, 20), 24))
+        assert np.array_equal(batch1[1][-1], np.full((10,), 34))
+        assert len(ds) == 13
+        assert ds.orig_batch_size == 32
+    def _validate_ds_from_fixed_gen(self, ds, exp_len):
+        for _ in range(2):
+            for i, sample in enumerate(ds):
+                assert np.array_equal(sample[0].cpu().numpy(), np.full((1, 3, 30, 20), i))
+                assert np.array_equal(sample[1].cpu().numpy(), np.full((1, 10,), i + 10))
+            assert i == exp_len - 1
+            assert ds.orig_batch_size == 32
+            assert len(ds) == exp_len

tests_pytest/pytorch/core/__init__.py ADDED Viewed

@@ -0,0 +1,14 @@
+# Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================

tests_pytest/pytorch/core/test_data_util.py ADDED Viewed

@@ -0,0 +1,125 @@
+# Copyright 2024 Sony Semiconductor Israel, Inc. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import pytest
+import torch
+import numpy as np
+from torch.utils.data import IterableDataset, Dataset
+from model_compression_toolkit.core.pytorch.data_util import (data_gen_to_dataloader, IterableDatasetFromGenerator,
+                                                              FixedDatasetFromGenerator, FixedSampleInfoDataset)
+@pytest.fixture(scope='session')
+def fixed_dataset():
+    # generate 320 images with data1[i] = i and data2[i] = i+10
+    data1 = np.stack([np.full((3, 30, 20), v) for v in range(320)], axis=0)
+    data2 = np.stack([np.full((10,), v + 10) for v in range(320)], axis=0)
+    return data1, data2
+@pytest.fixture
+def fixed_gen(fixed_dataset):
+    def f():
+        for i in range(10):
+            yield [fixed_dataset[0][32 * i: 32 * (i + 1)], fixed_dataset[1][32 * i: 32 * (i + 1)]]
+    return f
+def get_random_data_gen_fn(seed=42):
+    """ get gen factory for reproducible gen yielding different samples in each epoch """
+    rng = np.random.default_rng(seed)
+    def f():
+        for i in range(10):
+            yield [rng.random((32, 3, 20, 30)), rng.random((32, 10))]
+    return f
+class TestDataUtil:
+    create_dataloader_fn = data_gen_to_dataloader
+    def test_iterable_dataset_from_fixed_gen(self, fixed_gen):
+        """ tests iterable dataset from fixed gen - same samples are generated in each epoch in the same order """
+        ds = IterableDatasetFromGenerator(fixed_gen)
+        assert isinstance(ds, IterableDataset)
+        self._validate_ds_from_fixed_gen(ds, 320)
+    def test_iterable_dataset_from_random_gen(self):
+        """ test that dataset samples over epochs are identical to the original data generator """
+        ds = IterableDatasetFromGenerator(get_random_data_gen_fn())
+        pass1 = torch.stack([t[0] for t in ds], dim=0)
+        pass2 = torch.stack([t[0] for t in ds], dim=0)
+        gen_fn = get_random_data_gen_fn()
+        # one invocation is used for validation and batch size in dataset, so promote the reference gen for comparison
+        next(gen_fn())
+        gen_pass1 = np.concatenate([t[0] for t in gen_fn()], axis=0)
+        gen_pass2 = np.concatenate([t[0] for t in gen_fn()], axis=0)
+        # check that each pass is identical to corresponding pass in the original gen
+        assert np.allclose(pass1.cpu().numpy(), gen_pass1)
+        assert np.allclose(pass2.cpu().numpy(), gen_pass2)
+        assert not torch.equal(pass1, pass2)
+    def test_fixed_dataset_from_fixed_gen_full(self, fixed_gen):
+        ds = FixedDatasetFromGenerator(fixed_gen)
+        assert isinstance(ds, Dataset) and not isinstance(ds, IterableDataset)
+        self._validate_ds_from_fixed_gen(ds, 320)
+    def test_fixed_dataset_from_const_gen_subset(self, fixed_gen):
+        ds = FixedDatasetFromGenerator(fixed_gen, n_samples=25)
+        self._validate_ds_from_fixed_gen(ds, 25)
+    def test_fixed_dataset_from_random_gen_full(self):
+        ds = FixedDatasetFromGenerator(get_random_data_gen_fn())
+        self._validate_fixed_ds(ds, exp_len=320, exp_batch_size=32)
+    def test_fixed_dataset_from_random_gen_subset(self):
+        ds = FixedDatasetFromGenerator(get_random_data_gen_fn(), n_samples=123)
+        self._validate_fixed_ds(ds, exp_len=123, exp_batch_size=32)
+    def test_not_enough_samples_in_datagen(self):
+        def gen():
+            yield [np.ones((10, 3))]
+        with pytest.raises(ValueError, match='Not enough samples in the data generator'):
+            FixedDatasetFromGenerator(gen, n_samples=11)
+    def test_extra_info_mismatch(self, fixed_gen):
+        with pytest.raises(ValueError, match='Mismatch in the number of samples between samples and complementary data'):
+            FixedSampleInfoDataset([1]*10, [2]*10, [3]*11)
+    @pytest.mark.parametrize('ds_cls', [FixedDatasetFromGenerator, IterableDatasetFromGenerator])
+    def test_invalid_gen(self, ds_cls):
+        def gen():
+            yield np.ones((10, 3))
+        with pytest.raises(TypeError, match='Data generator is expected to yield a list of tensors'):
+            ds_cls(gen)
+    def _validate_ds_from_fixed_gen(self, ds, exp_len):
+        for _ in range(2):
+            for i, sample in enumerate(ds):
+                assert np.array_equal(sample[0].cpu().numpy(), np.full((3, 30, 20), i))
+                assert np.array_equal(sample[1].cpu().numpy(), np.full((10,), i + 10))
+            assert i == exp_len - 1
+            assert ds.orig_batch_size == 32
+            assert len(ds) == exp_len
+    def _validate_fixed_ds(self, ds, exp_len, exp_batch_size):
+        assert isinstance(ds, torch.utils.data.Dataset) and not isinstance(ds, torch.utils.data.IterableDataset)
+        full_pass1 = torch.concat([t[0] for t in ds], dim=0)
+        full_pass2 = torch.concat([t[0] for t in ds], dim=0)
+        assert torch.equal(full_pass1, full_pass2)
+        assert len(ds) == exp_len
+        assert ds.orig_batch_size == exp_batch_size

{mct_nightly-2.2.0.20241022.507.dist-info → mct_nightly-2.2.0.20241024.501.dist-info}/LICENSE.md RENAMED Viewed

File without changes

{mct_nightly-2.2.0.20241022.507.dist-info → mct_nightly-2.2.0.20241024.501.dist-info}/WHEEL RENAMED Viewed

File without changes

{mct_nightly-2.2.0.20241022.507.dist-info → mct_nightly-2.2.0.20241024.501.dist-info}/top_level.txt RENAMED Viewed

File without changes

mct-nightly 2.2.0.20241022.507__py3-none-any.whl → 2.2.0.20241024.501__py3-none-any.whl

mct-nightly 2.2.0.20241022.507py3-none-any.whl → 2.2.0.20241024.501py3-none-any.whl