PyPI - GANDLF - Versions diffs - 0.1.3.dev20250318__py3-none-any.whl → 0.1.4.dev20250502__py3-none-any.whl - Mend

GANDLF 0.1.3.dev20250318py3-none-any.whl → 0.1.4.dev20250502py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of GANDLF might be problematic. Click here for more details.

Files changed (55) hide show

GANDLF/cli/deploy.py +2 -2
GANDLF/cli/generate_metrics.py +21 -0
GANDLF/cli/main_run.py +4 -12
GANDLF/compute/__init__.py +0 -2
GANDLF/compute/forward_pass.py +0 -1
GANDLF/compute/generic.py +107 -2
GANDLF/compute/inference_loop.py +4 -4
GANDLF/compute/loss_and_metric.py +1 -2
GANDLF/compute/training_loop.py +10 -10
GANDLF/config_manager.py +26 -716
GANDLF/configuration/__init__.py +0 -0
GANDLF/configuration/default_config.py +73 -0
GANDLF/configuration/differential_privacy_config.py +16 -0
GANDLF/configuration/exclude_parameters.py +1 -0
GANDLF/configuration/model_config.py +82 -0
GANDLF/configuration/nested_training_config.py +25 -0
GANDLF/configuration/optimizer_config.py +121 -0
GANDLF/configuration/parameters_config.py +10 -0
GANDLF/configuration/patch_sampler_config.py +11 -0
GANDLF/configuration/post_processing_config.py +10 -0
GANDLF/configuration/pre_processing_config.py +94 -0
GANDLF/configuration/scheduler_config.py +90 -0
GANDLF/configuration/user_defined_config.py +131 -0
GANDLF/configuration/utils.py +96 -0
GANDLF/configuration/validators.py +479 -0
GANDLF/data/__init__.py +14 -16
GANDLF/data/lightning_datamodule.py +119 -0
GANDLF/entrypoints/run.py +29 -35
GANDLF/inference_manager.py +69 -25
GANDLF/losses/__init__.py +23 -1
GANDLF/losses/loss_calculators.py +79 -0
GANDLF/losses/segmentation.py +3 -2
GANDLF/metrics/__init__.py +26 -0
GANDLF/metrics/generic.py +1 -1
GANDLF/metrics/metric_calculators.py +102 -0
GANDLF/metrics/panoptica_config_brats.yaml +50 -0
GANDLF/metrics/segmentation_panoptica.py +35 -0
GANDLF/models/__init__.py +8 -3
GANDLF/models/lightning_module.py +2102 -0
GANDLF/optimizers/__init__.py +4 -8
GANDLF/privacy/opacus/opacus_anonymization_manager.py +243 -0
GANDLF/schedulers/__init__.py +6 -2
GANDLF/training_manager.py +159 -69
GANDLF/utils/__init__.py +4 -3
GANDLF/utils/imaging.py +121 -2
GANDLF/utils/modelio.py +9 -7
GANDLF/utils/pred_target_processors.py +71 -0
GANDLF/utils/write_parse.py +1 -1
GANDLF/version.py +1 -1
{gandlf-0.1.3.dev20250318.dist-info → gandlf-0.1.4.dev20250502.dist-info}/METADATA +14 -8
{gandlf-0.1.3.dev20250318.dist-info → gandlf-0.1.4.dev20250502.dist-info}/RECORD +55 -32
{gandlf-0.1.3.dev20250318.dist-info → gandlf-0.1.4.dev20250502.dist-info}/WHEEL +1 -1
{gandlf-0.1.3.dev20250318.dist-info → gandlf-0.1.4.dev20250502.dist-info}/entry_points.txt +0 -0
{gandlf-0.1.3.dev20250318.dist-info → gandlf-0.1.4.dev20250502.dist-info/licenses}/LICENSE +0 -0
{gandlf-0.1.3.dev20250318.dist-info → gandlf-0.1.4.dev20250502.dist-info}/top_level.txt +0 -0

GANDLF/cli/deploy.py CHANGED Viewed

@@ -246,7 +246,7 @@ def get_metrics_mlcube_config(
         mlcube_config = yaml.safe_load(f)
     if entrypoint_script:
         # modify the entrypoint to run a custom script
-        mlcube_config["tasks"]["evaluate"]["entrypoint"] = "python3.9 /entrypoint.py"
+        mlcube_config["tasks"]["evaluate"]["entrypoint"] = "python3.11 /entrypoint.py"
     mlcube_config["docker"]["build_strategy"] = "auto"
     return mlcube_config
@@ -315,7 +315,7 @@ def get_model_mlcube_config(
         device = "cuda" if requires_gpu else "cpu"
         mlcube_config["tasks"]["infer"][
             "entrypoint"
-        ] = f"python3.9 /entrypoint.py --device {device}"
+        ] = f"python3.11 /entrypoint.py --device {device}"
     return mlcube_config
     # Duplicate training task into one from reset (must be explicit) and one that resumes with new data

GANDLF/cli/generate_metrics.py CHANGED Viewed

@@ -20,6 +20,7 @@ from GANDLF.metrics import (
     mean_squared_log_error,
     mean_absolute_error,
     ncc_metrics,
+    generate_instance_segmentation,
 )
 from GANDLF.losses.segmentation import dice
 from GANDLF.metrics.segmentation import (
@@ -259,6 +260,26 @@ def generate_metrics_dict(
                     "volumeSimilarity_" + str(class_index)
                 ] = label_overlap_filter.GetVolumeSimilarity()
+    elif problem_type == "segmentation_brats":
+        for _, row in tqdm(input_df.iterrows(), total=input_df.shape[0]):
+            current_subject_id = row["SubjectID"]
+            overall_stats_dict[current_subject_id] = {}
+            label_image = torchio.LabelMap(row["Target"])
+            pred_image = torchio.LabelMap(row["Prediction"])
+            label_tensor = label_image.data
+            pred_tensor = pred_image.data
+            spacing = label_image.spacing
+            if label_tensor.data.shape[-1] == 1:
+                spacing = spacing[0:2]
+            # add dimension for batch
+            parameters["subject_spacing"] = torch.Tensor(spacing).unsqueeze(0)
+            label_array = label_tensor.unsqueeze(0).numpy()
+            pred_array = pred_tensor.unsqueeze(0).numpy()
+            overall_stats_dict[current_subject_id] = generate_instance_segmentation(
+                prediction=pred_array, target=label_array
+            )
     elif problem_type == "synthesis":
         def __fix_2d_tensor(input_tensor):

GANDLF/cli/main_run.py CHANGED Viewed

@@ -16,10 +16,9 @@ def main_run(
     config_file: str,
     model_dir: str,
     train_mode: bool,
-    device: str,
     resume: bool,
     reset: bool,
-    _profile: Optional[bool] = False,
+    profile: Optional[bool] = False,
     output_dir: Optional[str] = None,
 ) -> None:
     """
@@ -30,9 +29,9 @@ def main_run(
         config_file (str): The YAML file of the training configuration.
         model_dir (str): The model directory; for training, model is written out here, and for inference, trained model is expected here.
         train_mode (bool): Whether to train or infer.
-        device (str): The device type.
         resume (bool): Whether the previous run will be resumed or not.
         reset (bool): Whether the previous run will be reset or not.
+        profile (bool): Whether to profile the training or not. Defaults to False.
         output_dir (str): The output directory for the inference session. Defaults to None.
     Returns:
@@ -40,9 +39,7 @@ def main_run(
     """
     file_data_full = data_csv
     model_parameters = config_file
-    device = device
     parameters = ConfigManager(model_parameters)
-    parameters["device_id"] = -1
     if train_mode:
         if resume:
@@ -60,9 +57,6 @@ def main_run(
         parameters["output_dir"] = model_dir
     Path(parameters["output_dir"]).mkdir(parents=True, exist_ok=True)
-    if "-1" in device:
-        device = "cpu"
     # parse training CSV
     if "," in file_data_full:
         # training and validation pre-split
@@ -96,10 +90,9 @@ def main_run(
                 dataframe_testing=data_testing,
                 outputDir=parameters["output_dir"],
                 parameters=parameters,
-                device=device,
                 resume=resume,
                 reset=reset,
-                _profile=_profile,
+                profile=profile,
             )
     else:
         data_full, headers = parseTrainingCSV(file_data_full, train=train_mode)
@@ -109,9 +102,9 @@ def main_run(
                 dataframe=data_full,
                 outputDir=parameters["output_dir"],
                 parameters=parameters,
-                device=device,
                 resume=resume,
                 reset=reset,
+                profile=profile,
             )
         else:
             _, data_full, headers = parseTestingCSV(
@@ -122,5 +115,4 @@ def main_run(
                 modelDir=model_dir,
                 outputDir=output_dir,
                 parameters=parameters,
-                device=device,
             )

GANDLF/compute/__init__.py CHANGED Viewed

@@ -1,3 +1 @@
-from .training_loop import training_loop
-from .inference_loop import inference_loop
 from .generic import create_pytorch_objects

GANDLF/compute/forward_pass.py CHANGED Viewed

@@ -150,7 +150,6 @@ def validate_network(
                 tensor=subject[key]["data"].squeeze(0),
                 affine=subject[key]["affine"].squeeze(0),
             )
         # regression/classification problem AND label is present
         if (params["problem_type"] != "segmentation") and label_present:
             sampler = torchio.data.LabelSampler(params["patch_size"])

GANDLF/compute/generic.py CHANGED Viewed

@@ -2,17 +2,122 @@ from typing import Optional, Tuple
 from pandas.util import hash_pandas_object
 import torch
 from torch.utils.data import DataLoader
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
 from GANDLF.models import get_model
 from GANDLF.schedulers import get_scheduler
 from GANDLF.optimizers import get_optimizer
-from GANDLF.data import get_train_loader, get_validation_loader
+from GANDLF.data import get_train_loader, get_validation_loader, ImagesFromDataFrame
 from GANDLF.utils import (
     populate_header_in_parameters,
+    populate_channel_keys_in_params,
     parseTrainingCSV,
     send_model_to_device,
     get_class_imbalance_weights,
 )
+from GANDLF.utils.write_parse import get_dataframe
+from torchio import SubjectsDataset, Queue
+from typing import Union
+@dataclass
+class AbstractSubsetDataParser(ABC):
+    """
+    Interface for subset data parsers, needed to separate the dataset creation
+    from construction of the dataloaders.
+    """
+    subset_csv_path: str
+    parameters_dict: dict
+    @abstractmethod
+    def create_subset_dataset(self) -> Union[SubjectsDataset, Queue]:
+        """
+        Method to create the subset dataset based on the subset CSV file
+        and the parameters dict.
+        Returns:
+            Union[SubjectsDataset, Queue]: The subset dataset.
+        """
+        pass
+    def get_params_extended_with_subset_data(self) -> dict:
+        """
+        Trick to get around the fact that parameters dict need to be modified
+        during this parsing procedure. This method should be called after
+        create_subset_dataset(), as this method will populate the parameters
+        dict with the headers from the subset data.
+        """
+        return self.parameters_dict
+class TrainingSubsetDataParser(AbstractSubsetDataParser):
+    def create_subset_dataset(self) -> Union[SubjectsDataset, Queue]:
+        (
+            self.parameters_dict["training_data"],
+            headers_to_populate_train,
+        ) = parseTrainingCSV(self.subset_csv_path, train=True)
+        self.parameters_dict = populate_header_in_parameters(
+            self.parameters_dict, headers_to_populate_train
+        )
+        (
+            self.parameters_dict["penalty_weights"],
+            self.parameters_dict["sampling_weights"],
+            self.parameters_dict["class_weights"],
+        ) = get_class_imbalance_weights(
+            self.parameters_dict["training_data"], self.parameters_dict
+        )
+        print("Penalty weights : ", self.parameters_dict["penalty_weights"])
+        print("Sampling weights: ", self.parameters_dict["sampling_weights"])
+        print("Class weights   : ", self.parameters_dict["class_weights"])
+        return ImagesFromDataFrame(
+            get_dataframe(self.parameters_dict["training_data"]),
+            self.parameters_dict,
+            train=True,
+            loader_type="train",
+        )
+class ValidationSubsetDataParser(AbstractSubsetDataParser):
+    def create_subset_dataset(self) -> Union[SubjectsDataset, Queue]:
+        (self.parameters_dict["validation_data"], _) = parseTrainingCSV(
+            self.subset_csv_path, train=False
+        )
+        validation_dataset = ImagesFromDataFrame(
+            get_dataframe(self.parameters_dict["validation_data"]),
+            self.parameters_dict,
+            train=False,
+            loader_type="validation",
+        )
+        self.parameters_dict = populate_channel_keys_in_params(
+            validation_dataset, self.parameters_dict
+        )
+        return validation_dataset
+class TestSubsetDataParser(AbstractSubsetDataParser):
+    def create_subset_dataset(self) -> Union[SubjectsDataset, Queue]:
+        testing_dataset = ImagesFromDataFrame(
+            get_dataframe(self.subset_csv_path),
+            self.parameters_dict,
+            train=False,
+            loader_type="testing",
+        )
+        if not ("channel_keys" in self.parameters_dict):
+            self.parameters_dict = populate_channel_keys_in_params(
+                testing_dataset, self.parameters_dict
+            )
+        return testing_dataset
+class InferenceSubsetDataParserRadiology(TestSubsetDataParser):
+    """Simple wrapper for name coherency, functionally this is the same as TestSubsetDataParser"""
+    pass
 def create_pytorch_objects(

GANDLF/compute/inference_loop.py CHANGED Viewed

@@ -18,8 +18,8 @@ from torch.cuda.amp import autocast
 import openslide
 from GANDLF.data import get_testing_loader
 from GANDLF.utils import (
-    best_model_path_end,
-    latest_model_path_end,
+    BEST_MODEL_PATH_END,
+    LATEST_MODEL_PATH_END,
     load_ov_model,
     print_model_summary,
     applyCustomColorMap,
@@ -72,11 +72,11 @@ def inference_loop(
             files_to_check = [
                 os.path.join(
                     modelDir,
-                    str(parameters["model"]["architecture"]) + best_model_path_end,
+                    str(parameters["model"]["architecture"]) + BEST_MODEL_PATH_END,
                 ),
                 os.path.join(
                     modelDir,
-                    str(parameters["model"]["architecture"]) + latest_model_path_end,
+                    str(parameters["model"]["architecture"]) + LATEST_MODEL_PATH_END,
                 ),
             ]

GANDLF/compute/loss_and_metric.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import sys
 import warnings
 from typing import Dict, Tuple, Union
 from GANDLF.losses import global_losses_dict
@@ -134,7 +133,7 @@ def get_loss_and_metrics(
     # Metrics should be a list
     for metric in params["metrics"]:
         metric_lower = metric.lower()
-        metric_output[metric] = 0
+        metric_output[metric] = 0.0
         if metric_lower not in global_metrics_dict:
             warnings.warn("WARNING: Could not find the requested metric '" + metric)
             continue

GANDLF/compute/training_loop.py CHANGED Viewed

@@ -13,9 +13,9 @@ from GANDLF.grad_clipping.grad_scaler import GradScaler, model_parameters_exclud
 from GANDLF.grad_clipping.clip_gradients import dispatch_clip_grad_
 from GANDLF.utils import (
     get_date_time,
-    best_model_path_end,
-    latest_model_path_end,
-    initial_model_path_end,
+    BEST_MODEL_PATH_END,
+    LATEST_MODEL_PATH_END,
+    INITIAL_MODEL_PATH_END,
     save_model,
     optimize_and_save_model,
     load_model,
@@ -281,13 +281,13 @@ def training_loop(
     first_model_saved = False
     model_paths = {
         "best": os.path.join(
-            output_dir, params["model"]["architecture"] + best_model_path_end
+            output_dir, params["model"]["architecture"] + BEST_MODEL_PATH_END
         ),
         "initial": os.path.join(
-            output_dir, params["model"]["architecture"] + initial_model_path_end
+            output_dir, params["model"]["architecture"] + INITIAL_MODEL_PATH_END
         ),
         "latest": os.path.join(
-            output_dir, params["model"]["architecture"] + latest_model_path_end
+            output_dir, params["model"]["architecture"] + LATEST_MODEL_PATH_END
         ),
     }
@@ -481,14 +481,14 @@ def training_loop(
                 + str(mem[3])
             )
             if params["device"] == "cuda":
-                mem_cuda = torch.cuda.memory_stats()
+                cuda_memory_stats = torch.cuda.memory_stats()
                 outputToWrite_mem += (
                     ","
-                    + str(mem_cuda["active.all.peak"])
+                    + str(cuda_memory_stats["active.all.peak"])
                     + ","
-                    + str(mem_cuda["active.all.current"])
+                    + str(cuda_memory_stats["active.all.current"])
                     + ","
-                    + str(mem_cuda["active.all.allocated"])
+                    + str(cuda_memory_stats["active.all.allocated"])
                 )
             outputToWrite_mem += ",\n"
             file_mem.write(outputToWrite_mem)

GANDLF 0.1.3.dev20250318__py3-none-any.whl → 0.1.4.dev20250502__py3-none-any.whl

Potentially problematic release.

GANDLF 0.1.3.dev20250318py3-none-any.whl → 0.1.4.dev20250502py3-none-any.whl