GANDLF 0.1.3.dev20250318__py3-none-any.whl → 0.1.4.dev20250502__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of GANDLF might be problematic. Click here for more details.

Files changed (55) hide show
  1. GANDLF/cli/deploy.py +2 -2
  2. GANDLF/cli/generate_metrics.py +21 -0
  3. GANDLF/cli/main_run.py +4 -12
  4. GANDLF/compute/__init__.py +0 -2
  5. GANDLF/compute/forward_pass.py +0 -1
  6. GANDLF/compute/generic.py +107 -2
  7. GANDLF/compute/inference_loop.py +4 -4
  8. GANDLF/compute/loss_and_metric.py +1 -2
  9. GANDLF/compute/training_loop.py +10 -10
  10. GANDLF/config_manager.py +26 -716
  11. GANDLF/configuration/__init__.py +0 -0
  12. GANDLF/configuration/default_config.py +73 -0
  13. GANDLF/configuration/differential_privacy_config.py +16 -0
  14. GANDLF/configuration/exclude_parameters.py +1 -0
  15. GANDLF/configuration/model_config.py +82 -0
  16. GANDLF/configuration/nested_training_config.py +25 -0
  17. GANDLF/configuration/optimizer_config.py +121 -0
  18. GANDLF/configuration/parameters_config.py +10 -0
  19. GANDLF/configuration/patch_sampler_config.py +11 -0
  20. GANDLF/configuration/post_processing_config.py +10 -0
  21. GANDLF/configuration/pre_processing_config.py +94 -0
  22. GANDLF/configuration/scheduler_config.py +90 -0
  23. GANDLF/configuration/user_defined_config.py +131 -0
  24. GANDLF/configuration/utils.py +96 -0
  25. GANDLF/configuration/validators.py +479 -0
  26. GANDLF/data/__init__.py +14 -16
  27. GANDLF/data/lightning_datamodule.py +119 -0
  28. GANDLF/entrypoints/run.py +29 -35
  29. GANDLF/inference_manager.py +69 -25
  30. GANDLF/losses/__init__.py +23 -1
  31. GANDLF/losses/loss_calculators.py +79 -0
  32. GANDLF/losses/segmentation.py +3 -2
  33. GANDLF/metrics/__init__.py +26 -0
  34. GANDLF/metrics/generic.py +1 -1
  35. GANDLF/metrics/metric_calculators.py +102 -0
  36. GANDLF/metrics/panoptica_config_brats.yaml +50 -0
  37. GANDLF/metrics/segmentation_panoptica.py +35 -0
  38. GANDLF/models/__init__.py +8 -3
  39. GANDLF/models/lightning_module.py +2102 -0
  40. GANDLF/optimizers/__init__.py +4 -8
  41. GANDLF/privacy/opacus/opacus_anonymization_manager.py +243 -0
  42. GANDLF/schedulers/__init__.py +6 -2
  43. GANDLF/training_manager.py +159 -69
  44. GANDLF/utils/__init__.py +4 -3
  45. GANDLF/utils/imaging.py +121 -2
  46. GANDLF/utils/modelio.py +9 -7
  47. GANDLF/utils/pred_target_processors.py +71 -0
  48. GANDLF/utils/write_parse.py +1 -1
  49. GANDLF/version.py +1 -1
  50. {gandlf-0.1.3.dev20250318.dist-info → gandlf-0.1.4.dev20250502.dist-info}/METADATA +14 -8
  51. {gandlf-0.1.3.dev20250318.dist-info → gandlf-0.1.4.dev20250502.dist-info}/RECORD +55 -32
  52. {gandlf-0.1.3.dev20250318.dist-info → gandlf-0.1.4.dev20250502.dist-info}/WHEEL +1 -1
  53. {gandlf-0.1.3.dev20250318.dist-info → gandlf-0.1.4.dev20250502.dist-info}/entry_points.txt +0 -0
  54. {gandlf-0.1.3.dev20250318.dist-info → gandlf-0.1.4.dev20250502.dist-info/licenses}/LICENSE +0 -0
  55. {gandlf-0.1.3.dev20250318.dist-info → gandlf-0.1.4.dev20250502.dist-info}/top_level.txt +0 -0
GANDLF/cli/deploy.py CHANGED
@@ -246,7 +246,7 @@ def get_metrics_mlcube_config(
246
246
  mlcube_config = yaml.safe_load(f)
247
247
  if entrypoint_script:
248
248
  # modify the entrypoint to run a custom script
249
- mlcube_config["tasks"]["evaluate"]["entrypoint"] = "python3.9 /entrypoint.py"
249
+ mlcube_config["tasks"]["evaluate"]["entrypoint"] = "python3.11 /entrypoint.py"
250
250
  mlcube_config["docker"]["build_strategy"] = "auto"
251
251
  return mlcube_config
252
252
 
@@ -315,7 +315,7 @@ def get_model_mlcube_config(
315
315
  device = "cuda" if requires_gpu else "cpu"
316
316
  mlcube_config["tasks"]["infer"][
317
317
  "entrypoint"
318
- ] = f"python3.9 /entrypoint.py --device {device}"
318
+ ] = f"python3.11 /entrypoint.py --device {device}"
319
319
 
320
320
  return mlcube_config
321
321
  # Duplicate training task into one from reset (must be explicit) and one that resumes with new data
@@ -20,6 +20,7 @@ from GANDLF.metrics import (
20
20
  mean_squared_log_error,
21
21
  mean_absolute_error,
22
22
  ncc_metrics,
23
+ generate_instance_segmentation,
23
24
  )
24
25
  from GANDLF.losses.segmentation import dice
25
26
  from GANDLF.metrics.segmentation import (
@@ -259,6 +260,26 @@ def generate_metrics_dict(
259
260
  "volumeSimilarity_" + str(class_index)
260
261
  ] = label_overlap_filter.GetVolumeSimilarity()
261
262
 
263
+ elif problem_type == "segmentation_brats":
264
+ for _, row in tqdm(input_df.iterrows(), total=input_df.shape[0]):
265
+ current_subject_id = row["SubjectID"]
266
+ overall_stats_dict[current_subject_id] = {}
267
+ label_image = torchio.LabelMap(row["Target"])
268
+ pred_image = torchio.LabelMap(row["Prediction"])
269
+ label_tensor = label_image.data
270
+ pred_tensor = pred_image.data
271
+ spacing = label_image.spacing
272
+ if label_tensor.data.shape[-1] == 1:
273
+ spacing = spacing[0:2]
274
+ # add dimension for batch
275
+ parameters["subject_spacing"] = torch.Tensor(spacing).unsqueeze(0)
276
+ label_array = label_tensor.unsqueeze(0).numpy()
277
+ pred_array = pred_tensor.unsqueeze(0).numpy()
278
+
279
+ overall_stats_dict[current_subject_id] = generate_instance_segmentation(
280
+ prediction=pred_array, target=label_array
281
+ )
282
+
262
283
  elif problem_type == "synthesis":
263
284
 
264
285
  def __fix_2d_tensor(input_tensor):
GANDLF/cli/main_run.py CHANGED
@@ -16,10 +16,9 @@ def main_run(
16
16
  config_file: str,
17
17
  model_dir: str,
18
18
  train_mode: bool,
19
- device: str,
20
19
  resume: bool,
21
20
  reset: bool,
22
- _profile: Optional[bool] = False,
21
+ profile: Optional[bool] = False,
23
22
  output_dir: Optional[str] = None,
24
23
  ) -> None:
25
24
  """
@@ -30,9 +29,9 @@ def main_run(
30
29
  config_file (str): The YAML file of the training configuration.
31
30
  model_dir (str): The model directory; for training, model is written out here, and for inference, trained model is expected here.
32
31
  train_mode (bool): Whether to train or infer.
33
- device (str): The device type.
34
32
  resume (bool): Whether the previous run will be resumed or not.
35
33
  reset (bool): Whether the previous run will be reset or not.
34
+ profile (bool): Whether to profile the training or not. Defaults to False.
36
35
  output_dir (str): The output directory for the inference session. Defaults to None.
37
36
 
38
37
  Returns:
@@ -40,9 +39,7 @@ def main_run(
40
39
  """
41
40
  file_data_full = data_csv
42
41
  model_parameters = config_file
43
- device = device
44
42
  parameters = ConfigManager(model_parameters)
45
- parameters["device_id"] = -1
46
43
 
47
44
  if train_mode:
48
45
  if resume:
@@ -60,9 +57,6 @@ def main_run(
60
57
  parameters["output_dir"] = model_dir
61
58
  Path(parameters["output_dir"]).mkdir(parents=True, exist_ok=True)
62
59
 
63
- if "-1" in device:
64
- device = "cpu"
65
-
66
60
  # parse training CSV
67
61
  if "," in file_data_full:
68
62
  # training and validation pre-split
@@ -96,10 +90,9 @@ def main_run(
96
90
  dataframe_testing=data_testing,
97
91
  outputDir=parameters["output_dir"],
98
92
  parameters=parameters,
99
- device=device,
100
93
  resume=resume,
101
94
  reset=reset,
102
- _profile=_profile,
95
+ profile=profile,
103
96
  )
104
97
  else:
105
98
  data_full, headers = parseTrainingCSV(file_data_full, train=train_mode)
@@ -109,9 +102,9 @@ def main_run(
109
102
  dataframe=data_full,
110
103
  outputDir=parameters["output_dir"],
111
104
  parameters=parameters,
112
- device=device,
113
105
  resume=resume,
114
106
  reset=reset,
107
+ profile=profile,
115
108
  )
116
109
  else:
117
110
  _, data_full, headers = parseTestingCSV(
@@ -122,5 +115,4 @@ def main_run(
122
115
  modelDir=model_dir,
123
116
  outputDir=output_dir,
124
117
  parameters=parameters,
125
- device=device,
126
118
  )
@@ -1,3 +1 @@
1
- from .training_loop import training_loop
2
- from .inference_loop import inference_loop
3
1
  from .generic import create_pytorch_objects
@@ -150,7 +150,6 @@ def validate_network(
150
150
  tensor=subject[key]["data"].squeeze(0),
151
151
  affine=subject[key]["affine"].squeeze(0),
152
152
  )
153
-
154
153
  # regression/classification problem AND label is present
155
154
  if (params["problem_type"] != "segmentation") and label_present:
156
155
  sampler = torchio.data.LabelSampler(params["patch_size"])
GANDLF/compute/generic.py CHANGED
@@ -2,17 +2,122 @@ from typing import Optional, Tuple
2
2
  from pandas.util import hash_pandas_object
3
3
  import torch
4
4
  from torch.utils.data import DataLoader
5
-
5
+ from abc import ABC, abstractmethod
6
+ from dataclasses import dataclass
6
7
  from GANDLF.models import get_model
7
8
  from GANDLF.schedulers import get_scheduler
8
9
  from GANDLF.optimizers import get_optimizer
9
- from GANDLF.data import get_train_loader, get_validation_loader
10
+ from GANDLF.data import get_train_loader, get_validation_loader, ImagesFromDataFrame
10
11
  from GANDLF.utils import (
11
12
  populate_header_in_parameters,
13
+ populate_channel_keys_in_params,
12
14
  parseTrainingCSV,
13
15
  send_model_to_device,
14
16
  get_class_imbalance_weights,
15
17
  )
18
+ from GANDLF.utils.write_parse import get_dataframe
19
+ from torchio import SubjectsDataset, Queue
20
+ from typing import Union
21
+
22
+
23
+ @dataclass
24
+ class AbstractSubsetDataParser(ABC):
25
+ """
26
+ Interface for subset data parsers, needed to separate the dataset creation
27
+ from construction of the dataloaders.
28
+ """
29
+
30
+ subset_csv_path: str
31
+ parameters_dict: dict
32
+
33
+ @abstractmethod
34
+ def create_subset_dataset(self) -> Union[SubjectsDataset, Queue]:
35
+ """
36
+ Method to create the subset dataset based on the subset CSV file
37
+ and the parameters dict.
38
+
39
+ Returns:
40
+ Union[SubjectsDataset, Queue]: The subset dataset.
41
+ """
42
+ pass
43
+
44
+ def get_params_extended_with_subset_data(self) -> dict:
45
+ """
46
+ Trick to get around the fact that parameters dict need to be modified
47
+ during this parsing procedure. This method should be called after
48
+ create_subset_dataset(), as this method will populate the parameters
49
+ dict with the headers from the subset data.
50
+ """
51
+ return self.parameters_dict
52
+
53
+
54
+ class TrainingSubsetDataParser(AbstractSubsetDataParser):
55
+ def create_subset_dataset(self) -> Union[SubjectsDataset, Queue]:
56
+ (
57
+ self.parameters_dict["training_data"],
58
+ headers_to_populate_train,
59
+ ) = parseTrainingCSV(self.subset_csv_path, train=True)
60
+
61
+ self.parameters_dict = populate_header_in_parameters(
62
+ self.parameters_dict, headers_to_populate_train
63
+ )
64
+
65
+ (
66
+ self.parameters_dict["penalty_weights"],
67
+ self.parameters_dict["sampling_weights"],
68
+ self.parameters_dict["class_weights"],
69
+ ) = get_class_imbalance_weights(
70
+ self.parameters_dict["training_data"], self.parameters_dict
71
+ )
72
+
73
+ print("Penalty weights : ", self.parameters_dict["penalty_weights"])
74
+ print("Sampling weights: ", self.parameters_dict["sampling_weights"])
75
+ print("Class weights : ", self.parameters_dict["class_weights"])
76
+
77
+ return ImagesFromDataFrame(
78
+ get_dataframe(self.parameters_dict["training_data"]),
79
+ self.parameters_dict,
80
+ train=True,
81
+ loader_type="train",
82
+ )
83
+
84
+
85
+ class ValidationSubsetDataParser(AbstractSubsetDataParser):
86
+ def create_subset_dataset(self) -> Union[SubjectsDataset, Queue]:
87
+ (self.parameters_dict["validation_data"], _) = parseTrainingCSV(
88
+ self.subset_csv_path, train=False
89
+ )
90
+ validation_dataset = ImagesFromDataFrame(
91
+ get_dataframe(self.parameters_dict["validation_data"]),
92
+ self.parameters_dict,
93
+ train=False,
94
+ loader_type="validation",
95
+ )
96
+ self.parameters_dict = populate_channel_keys_in_params(
97
+ validation_dataset, self.parameters_dict
98
+ )
99
+ return validation_dataset
100
+
101
+
102
+ class TestSubsetDataParser(AbstractSubsetDataParser):
103
+ def create_subset_dataset(self) -> Union[SubjectsDataset, Queue]:
104
+ testing_dataset = ImagesFromDataFrame(
105
+ get_dataframe(self.subset_csv_path),
106
+ self.parameters_dict,
107
+ train=False,
108
+ loader_type="testing",
109
+ )
110
+ if not ("channel_keys" in self.parameters_dict):
111
+ self.parameters_dict = populate_channel_keys_in_params(
112
+ testing_dataset, self.parameters_dict
113
+ )
114
+ return testing_dataset
115
+
116
+
117
+ class InferenceSubsetDataParserRadiology(TestSubsetDataParser):
118
+ """Simple wrapper for name coherency, functionally this is the same as TestSubsetDataParser"""
119
+
120
+ pass
16
121
 
17
122
 
18
123
  def create_pytorch_objects(
@@ -18,8 +18,8 @@ from torch.cuda.amp import autocast
18
18
  import openslide
19
19
  from GANDLF.data import get_testing_loader
20
20
  from GANDLF.utils import (
21
- best_model_path_end,
22
- latest_model_path_end,
21
+ BEST_MODEL_PATH_END,
22
+ LATEST_MODEL_PATH_END,
23
23
  load_ov_model,
24
24
  print_model_summary,
25
25
  applyCustomColorMap,
@@ -72,11 +72,11 @@ def inference_loop(
72
72
  files_to_check = [
73
73
  os.path.join(
74
74
  modelDir,
75
- str(parameters["model"]["architecture"]) + best_model_path_end,
75
+ str(parameters["model"]["architecture"]) + BEST_MODEL_PATH_END,
76
76
  ),
77
77
  os.path.join(
78
78
  modelDir,
79
- str(parameters["model"]["architecture"]) + latest_model_path_end,
79
+ str(parameters["model"]["architecture"]) + LATEST_MODEL_PATH_END,
80
80
  ),
81
81
  ]
82
82
 
@@ -1,4 +1,3 @@
1
- import sys
2
1
  import warnings
3
2
  from typing import Dict, Tuple, Union
4
3
  from GANDLF.losses import global_losses_dict
@@ -134,7 +133,7 @@ def get_loss_and_metrics(
134
133
  # Metrics should be a list
135
134
  for metric in params["metrics"]:
136
135
  metric_lower = metric.lower()
137
- metric_output[metric] = 0
136
+ metric_output[metric] = 0.0
138
137
  if metric_lower not in global_metrics_dict:
139
138
  warnings.warn("WARNING: Could not find the requested metric '" + metric)
140
139
  continue
@@ -13,9 +13,9 @@ from GANDLF.grad_clipping.grad_scaler import GradScaler, model_parameters_exclud
13
13
  from GANDLF.grad_clipping.clip_gradients import dispatch_clip_grad_
14
14
  from GANDLF.utils import (
15
15
  get_date_time,
16
- best_model_path_end,
17
- latest_model_path_end,
18
- initial_model_path_end,
16
+ BEST_MODEL_PATH_END,
17
+ LATEST_MODEL_PATH_END,
18
+ INITIAL_MODEL_PATH_END,
19
19
  save_model,
20
20
  optimize_and_save_model,
21
21
  load_model,
@@ -281,13 +281,13 @@ def training_loop(
281
281
  first_model_saved = False
282
282
  model_paths = {
283
283
  "best": os.path.join(
284
- output_dir, params["model"]["architecture"] + best_model_path_end
284
+ output_dir, params["model"]["architecture"] + BEST_MODEL_PATH_END
285
285
  ),
286
286
  "initial": os.path.join(
287
- output_dir, params["model"]["architecture"] + initial_model_path_end
287
+ output_dir, params["model"]["architecture"] + INITIAL_MODEL_PATH_END
288
288
  ),
289
289
  "latest": os.path.join(
290
- output_dir, params["model"]["architecture"] + latest_model_path_end
290
+ output_dir, params["model"]["architecture"] + LATEST_MODEL_PATH_END
291
291
  ),
292
292
  }
293
293
 
@@ -481,14 +481,14 @@ def training_loop(
481
481
  + str(mem[3])
482
482
  )
483
483
  if params["device"] == "cuda":
484
- mem_cuda = torch.cuda.memory_stats()
484
+ cuda_memory_stats = torch.cuda.memory_stats()
485
485
  outputToWrite_mem += (
486
486
  ","
487
- + str(mem_cuda["active.all.peak"])
487
+ + str(cuda_memory_stats["active.all.peak"])
488
488
  + ","
489
- + str(mem_cuda["active.all.current"])
489
+ + str(cuda_memory_stats["active.all.current"])
490
490
  + ","
491
- + str(mem_cuda["active.all.allocated"])
491
+ + str(cuda_memory_stats["active.all.allocated"])
492
492
  )
493
493
  outputToWrite_mem += ",\n"
494
494
  file_mem.write(outputToWrite_mem)