PyPI - simcats-datasets - Versions diffs - 2.4.0__tar.gz → 2.5.0__tar.gz - Mend

simcats-datasets 2.4.0tar.gz → 2.5.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

{simcats_datasets-2.4.0 → simcats_datasets-2.5.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: simcats-datasets
-Version: 2.4.0
+Version: 2.5.0
 Summary: SimCATS-Datasets is a Python package that simplifies the creation and loading of SimCATS datasets.
 Author-email: Fabian Hader <f.hader@fz-juelich.de>, Fabian Fuchs <f.fuchs@fz-juelich.de>, Karin Havemann <k.havemann@fz-juelich.de>, Sarah Fleitmann <s.fleitmann@fz-juelich.de>, Jan Vogelbruch <j.vogelbruch@fz-juelich.de>
 License:                     GNU GENERAL PUBLIC LICENSE

{simcats_datasets-2.4.0 → simcats_datasets-2.5.0}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "simcats-datasets"
-version = "2.4.0" # also change in docs/source/conf.py and __init__
+version = "2.5.0" # also change in docs/source/conf.py and __init__
 license = { file="LICENSE" }
 authors = [
     { name="Fabian Hader", email="f.hader@fz-juelich.de" },

simcats_datasets-2.5.0/simcats_datasets/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ __all__ = []
2	+ __version__ = "2.5.0"

{simcats_datasets-2.4.0 → simcats_datasets-2.5.0}/simcats_datasets/generation/_create_dataset.py RENAMED Viewed

@@ -16,7 +16,8 @@ __all__ = []
 def create_dataset(dataset_path: str,
-                   csds: List[np.ndarray],
+                   csds: Optional[List[np.ndarray]] = None,
+                   sensor_scans: Optional[List[np.ndarray]] = None,
                    occupations: Optional[List[np.ndarray]] = None,
                    tct_masks: Optional[List[np.ndarray]] = None,
                    ct_by_dot_masks: Optional[List[np.ndarray]] = None,
@@ -27,6 +28,7 @@ def create_dataset(dataset_path: str,
                    max_len_line_labels_chunk: Optional[int] = None,
                    max_len_metadata_chunk: Optional[int] = None,
                    dtype_csd: np.dtype = np.float32,
+                   dtype_sensor_scan: np.dtype = np.float32,
                    dtype_occ: np.dtype = np.float32,
                    dtype_tct: np.dtype = np.uint8,
                    dtype_ct_by_dot: np.dtype = np.uint8,
@@ -35,7 +37,10 @@ def create_dataset(dataset_path: str,
     Args:
         dataset_path: The path where the new (v2) HDF5 dataset will be stored.
-        csds: The list of CSDs to use for creating the dataset.
+        csds: The list of CSDs to use for creating the dataset. A dataset can have either CSDs or sensor scans, but
+            never both. Default is None.
+        sensor_scans: The list of sensor scans to use for creating the dataset. A dataset can have either CSDs or sensor
+            scans, but never both. Default is None.
         occupations: List of occupations to use for creating the dataset. Defaults to None.
         tct_masks: List of TCT masks to use for creating the dataset. Defaults to None.
         ct_by_dot_masks: List of CT by dot masks to use for creating the dataset. Defaults to None.
@@ -43,15 +48,17 @@ def create_dataset(dataset_path: str,
         line_labels: List of line labels to use for creating the dataset. Defaults to None.
         metadata: List of metadata to use for creating the dataset. Defaults to None.
         max_len_line_coordinates_chunk: The expected maximal length for line coordinates in number of float values (each
-            line requires 4 floats). If None, it is set to the largest value of the CSD shape. Default is None.
+            line requires 4 floats). If None, it is set to the largest value of the CSD (or sensor scan) shape. Default
+            is None.
         max_len_line_labels_chunk: The expected maximal length for line labels in number of uint8/char values (each line
             label, encoded as utf-8 json, should require at most 80 chars). If None, it is set to the largest value of
-            the CSD shape * 20 (matching with allowed number of line coords). Default is None.
+            the CSD (or sensor scan) shape * 20 (matching with allowed number of line coords). Default is None.
         max_len_metadata_chunk: The expected maximal length for metadata in number of uint8/char values (each metadata
             dict, encoded as utf-8 json, should require at most 8000 chars, expected rather something like 4000, but
             could get larger for dot jumps metadata of high resolution scans). If None, it is set to 8000. Default is
             None.
         dtype_csd: Specifies the dtype to be used for saving CSDs. Default is np.float32.
+        dtype_sensor_scan: Specifies the dtype to be used for saving sensor scans. Default is np.float32.
         dtype_occ: Specifies the dtype to be used for saving Occupations. Default is np.float32.
         dtype_tct: Specifies the dtype to be used for saving TCTs. Default is np.uint8.
         dtype_ct_by_dot: Specifies the dtype to be used for saving CT by dot masks. Default is np.uint8.
@@ -60,27 +67,57 @@ def create_dataset(dataset_path: str,
     # Create path where the dataset will be saved (if folder doesn't exist already)
     Path(dirname(dataset_path)).mkdir(parents=True, exist_ok=True)
+    # check if the dataset to be created is a csd or sensor_scan dataset
+    if csds is not None and sensor_scans is None:
+        csd_dataset = True
+    elif csds is None and sensor_scans is not None:
+        csd_dataset = False
+    else:
+        raise ValueError("A dataset can contain either CSDs or sensor scans but never both! Exactly one of the two has "
+                         "to be None.")
     with h5py.File(dataset_path, "a") as hdf5_file:
         # get the number of total ids. This is especially required if a large dataset is loaded and saved step by step
-        num_ids = len(csds)
+        if csd_dataset:
+            num_ids = len(csds)
+        else:
+            num_ids = len(sensor_scans)
-        # process CSDs
-        # save an example CSD to get shape and dtype
-        temp_csd = csds[0].copy()
-        # use chunks as this will speed up reading later! One chunk is set to be exactly one image (optimized to load
-        # one image at a time during training)
-        ds = hdf5_file.require_dataset(name='csds', shape=(0, *temp_csd.shape), dtype=dtype_csd,
-                                       maxshape=(None, *temp_csd.shape))
+        # get a temp copy of a csd or sensor scan (to get the shape) and retrieve the corresponding HDF5 dataset
+        if csd_dataset:
+            # process CSDs
+            # save an example CSD to get shape and dtype
+            temp_data = csds[0].copy()
+            # use chunks as this will speed up reading later! One chunk is set to be exactly one image (optimized to
+            # load one image at a time during training)
+            ds = hdf5_file.require_dataset(name='csds',
+                                           shape=(0, *temp_data.shape),
+                                           dtype=dtype_csd,
+                                           maxshape=(None, *temp_data.shape))
+        else:
+            # process sensor scans
+            # save an example sensor scan to get shape and dtype
+            temp_data = sensor_scans[0].copy()
+            # use chunks as this will speed up reading later! One chunk is set to be exactly one image (optimized to
+            # load one image at a time during training)
+            ds = hdf5_file.require_dataset(name='sensor_scans',
+                                           shape=(0, *temp_data.shape),
+                                           dtype=dtype_sensor_scan,
+                                           maxshape=(None, *temp_data.shape))
         # determine index offset if there is already data in the dataset
         id_offset = ds.shape[0]
         # resize datasets to fit new data
         ds.resize(ds.shape[0] + num_ids, axis=0)
-        ds[id_offset:] = np.array(csds).astype(dtype_csd)
+        # Add new CSDs or sensor scans to the dataset
+        if csd_dataset:
+            ds[id_offset:] = np.array(csds).astype(dtype_csd)
+        else:
+            ds[id_offset:] = np.array(sensor_scans).astype(dtype_sensor_scan)
         if occupations is not None:
             if len(occupations) != num_ids:
                 raise ValueError(
-                    f"Number of new occupation arrays ({len(occupations)}) does not match the number of new CSDs "
-                    f"({num_ids}).")
+                    f"Number of new occupation arrays ({len(occupations)}) does not match the number of new CSDs or "
+                    f"sensor scans ({num_ids}).")
             # process Occupations
             # save an example occ to get shape
             temp_occ = occupations[0].copy()
@@ -91,15 +128,15 @@ def create_dataset(dataset_path: str,
             if ds.shape[0] != id_offset:
                 raise ValueError(
                     f"Number of already stored occupation arrays ({ds.shape[0]}) does not match the number of already "
-                    f"stored CSDs ({id_offset}).")
+                    f"stored CSDs or sensor scans ({id_offset}).")
             # resize datasets to fit new data
             ds.resize(ds.shape[0] + num_ids, axis=0)
             ds[id_offset:] = np.array(occupations).astype(dtype_occ)
         if tct_masks is not None:
             if len(tct_masks) != num_ids:
                 raise ValueError(
-                    f"Number of new TCT mask arrays ({len(tct_masks)}) does not match the number of new CSDs "
-                    f"({num_ids}).")
+                    f"Number of new TCT mask arrays ({len(tct_masks)}) does not match the number of new CSDs or sensor "
+                    f"scans ({num_ids}).")
             # process tct masks
             # save an example tct to get shape and dtype
             temp_tct = tct_masks[0].copy()
@@ -110,7 +147,7 @@ def create_dataset(dataset_path: str,
             if ds.shape[0] != id_offset:
                 raise ValueError(
                     f"Number of already stored TCT mask arrays ({ds.shape[0]}) does not match the number of already "
-                    f"stored CSDs ({id_offset}).")
+                    f"stored CSDs or sensor scans ({id_offset}).")
             # resize datasets to fit new data
             ds.resize(ds.shape[0] + num_ids, axis=0)
             ds[id_offset:] = np.array(tct_masks).astype(dtype_tct)
@@ -118,7 +155,7 @@ def create_dataset(dataset_path: str,
             if len(ct_by_dot_masks) != num_ids:
                 raise ValueError(
                     f"Number of new CT by dot mask arrays ({len(ct_by_dot_masks)}) does not match the number of new "
-                    f"CSDs ({num_ids}).")
+                    f"CSDs or sensor scans ({num_ids}).")
             # process tct masks
             # save an example tct to get shape and dtype
             temp_ct_by_dot = ct_by_dot_masks[0].copy()
@@ -129,7 +166,7 @@ def create_dataset(dataset_path: str,
             if ds.shape[0] != id_offset:
                 raise ValueError(
                     f"Number of already stored CT by dot mask arrays ({ds.shape[0]}) does not match the number of "
-                    f"already stored CSDs ({id_offset}).")
+                    f"already stored CSDs or sensor scans ({id_offset}).")
             # resize datasets to fit new data
             ds.resize(ds.shape[0] + num_ids, axis=0)
             ds[id_offset:] = np.array(ct_by_dot_masks).astype(dtype_tct)
@@ -137,11 +174,11 @@ def create_dataset(dataset_path: str,
             if len(line_coordinates) != num_ids:
                 raise ValueError(
                     f"Number of new line coordinates ({len(line_coordinates)}) does not match the number of new "
-                    f"CSDs ({num_ids}).")
+                    f"CSDs or sensor scans ({num_ids}).")
             # retrieve fixed length for chunks
             if max_len_line_coordinates_chunk is None:
                 # calculate max expected length (max_number_of_lines * 4 entries, max number estimated as max(shape)/4)
-                max_len = max(temp_csd.shape)
+                max_len = max(temp_data.shape)
             else:
                 max_len = max_len_line_coordinates_chunk
             # use chunks as this will speed up reading later! One chunk is set to be exactly one image (optimized to
@@ -151,7 +188,7 @@ def create_dataset(dataset_path: str,
             if ds.shape[0] != id_offset:
                 raise ValueError(
                     f"Number of already stored line coordinates ({ds.shape[0]}) does not match the number of already "
-                    f"stored CSDs ({id_offset}).")
+                    f"stored CSDs or sensor scans ({id_offset}).")
             # resize datasets to fit new data
             ds.resize(ds.shape[0] + num_ids, axis=0)
             # process line coordinates
@@ -163,13 +200,13 @@ def create_dataset(dataset_path: str,
         if line_labels is not None:
             if len(line_labels) != num_ids:
                 raise ValueError(
-                    f"Number of new line labels ({len(line_labels)}) does not match the number of new CSDs "
-                    f"({num_ids}).")
+                    f"Number of new line labels ({len(line_labels)}) does not match the number of new CSDs or sensor "
+                    f"scans ({num_ids}).")
             # retrieve fixed length for chunks
             if max_len_line_labels_chunk is None:
                 # calculate max expected length (max_number_of_lines * 80 uint8 numbers, max number estimated as
                 # max(shape)/4)
-                max_len = max(temp_csd.shape) * 20
+                max_len = max(temp_data.shape) * 20
             else:
                 max_len = max_len_line_labels_chunk
             # use chunks as this will speed up reading later! One chunk is set to be exactly one image (optimized to
@@ -179,7 +216,7 @@ def create_dataset(dataset_path: str,
             if ds.shape[0] != id_offset:
                 raise ValueError(
                     f"Number of already stored line labels ({ds.shape[0]}) does not match the number of already stored "
-                    f"CSDs ({id_offset}).")
+                    f"CSDs or sensor scans ({id_offset}).")
             # resize datasets to fit new data
             ds.resize(ds.shape[0] + num_ids, axis=0)
             # process line labels
@@ -193,7 +230,8 @@ def create_dataset(dataset_path: str,
         if metadata is not None:
             if len(metadata) != num_ids:
                 raise ValueError(
-                    f"Number of new metadata ({len(metadata)}) does not match the number of new CSDs ({num_ids}).")
+                    f"Number of new metadata ({len(metadata)}) does not match the number of new CSDs or sensor scans "
+                    f"({num_ids}).")
             # retrieve fixed length for chunks
             if max_len_metadata_chunk is None:
                 # set len to 8000 uint8 numbers, that should already include some extra safety (expected smth. like
@@ -208,7 +246,7 @@ def create_dataset(dataset_path: str,
             if ds.shape[0] != id_offset:
                 raise ValueError(
                     f"Number of already stored metadata ({ds.shape[0]}) does not match the number of already stored "
-                    f"CSDs ({id_offset}).")
+                    f"CSDs or sensor scans ({id_offset}).")
             # resize datasets to fit new data
             ds.resize(ds.shape[0] + num_ids, axis=0)
             # process metadata

{simcats_datasets-2.4.0 → simcats_datasets-2.5.0}/simcats_datasets/loading/_load_dataset.py RENAMED Viewed

@@ -13,11 +13,11 @@ from typing import List, Tuple, Union
 import h5py
 import numpy as np
-from tqdm import tqdm
 def load_dataset(file: Union[str, h5py.File],
-                 load_csds=True,
+                 load_csds: bool = True,
+                 load_sensor_scans: bool = False,
                  load_occupations: bool = False,
                  load_tct_masks: bool = False,
                  load_ct_by_dot_masks: bool = False,
@@ -34,12 +34,15 @@ def load_dataset(file: Union[str, h5py.File],
             dataset. If a path is supplied, load_dataset will open the file itself. If you want to do multiple
             consecutive loads from the same file (e.g. for using th PyTorch SimcatsDataset without preloading), consider
             initializing the file object yourself and passing it, to improve the performance.
-        load_csds: Determines if csds should be loaded. Default is True.
+        load_csds: Determines if CSDs should be loaded. A dataset can have either CSDs or sensor scans, but never both.
+            Default is True.
+        load_sensor_scans: Determines if sensor scans should be loaded. A dataset can have either CSDs or sensor scans,
+            but never both. Default is False.
         load_occupations: Determines if occupation data should be loaded. Default is False.
         load_tct_masks: Determines if lead transition masks should be loaded. Default is False.
         load_ct_by_dot_masks: Determines if charge transition labeled by affected dot masks should be loaded. This
             requires that ct_by_dot_masks have been added to the dataset. If a dataset has been created using
-            create_simulated_dataset, these masks can be added afterwards using add_ct_by_dot_masks_to_dataset, mainly
+            create_simulated_dataset, these masks can be added afterward using add_ct_by_dot_masks_to_dataset, mainly
             to avoid recalculating them multiple times (for example for machine learning purposes). Default is False.
         load_line_coords: Determines if lead transition definitions using start and end points should be loaded. Default
             is False.
@@ -56,13 +59,15 @@ def load_dataset(file: Union[str, h5py.File],
     Returns:
         namedtuple: The namedtuple can be unpacked like every normal tuple, or instead accessed by field names. \n
-        Depending on what has been enabled, the following data is included in the named tuple: \n
-        - field 'csds': List containing all CSDs as numpy arrays. The list is sorted by the id of the CSDs (if no
-          specific_ids are provided, else the order is given by specific_ids).
+        Depending on what has been enabled, the following data is included in the named tuple (all lists are sorted by
+        the id of the CSDs or sensor_scans if no specific_ids are provided, else the order is given by specific_ids): \n
+        - field 'csds': List containing all CSDs as numpy arrays.
+        - field 'sensor_scans': List containing all sensor scans as numpy arrays.
         - field 'occupations': List containing numpy arrays with occupations.
         - field 'tct_masks': List containing numpy arrays of TCT masks.
         - field 'ct_by_dot_masks': List containing numpy arrays of CT_by_dot masks.
-        - field 'line_coordinates': List containing numpy arrays of line coordinates.
+        - field 'line_coordinates': List containing numpy arrays of line coordinates. Each row of the array specifies
+            the start and end points of one line.
         - field 'line_labels': List containing a list of dictionaries (one dict for each line specified as line
             coordinates).
         - field 'metadata': List containing dictionaries with all metadata (simcats configs) for each CSD.
@@ -72,6 +77,8 @@ def load_dataset(file: Union[str, h5py.File],
     fieldnames = []
     if load_csds:
         fieldnames.append("csds")
+    if load_sensor_scans:
+        fieldnames.append("sensor_scans")
     if load_occupations:
         fieldnames.append("occupations")
     if load_tct_masks:
@@ -86,10 +93,17 @@ def load_dataset(file: Union[str, h5py.File],
         fieldnames.append("metadata")
     if load_ids:
         fieldnames.append("ids")
-    CSDDataset = namedtuple(typename="CSDDataset", field_names=fieldnames)
+    SimcatsDataset = namedtuple(typename="SimcatsDataset", field_names=fieldnames)
     # use nullcontext to catch the case where a file is passed instead of the string
     with h5py.File(file, "r") if isinstance(file, str) else nullcontext(file) as _file:
+        # check if the dataset contains csd or sensor_scans
+        if "csds" in _file:
+            csd_dataset = True
+        elif "sensor_scans" in _file:
+            csd_dataset = False
+        else:
+            raise KeyError("The dataset that should be loaded does not contain any csds or sensor_scans!")
         # if only specific ids should be loaded, check if all ids are available
         if specific_ids is not None:
             if isinstance(specific_ids, list) or isinstance(specific_ids, np.ndarray):
@@ -104,18 +118,31 @@ def load_dataset(file: Union[str, h5py.File],
             # Dataset with non-existing specific IDs (which else would only crash as soon as a non-existent ID is
             # requested during training). We can't check this on loading CSDs etc. as it massively slows down loading.
             if specific_ids is not None:
-                if np.min(specific_ids) < 0 or np.max(specific_ids) >= len(_file["csds"]):
-                    msg = "Not all ids specified by 'specific_ids' are available in the dataset!"
-                    raise IndexError(msg)
+                if csd_dataset:
+                    if np.min(specific_ids) < 0 or np.max(specific_ids) >= len(_file["csds"]):
+                        msg = "Not all ids specified by 'specific_ids' are available in the dataset!"
+                        raise IndexError(msg)
+                else:
+                    if np.min(specific_ids) < 0 or np.max(specific_ids) >= len(_file["sensor_scans"]):
+                        msg = "Not all ids specified by 'specific_ids' are available in the dataset!"
+                        raise IndexError(msg)
                 available_ids = specific_ids
             else:
-                available_ids = range(len(_file["csds"]))
+                if csd_dataset:
+                    available_ids = range(len(_file["csds"]))
+                else:
+                    available_ids = range(len(_file["sensor_scans"]))
         if load_csds:
             if specific_ids is not None:
                 csds = _file["csds"][specific_ids]
             else:
                 csds = _file["csds"][:]
+        if load_sensor_scans:
+            if specific_ids is not None:
+                sensor_scans = _file["sensor_scans"][specific_ids]
+            else:
+                sensor_scans = _file["sensor_scans"][:]
         if load_occupations:
             if specific_ids is not None:
                 occupations = _file["occupations"][specific_ids]
@@ -155,6 +182,8 @@ def load_dataset(file: Union[str, h5py.File],
     return_data = []
     if load_csds:
         return_data.append(csds)
+    if load_sensor_scans:
+        return_data.append(sensor_scans)
     if load_occupations:
         return_data.append(occupations)
     if load_tct_masks:
@@ -174,4 +203,4 @@ def load_dataset(file: Union[str, h5py.File],
     if specific_ids is not None and undo_sort_ids is not None:
         return_data = [[x[i] for i in undo_sort_ids] for x in return_data]
-    return CSDDataset._make(tuple(return_data))
+    return SimcatsDataset._make(tuple(return_data))

{simcats_datasets-2.4.0 → simcats_datasets-2.5.0}/simcats_datasets/loading/load_ground_truth.py RENAMED Viewed

@@ -30,6 +30,13 @@ from simcats_datasets.loading import load_dataset
 from simcats.support_functions import rotate_points
+# Lists defining which ground truth type is supported for CSD and sensor scan datasets, respectively
+_csd_ground_truths = ["load_zeros_masks", "load_tct_masks", "load_tct_by_dot_masks", "load_idt_masks", "load_ct_masks",
+                      "load_ct_by_dot_masks", "load_tc_region_masks", "load_tc_region_minus_tct_masks",
+                      "load_c_region_masks"]
+_sensor_scan_ground_truths = ["load_zeros_masks", "load_tct_masks"]
 def load_zeros_masks(file: Union[str, h5py.File],
                      specific_ids: Union[range, List[int], np.ndarray, None] = None,
                      progress_bar: bool = True) -> List[np.ndarray]:

{simcats_datasets-2.4.0 → simcats_datasets-2.5.0}/simcats_datasets/loading/pytorch.py RENAMED Viewed

@@ -28,7 +28,8 @@ class SimcatsDataset(Dataset):
                  ground_truth_preprocessors: Union[List[Union[str, Callable]], None] = None,
                  format_output: Union[Callable, str, None] = None, preload: bool = True,
                  max_concurrent_preloads: int = 100000,
-                 progress_bar: bool = False, ):
+                 progress_bar: bool = False,
+                 sensor_scan_dataset: bool = False,):
         """Initializes an object for providing simcats_datasets data to pytorch.
         Args:
@@ -77,8 +78,11 @@ class SimcatsDataset(Dataset):
                 loading them step by step and for example converting the CSDs to float32 with a corresponding data
                 preprocessor. Default is 100,000.
             progress_bar: Determines whether to display a progress bar while loading data. Default is False.
+            sensor_scan_dataset: Determines whether the dataset is a sensor scan dataset (contains sensor scans instead
+                of CSDs). Default is False.
         """
         self.__h5_path = h5_path
+        self.__sensor_scan_dataset = sensor_scan_dataset
         self.__specific_ids = specific_ids
         # set up the load ground truth function. Could be None, function referenced by string, or callable
         if load_ground_truth is None:
@@ -88,6 +92,20 @@ class SimcatsDataset(Dataset):
                 self.__load_ground_truth = getattr(simcats_datasets.loading.load_ground_truth, load_ground_truth)
             else:
                 self.__load_ground_truth = load_ground_truth
+            # check if it is possible to load the desired ground truth from the given dataset
+            try:
+                _ = self.load_ground_truth(file=self.__h5_path, specific_ids=[0], progress_bar=False)
+            except:
+                raise ValueError(
+                    f"The specified ground truth ({self.load_ground_truth.__name__}) can't be loaded for the given "
+                    f"dataset ({self.h5_path}). Please make sure to select a supported ground truth type.\n"
+                    f"Supported ground truth types for CSD datasets created using "
+                    f"simcats_datasets.generation.create_simulated_dataset are:\n"
+                    f"{', '.join(simcats_datasets.loading.load_ground_truth._csd_ground_truths)}\n"
+                    f"Supported ground truth types for sensor scan datasets created using "
+                    f"simcats_datasets.generation.create_simulated_dataset are:\n"
+                    f"{', '.join(simcats_datasets.loading.load_ground_truth._sensor_scan_ground_truths)}"
+                )
         # set up the data preprocessors. Could be None, functions referenced by strings, or callables
         if data_preprocessors is None:
             self.__data_preprocessors = data_preprocessors
@@ -121,31 +139,37 @@ class SimcatsDataset(Dataset):
                 load_dataset(file=h5_file, load_csds=False, load_ids=True, specific_ids=self.specific_ids,
                     progress_bar=self.progress_bar, ).ids)
             # preprocess an exemplary image to get final shape (some preprocessors might adjust the shape)
-            _temp_csd = \
-            load_dataset(file=h5_file, load_csds=True, specific_ids=[0], progress_bar=self.progress_bar, ).csds[0]
+            _temp_measurement = load_dataset(file=h5_file,
+                                      load_csds=not self.__sensor_scan_dataset,
+                                      load_sensor_scans=self.__sensor_scan_dataset,
+                                      specific_ids=[0],
+                                      progress_bar=self.progress_bar, )[0][0]
             if self.data_preprocessors is not None:
                 for processor in self.data_preprocessors:
-                    _temp_csd = processor(_temp_csd)
-            self.__shape = (self.__num_ids, *np.squeeze(_temp_csd).shape)
-            # preload all data if requested
+                    _temp_measurement = processor(_temp_measurement)
+            self.__shape = (self.__num_ids, *np.squeeze(_temp_measurement).shape)
+            # preload all measurements if requested
             if self.preload:
-                self.__csds = []
+                self.__measurements = []
                 self.__ground_truths = []
-                # load and save data, at most max_concurrent_ids at a time
+                # load and save measurements, at most max_concurrent_ids at a time
                 for i in range(math.ceil(self.__num_ids / max_concurrent_preloads)):
                     _ids = range(i * max_concurrent_preloads,
                         np.min([(i + 1) * max_concurrent_preloads, self.__num_ids]))
                     if self.specific_ids is not None:
                         _ids = [self.specific_ids[i] for i in _ids]
                     # load
-                    _temp_csds = [csd for csd in
-                        load_dataset(file=h5_file, specific_ids=_ids, progress_bar=self.progress_bar, ).csds]
-                    # preprocess data
+                    _temp_measurements = [data for data in load_dataset(file=h5_file,
+                                                                load_csds=not self.__sensor_scan_dataset,
+                                                                load_sensor_scans=self.__sensor_scan_dataset,
+                                                                specific_ids=_ids,
+                                                                progress_bar=self.progress_bar, )[0]]
+                    # preprocess measurements
                     if self.data_preprocessors is not None:
                         for processor in self.data_preprocessors:
-                            _temp_csds = processor(_temp_csds)
-                    self.__csds.extend(_temp_csds)
-                    del _temp_csds
+                            _temp_measurements = processor(_temp_measurements)
+                    self.__measurements.extend(_temp_measurements)
+                    del _temp_measurements
                     try:
                         _temp_ground_truths = [gt for gt in
                             self.load_ground_truth(file=h5_file, specific_ids=_ids, progress_bar=self.progress_bar, )]
@@ -162,6 +186,10 @@ class SimcatsDataset(Dataset):
     def h5_path(self) -> str:
         return self.__h5_path
+    @property
+    def sensor_scan_dataset(self) -> bool:
+        return self.__sensor_scan_dataset
     @property
     def specific_ids(self) -> Union[range, List[int], np.ndarray, None]:
         return self.__specific_ids
@@ -196,19 +224,19 @@ class SimcatsDataset(Dataset):
     def __len__(self):
         """
-        Returns the number of CSDs in the dataset.
+        Returns the number of measurements in the dataset.
         """
         return self.__num_ids
     def __getitem__(self, idx: int):
         """
-        Retrieves a csd and the corresponding ground truth at given index idx.
+        Retrieves a measurement and the corresponding ground truth at given index idx.
         Args:
             idx: The id of the csd and ground truth to be returned.
         """
         if self.preload:
-            csd = self.__csds[idx]
+            measurement = self.__measurements[idx]
             try:
                 ground_truth = self.__ground_truths[idx]
             except IndexError:
@@ -220,12 +248,16 @@ class SimcatsDataset(Dataset):
                 self.__h5_file = h5py.File(self.h5_path, mode="r")
             if self.specific_ids is not None:
                 idx = self.specific_ids[idx]
-            # load data
-            csd = load_dataset(file=self.__h5_file, specific_ids=[idx], progress_bar=self.progress_bar).csds[0]
-            # preprocess data
+            # load measurement
+            measurement = load_dataset(file=self.__h5_file,
+                                load_csds=not self.__sensor_scan_dataset,
+                                load_sensor_scans=self.__sensor_scan_dataset,
+                                specific_ids=[idx],
+                                progress_bar=self.progress_bar)[0][0]
+            # preprocess measurement
             if self.data_preprocessors is not None:
                 for processor in self.data_preprocessors:
-                    csd = processor(csd)
+                    measurement = processor(measurement)
             # load ground truth
             try:
                 ground_truth = \
@@ -236,7 +268,7 @@ class SimcatsDataset(Dataset):
                         ground_truth = processor(ground_truth)
             except TypeError:
                 ground_truth = None
-        return self.format_output(csd=csd, ground_truth=ground_truth, idx=idx)
+        return self.format_output(measurement=measurement, ground_truth=ground_truth, idx=idx)
     def __repr__(self):
         return (f"{self.__class__.__name__}(\n"
@@ -247,7 +279,8 @@ class SimcatsDataset(Dataset):
                 f"\tground_truth_preprocessors=[{[', '.join([func.__name__ for func in self.ground_truth_preprocessors]) if self.ground_truth_preprocessors is not None else None][0]}],\n"
                 f"\tformat_output={self.format_output.__name__},\n"
                 f"\tpreload={self.preload},\n"
-                f"\tprogress_bar={self.progress_bar}\n"
+                f"\tprogress_bar={self.progress_bar},\n"
+                f"\tsensor_scan_dataset={self.sensor_scan_dataset}\n"
                 f")")
     def __del__(self):
@@ -264,7 +297,8 @@ class SimcatsConcatDataset(ConcatDataset):
                  ground_truth_preprocessors: Union[List[Union[str, Callable]], None] = None,
                  format_output: Union[Callable, str, None] = None, preload: bool = True,
                  max_concurrent_preloads: int = 100000,
-                 progress_bar: bool = False, ):
+                 progress_bar: bool = False,
+                 sensor_scan_dataset: bool = False,):
         """Initializes an object for providing concatenated simcats_datasets data to pytorch.
         Args:
@@ -313,6 +347,8 @@ class SimcatsConcatDataset(ConcatDataset):
                 loading them step by step and for example converting the CSDs to float32 with a corresponding data
                 preprocessor. Default is 100.000.
             progress_bar: Determines whether to display a progress bar while loading data. Default is False.
+            sensor_scan_dataset: Determines whether the datasets are sensor scan datasets (contain sensor scans instead
+                of CSDs). Default is False.
         """
         _datasets = list()
         if specific_ids is not None and len(specific_ids) != len(h5_paths):
@@ -328,9 +364,10 @@ class SimcatsConcatDataset(ConcatDataset):
                                data_preprocessors=data_preprocessors,
                                ground_truth_preprocessors=ground_truth_preprocessors, format_output=format_output,
                                preload=preload, max_concurrent_preloads=max_concurrent_preloads,
-                               progress_bar=progress_bar))
+                               progress_bar=progress_bar, sensor_scan_dataset=sensor_scan_dataset))
         super().__init__(_datasets)
         self.__h5_paths = h5_paths
+        self.__sensor_scan_dataset = sensor_scan_dataset
         self.__specific_ids = specific_ids
         # set up the load ground truth function. Could be None, function referenced by string, or callable
         if load_ground_truth is None:
@@ -373,7 +410,7 @@ class SimcatsConcatDataset(ConcatDataset):
             if shape is None:
                 shape = dataset.shape[1:]
             elif dataset.shape[1:] != shape:
-                raise ValueError(f"The shape of the SimcatsDataset CSDs should be identical but found shapes "
+                raise ValueError(f"The shape of the SimcatsDataset Measurements should be identical but found shapes "
                                  f"{[dataset.shape[1:] for dataset in _datasets]}")
         self.__shape = (len(self), *shape)
@@ -381,6 +418,10 @@ class SimcatsConcatDataset(ConcatDataset):
     def h5_paths(self) -> List[str]:
         return self.__h5_paths
+    @property
+    def sensor_scan_dataset(self) -> bool:
+        return self.__sensor_scan_dataset
     @property
     def specific_ids(self) -> Union[List[Union[range, List[int], np.ndarray, None]], None]:
         return self.__specific_ids
@@ -422,5 +463,6 @@ class SimcatsConcatDataset(ConcatDataset):
                 f"\tground_truth_preprocessors=[{[', '.join([func.__name__ for func in self.ground_truth_preprocessors]) if self.ground_truth_preprocessors is not None else None][0]}],\n"
                 f"\tformat_output={self.format_output.__name__},\n"
                 f"\tpreload={self.preload},\n"
-                f"\tprogress_bar={self.progress_bar}\n"
+                f"\tprogress_bar={self.progress_bar},\n"
+                f"\tsensor_scan_dataset={self.sensor_scan_dataset},\n"
                 f")")

simcats_datasets-2.5.0/simcats_datasets/support_functions/pytorch_format_output.py ADDED Viewed

@@ -0,0 +1,169 @@
+"""Functions for formatting the output of the **Pytorch Dataset class**.
+Every function must accept a measurement (as array), a ground truth (e.g. TCT mask as array) and the image id as input.
+Output type depends on the ground truth type and the required pytorch datatype (tensor as long, float, ...). Ground
+truth could for example be a pixel mask or defined start end points of lines.
+**Please look at format_dict_csd_float_ground_truth_long for a reference.**
+@author: f.hader
+"""
+from __future__ import annotations
+from typing import Tuple
+import numpy as np
+import torch
+def format_dict_csd_float_ground_truth_long(measurement: np.ndarray, ground_truth: np.ndarray, idx: int, ) -> dict[
+    str, torch.Tensor]:
+    """Format the output of the Pytorch Dataset class to be a dict with entries 'csd' and 'ground_truth' of dtype float and long, respectively. (default of Pytorch Dataset class.)
+    Args:
+        measurement: The measurement array.
+        ground_truth: Ground truth as pixel mask.
+        idx: index of the measurement. Not used in this format.
+    Returns:
+        Dict with 'csd' and 'ground_truth' of dtype float and long, respectively.
+    """
+    assert (measurement.size == ground_truth.size), \
+        f"Image and mask should be the same size, but are {measurement.size=} and {ground_truth.size=}"
+    return {"csd": torch.as_tensor(measurement.copy(), dtype=torch.float).contiguous(),
+        "ground_truth": torch.as_tensor(ground_truth.copy(), dtype=torch.long, ).contiguous(), }
+def format_dict_csd_float16_ground_truth_long(measurement: np.ndarray, ground_truth: np.ndarray, idx: int, ) -> dict[
+    str, torch.Tensor]:
+    """Format the output of the Pytorch Dataset class to be a dict with entries 'csd' and 'ground_truth' of dtype float16 and long, respectively.
+    Args:
+        measurement: The measurement array.
+        ground_truth: Ground truth as pixel mask.
+        idx: index of the measurement. Not used in this format.
+    Returns:
+        Dict with 'csd' and 'ground_truth' of dtype float16 and long, respectively.
+    """
+    assert (measurement.size == ground_truth.size), \
+        f"Image and mask should be the same size, but are {measurement.size=} and {ground_truth.size=}"
+    return {"csd": torch.as_tensor(measurement.copy(), dtype=torch.float16).contiguous(),
+        "ground_truth": torch.as_tensor(ground_truth.copy(), dtype=torch.long, ).contiguous(), }
+def format_dict_csd_float_ground_truth_float(measurement: np.ndarray, ground_truth: np.ndarray, idx: int, ) -> dict[
+    str, torch.Tensor]:
+    """Format the output of the Pytorch Dataset class to be a dict with entries 'csd' and 'ground_truth' of dtype float and float, respectively.
+    Args:
+        measurement: The measurement array.
+        ground_truth: Ground truth as pixel mask.
+        idx: index of the measurement. Not used in this format.
+    Returns:
+        Dict with 'csd' and 'ground_truth' of dtype float and float, respectively.
+    """
+    assert (measurement.size == ground_truth.size), \
+        f"Image and mask should be the same size, but are {measurement.size=} and {ground_truth.size=}"
+    return {"csd": torch.as_tensor(measurement.copy(), dtype=torch.float).contiguous(),
+        "ground_truth": torch.as_tensor(ground_truth.copy(), dtype=torch.float).contiguous(), }
+def format_mmsegmentation(measurement: np.ndarray, ground_truth: np.ndarray, idx: int, ) -> dict[str, torch.Tensor]:
+    """Format the output of the Pytorch Dataset class to be conform to the MMSegmentation CustomDataset of version 0.6.0, see https://github.com/open-mmlab/mmsegmentation/blob/v0.6.0/mmseg/datasets/custom.py.
+    Args:
+        measurement: The measurement array.
+        ground_truth: Ground truth as pixel mask.
+        idx: index of the measurement.
+    Returns:
+        Dict with data conform to the MMSegmentation CustomDataset of version 0.6.0, see https://github.com/open-mmlab/mmsegmentation/blob/v0.6.0/mmseg/datasets/custom.py.
+    """
+    assert (measurement.size == ground_truth.size), \
+        f"Image and mask should be the same size, but are {measurement.size=} and {ground_truth.size=}"
+    return {"img": torch.as_tensor(measurement.copy()).float().contiguous(),
+        "gt_semantic_seg": torch.as_tensor(ground_truth.copy()).float().contiguous(),
+        "img_metas": {"filename": f"{idx}.jpg", "ori_filename": f"{idx}_ori.jpg", "ori_shape": measurement.shape[::-1],
+            # we want (100, 100, 1) not (1, 100, 100)
+            "img_shape": measurement.shape[::-1], "pad_shape": measurement.shape[::-1],  # image shape after padding
+            "scale_factor": 1.0, "img_norm_cfg": {"mean": np.mean(measurement, axis=(-2, -1)),  # mean for each channel
+                "std": np.std(measurement, axis=(-2, -1)),  # std for each channel
+                "to_rgb": False, }, "img_id": f"{idx}", }, }
+def format_csd_only(measurement: np.ndarray, ground_truth: np.ndarray, idx: int, ) -> torch.Tensor:
+    """Format the output of the Pytorch Dataset class to be just a measurement.
+    Args:
+        measurement: The measurement array.
+        ground_truth: Ground truth as pixel mask. Not used in this format.
+        idx: Index of the measurement. Not used in this format.
+    Returns:
+        The measurement as tensor.
+    """
+    return torch.as_tensor(measurement.copy(), dtype=torch.float).contiguous()
+def format_csd_float16_only(measurement: np.ndarray, ground_truth: np.ndarray, idx: int, ) -> torch.Tensor:
+    """Format the output of the Pytorch Dataset class to be just a float16 (half precision) measurement.
+    Args:
+        measurement: The measurement array.
+        ground_truth: Ground truth as pixel mask. Not used in this format.
+        idx: Index of the measurement. Not used in this format.
+    Returns:
+        The float 16 (half precision) measurement as tensor.
+    """
+    return torch.as_tensor(measurement.copy(), dtype=torch.float16).contiguous()
+def format_csd_bfloat16_only(measurement: np.ndarray, ground_truth: np.ndarray, idx: int, ) -> torch.Tensor:
+    """Format the output of the Pytorch Dataset class to be just a bfloat16 (half precision) measurement.
+    Args:
+        measurement: The measurement array.
+        ground_truth: Ground truth as pixel mask. Not used in this format.
+        idx: Index of the measurement. Not used in this format.
+    Returns:
+        The brain float 16 (half precision) measurement as tensor.
+    """
+    return torch.as_tensor(measurement.copy(), dtype=torch.bfloat16).contiguous()
+def format_csd_class_index(measurement: np.ndarray, ground_truth: np.ndarray, idx: int, ) -> Tuple[
+    torch.Tensor, torch.Tensor, int]:
+    """Format the output of the Pytorch Dataset class to be the measurement, a class index (which is always 0 as we have no classes) and the index.
+    This is needed to be conform to the datasets used in DeepSVDD, see https://github.com/lukasruff/Deep-SVDD-PyTorch.
+    Args:
+        measurement: The measurement array.
+        ground_truth: Ground truth as pixel mask. Not used in this format.
+        idx: Index of the measurement.
+    Returns:
+        A tuple of measurement, class index, and the index.
+    """
+    return torch.as_tensor(measurement.copy(), dtype=torch.float).unsqueeze(0).contiguous(), torch.tensor(0), idx
+def format_tuple_csd_float_ground_truth_float(measurement: np.ndarray, ground_truth: np.ndarray, idx: int, ) -> dict[
+    str, torch.Tensor]:
+    """Format the output of the Pytorch Dataset class to be a tuple of the measurement and the ground_truth.
+    Args:
+        measurement: The measurement array.
+        ground_truth: Ground truth as pixel mask.
+        idx: index of the measurement. Not used in this format.
+    Returns:
+        Tuple with measurement and ground_truth of dtype float and float, respectively.
+    """
+    assert (measurement.size == ground_truth.size), \
+        f"Image and mask should be the same size, but are {measurement.size=} and {ground_truth.size=}"
+    return (torch.as_tensor(measurement.copy(), dtype=torch.float).contiguous(),
+            torch.as_tensor(ground_truth.copy(), dtype=torch.float).contiguous(),)

{simcats_datasets-2.4.0 → simcats_datasets-2.5.0}/simcats_datasets.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: simcats-datasets
-Version: 2.4.0
+Version: 2.5.0
 Summary: SimCATS-Datasets is a Python package that simplifies the creation and loading of SimCATS datasets.
 Author-email: Fabian Hader <f.hader@fz-juelich.de>, Fabian Fuchs <f.fuchs@fz-juelich.de>, Karin Havemann <k.havemann@fz-juelich.de>, Sarah Fleitmann <s.fleitmann@fz-juelich.de>, Jan Vogelbruch <j.vogelbruch@fz-juelich.de>
 License:                     GNU GENERAL PUBLIC LICENSE

simcats_datasets-2.4.0/simcats_datasets/__init__.py DELETED Viewed

	@@ -1,2 +0,0 @@
1	- __all__ = []
2	- __version__ = "2.4.0"

simcats_datasets-2.4.0/simcats_datasets/support_functions/pytorch_format_output.py DELETED Viewed

@@ -1,170 +0,0 @@
-"""Functions for formatting the output of the **Pytorch Dataset class**.
-Every function must accept a CSD (as array), a ground truth (e.g. TCT mask as array) and the image id as input.
-Output type depends on the ground truth type and the required pytorch datatype (tensor as long, float, ...). Ground
-truth could for example be a pixel mask or defined start end points of lines.
-**Please look at format_dict_csd_float_ground_truth_long for a reference.**
-@author: f.hader
-"""
-from __future__ import annotations
-from typing import Tuple
-import numpy as np
-import torch
-def format_dict_csd_float_ground_truth_long(csd: np.ndarray, ground_truth: np.ndarray, idx: int, ) -> dict[
-    str, torch.Tensor]:
-    """Format the output of the Pytorch Dataset class to be a dict with entries 'csd' and 'ground_truth' of dtype float and long, respectively. (default of Pytorch Dataset class.)
-    Args:
-        csd: The CSD array.
-        ground_truth: Ground truth as pixel mask.
-        idx: index of the csd. Not used in this format.
-    Returns:
-        Dict with 'csd' and 'ground_truth' of dtype float and long, respectively.
-    """
-    assert (
-            csd.size == ground_truth.size), f"Image and mask should be the same size, but are {csd.size=} and {ground_truth.size=}"
-    return {"csd": torch.as_tensor(csd.copy(), dtype=torch.float).contiguous(),
-        "ground_truth": torch.as_tensor(ground_truth.copy(), dtype=torch.long, ).contiguous(), }
-def format_dict_csd_float16_ground_truth_long(csd: np.ndarray, ground_truth: np.ndarray, idx: int, ) -> dict[
-    str, torch.Tensor]:
-    """Format the output of the Pytorch Dataset class to be a dict with entries 'csd' and 'ground_truth' of dtype float16 and long, respectively.
-    Args:
-        csd: The CSD array.
-        ground_truth: Ground truth as pixel mask.
-        idx: index of the csd. Not used in this format.
-    Returns:
-        Dict with 'csd' and 'ground_truth' of dtype float16 and long, respectively.
-    """
-    assert (
-            csd.size == ground_truth.size), f"Image and mask should be the same size, but are {csd.size=} and {ground_truth.size=}"
-    return {"csd": torch.as_tensor(csd.copy(), dtype=torch.float16).contiguous(),
-        "ground_truth": torch.as_tensor(ground_truth.copy(), dtype=torch.long, ).contiguous(), }
-def format_dict_csd_float_ground_truth_float(csd: np.ndarray, ground_truth: np.ndarray, idx: int, ) -> dict[
-    str, torch.Tensor]:
-    """Format the output of the Pytorch Dataset class to be a dict with entries 'csd' and 'ground_truth' of dtype float and float, respectively.
-    Args:
-        csd: The CSD array.
-        ground_truth: Ground truth as pixel mask.
-        idx: index of the csd. Not used in this format.
-    Returns:
-        Dict with 'csd' and 'ground_truth' of dtype float and float, respectively.
-    """
-    assert (
-            csd.size == ground_truth.size), f"Image and mask should be the same size, but are {csd.size=} and {ground_truth.size=}"
-    return {"csd": torch.as_tensor(csd.copy(), dtype=torch.float).contiguous(),
-        "ground_truth": torch.as_tensor(ground_truth.copy(), dtype=torch.float).contiguous(), }
-def format_mmsegmentation(csd: np.ndarray, ground_truth: np.ndarray, idx: int, ) -> dict[str, torch.Tensor]:
-    """Format the output of the Pytorch Dataset class to be conform to the MMSegmentation CustomDataset of version 0.6.0, see https://github.com/open-mmlab/mmsegmentation/blob/v0.6.0/mmseg/datasets/custom.py.
-    Args:
-        csd: The CSD array.
-        ground_truth: Ground truth as pixel mask.
-        idx: index of the csd.
-    Returns:
-        Dict with data conform to the MMSegmentation CustomDataset of version 0.6.0, see https://github.com/open-mmlab/mmsegmentation/blob/v0.6.0/mmseg/datasets/custom.py.
-    """
-    assert (
-            csd.size == ground_truth.size), f"Image and mask should be the same size, but are {csd.size=} and {ground_truth.size=}"
-    return {"img": torch.as_tensor(csd.copy()).float().contiguous(),
-        "gt_semantic_seg": torch.as_tensor(ground_truth.copy()).float().contiguous(),
-        "img_metas": {"filename": f"{idx}.jpg", "ori_filename": f"{idx}_ori.jpg", "ori_shape": csd.shape[::-1],
-            # we want (100, 100, 1) not (1, 100, 100)
-            "img_shape": csd.shape[::-1], "pad_shape": csd.shape[::-1],  # image shape after padding
-            "scale_factor": 1.0, "img_norm_cfg": {"mean": np.mean(csd, axis=(-2, -1)),  # mean for each channel
-                "std": np.std(csd, axis=(-2, -1)),  # std for each channel
-                "to_rgb": False, }, "img_id": f"{idx}", }, }
-def format_csd_only(csd: np.ndarray, ground_truth: np.ndarray, idx: int, ) -> torch.Tensor:
-    """Format the output of the Pytorch Dataset class to be just a CSD.
-    Args:
-        csd: The CSD array.
-        ground_truth: Ground truth as pixel mask. Not used in this format.
-        idx: Index of the csd. Not used in this format.
-    Returns:
-        The CSD as tensor.
-    """
-    return torch.as_tensor(csd.copy(), dtype=torch.float).contiguous()
-def format_csd_float16_only(csd: np.ndarray, ground_truth: np.ndarray, idx: int, ) -> torch.Tensor:
-    """Format the output of the Pytorch Dataset class to be just a float16 (half precision) CSD.
-    Args:
-        csd: The CSD array.
-        ground_truth: Ground truth as pixel mask. Not used in this format.
-        idx: Index of the csd. Not used in this format.
-    Returns:
-        The float 16 (half precision) CSD as tensor.
-    """
-    return torch.as_tensor(csd.copy(), dtype=torch.float16).contiguous()
-def format_csd_bfloat16_only(csd: np.ndarray, ground_truth: np.ndarray, idx: int, ) -> torch.Tensor:
-    """Format the output of the Pytorch Dataset class to be just a bfloat16 (half precision) CSD.
-    Args:
-        csd: The CSD array.
-        ground_truth: Ground truth as pixel mask. Not used in this format.
-        idx: Index of the csd. Not used in this format.
-    Returns:
-        The brain float 16 (half precision) CSD as tensor.
-    """
-    return torch.as_tensor(csd.copy(), dtype=torch.bfloat16).contiguous()
-def format_csd_class_index(csd: np.ndarray, ground_truth: np.ndarray, idx: int, ) -> Tuple[
-    torch.Tensor, torch.Tensor, int]:
-    """Format the output of the Pytorch Dataset class to be the CSD, a class index (which is always 0 as we have no classes) and the index.
-    This is needed to be conform to the datasets used in DeepSVDD, see https://github.com/lukasruff/Deep-SVDD-PyTorch.
-    Args:
-        csd: The CSD array.
-        ground_truth: Ground truth as pixel mask. Not used in this format.
-        idx: Index of the csd.
-    Returns:
-        A tuple of CSD, class index, and the index.
-    """
-    return torch.as_tensor(csd.copy(), dtype=torch.float).unsqueeze(0).contiguous(), torch.tensor(0), idx
-def format_tuple_csd_float_ground_truth_float(csd: np.ndarray, ground_truth: np.ndarray, idx: int, ) -> dict[
-    str, torch.Tensor]:
-    """Format the output of the Pytorch Dataset class to be a tuple of the csd and the ground_truth.
-    Args:
-        csd: The CSD array.
-        ground_truth: Ground truth as pixel mask.
-        idx: index of the csd. Not used in this format.
-    Returns:
-        Tuple with csd and ground_truth of dtype float and float, respectively.
-    """
-    assert (
-            csd.size == ground_truth.size), f"Image and mask should be the same size, but are {csd.size=} and {ground_truth.size=}"
-    return (torch.as_tensor(csd.copy(), dtype=torch.float).contiguous(),
-            torch.as_tensor(ground_truth.copy(), dtype=torch.float).contiguous(),)