PyPI - simcats-datasets - Versions diffs - 2.5.0__py3-none-any.whl → 2.6.0__py3-none-any.whl - Mend

simcats-datasets 2.5.0py3-none-any.whl → 2.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

simcats_datasets/__init__.py CHANGED Viewed

@@ -1,2 +1,2 @@
 __all__ = []
-__version__ = "2.5.0"
+__version__ = "2.6.0"

simcats_datasets/generation/_create_dataset.py CHANGED Viewed

@@ -21,6 +21,8 @@ def create_dataset(dataset_path: str,
                    occupations: Optional[List[np.ndarray]] = None,
                    tct_masks: Optional[List[np.ndarray]] = None,
                    ct_by_dot_masks: Optional[List[np.ndarray]] = None,
+                   sensor_regime_masks: Optional[List[np.ndarray]] = None,
+                   sensor_peak_center_masks: Optional[List[np.ndarray]] = None,
                    line_coordinates: Optional[List[np.ndarray]] = None,
                    line_labels: Optional[List[dict]] = None,
                    metadata: Optional[List[dict]] = None,
@@ -32,6 +34,8 @@ def create_dataset(dataset_path: str,
                    dtype_occ: np.dtype = np.float32,
                    dtype_tct: np.dtype = np.uint8,
                    dtype_ct_by_dot: np.dtype = np.uint8,
+                   dtype_sensor_regime_masks: np.dtype = np.uint8,
+                   dtype_sensor_peak_center_masks: np.dtype = np.uint8,
                    dtype_line_coordinates: np.dtype = np.float32) -> None:
     """Function for creating simcats_datasets v2 format datasets from given data.
@@ -44,6 +48,8 @@ def create_dataset(dataset_path: str,
         occupations: List of occupations to use for creating the dataset. Defaults to None.
         tct_masks: List of TCT masks to use for creating the dataset. Defaults to None.
         ct_by_dot_masks: List of CT by dot masks to use for creating the dataset. Defaults to None.
+        sensor_regime_masks: List of sensor regime masks to use for creating the dataset. Defaults to None.
+        sensor_peak_center_masks: List of sensor peak center masks to use for creating the dataset. Defaults to None.
         line_coordinates: List of line coordinates to use for creating the dataset. Defaults to None.
         line_labels: List of line labels to use for creating the dataset. Defaults to None.
         metadata: List of metadata to use for creating the dataset. Defaults to None.
@@ -62,6 +68,9 @@ def create_dataset(dataset_path: str,
         dtype_occ: Specifies the dtype to be used for saving Occupations. Default is np.float32.
         dtype_tct: Specifies the dtype to be used for saving TCTs. Default is np.uint8.
         dtype_ct_by_dot: Specifies the dtype to be used for saving CT by dot masks. Default is np.uint8.
+        dtype_sensor_regime_masks: Specifies the dtype to be used for saving sensor regime masks. Default is np.uint8.
+        dtype_sensor_peak_center_masks: Specifies the dtype to be used for saving sensor peak center masks. Default is
+            np.uint8.
         dtype_line_coordinates: Specifies the dtype to be used for saving line coordinates. Default is np.float32.
     """
     # Create path where the dataset will be saved (if folder doesn't exist already)
@@ -170,6 +179,47 @@ def create_dataset(dataset_path: str,
             # resize datasets to fit new data
             ds.resize(ds.shape[0] + num_ids, axis=0)
             ds[id_offset:] = np.array(ct_by_dot_masks).astype(dtype_tct)
+        if sensor_regime_masks is not None:
+            if len(sensor_regime_masks) != num_ids:
+                raise ValueError(
+                    f"Number of new sensor regime mask arrays ({len(sensor_regime_masks)}) does not match the number "
+                    f"of new CSDs or sensor scans ({num_ids}).")
+            # process sensor regime masks
+            # save an example sensor regime mask to get shape and dtype
+            temp_sensor_regime_mask = sensor_regime_masks[0].copy()
+            # use chunks as this will speed up reading later! One chunk is set to be exactly one image (optimized to
+            # load one image at a time during training)
+            ds = hdf5_file.require_dataset(name='sensor_regime_masks', shape=(0, *temp_sensor_regime_mask.shape),
+                                           dtype=dtype_sensor_regime_masks,
+                                           maxshape=(None, *temp_sensor_regime_mask.shape))
+            if ds.shape[0] != id_offset:
+                raise ValueError(
+                    f"Number of already stored sensor regime mask arrays ({ds.shape[0]}) does not match the number of "
+                    f"already stored CSDs or sensor scans ({id_offset}).")
+            # resize datasets to fit new data
+            ds.resize(ds.shape[0] + num_ids, axis=0)
+            ds[id_offset:] = np.array(sensor_regime_masks).astype(dtype_sensor_regime_masks)
+        if sensor_peak_center_masks is not None:
+            if len(sensor_peak_center_masks) != num_ids:
+                raise ValueError(
+                    f"Number of new sensor peak center mask arrays ({len(sensor_peak_center_masks)}) does not match "
+                    f"the number of new CSDs or sensor scans ({num_ids}).")
+            # process sensor peak center masks
+            # save an example sensor peak center mask to get shape and dtype
+            temp_sensor_peak_center_mask = sensor_peak_center_masks[0].copy()
+            # use chunks as this will speed up reading later! One chunk is set to be exactly one image (optimized to
+            # load one image at a time during training)
+            ds = hdf5_file.require_dataset(name='sensor_peak_center_masks',
+                                           shape=(0, *temp_sensor_peak_center_mask.shape),
+                                           dtype=dtype_sensor_peak_center_masks,
+                                           maxshape=(None, *temp_sensor_peak_center_mask.shape))
+            if ds.shape[0] != id_offset:
+                raise ValueError(
+                    f"Number of already stored sensor peak center mask arrays ({ds.shape[0]}) does not match the "
+                    f"number of already stored CSDs or sensor scans ({id_offset}).")
+            # resize datasets to fit new data
+            ds.resize(ds.shape[0] + num_ids, axis=0)
+            ds[id_offset:] = np.array(sensor_peak_center_masks).astype(dtype_sensor_peak_center_masks)
         if line_coordinates is not None:
             if len(line_coordinates) != num_ids:
                 raise ValueError(

simcats_datasets/generation/_create_simulated_dataset.py CHANGED Viewed

@@ -16,6 +16,14 @@ import numpy as np
 # parallel
 from parallelbar import progress_imap
+from tqdm import tqdm
+from simcats_datasets.loading import load_dataset
+from simcats_datasets.loading.load_ground_truth import load_ct_by_dot_masks
+# label creation based on line intersection
+from simcats_datasets.support_functions.get_lead_transition_labels import get_lead_transition_labels
+from simcats_datasets.support_functions.get_coulomb_oscillation_area_boundaries import get_coulomb_oscillation_area_boundaries
+from simcats_datasets.support_functions._json_encoders import NumpyEncoder
 # for SimCATS simulation
 from simcats import Simulation, default_configs
@@ -25,27 +33,22 @@ from simcats.support_functions import (
     NormalSamplingRange,
     UniformSamplingRange, ExponentialSamplingRange,
 )
-from tqdm import tqdm
-from simcats_datasets.loading import load_dataset
-from simcats_datasets.loading.load_ground_truth import load_ct_by_dot_masks
-# label creation based on line intersection
-from simcats_datasets.support_functions.get_lead_transition_labels import get_lead_transition_labels
-from simcats_datasets.support_functions._json_encoders import NumpyEncoder
 __all__ = []
 def _simulate(args: Tuple) -> Tuple:
-    """Method to simulate a csd with the given args. Required for parallel simulation in create_cimulated_dataset.
+    """Method to simulate a CSD or sensor scan with the given args. Required for parallel simulation in create_cimulated_dataset.
     Args:
-        args: Tuple of sample_range_g1, sample_range_g2, volt_range, simcats_config, resolution.
+        args: Tuple of sample_range_g1, sample_range_g2, sample_range_sensor_g1, sample_range_sensor_g2, volt_range,
+            simcats_config, resolution, sensor_scan_dataset, reset_sensor_offset_mu_sens_in_csds.
     Returns:
-        Tuple of csd, occ, lead_trans, metadata, line_points, labels.
+        Tuple of measurement, occupation_mask, lead_transition_mask, metadata, line_points, labels.
     """
-    sample_range_g1, sample_range_g2, volt_range, simcats_config, resolution = args
+    (sample_range_g1, sample_range_g2, sample_range_sensor_g1, sample_range_sensor_g2, volt_range, simcats_config,
+     resolution, sensor_scan_dataset, reset_sensor_offset_mu_sens_in_csds) = args
     # random number generator used for sampling volt ranges.
     # !Must be generated here! Else same for every process!
@@ -75,23 +78,53 @@ def _simulate(args: Tuple) -> Tuple:
     sim = Simulation(**simcats_config)
     # sample voltage ranges
-    g1_start = rng.uniform(low=sample_range_g1[0], high=sample_range_g1[1])
-    g2_start = rng.uniform(low=sample_range_g2[0], high=sample_range_g2[1])
-    g1_range = np.array([g1_start, g1_start + volt_range[0]])
-    g2_range = np.array([g2_start, g2_start + volt_range[1]])
+    g1_start = rng.uniform(low=sample_range_g1[0], high=sample_range_g1[1]) if sample_range_g1 is not None else None
+    g2_start = rng.uniform(low=sample_range_g2[0], high=sample_range_g2[1]) if sample_range_g2 is not None else None
+    sensor_g1_start = rng.uniform(low=sample_range_sensor_g1[0], high=sample_range_sensor_g1[1]) if sample_range_sensor_g1 is not None else None
+    sensor_g2_start = rng.uniform(low=sample_range_sensor_g2[0], high=sample_range_sensor_g2[1]) if sample_range_sensor_g2 is not None else None
     # perform simulation
-    csd, occ, lead_trans, metadata = sim.measure(
-        sweep_range_g1=g1_range, sweep_range_g2=g2_range, resolution=resolution
-    )
-    # calculate lead_transition labels
-    ideal_csd_conf = metadata["ideal_csd_config"]
-    line_points, labels = get_lead_transition_labels(
-        sweep_range_g1=g1_range,
-        sweep_range_g2=g2_range,
-        ideal_csd_config=ideal_csd_conf,
-        lead_transition_mask=lead_trans,
-    )
-    return csd, occ, lead_trans, metadata, line_points, labels
+    if not sensor_scan_dataset:
+        g1_range = np.array([g1_start, g1_start + volt_range[0]])
+        g2_range = np.array([g2_start, g2_start + volt_range[1]])
+        if reset_sensor_offset_mu_sens_in_csds:
+            # calculate potential to reset offset_mu_sens
+            occupations, _ = sim.ideal_csd_config.get_csd_data(volt_limits_g1=g1_range,
+                                                               volt_limits_g2=g2_range,
+                                                               resolution=2)
+            potentials = simcats_config["sensor"].sensor_potential(occupations=occupations,
+                                                                   volt_limits_g1=g1_range,
+                                                                   volt_limits_g2=g2_range)
+            # the new offset is calculated as follows: offset - (potentials[0] - offset)
+            sim.sensor.offset_mu_sens = 2 * simcats_config["sensor"].offset_mu_sens - potentials[0]
+        measurement, occ, lead_trans, metadata = sim.measure(
+            sweep_range_g1=g1_range,
+            sweep_range_g2=g2_range,
+            volt_sensor_g1=sensor_g1_start,
+            volt_sensor_g2=sensor_g2_start,
+            resolution=resolution
+        )
+        # calculate lead_transition labels
+        ideal_csd_conf = metadata["ideal_csd_config"]
+        line_points, labels = get_lead_transition_labels(
+            sweep_range_g1=g1_range,
+            sweep_range_g2=g2_range,
+            ideal_csd_config=ideal_csd_conf,
+            lead_transition_mask=lead_trans,
+        )
+    else:
+        sensor_g1_range = np.array([sensor_g1_start, sensor_g1_start + volt_range[0]])
+        sensor_g2_range = np.array([sensor_g2_start, sensor_g2_start + volt_range[1]])
+        measurement, occ, lead_trans, metadata = sim.measure_sensor_scan(
+            sweep_range_sensor_g1=sensor_g1_range,
+            sweep_range_sensor_g2=sensor_g2_range,
+            volt_g1=g1_start,
+            volt_g2=g2_start,
+            resolution=resolution
+        )
+        line_points, labels = get_coulomb_oscillation_area_boundaries(metadata)
+    return measurement, occ, lead_trans, metadata, line_points, labels
 def create_simulated_dataset(
@@ -107,28 +140,36 @@ def create_simulated_dataset(
     max_len_line_labels_chunk: int = 2000,
     max_len_metadata_chunk: int = 8000,
     dtype_csd: np.dtype = np.float32,
+    dtype_sensor_scan: np.dtype = np.float32,
     dtype_occ: np.dtype = np.float32,
     dtype_tct: np.dtype = np.uint8,
+    dtype_sensor_regime_masks: np.dtype = np.uint8,
+    dtype_sensor_peak_center_masks: np.dtype = np.uint8,
     dtype_line_coordinates: np.dtype = np.float32,
+    sensor_scan_dataset: bool = False,
+    reset_sensor_offset_mu_sens_in_csds: bool = False,
 ) -> None:
     """Function for generating simulated datasets using SimCATS for simulations.
-    **Warning**: This function expects that the simulation config uses IdealCSDGeometric from SimCATS. Other
-    implementations are not guaranteed to work.
+    Datasets can either contain CSDs or sensor scans.
+    **Warning**: This function expects that the simulation config uses IdealCSDGeometric (from SimCATS) for CSD datasets
+    and SensorScanSensorGeneric (from SimCATS) for sensor scan datasets. Other implementations are not guaranteed to
+    work.
     Args:
         dataset_path: The path where the dataset will be stored. Can also be an already existing dataset, to which new
             data is added.
-        simcats_config: Configuration for simcats simulation class. Default is the GaAs_v1 config provided by simcats.
-        n_runs: Number of CSDs to be generated. Default is 10000.
-        resolution: Pixel resolution for both axis of the CSDs, first number of columns (x), then number of rows (y).
-            Default is np.array([100, 100]). \n
+        simcats_config: Configuration for SimCATS simulation class. Default is the GaAs_v1 config provided by SimCATS.
+        n_runs: Number of CSDs or sensor scans to be generated. Default is 10000.
+        resolution: Pixel resolution for both axis of the measurements, first number of columns (x), then number of rows
+            (y). Default is np.array([100, 100]). \n
             Example: \n
             [res_g1, res_g2]
-        volt_range: Volt range for both axis of the CSDs. Individual CSDs with the specified size are randomly sampled
-            in the voltage space. Default is np.array([0.03, 0.03]) (usually the scans from RWTH GaAs offler sample are
-            30mV x 30mV).
-        tags: Additional tags for the data to be simulated, which will be added to the dataset DataFrame. Default is
+        volt_range: Volt range for both axis of the measurements. Individual measurements with the specified size are
+            randomly sampled in the voltage space (defined by the volt_limits in the SimCATS config). Default is
+            np.array([0.03, 0.03]) (usually the scans from RWTH GaAs offler sample are 30mV x 30mV).
+        tags: Additional tags for the data to be simulated, which will be added to the dataset metadata. Default is
             None. \n
             Example: \n
             {"tags": "shifted sensor, no noise", "sample": "GaAs"}.
@@ -139,9 +180,21 @@ def create_simulated_dataset(
         max_len_line_labels_chunk:  Maximum number of chars for the line label dict. Default is 2000.
         max_len_metadata_chunk: Maximum number of chars for the metadata dict. Default is 8000.
         dtype_csd: Specifies the dtype to be used for saving CSDs. Default is np.float32.
+        dtype_sensor_scan: Specifies the dtype to be used for saving sensor scans. Default is np.float32.
         dtype_occ: Specifies the dtype to be used for saving Occupations. Default is np.float32.
         dtype_tct: Specifies the dtype to be used for saving TCTs. Default is np.uint8.
+        dtype_sensor_regime_masks: Specifies the dtype to be used for saving sensor regime masks. Default is np.uint8.
+        dtype_sensor_peak_center_masks: Specifies the dtype to be used for saving sensor peak center masks. Default is
+            np.uint8.
         dtype_line_coordinates: Specifies the dtype to be used for saving line coordinates. Default is np.float32.
+        sensor_scan_dataset: Determines whether to generate a sensor scan dataset (contains sensor scans instead of
+            CSDs). Default is False.
+        reset_sensor_offset_mu_sens_in_csds: Specifies whether to reset the sensor offset_mu_sens parameter before CSD
+            measurements. If this is activated, the offset of the sensor potential is reset so that the first pixel of
+            the CSD is exactly at the previously defined offset_mu_sens. Thus, this effectively resets the sensor to
+            start at the position defined by offset_mu_sens before starting to measure. It is intended to simulate that
+            the sensor is retuned to the defined position before each CSD. It has no effect for sensor scan datasets.
+            Default is False.
     """
     # set tags to an empty dict if none were supplied
     if tags is None:
@@ -150,36 +203,81 @@ def create_simulated_dataset(
     # Create path where the dataset will be saved (if folder doesn't exist already)
     Path(Path(dataset_path).parent).mkdir(parents=True, exist_ok=True)
+    # retrieve the allowed sampling ranges from the config and copy them (else we would change the config itself)
+    sample_range_g1 = simcats_config.get("volt_limits_g1", None)
+    sample_range_g1 = sample_range_g1.astype(np.float32) if sample_range_g1 is not None else None
+    sample_range_g2 = simcats_config.get("volt_limits_g2", None)
+    sample_range_g2 = sample_range_g2.astype(np.float32) if sample_range_g2 is not None else None
+    sample_range_sensor_g1 = simcats_config.get("volt_limits_sensor_g1", None)
+    sample_range_sensor_g1 = sample_range_sensor_g1.astype(np.float32) if sample_range_sensor_g1 is not None else None
+    sample_range_sensor_g2 = simcats_config.get("volt_limits_sensor_g2", None)
+    sample_range_sensor_g2 = sample_range_sensor_g2.astype(np.float32) if sample_range_sensor_g2 is not None else None
     # arange volt limits so that random sampling gives us a starting point that is at least the defined volt_range below
     # the maximum
-    sample_range_g1 = simcats_config["volt_limits_g1"].copy()
-    sample_range_g1[-1] -= volt_range[0]
-    sample_range_g2 = simcats_config["volt_limits_g2"].copy()
-    sample_range_g2[-1] -= volt_range[1]
+    if not sensor_scan_dataset:
+        measurement_type = "csds"
+        sample_range_g1[-1] -= volt_range[0]
+        sample_range_g2[-1] -= volt_range[1]
+    else:
+        measurement_type = "sensor_scans"
+        sample_range_sensor_g1[-1] -= volt_range[0]
+        sample_range_sensor_g2[-1] -= volt_range[1]
     with h5py.File(dataset_path, "a") as hdf5_file:
         # load datasets or create them if not already there
-        csds = hdf5_file.require_dataset(
-            name="csds",
-            shape=(0, resolution[1], resolution[0]),
-            chunks=(1, resolution[1], resolution[0]),
-            dtype=dtype_csd,
-            maxshape=(None, resolution[1], resolution[0]),
-        )
-        occupations = hdf5_file.require_dataset(
-            name="occupations",
-            shape=(0, resolution[1], resolution[0], 2),
-            chunks=(1, resolution[1], resolution[0], 2),
-            dtype=dtype_occ,
-            maxshape=(None, resolution[1], resolution[0], 2),
-        )
-        tct_masks = hdf5_file.require_dataset(
-            name="tct_masks",
-            shape=(0, resolution[1], resolution[0]),
-            chunks=(1, resolution[1], resolution[0]),
-            dtype=dtype_tct,
-            maxshape=(None, resolution[1], resolution[0]),
-        )
+        if isinstance(resolution, int):
+            measurements = hdf5_file.require_dataset(
+                name=measurement_type,
+                shape=(0, resolution),
+                chunks=(1, resolution),
+                dtype=dtype_csd if not sensor_scan_dataset else dtype_sensor_scan,
+                maxshape=(None, resolution),
+            )
+            occupations = hdf5_file.require_dataset(
+                name="occupations" if not sensor_scan_dataset else "sensor_regime_masks",
+                shape=(0, resolution, 2) if not sensor_scan_dataset else (0, resolution),
+                chunks=(1, resolution, 2) if not sensor_scan_dataset else (1, resolution),
+                dtype=dtype_occ if not sensor_scan_dataset else dtype_sensor_regime_masks,
+                maxshape=(None, resolution, 2) if not sensor_scan_dataset else (None, resolution),
+            )
+            tct_masks = hdf5_file.require_dataset(
+                name="tct_masks" if not sensor_scan_dataset else "sensor_peak_center_masks",
+                shape=(0, resolution),
+                chunks=(1, resolution),
+                dtype=dtype_tct if not sensor_scan_dataset else dtype_sensor_peak_center_masks,
+                maxshape=(None, resolution),
+            )
+        elif len(resolution) == 2:
+            measurements = hdf5_file.require_dataset(
+                name=measurement_type,
+                shape=(0, resolution[1], resolution[0]),
+                chunks=(1, resolution[1], resolution[0]),
+                dtype=dtype_csd if not sensor_scan_dataset else dtype_sensor_scan,
+                maxshape=(None, resolution[1], resolution[0]),
+            )
+            occupations = hdf5_file.require_dataset(
+                name="occupations" if not sensor_scan_dataset else "sensor_regime_masks",
+                shape=(0, resolution[1], resolution[0], 2) if not sensor_scan_dataset else (
+                    0, resolution[1], resolution[0]),
+                chunks=(1, resolution[1], resolution[0], 2) if not sensor_scan_dataset else (
+                    1, resolution[1], resolution[0]),
+                dtype=dtype_occ if not sensor_scan_dataset else dtype_sensor_regime_masks,
+                maxshape=(None, resolution[1], resolution[0], 2) if not sensor_scan_dataset else (
+                    None, resolution[1], resolution[0]),
+            )
+            tct_masks = hdf5_file.require_dataset(
+                name="tct_masks" if not sensor_scan_dataset else "sensor_peak_center_masks",
+                shape=(0, resolution[1], resolution[0]),
+                chunks=(1, resolution[1], resolution[0]),
+                dtype=dtype_tct if not sensor_scan_dataset else dtype_sensor_peak_center_masks,
+                maxshape=(None, resolution[1], resolution[0]),
+            )
+        else:
+            raise ValueError(
+                "An invalid resolution was given. The resolution should either be an integer or a one dimensional numpy"
+                " array with two elements.")
         line_coords = hdf5_file.require_dataset(
             name="line_coordinates",
             shape=(0, max_len_line_coordinates_chunk),
@@ -202,10 +300,10 @@ def create_simulated_dataset(
             maxshape=(None, max_len_metadata_chunk),
         )
         # determine index offset if there is already data in the dataset
-        id_offset = csds.shape[0]
+        id_offset = measurements.shape[0]
         # resize datasets to fit new data
-        csds.resize(csds.shape[0] + n_runs, axis=0)
+        measurements.resize(measurements.shape[0] + n_runs, axis=0)
         occupations.resize(occupations.shape[0] + n_runs, axis=0)
         tct_masks.resize(tct_masks.shape[0] + n_runs, axis=0)
         line_coords.resize(line_coords.shape[0] + n_runs, axis=0)
@@ -215,10 +313,11 @@ def create_simulated_dataset(
         # simulate and save data
         indices = range(id_offset, n_runs + id_offset)
         arguments = itertools.repeat(
-            (sample_range_g1, sample_range_g2, volt_range, simcats_config, resolution),
+            (sample_range_g1, sample_range_g2, sample_range_sensor_g1, sample_range_sensor_g2, volt_range,
+             simcats_config, resolution, sensor_scan_dataset, reset_sensor_offset_mu_sens_in_csds),
             times=len(indices),
         )
-        for index, (csd, occ, lead_trans, metadata, line_points, labels) in zip(
+        for index, (measurement, occ, lead_trans, metadata, line_points, labels) in zip(
             indices,
             progress_imap(
                 func=_simulate,
@@ -230,9 +329,9 @@ def create_simulated_dataset(
             ),
         ):
             # save data
-            csds[index] = csd.astype(dtype_csd)
-            occupations[index] = occ.astype(dtype_occ)
-            tct_masks[index] = lead_trans.astype(dtype_tct)
+            measurements[index] = measurement.astype(dtype_csd if not sensor_scan_dataset else dtype_sensor_scan)
+            occupations[index] = occ.astype(dtype_occ if not sensor_scan_dataset else dtype_sensor_regime_masks)
+            tct_masks[index] = lead_trans.astype(dtype_tct if not sensor_scan_dataset else dtype_sensor_peak_center_masks)
             line_coords[index] = np.pad(
                 line_points.flatten(),
                 ((0, max_len_line_coordinates_chunk - line_points.size)),

simcats_datasets/loading/_load_dataset.py CHANGED Viewed

@@ -21,6 +21,8 @@ def load_dataset(file: Union[str, h5py.File],
                  load_occupations: bool = False,
                  load_tct_masks: bool = False,
                  load_ct_by_dot_masks: bool = False,
+                 load_sensor_regime_masks: bool = False,
+                 load_sensor_peak_center_masks: bool = False,
                  load_line_coords: bool = False,
                  load_line_labels: bool = False,
                  load_metadata: bool = False,
@@ -44,6 +46,10 @@ def load_dataset(file: Union[str, h5py.File],
             requires that ct_by_dot_masks have been added to the dataset. If a dataset has been created using
             create_simulated_dataset, these masks can be added afterward using add_ct_by_dot_masks_to_dataset, mainly
             to avoid recalculating them multiple times (for example for machine learning purposes). Default is False.
+        load_sensor_regime_masks: Determines if sensor regime masks should be loaded. Only sensor scan datasets contain
+            sensor regime masks. Default is False.
+        load_sensor_peak_center_masks: Determines if sensor peak center masks should be loaded. Only sensor scan
+            datasets contain sensor peak center masks. Default is False.
         load_line_coords: Determines if lead transition definitions using start and end points should be loaded. Default
             is False.
         load_line_labels: Determines if labels for lead transitions defined using start and end points should be loaded.
@@ -85,6 +91,10 @@ def load_dataset(file: Union[str, h5py.File],
         fieldnames.append("tct_masks")
     if load_ct_by_dot_masks:
         fieldnames.append("ct_by_dot_masks")
+    if load_sensor_regime_masks:
+        fieldnames.append("sensor_regime_masks")
+    if load_sensor_peak_center_masks:
+        fieldnames.append("sensor_peak_center_masks")
     if load_line_coords:
         fieldnames.append("line_coordinates")
     if load_line_labels:
@@ -158,6 +168,16 @@ def load_dataset(file: Union[str, h5py.File],
                 ct_by_dot_masks = _file["ct_by_dot_masks"][specific_ids]
             else:
                 ct_by_dot_masks = _file["ct_by_dot_masks"][:]
+        if load_sensor_regime_masks:
+            if specific_ids is not None:
+                sensor_regime_masks = _file["sensor_regime_masks"][specific_ids]
+            else:
+                sensor_regime_masks = _file["sensor_regime_masks"][:]
+        if load_sensor_peak_center_masks:
+            if specific_ids is not None:
+                sensor_peak_center_masks = _file["sensor_peak_center_masks"][specific_ids]
+            else:
+                sensor_peak_center_masks = _file["sensor_peak_center_masks"][:]
         if load_line_coords:
             if specific_ids is not None:
                 # remove padded nan values
@@ -190,6 +210,10 @@ def load_dataset(file: Union[str, h5py.File],
         return_data.append(tct_masks)
     if load_ct_by_dot_masks:
         return_data.append(ct_by_dot_masks)
+    if load_sensor_regime_masks:
+        return_data.append(sensor_regime_masks)
+    if load_sensor_peak_center_masks:
+        return_data.append(sensor_peak_center_masks)
     if load_line_coords:
         return_data.append(line_coords)
     if load_line_labels:

simcats-datasets 2.5.0__py3-none-any.whl → 2.6.0__py3-none-any.whl

simcats-datasets 2.5.0py3-none-any.whl → 2.6.0py3-none-any.whl