PyPI - simcats-datasets - Versions diffs - 2.4.0__py3-none-any.whl - Mend

simcats-datasets 2.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

simcats_datasets/__init__.py +2 -0
simcats_datasets/generation/__init__.py +6 -0
simcats_datasets/generation/_create_dataset.py +221 -0
simcats_datasets/generation/_create_simulated_dataset.py +372 -0
simcats_datasets/loading/__init__.py +8 -0
simcats_datasets/loading/_load_dataset.py +177 -0
simcats_datasets/loading/load_ground_truth.py +486 -0
simcats_datasets/loading/pytorch.py +426 -0
simcats_datasets/support_functions/__init__.py +1 -0
simcats_datasets/support_functions/_json_encoders.py +51 -0
simcats_datasets/support_functions/clip_line_to_rectangle.py +191 -0
simcats_datasets/support_functions/convert_lines.py +110 -0
simcats_datasets/support_functions/data_preprocessing.py +351 -0
simcats_datasets/support_functions/get_lead_transition_labels.py +102 -0
simcats_datasets/support_functions/pytorch_format_output.py +170 -0
simcats_datasets-2.4.0.dist-info/LICENSE +674 -0
simcats_datasets-2.4.0.dist-info/METADATA +837 -0
simcats_datasets-2.4.0.dist-info/RECORD +20 -0
simcats_datasets-2.4.0.dist-info/WHEEL +5 -0
simcats_datasets-2.4.0.dist-info/top_level.txt +1 -0

simcats_datasets/support_functions/convert_lines.py ADDED Viewed

@@ -0,0 +1,110 @@
+"""Helper functions for converting lines into different representations
+@author: f.fuchs
+"""
+from copy import deepcopy
+from typing import List, Union
+import numpy as np
+def lines_voltage_to_pixel_space(lines: Union[List[np.ndarray], np.ndarray],
+                                 voltage_range_x: np.ndarray,
+                                 voltage_range_y: np.ndarray,
+                                 image_width: int,
+                                 image_height: int,
+                                 round_to_int: bool = False, ) -> np.ndarray:
+    """Convert lines from voltage space to image/pixel space.
+    This method makes a deepcopy of the supplied lines. Therefore, the original input won't be modified.
+    Args:
+        lines: Array or list of lines to convert, shape: (n, 4). \n
+            Example: \n
+            [[x_start, y_start, x_stop, y_stop], ...]
+        voltage_range_x: Voltage range in x direction.
+        voltage_range_y: Voltage range in y direction.
+        image_width: Width of the image/pixel space.
+        image_height: Height of the image/pixel space.
+        round_to_int: Toggles if the lines are returned as floats (False) or are rounded and then returned as integers
+            (True). Defaults to false.
+    Returns:
+        Array with rows containing the converted lines.
+    """
+    pixel_space = deepcopy(np.array(lines))
+    for _i, line in enumerate(pixel_space):
+        # change x coordinates of the line
+        line[0] = (
+                (image_width - 1) * (line[0] - voltage_range_x.min()) / (voltage_range_x.max() - voltage_range_x.min()))
+        line[2] = (
+                (image_width - 1) * (line[2] - voltage_range_x.min()) / (voltage_range_x.max() - voltage_range_x.min()))
+        # change y coordinates of the line
+        line[1] = ((image_height - 1) * (line[1] - voltage_range_y.min()) / (
+                    voltage_range_y.max() - voltage_range_y.min()))
+        line[3] = ((image_height - 1) * (line[3] - voltage_range_y.min()) / (
+                    voltage_range_y.max() - voltage_range_y.min()))
+    if round_to_int:
+        return np.array(pixel_space).round(decimals=0).astype(int)
+    else:
+        return pixel_space
+def lines_pixel_to_voltage_space(lines: Union[List[np.ndarray], np.ndarray],
+                                 voltage_range_x: np.ndarray,
+                                 voltage_range_y: np.ndarray,
+                                 image_width: int,
+                                 image_height: int, ) -> np.ndarray:
+    """Convert lines from image/pixel space to voltage space.
+    This method makes a deepcopy of the supplied lines. Therefore, the original input won't be modified.
+    Args:
+        lines: Array or list of lines to convert, shape: (n, 4). \n
+            Example: \n
+            [[x_start, y_start, x_stop, y_stop], ...]
+        voltage_range_x: Voltage range in x direction.
+        voltage_range_y: Voltage range in y direction.
+        image_width: Width of the image/pixel space.
+        image_height: Height of the image/pixel space.
+    Returns:
+        Array with rows containing the converted lines.
+    """
+    voltage_space = deepcopy(np.array(lines)).astype(np.float32)
+    for _i, line in enumerate(voltage_space):
+        # change x coordinates of the line
+        line[0] = (line[0] / (image_width - 1)) * (voltage_range_x[1] - voltage_range_x[0]) + voltage_range_x[0]
+        line[2] = (line[2] / (image_width - 1)) * (voltage_range_x[1] - voltage_range_x[0]) + voltage_range_x[0]
+        # change y coordinates of the line
+        line[1] = (line[1] / (image_height - 1)) * (voltage_range_y[1] - voltage_range_y[0]) + voltage_range_y[0]
+        line[3] = (line[3] / (image_height - 1)) * (voltage_range_y[1] - voltage_range_y[0]) + voltage_range_y[0]
+    return voltage_space
+def lines_convert_two_coordinates_to_coordinate_plus_change(lines: Union[List[np.ndarray], np.ndarray]) -> np.ndarray:
+    """Change the format from x,y,x,y to x,y,dx,dy.
+    Order: top point > bottom point and if same y coordinate, right point > left point.
+    Args:
+        lines: Array or list of lines to convert, shape: (n, 4). \n
+            Example: \n
+            [[x_start, y_start, x_stop, y_stop], ...]
+    Returns:
+        Array with rows of lines in x,y,dx,dy format.
+    """
+    new_lines_pairs = []
+    for line in lines:
+        p1 = line[0], line[1]
+        p2 = line[2], line[3]
+        if p1[0] < p2[0]:
+            new_lines_pairs.append([p1[0], p1[1], p2[0] - p1[0], p2[1] - p1[1]])
+        elif p1[0] > p2[0]:
+            new_lines_pairs.append([p2[0], p2[1], p1[0] - p2[0], p1[1] - p2[1]])
+        else:
+            if p1[1] < p2[1]:
+                new_lines_pairs.append([p1[0], p1[1], p2[0] - p1[0], p2[1] - p1[1]])
+            else:
+                new_lines_pairs.append([p2[0], p2[1], p1[0] - p2[0], p1[1] - p2[1]])
+    return np.array(new_lines_pairs)

simcats_datasets/support_functions/data_preprocessing.py ADDED Viewed

@@ -0,0 +1,351 @@
+"""Data preprocessors to be used with the **Pytorch Dataset class**.
+Every preprocessor must accept either a single array or a list of arrays as input. Output type should always be the same
+as the input type. Please try to use -=, +=, *=, and /=, as these are way faster than data = data + ... etc.. Avoid
+using map(function, data), as this will return a copy and copying will slow down your code.
+**Please look at example_preprocessor for a reference.**
+"""
+from typing import List, Union, Tuple
+import numpy as np
+import cv2
+import skimage.restoration
+import bm3d
+from scipy.signal import resample, decimate
+def example_preprocessor(data: Union[np.ndarray, List[np.ndarray]]) -> Union[np.ndarray, List[np.ndarray]]:
+    """Example (reference) for preprocessor implementations.
+    Args:
+        data: Numpy array to be preprocessed (or a list of such).
+    Returns:
+        Preprocessed numpy array (or a list of such).
+    """
+    # handle list here, for example with list comprehension
+    if isinstance(data, list):
+        data = [_data for _data in data]
+    else:
+        data = data
+    return data
+def cast_to_float32(data: Union[np.ndarray, List[np.ndarray]]) -> Union[np.ndarray, List[np.ndarray]]:
+    """Cast the data to float32. Especially useful to reduce memory usage for preloaded datasets.
+    Args:
+        data: Numpy array to be cast to float32 (or a list of such).
+    Returns:
+        Float32 numpy array (or a list of such).
+    """
+    # handle list here, for example with list comprehension
+    if isinstance(data, list):
+        data = [_data.astype(np.float32) for _data in data]
+    else:
+        data = data.astype(np.float32)
+    return data
+def cast_to_float16(data: Union[np.ndarray, List[np.ndarray]]) -> Union[np.ndarray, List[np.ndarray]]:
+    """Cast the data to float16. Especially useful to reduce memory usage for preloaded datasets.
+    Args:
+        data: Numpy array to be cast to float16 (or a list of such).
+    Returns:
+        Float16 numpy array (or a list of such).
+    """
+    # handle list here, for example with list comprehension
+    if isinstance(data, list):
+        data = [_data.astype(np.float16) for _data in data]
+    else:
+        data = data.astype(np.float16)
+    return data
+def standardization(data: Union[np.ndarray, List[np.ndarray]]) -> Union[np.ndarray, List[np.ndarray]]:
+    """Standardization of the data (mean=0, std=1).
+    If a list of data is passed, each data is standardized individually (no global standardization).
+    Args:
+        data: Numpy array to be standardized (or a list of such).
+    Returns:
+        Standardized numpy array (or a list of such).
+    """
+    if isinstance(data, list):
+        for _data in data:
+            _data -= np.mean(_data)
+            _data /= np.std(_data)
+    else:
+        data -= np.mean(data)
+        data /= np.std(data)
+    return data
+def min_max_0_1(data: Union[np.ndarray, List[np.ndarray]]) -> Union[np.ndarray, List[np.ndarray]]:
+    """Min max scaling of the data to [0, 1].
+    If a list of data is passed, each data is scaled individually (no global scaling).
+    Args:
+        data: Numpy array to be scaled (or a list of such).
+    Returns:
+        Rescaled numpy array (or a list of such).
+    """
+    if isinstance(data, list):
+        for _data in data:
+            _data -= np.min(_data)
+            _data /= np.max(_data)
+    else:
+        data -= np.min(data)
+        data /= np.max(data)
+    return data
+def min_max_minus_one_one(data: Union[np.ndarray, List[np.ndarray]]) -> Union[np.ndarray, List[np.ndarray]]:
+    """Min max scaling of the data to [-1, 1].
+    If a list of data is passed, each data is scaled individually (no global scaling).
+    Args:
+        data: Numpy array to be scaled (or a list of such).
+    Returns:
+        Rescaled numpy array (or a list of such).
+    """
+    data = min_max_0_1(data)
+    if isinstance(data, list):
+        for _data in data:
+            _data -= 0.5
+            _data *= 2
+    else:
+        data -= 0.5
+        data *= 2
+    return data
+def add_newaxis(data: Union[np.ndarray, List[np.ndarray]]) -> Union[np.ndarray, List[np.ndarray]]:
+    """Adds a new axis to the data (basically the missing color channel).
+    Args:
+        data: Numpy array to which the axis will be added (or a list of such).
+    Returns:
+        Numpy array with additional axis (or a list of such).
+    """
+    if isinstance(data, list):
+        return [_data[np.newaxis, ...] for _data in data]
+    return data[np.newaxis, ...]
+def only_two_classes(data: Union[np.ndarray, List[np.ndarray]]) -> Union[np.ndarray, List[np.ndarray]]:
+    """Sets all mask labels that are larger than or equal 1 to 1 and all other pixels to zero.
+    Args:
+        data: Numpy array to be processed (or a list of such).
+    Returns:
+        Numpy array with only two classes (or a list of such).
+    """
+    if isinstance(data, list):
+        for _data in data:
+            _data[_data >= 1] = 1
+            _data[_data < 1] = 0
+    else:
+        data[data >= 1] = 1
+        data[data < 1] = 0
+    return data
+def shrink_to_shape(data: Union[np.ndarray, List[np.ndarray]], shape: Tuple[int, int]) -> Union[
+    np.ndarray, List[np.ndarray]]:
+    """Cut off required number of rows/columns of pixels at each edge of the image to get the desired shape.
+    **Warning**: This preprocessor can't be used by supplying a string with the name to the class SimcatsDataset from
+    the simcats_datasets.pytorch module, as this requires that preprocessors need no additional parameters but only the
+    data. If a list of data is passed, it is expected, that all images in the list have the same shape!
+    Args:
+        data: Numpy array to be preprocessed (or a list of such).
+        shape: The shape to which the data will be reshaped.
+    Returns:
+        Shrinked numpy array (or a list of such).
+    """
+    if isinstance(data, list) and data[0].shape != shape:
+        axis0_start = (data[0].shape[0] - shape[0]) // 2
+        axis0_stop = -data[0].shape[0] + shape[0] + axis0_start
+        axis1_start = (data[0].shape[1] - shape[1]) // 2
+        axis1_stop = -data[0].shape[1] + shape[1] + axis1_start
+        data = [_data[axis0_start:axis0_stop, axis1_start:axis1_stop] for _data in data]
+    elif data.shape != shape:
+        axis0_start = (data.shape[0] - shape[0]) // 2
+        axis0_stop = -data.shape[0] + shape[0] + axis0_start
+        axis1_start = (data.shape[1] - shape[1]) // 2
+        axis1_stop = -data.shape[1] + shape[1] + axis1_start
+        data = data[axis0_start:axis0_stop, axis1_start:axis1_stop]
+    return data
+def shrink_to_shape_96x96(data: Union[np.ndarray, List[np.ndarray]]) -> Union[np.ndarray, List[np.ndarray]]:
+    """Cut off required number of rows/columns of pixels at each edge of the image to get shape 96x96.
+    **Warning**: If a list of data is passed, it is expected, that all images in the list have the same shape!
+    Args:
+        data: Numpy array to be preprocessed (or a list of such).
+    Returns:
+        Shrinked numpy array (or a list of such).
+    """
+    return shrink_to_shape(data=data, shape=(96, 96))
+def resample_image(data: Union[np.ndarray, List[np.ndarray]], target_size: Tuple[int, int]) -> Union[
+    np.ndarray, List[np.ndarray]]:
+    """Resample an image to target size using scipy.signal.resample.
+    **Warning**: This preprocessor can't be used by supplying a string with the name to the class SimcatsDataset from
+    the simcats_datasets.pytorch module, as it requires that preprocessors need no additional parameters but only the
+    data.
+    Args:
+        data: The image to resample.
+        target_size: The target size to resample to.
+    Returns:
+        The resampled image or a list of such.
+    """
+    if isinstance(data, list):
+        data = [resample_image(temp_data) for temp_data in data]
+    else:
+        if data.shape[0] > target_size[0]:
+            data = resample(data, target_size[0], axis=0)
+        if data.shape[1] > target_size[1]:
+            data = resample(data, target_size[1], axis=1)
+    return data
+def decimate_image(data: Union[np.ndarray, List[np.ndarray]], target_size: Tuple[int, int]) -> Union[
+    np.ndarray, List[np.ndarray]]:
+    """Decimate an image to target size using scipy.signal.decimate.
+    **Warning**: This preprocessor can't be used by supplying a string with the name to the class SimcatsDataset from
+    the simcats_datasets.pytorch module, as it requires that preprocessors need no additional parameters but only the
+    data.
+    Args:
+        data: The image to decimate.
+        target_size: The target size to decimate to.
+    Returns:
+        The decimated image or a list of such.
+    """
+    if isinstance(data, list):
+        data = [decimate_image(temp_data) for temp_data in data]
+    else:
+        q = [data.shape[0] / target_size[0], data.shape[1] / target_size[1]]
+        while q[0] > 1 or q[1] > 1:
+            if q[0] > 1:
+                data = decimate(data.T, min(13, int(np.ceil(q[0]))), axis=1, ftype="iir").T
+            if q[1] > 1:
+                data = decimate(data.T, min(13, int(np.ceil(q[1]))), axis=0, ftype="iir").T
+            q = [data.shape[0] / target_size[0], data.shape[1] / target_size[1]]
+    return data
+def standardize_to_dataset(data: Union[np.ndarray, List[np.ndarray]], mean: float, std: float) -> Union[
+    np.ndarray, List[np.ndarray]]:
+    """Standardization of the data not per image but for a whole dataset.
+    **Warning**: This preprocessor can't be used by supplying a string with the name to the class SimcatsDataset from
+    the simcats_datasets.pytorch module, as it requires that preprocessors need no additional parameters but only the
+    data.
+    Args:
+        data (Union[np.ndarray, List[np.ndarray]]):  Numpy array to be standardized (or a list of such).
+        mean (float): The mean to subtract.
+        std (float): The standard deviation to divide by.
+    Returns:
+        Union[np.ndarray, List[np.ndarray]]: Standardized numpy array (or a list of such).
+    """
+    if isinstance(data, list):
+        for _data in data:
+            _data -= mean
+            _data /= std
+    else:
+        data -= mean
+        data /= std
+    return data
+def _bm3d_smoothing_single_img(img: np.ndarray) -> np.ndarray:
+    """BM3D smoothing helper function, which performs the actual BM3D smoothing in the bm3d_smoothing preprocessor.
+    Args:
+        img: Numpy array to be smoothed.
+    Returns:
+        Smoothed image.
+    """
+    sigma = 0.4 * skimage.restoration.estimate_sigma(img, average_sigmas=True)
+    img = bm3d.bm3d(img, sigma_psd=sigma, stage_arg=bm3d.BM3DStages.HARD_THRESHOLDING)
+    return img
+def bm3d_smoothing(data: Union[np.ndarray, List[np.ndarray]]) -> Union[np.ndarray, List[np.ndarray]]:
+    """Smoothing of the data using the BM3D algorithm.
+    Args:
+        data: Numpy array to be smoothed (or a list of such)
+    Returns:
+        BM3D-smoothed numpy array (or a list of such)
+    """
+    if isinstance(data, list):
+        for i in range(len(data)):
+            data[i] = _bm3d_smoothing_single_img(data[i])
+    else:
+        data = _bm3d_smoothing_single_img(data)
+    return data
+def _vertical_median_smoothing_single_img(img: np.ndarray) -> np.ndarray:
+    """Vertical median smoothing helper function, which performs the actual smoothing in the vertical_median_smoothing preprocessor.
+    Args:
+        img: Numpy array to be smoothed.
+    Returns:
+        Smoothed image.
+    """
+    for i in range(img.shape[1]):
+        img[:, i] = cv2.medianBlur(img[:, i], 3).flatten()
+    return img
+def vertical_median_smoothing(data: Union[np.ndarray, List[np.ndarray]]) -> Union[np.ndarray, List[np.ndarray]]:
+    """Median-smoothing of the data, for each vertical column independently.
+    Args:
+        data: Numpy array to be smoothed (or a list of such).
+    Returns:
+        Smoothed numpy array (or a list of such).
+    """
+    if isinstance(data, list):
+        for i in range(len(data)):
+            data[i] = data[i].astype(np.float32)
+            data[i] = _vertical_median_smoothing_single_img(data[i])
+    else:
+        data = data.astype(np.float32)
+        data = _vertical_median_smoothing_single_img(data)
+    return data

simcats_datasets/support_functions/get_lead_transition_labels.py ADDED Viewed

@@ -0,0 +1,102 @@
+"""Functionalities for extracting labeled transition lines from a SimCATS CSD (using the metadata).
+@author: f.hader
+"""
+from typing import Dict, List, Tuple
+import numpy as np
+from simcats.ideal_csd import IdealCSDInterface
+from simcats.ideal_csd.geometric import calculate_all_bezier_anchors as calc_anchors
+from simcats_datasets.support_functions.clip_line_to_rectangle import clip_point_line_to_rectangle, \
+    clip_slope_line_to_rectangle, create_rectangle_corners
+def get_lead_transition_labels(sweep_range_g1: np.ndarray,
+                               sweep_range_g2: np.ndarray,
+                               ideal_csd_config: IdealCSDInterface,
+                               lead_transition_mask: np.ndarray) -> Tuple[np.ndarray, List[Dict]]:
+    """Function for calculating the line coordinates and labels for all linear parts in a simulated CSD.
+    **Warning**: This function expects that IdealCSDGeometric has been used for the simulation. Dot jumps or similar
+    distortions are not taken into account in the calculation of the line coordinates. This means, that the returned
+    lines are the ideal (undisturbed) lines.
+    Args:
+        sweep_range_g1: The sweep range for gate 1. Required to know where the boundaries are.
+        sweep_range_g2: The sweep range for gate 2. Required to know where the boundaries are.
+        ideal_csd_config: The IdealCSDInterface implementation that was used during the simulation. It is
+            required to calculate the bezier anchors from the configured TCTs.
+        lead_transition_mask: Lead transition mask (TCT mask), used to identify involved TCTs.
+    Returns:
+        np.ndarray, list[dict]: Array with the line coordinates and list containing dictionaries with corresponding
+            labels. Every row of the array represents one line as [x_start, y_start, x_stop, y_stop].
+    """
+    # retrieve which TCTs are contained in the simulated csd
+    tct_ids = np.unique(lead_transition_mask).astype(int).tolist()
+    tct_ids.remove(0)
+    # retrieve TCT rotation
+    rotation = ideal_csd_config.rotation
+    # get CSD corner point
+    rect_corners = create_rectangle_corners(x_range=sweep_range_g1, y_range=sweep_range_g2)
+    # list to collect labels
+    line_points = []
+    labels = []
+    # for every tct find the linear parts that are included in the csd (to be used as labels for line detection)
+    for i in tct_ids:
+        # retrieve tct parameters
+        tct_params = ideal_csd_config.tct_params[i - 1]
+        # retrieve all bezier anchors. Linear parts are always bound by anchors of two subsequent triple points,
+        # or by one anchor and infinte linear prolongation in the single dot regions.
+        anchors = calc_anchors(tct_params=tct_params, max_peaks=i)
+        # iterate all lead transitions / linear parts of the current tct and check if they are in the image
+        for trans_id in range(i * 2):
+            # the first lead transition only has one bezier anchor, as it is infinitively prolonged in the single dot
+            # regime
+            if trans_id == 0:
+                anchor = anchors[i][trans_id, 0, :]
+                slope = tct_params[2]
+                # rotate slope into image space
+                angle = np.arctan(slope) + rotation
+                if slope < 0:
+                    angle += np.pi
+                slope = np.tan(angle)
+                clipped_start, clipped_end = clip_slope_line_to_rectangle(slope=slope, point=anchor,
+                                                                          rect_corners=rect_corners, is_start=False)
+                if clipped_start is not None and clipped_end is not None:
+                    line_points.append(np.array([clipped_start[0], clipped_start[1], clipped_end[0], clipped_end[1]]))
+                    labels.append({"tct_id": i, "transition_id": trans_id})
+            # the last lead transition only has one bezier anchor, as it is infinitively prolonged in the single dot
+            # regime
+            elif trans_id == i * 2 - 1:
+                anchor = anchors[i][trans_id - 1, 2, :]
+                slope = tct_params[3]
+                # rotate slope into image space
+                angle = np.arctan(slope) + rotation
+                if slope < 0:
+                    angle += np.pi
+                slope = np.tan(angle)
+                clipped_start, clipped_end = clip_slope_line_to_rectangle(slope=slope, point=anchor,
+                                                                          rect_corners=rect_corners, is_start=True)
+                if clipped_start is not None and clipped_end is not None:
+                    line_points.append(np.array([clipped_start[0], clipped_start[1], clipped_end[0], clipped_end[1]]))
+                    labels.append({"tct_id": i, "transition_id": trans_id})
+            # all other transitions are in the double dot regime and have two anchors defining the line
+            else:
+                anchor_start = anchors[i][trans_id - 1, 2, :]
+                anchor_stop = anchors[i][trans_id, 0, :]
+                clipped_start, clipped_end = clip_point_line_to_rectangle(start=anchor_start, end=anchor_stop,
+                                                                          rect_corners=rect_corners)
+                if clipped_start is not None and clipped_end is not None:
+                    line_points.append(np.array([clipped_start[0], clipped_start[1], clipped_end[0], clipped_end[1]]))
+                    labels.append({"tct_id": i, "transition_id": trans_id})
+    return np.array(line_points), labels