PyPI - celldetective - Versions diffs - 1.4.1.post1__py3-none-any.whl → 1.5.0b0__py3-none-any.whl - Mend

celldetective 1.4.1.post1py3-none-any.whl → 1.5.0b0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (151) hide show

celldetective/__init__.py +25 -0
celldetective/__main__.py +62 -43
celldetective/_version.py +1 -1
celldetective/extra_properties.py +477 -399
celldetective/filters.py +192 -97
celldetective/gui/InitWindow.py +541 -411
celldetective/gui/__init__.py +0 -15
celldetective/gui/about.py +44 -39
celldetective/gui/analyze_block.py +120 -84
celldetective/gui/base/__init__.py +0 -0
celldetective/gui/base/channel_norm_generator.py +335 -0
celldetective/gui/base/components.py +249 -0
celldetective/gui/base/feature_choice.py +92 -0
celldetective/gui/base/figure_canvas.py +52 -0
celldetective/gui/base/list_widget.py +133 -0
celldetective/gui/{styles.py → base/styles.py} +92 -36
celldetective/gui/base/utils.py +33 -0
celldetective/gui/base_annotator.py +900 -767
celldetective/gui/classifier_widget.py +642 -554
celldetective/gui/configure_new_exp.py +777 -671
celldetective/gui/control_panel.py +635 -524
celldetective/gui/dynamic_progress.py +449 -0
celldetective/gui/event_annotator.py +2023 -1662
celldetective/gui/generic_signal_plot.py +1292 -944
celldetective/gui/gui_utils.py +899 -1289
celldetective/gui/interactions_block.py +658 -0
celldetective/gui/interactive_timeseries_viewer.py +447 -0
celldetective/gui/json_readers.py +48 -15
celldetective/gui/layouts/__init__.py +5 -0
celldetective/gui/layouts/background_model_free_layout.py +537 -0
celldetective/gui/layouts/channel_offset_layout.py +134 -0
celldetective/gui/layouts/local_correction_layout.py +91 -0
celldetective/gui/layouts/model_fit_layout.py +372 -0
celldetective/gui/layouts/operation_layout.py +68 -0
celldetective/gui/layouts/protocol_designer_layout.py +96 -0
celldetective/gui/pair_event_annotator.py +3130 -2435
celldetective/gui/plot_measurements.py +586 -267
celldetective/gui/plot_signals_ui.py +724 -506
celldetective/gui/preprocessing_block.py +395 -0
celldetective/gui/process_block.py +1678 -1831
celldetective/gui/seg_model_loader.py +580 -473
celldetective/gui/settings/__init__.py +0 -7
celldetective/gui/settings/_cellpose_model_params.py +181 -0
celldetective/gui/settings/_event_detection_model_params.py +95 -0
celldetective/gui/settings/_segmentation_model_params.py +159 -0
celldetective/gui/settings/_settings_base.py +77 -65
celldetective/gui/settings/_settings_event_model_training.py +752 -526
celldetective/gui/settings/_settings_measurements.py +1133 -964
celldetective/gui/settings/_settings_neighborhood.py +574 -488
celldetective/gui/settings/_settings_segmentation_model_training.py +779 -564
celldetective/gui/settings/_settings_signal_annotator.py +329 -305
celldetective/gui/settings/_settings_tracking.py +1304 -1094
celldetective/gui/settings/_stardist_model_params.py +98 -0
celldetective/gui/survival_ui.py +422 -312
celldetective/gui/tableUI.py +1665 -1700
celldetective/gui/table_ops/_maths.py +295 -0
celldetective/gui/table_ops/_merge_groups.py +140 -0
celldetective/gui/table_ops/_merge_one_hot.py +95 -0
celldetective/gui/table_ops/_query_table.py +43 -0
celldetective/gui/table_ops/_rename_col.py +44 -0
celldetective/gui/thresholds_gui.py +382 -179
celldetective/gui/viewers/__init__.py +0 -0
celldetective/gui/viewers/base_viewer.py +700 -0
celldetective/gui/viewers/channel_offset_viewer.py +331 -0
celldetective/gui/viewers/contour_viewer.py +394 -0
celldetective/gui/viewers/size_viewer.py +153 -0
celldetective/gui/viewers/spot_detection_viewer.py +341 -0
celldetective/gui/viewers/threshold_viewer.py +309 -0
celldetective/gui/workers.py +304 -126
celldetective/log_manager.py +92 -0
celldetective/measure.py +1895 -1478
celldetective/napari/__init__.py +0 -0
celldetective/napari/utils.py +1025 -0
celldetective/neighborhood.py +1914 -1448
celldetective/preprocessing.py +1620 -1220
celldetective/processes/__init__.py +0 -0
celldetective/processes/background_correction.py +271 -0
celldetective/processes/compute_neighborhood.py +894 -0
celldetective/processes/detect_events.py +246 -0
celldetective/processes/measure_cells.py +565 -0
celldetective/processes/segment_cells.py +760 -0
celldetective/processes/track_cells.py +435 -0
celldetective/processes/train_segmentation_model.py +694 -0
celldetective/processes/train_signal_model.py +265 -0
celldetective/processes/unified_process.py +292 -0
celldetective/regionprops/_regionprops.py +358 -317
celldetective/relative_measurements.py +987 -710
celldetective/scripts/measure_cells.py +313 -212
celldetective/scripts/measure_relative.py +90 -46
celldetective/scripts/segment_cells.py +165 -104
celldetective/scripts/segment_cells_thresholds.py +96 -68
celldetective/scripts/track_cells.py +198 -149
celldetective/scripts/train_segmentation_model.py +324 -201
celldetective/scripts/train_signal_model.py +87 -45
celldetective/segmentation.py +844 -749
celldetective/signals.py +3514 -2861
celldetective/tracking.py +1332 -1011
celldetective/utils/__init__.py +0 -0
celldetective/utils/cellpose_utils/__init__.py +133 -0
celldetective/utils/color_mappings.py +42 -0
celldetective/utils/data_cleaning.py +630 -0
celldetective/utils/data_loaders.py +450 -0
celldetective/utils/dataset_helpers.py +207 -0
celldetective/utils/downloaders.py +197 -0
celldetective/utils/event_detection/__init__.py +8 -0
celldetective/utils/experiment.py +1782 -0
celldetective/utils/image_augmenters.py +308 -0
celldetective/utils/image_cleaning.py +74 -0
celldetective/utils/image_loaders.py +926 -0
celldetective/utils/image_transforms.py +335 -0
celldetective/utils/io.py +62 -0
celldetective/utils/mask_cleaning.py +348 -0
celldetective/utils/mask_transforms.py +5 -0
celldetective/utils/masks.py +184 -0
celldetective/utils/maths.py +351 -0
celldetective/utils/model_getters.py +325 -0
celldetective/utils/model_loaders.py +296 -0
celldetective/utils/normalization.py +380 -0
celldetective/utils/parsing.py +465 -0
celldetective/utils/plots/__init__.py +0 -0
celldetective/utils/plots/regression.py +53 -0
celldetective/utils/resources.py +34 -0
celldetective/utils/stardist_utils/__init__.py +104 -0
celldetective/utils/stats.py +90 -0
celldetective/utils/types.py +21 -0
{celldetective-1.4.1.post1.dist-info → celldetective-1.5.0b0.dist-info}/METADATA +1 -1
celldetective-1.5.0b0.dist-info/RECORD +187 -0
{celldetective-1.4.1.post1.dist-info → celldetective-1.5.0b0.dist-info}/WHEEL +1 -1
tests/gui/test_new_project.py +129 -117
tests/gui/test_project.py +127 -79
tests/test_filters.py +39 -15
tests/test_notebooks.py +8 -0
tests/test_tracking.py +425 -144
tests/test_utils.py +123 -77
celldetective/gui/base_components.py +0 -23
celldetective/gui/layouts.py +0 -1602
celldetective/gui/processes/compute_neighborhood.py +0 -594
celldetective/gui/processes/measure_cells.py +0 -360
celldetective/gui/processes/segment_cells.py +0 -499
celldetective/gui/processes/track_cells.py +0 -303
celldetective/gui/processes/train_segmentation_model.py +0 -270
celldetective/gui/processes/train_signal_model.py +0 -108
celldetective/gui/table_ops/merge_groups.py +0 -118
celldetective/gui/viewers.py +0 -1354
celldetective/io.py +0 -3663
celldetective/utils.py +0 -3108
celldetective-1.4.1.post1.dist-info/RECORD +0 -123
/celldetective/{gui/processes → processes}/downloader.py +0 -0
{celldetective-1.4.1.post1.dist-info → celldetective-1.5.0b0.dist-info}/entry_points.txt +0 -0
{celldetective-1.4.1.post1.dist-info → celldetective-1.5.0b0.dist-info}/licenses/LICENSE +0 -0
{celldetective-1.4.1.post1.dist-info → celldetective-1.5.0b0.dist-info}/top_level.txt +0 -0

celldetective/utils/data_loaders.py ADDED Viewed

@@ -0,0 +1,450 @@
+import os
+import numpy as np
+from tqdm import tqdm
+from celldetective import get_logger
+from celldetective.utils.image_loaders import locate_stack_and_labels
+from celldetective.utils.experiment import (
+    get_config,
+    get_experiment_wells,
+    get_experiment_labels,
+    get_experiment_metadata,
+    extract_well_name_and_number,
+    extract_position_name,
+    interpret_wells_and_positions,
+    get_position_movie_path,
+    get_positions_in_well,
+)
+from celldetective.utils.parsing import (
+    config_section_to_dict,
+    _extract_labels_from_config,
+)
+logger = get_logger()
+def get_position_table(pos, population, return_path=False):
+    """
+    Retrieves the data table for a specified population at a given position, optionally returning the table's file path.
+    This function locates and loads a CSV data table associated with a specific population (e.g., 'targets', 'cells')
+    from a specified position directory. The position directory should contain an 'output/tables' subdirectory where
+    the CSV file named 'trajectories_{population}.csv' is expected to be found. If the file exists, it is loaded into
+    a pandas DataFrame; otherwise, None is returned.
+    Parameters
+    ----------
+    pos : str
+            The path to the position directory from which to load the data table.
+    population : str
+            The name of the population for which the data table is to be retrieved. This name is used to construct the
+            file name of the CSV file to be loaded.
+    return_path : bool, optional
+            If True, returns a tuple containing the loaded data table (or None) and the path to the CSV file. If False,
+            only the loaded data table (or None) is returned (default is False).
+    Returns
+    -------
+    pandas.DataFrame or None, or (pandas.DataFrame or None, str)
+            If return_path is False, returns the loaded data table as a pandas DataFrame, or None if the table file does
+            not exist. If return_path is True, returns a tuple where the first element is the data table (or None) and the
+            second element is the path to the CSV file.
+    Examples
+    --------
+    >>> df_pos = get_position_table('/path/to/position', 'targets')
+    # This will load the 'trajectories_targets.csv' table from the specified position directory into a pandas DataFrame.
+    >>> df_pos, table_path = get_position_table('/path/to/position', 'targets', return_path=True)
+    # This will load the 'trajectories_targets.csv' table and also return the path to the CSV file.
+    """
+    import pandas as pd
+    if not pos.endswith(os.sep):
+        table = os.sep.join([pos, "output", "tables", f"trajectories_{population}.csv"])
+    else:
+        table = pos + os.sep.join(
+            ["output", "tables", f"trajectories_{population}.csv"]
+        )
+    if os.path.exists(table):
+        try:
+            df_pos = pd.read_csv(table, low_memory=False)
+        except Exception as e:
+            logger.error(e)
+            df_pos = None
+    else:
+        df_pos = None
+    if return_path:
+        return df_pos, table
+    else:
+        return df_pos
+def get_position_pickle(pos, population, return_path=False):
+    """
+    Retrieves the data table for a specified population at a given position, optionally returning the table's file path.
+    This function locates and loads a CSV data table associated with a specific population (e.g., 'targets', 'cells')
+    from a specified position directory. The position directory should contain an 'output/tables' subdirectory where
+    the CSV file named 'trajectories_{population}.csv' is expected to be found. If the file exists, it is loaded into
+    a pandas DataFrame; otherwise, None is returned.
+    Parameters
+    ----------
+    pos : str
+            The path to the position directory from which to load the data table.
+    population : str
+            The name of the population for which the data table is to be retrieved. This name is used to construct the
+            file name of the CSV file to be loaded.
+    return_path : bool, optional
+            If True, returns a tuple containing the loaded data table (or None) and the path to the CSV file. If False,
+            only the loaded data table (or None) is returned (default is False).
+    Returns
+    -------
+    pandas.DataFrame or None, or (pandas.DataFrame or None, str)
+            If return_path is False, returns the loaded data table as a pandas DataFrame, or None if the table file does
+            not exist. If return_path is True, returns a tuple where the first element is the data table (or None) and the
+            second element is the path to the CSV file.
+    Examples
+    --------
+    >>> df_pos = get_position_table('/path/to/position', 'targets')
+    # This will load the 'trajectories_targets.csv' table from the specified position directory into a pandas DataFrame.
+    >>> df_pos, table_path = get_position_table('/path/to/position', 'targets', return_path=True)
+    # This will load the 'trajectories_targets.csv' table and also return the path to the CSV file.
+    """
+    if not pos.endswith(os.sep):
+        table = os.sep.join([pos, "output", "tables", f"trajectories_{population}.pkl"])
+    else:
+        table = pos + os.sep.join(
+            ["output", "tables", f"trajectories_{population}.pkl"]
+        )
+    if os.path.exists(table):
+        df_pos = np.load(table, allow_pickle=True)
+    else:
+        df_pos = None
+    if return_path:
+        return df_pos, table
+    else:
+        return df_pos
+def load_experiment_tables(
+    experiment,
+    population="targets",
+    well_option="*",
+    position_option="*",
+    return_pos_info=False,
+    load_pickle=False,
+):
+    """
+    Load tabular data for an experiment, optionally including position-level information.
+    This function retrieves and processes tables associated with positions in an experiment.
+    It supports filtering by wells and positions, and can load either CSV data or pickle files.
+    Parameters
+    ----------
+    experiment : str
+            Path to the experiment folder to load data for.
+    population : str, optional
+            The population to extract from the position tables (`'targets'` or `'effectors'`). Default is `'targets'`.
+    well_option : str or list, optional
+            Specifies which wells to include. Default is `'*'`, meaning all wells.
+    position_option : str or list, optional
+            Specifies which positions to include within selected wells. Default is `'*'`, meaning all positions.
+    return_pos_info : bool, optional
+            If `True`, also returns a DataFrame containing position-level metadata. Default is `False`.
+    load_pickle : bool, optional
+            If `True`, loads pre-processed pickle files for the positions instead of raw data. Default is `False`.
+    Returns
+    -------
+    df : pandas.DataFrame or None
+            A DataFrame containing aggregated data for the specified wells and positions, or `None` if no data is found.
+            The DataFrame includes metadata such as well and position identifiers, concentrations, antibodies, and other
+            experimental parameters.
+    df_pos_info : pandas.DataFrame, optional
+            A DataFrame with metadata for each position, including file paths and experimental details. Returned only
+            if `return_pos_info=True`.
+    Notes
+    -----
+    - The function assumes the experiment's configuration includes details about movie prefixes, concentrations,
+      cell types, antibodies, and pharmaceutical agents.
+    - Wells and positions can be filtered using `well_option` and `position_option`, respectively. If filtering
+      fails or is invalid, those specific wells/positions are skipped.
+    - Position-level metadata is assembled into `df_pos_info` and includes paths to data and movies.
+    Examples
+    --------
+    Load all data for an experiment:
+    >>> df = load_experiment_tables("path/to/experiment1")
+    Load data for specific wells and positions, including position metadata:
+    >>> df, df_pos_info = load_experiment_tables(
+    ...     "experiment_01", well_option=["A1", "B1"], position_option=[0, 1], return_pos_info=True
+    ... )
+    Use pickle files for faster loading:
+    >>> df = load_experiment_tables("experiment_01", load_pickle=True)
+    """
+    import pandas as pd
+    config = get_config(experiment)
+    wells = get_experiment_wells(experiment)
+    movie_prefix = config_section_to_dict(config, "MovieSettings")["movie_prefix"]
+    labels = get_experiment_labels(experiment)
+    metadata = get_experiment_metadata(experiment)  # None or dict of metadata
+    well_labels = _extract_labels_from_config(config, len(wells))
+    well_indices, position_indices = interpret_wells_and_positions(
+        experiment, well_option, position_option
+    )
+    df = []
+    df_pos_info = []
+    real_well_index = 0
+    for k, well_path in enumerate(tqdm(wells[well_indices])):
+        any_table = False  # assume no table
+        well_name, well_number = extract_well_name_and_number(well_path)
+        widx = well_indices[k]
+        well_alias = well_labels[widx]
+        positions = get_positions_in_well(well_path)
+        if position_indices is not None:
+            try:
+                positions = positions[position_indices]
+            except Exception as e:
+                logger.error(e)
+                continue
+        real_pos_index = 0
+        for pidx, pos_path in enumerate(positions):
+            pos_name = extract_position_name(pos_path)
+            stack_path = get_position_movie_path(pos_path, prefix=movie_prefix)
+            if not load_pickle:
+                df_pos, table = get_position_table(
+                    pos_path, population=population, return_path=True
+                )
+            else:
+                df_pos, table = get_position_pickle(
+                    pos_path, population=population, return_path=True
+                )
+            if df_pos is not None:
+                df_pos["position"] = pos_path
+                df_pos["well"] = well_path
+                df_pos["well_index"] = well_number
+                df_pos["well_name"] = well_name
+                df_pos["pos_name"] = pos_name
+                for k in list(labels.keys()):
+                    values = labels[k]
+                    try:
+                        df_pos[k] = values[widx]
+                    except Exception as e:
+                        logger.error(f"{e=}")
+                if metadata is not None:
+                    keys = list(metadata.keys())
+                    for key in keys:
+                        df_pos[key] = metadata[key]
+                df.append(df_pos)
+                any_table = True
+                pos_dict = {
+                    "pos_path": pos_path,
+                    "pos_index": real_pos_index,
+                    "pos_name": pos_name,
+                    "table_path": table,
+                    "stack_path": stack_path,
+                    "well_path": well_path,
+                    "well_index": real_well_index,
+                    "well_name": well_name,
+                    "well_number": well_number,
+                    "well_alias": well_alias,
+                }
+                df_pos_info.append(pos_dict)
+                real_pos_index += 1
+        if any_table:
+            real_well_index += 1
+    df_pos_info = pd.DataFrame(df_pos_info)
+    if len(df) > 0:
+        df = pd.concat(df)
+        df = df.reset_index(drop=True)
+    else:
+        df = None
+    if return_pos_info:
+        return df, df_pos_info
+    else:
+        return df
+def load_tracking_data(position, prefix="Aligned", population="target"):
+    """
+    Load the tracking data, labels, and stack for a given position and population.
+    Parameters
+    ----------
+    position : str
+            The position or directory where the data is located.
+    prefix : str, optional
+            The prefix used in the filenames of the stack images (default is "Aligned").
+    population : str, optional
+            The population to load the data for. Options are "target" or "effector" (default is "target").
+    Returns
+    -------
+    trajectories : DataFrame
+            The tracking data loaded as a pandas DataFrame.
+    labels : ndarray
+            The segmentation labels loaded as a numpy ndarray.
+    stack : ndarray
+            The image stack loaded as a numpy ndarray.
+    Notes
+    -----
+    This function loads the tracking data, labels, and stack for a given position and population.
+    It reads the trajectories from the appropriate CSV file based on the specified population.
+    The stack and labels are located using the `locate_stack_and_labels` function.
+    The resulting tracking data is returned as a pandas DataFrame, and the labels and stack are returned as numpy ndarrays.
+    Examples
+    --------
+    >>> trajectories, labels, stack = load_tracking_data(position, population="target")
+    # Load the tracking data, labels, and stack for the specified position and target population.
+    """
+    import pandas as pd
+    position = position.replace("\\", "/")
+    if population.lower() == "target" or population.lower() == "targets":
+        trajectories = pd.read_csv(
+            position + os.sep.join(["output", "tables", "trajectories_targets.csv"])
+        )
+    elif population.lower() == "effector" or population.lower() == "effectors":
+        trajectories = pd.read_csv(
+            position + os.sep.join(["output", "tables", "trajectories_effectors.csv"])
+        )
+    else:
+        trajectories = pd.read_csv(
+            position
+            + os.sep.join(["output", "tables", f"trajectories_{population}.csv"])
+        )
+    stack, labels = locate_stack_and_labels(
+        position, prefix=prefix, population=population
+    )
+    return trajectories, labels, stack
+def interpret_tracking_configuration(config):
+    """
+    Interpret and resolve the path for a tracking configuration file.
+    This function determines the appropriate configuration file path based on the input.
+    If the input is a string representing an existing path or a known configuration name,
+    it resolves to the correct file path. If the input is invalid or `None`, a default
+    configuration is returned.
+    Parameters
+    ----------
+    config : str or None
+            The input configuration, which can be:
+            - A string representing the full path to a configuration file.
+            - A short name of a configuration file without the `.json` extension.
+            - `None` to use a default configuration.
+    Returns
+    -------
+    str
+            The resolved path to the configuration file.
+    Notes
+    -----
+    - If `config` is a string and the specified path exists, it is returned as-is.
+    - If `config` is a name, the function searches in the `tracking_configs` directory
+      within the `celldetective` models folder.
+    - If the file or name is not found, or if `config` is `None`, the function falls
+      back to a default configuration using `cell_config()`.
+    Examples
+    --------
+    Resolve a full path:
+    >>> interpret_tracking_configuration("/path/to/config.json")
+    '/path/to/config.json'
+    Resolve a named configuration:
+    >>> interpret_tracking_configuration("default_tracking")
+    '/path/to/celldetective/models/tracking_configs/default_tracking.json'
+    Handle `None` to return the default configuration:
+    >>> interpret_tracking_configuration(None)
+    '/path/to/default/config.json'
+    """
+    if isinstance(config, str):
+        if os.path.exists(config):
+            return config
+        else:
+            modelpath = os.sep.join(
+                [
+                    os.path.split(os.path.dirname(os.path.realpath(__file__)))[0],
+                    # "celldetective",
+                    "models",
+                    "tracking_configs",
+                    os.sep,
+                ]
+            )
+            if os.path.exists(modelpath + config + ".json"):
+                return modelpath + config + ".json"
+            else:
+                from btrack.datasets import cell_config
+                config = cell_config()
+    elif config is None:
+        from btrack.datasets import cell_config
+        config = cell_config()
+    return config

celldetective/utils/dataset_helpers.py ADDED Viewed

@@ -0,0 +1,207 @@
+import numpy as np
+from celldetective import get_logger
+logger = get_logger()
+def split_by_ratio(arr, *ratios):
+    """
+    Split an array into multiple chunks based on given ratios.
+    Parameters
+    ----------
+    arr : array-like
+            The input array to be split.
+    *ratios : float
+            Ratios specifying the proportions of each chunk. The sum of ratios should be less than or equal to 1.
+    Returns
+    -------
+    list
+            A list of arrays containing the splits/chunks of the input array.
+    Notes
+    -----
+    This function randomly permutes the input array (`arr`) and then splits it into multiple chunks based on the provided ratios.
+    The ratios determine the relative sizes of the resulting chunks. The sum of the ratios should be less than or equal to 1.
+    The function uses the accumulated ratios to determine the split indices.
+    The function returns a list of arrays representing the splits of the input array. The number of splits is equal to the number
+    of provided ratios. If there are more ratios than splits, the extra ratios are ignored.
+    Examples
+    --------
+    >>> arr = np.arange(10)
+    >>> splits = split_by_ratio(arr, 0.6, 0.2, 0.2)
+    >>> print(len(splits))
+    3
+    # Split the array into 3 chunks with ratios 0.6, 0.2, and 0.2.
+    >>> arr = np.arange(100)
+    >>> splits = split_by_ratio(arr, 0.5, 0.25)
+    >>> print([len(split) for split in splits])
+    [50, 25]
+    # Split the array into 2 chunks with ratios 0.5 and 0.25.
+    """
+    arr = np.random.permutation(arr)
+    ind = np.add.accumulate(np.array(ratios) * len(arr)).astype(int)
+    return [x.tolist() for x in np.split(arr, ind)][: len(ratios)]
+def compute_weights(y):
+    """
+    Compute class weights based on the input labels.
+    Parameters
+    ----------
+    y : array-like
+            Array of labels.
+    Returns
+    -------
+    dict
+            A dictionary containing the computed class weights.
+    Notes
+    -----
+    This function calculates the class weights based on the input labels (`y`) using the "balanced" method.
+    The class weights are computed to address the class imbalance problem, where the weights are inversely
+    proportional to the class frequencies.
+    The function returns a dictionary (`class_weights`) where the keys represent the unique classes in `y`
+    and the values represent the computed weights for each class.
+    Examples
+    --------
+    >>> labels = np.array([0, 1, 0, 1, 1])
+    >>> weights = compute_weights(labels)
+    >>> print(weights)
+    {0: 1.5, 1: 0.75}
+    # Compute class weights for the binary labels.
+    >>> labels = np.array([0, 1, 2, 0, 1, 2, 2])
+    >>> weights = compute_weights(labels)
+    >>> print(weights)
+    {0: 1.1666666666666667, 1: 1.1666666666666667, 2: 0.5833333333333334}
+    # Compute class weights for the multi-class labels.
+    """
+    from sklearn.utils import compute_class_weight
+    class_weights = compute_class_weight(
+        class_weight="balanced",
+        classes=np.unique(y),
+        y=y,
+    )
+    class_weights = dict(zip(np.unique(y), class_weights))
+    return class_weights
+def train_test_split(
+    data_x, data_y1, data_class=None, validation_size=0.25, test_size=0, n_iterations=10
+):
+    """
+    Split the dataset into training, validation, and test sets.
+    Parameters
+    ----------
+    data_x : array-like
+            Input features or independent variables.
+    data_y1 : array-like
+            Target variable 1.
+    data_y2 : array-like
+            Target variable 2.
+    validation_size : float, optional
+            Proportion of the dataset to include in the validation set. Default is 0.25.
+    test_size : float, optional
+            Proportion of the dataset to include in the test set. Default is 0.
+    Returns
+    -------
+    dict
+            A dictionary containing the split datasets.
+            Keys: "x_train", "x_val", "y1_train", "y1_val", "y2_train", "y2_val".
+            If test_size > 0, additional keys: "x_test", "y1_test", "y2_test".
+    Notes
+    -----
+    This function divides the dataset into training, validation, and test sets based on the specified proportions.
+    It shuffles the data and splits it according to the proportions defined by `validation_size` and `test_size`.
+    The input features (`data_x`) and target variables (`data_y1`, `data_y2`) should be arrays or array-like objects
+    with compatible dimensions.
+    The function returns a dictionary containing the split datasets. The training set is assigned to "x_train",
+    "y1_train", and "y2_train". The validation set is assigned to "x_val", "y1_val", and "y2_val". If `test_size` is
+    greater than 0, the test set is assigned to "x_test", "y1_test", and "y2_test".
+    """
+    if data_class is not None:
+        logger.info(
+            f"Unique classes: {np.sort(np.argmax(np.unique(data_class,axis=0),axis=1))}"
+        )
+    for i in range(n_iterations):
+        n_values = len(data_x)
+        randomize = np.arange(n_values)
+        np.random.shuffle(randomize)
+        train_percentage = 1 - validation_size - test_size
+        chunks = split_by_ratio(randomize, train_percentage, validation_size, test_size)
+        x_train = data_x[chunks[0]]
+        y1_train = data_y1[chunks[0]]
+        if data_class is not None:
+            y2_train = data_class[chunks[0]]
+        x_val = data_x[chunks[1]]
+        y1_val = data_y1[chunks[1]]
+        if data_class is not None:
+            y2_val = data_class[chunks[1]]
+        if data_class is not None:
+            print(
+                f"classes in train set: {np.sort(np.argmax(np.unique(y2_train,axis=0),axis=1))}; classes in validation set: {np.sort(np.argmax(np.unique(y2_val,axis=0),axis=1))}"
+            )
+            same_class_test = np.array_equal(
+                np.sort(np.argmax(np.unique(y2_train, axis=0), axis=1)),
+                np.sort(np.argmax(np.unique(y2_val, axis=0), axis=1)),
+            )
+            print(f"Check that classes are found in all sets: {same_class_test}...")
+        else:
+            same_class_test = True
+        if same_class_test:
+            ds = {
+                "x_train": x_train,
+                "x_val": x_val,
+                "y1_train": y1_train,
+                "y1_val": y1_val,
+            }
+            if data_class is not None:
+                ds.update({"y2_train": y2_train, "y2_val": y2_val})
+            if test_size > 0:
+                x_test = data_x[chunks[2]]
+                y1_test = data_y1[chunks[2]]
+                ds.update({"x_test": x_test, "y1_test": y1_test})
+                if data_class is not None:
+                    y2_test = data_class[chunks[2]]
+                    ds.update({"y2_test": y2_test})
+            return ds
+        else:
+            continue
+    raise Exception(
+        "Some classes are missing from the train or validation set... Abort."
+    )

celldetective 1.4.1.post1__py3-none-any.whl → 1.5.0b0__py3-none-any.whl

celldetective 1.4.1.post1py3-none-any.whl → 1.5.0b0py3-none-any.whl