PyPI - MIDRC-MELODY - Versions diffs - 0.3.3__py3-none-any.whl - Mend

MIDRC-MELODY 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

MIDRC_MELODY/__init__.py +0 -0
MIDRC_MELODY/__main__.py +4 -0
MIDRC_MELODY/common/__init__.py +0 -0
MIDRC_MELODY/common/data_loading.py +199 -0
MIDRC_MELODY/common/data_preprocessing.py +134 -0
MIDRC_MELODY/common/edit_config.py +156 -0
MIDRC_MELODY/common/eod_aaod_metrics.py +292 -0
MIDRC_MELODY/common/generate_eod_aaod_spiders.py +69 -0
MIDRC_MELODY/common/generate_qwk_spiders.py +56 -0
MIDRC_MELODY/common/matplotlib_spider.py +425 -0
MIDRC_MELODY/common/plot_tools.py +132 -0
MIDRC_MELODY/common/plotly_spider.py +217 -0
MIDRC_MELODY/common/qwk_metrics.py +244 -0
MIDRC_MELODY/common/table_tools.py +230 -0
MIDRC_MELODY/gui/__init__.py +0 -0
MIDRC_MELODY/gui/config_editor.py +200 -0
MIDRC_MELODY/gui/data_loading.py +157 -0
MIDRC_MELODY/gui/main_controller.py +154 -0
MIDRC_MELODY/gui/main_window.py +545 -0
MIDRC_MELODY/gui/matplotlib_spider_widget.py +204 -0
MIDRC_MELODY/gui/metrics_model.py +62 -0
MIDRC_MELODY/gui/plotly_spider_widget.py +56 -0
MIDRC_MELODY/gui/qchart_spider_widget.py +272 -0
MIDRC_MELODY/gui/shared/__init__.py +0 -0
MIDRC_MELODY/gui/shared/react/__init__.py +0 -0
MIDRC_MELODY/gui/shared/react/copyabletableview.py +100 -0
MIDRC_MELODY/gui/shared/react/grabbablewidget.py +406 -0
MIDRC_MELODY/gui/tqdm_handler.py +210 -0
MIDRC_MELODY/melody.py +102 -0
MIDRC_MELODY/melody_gui.py +111 -0
MIDRC_MELODY/resources/MIDRC.ico +0 -0
midrc_melody-0.3.3.dist-info/METADATA +151 -0
midrc_melody-0.3.3.dist-info/RECORD +37 -0
midrc_melody-0.3.3.dist-info/WHEEL +5 -0
midrc_melody-0.3.3.dist-info/entry_points.txt +4 -0
midrc_melody-0.3.3.dist-info/licenses/LICENSE +201 -0
midrc_melody-0.3.3.dist-info/top_level.txt +1 -0

MIDRC_MELODY/__init__.py ADDED Viewed

File without changes

MIDRC_MELODY/__main__.py ADDED Viewed

@@ -0,0 +1,4 @@
+from melody import main
+if __name__ == "__main__":
+    main()

MIDRC_MELODY/common/__init__.py ADDED Viewed

File without changes

MIDRC_MELODY/common/data_loading.py ADDED Viewed

@@ -0,0 +1,199 @@
+#  Copyright (c) 2025 Medical Imaging and Data Resource Center (MIDRC).
+#
+#      Licensed under the Apache License, Version 2.0 (the "License");
+#      you may not use this file except in compliance with the License.
+#      You may obtain a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#      Unless required by applicable law or agreed to in writing, software
+#      distributed under the License is distributed on an "AS IS" BASIS,
+#      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#      See the License for the specific language governing permissions and
+#      limitations under the License.
+#
+""" Data Loading and Preprocessing Functions """
+from dataclasses import dataclass
+from pathlib import Path
+import pickle
+import sys
+import time
+from typing import Any, Dict, List, Optional, Tuple
+import pandas as pd
+from MIDRC_MELODY.common.data_preprocessing import bin_dataframe_column
+def check_file_exists(file_path: str, key_name: str) -> None:
+    """
+    Check if a file exists and exit gracefully with an error message if it doesn't.
+    :arg file_path: Path to the file to check.
+    :arg key_name: Key name in the configuration file.
+    """
+    if not Path(file_path).exists():
+        print(f"Error: The file specified for '{key_name}' ('{file_path}') does not exist.")
+        print(f"Please update the '{key_name}' path in the config file to point to a valid file.")
+        print("Ensure the path is correct and accessible.")
+        sys.exit(1)
+def create_matched_df_from_files(input_data: dict, numeric_cols_dict: dict) -> Tuple[pd.DataFrame, list, list]:
+    """
+    Create a matched DataFrame from the truth and test files
+    :arg input_data: Dictionary containing the input data
+    :arg numeric_cols_dict: Dictionary containing the numeric columns information
+    :return: A tuple containing the matched DataFrame, a list of categories, and a list of test columns
+    """
+    truth_file = input_data['truth file']
+    test_scores_file = input_data['test scores']
+    # Check if files exist
+    check_file_exists(truth_file, 'truth file')
+    check_file_exists(test_scores_file, 'test scores')
+    # Read the truth and test scores files
+    df_truth = pd.read_csv(truth_file)
+    df_test = pd.read_csv(test_scores_file)
+    uid_col = input_data.get('uid column', 'case_name')
+    truth_col = input_data.get('truth column', 'truth')
+    test_columns = df_test[df_test.columns.difference([uid_col])].columns
+    categories = df_truth[df_truth.columns.difference([uid_col, truth_col])].columns
+    # Bin numerical columns, specifically 'age'
+    for str_col, col_dict in numeric_cols_dict.items():
+        num_col = col_dict['raw column'] if 'raw column' in col_dict else str_col
+        bins = col_dict['bins'] if 'bins' in col_dict else None
+        labels = col_dict['labels'] if 'labels' in col_dict else None
+        if num_col in df_truth.columns:
+            df_truth = bin_dataframe_column(df_truth, num_col, str_col, bins=bins, labels=labels)
+            categories = categories.map(lambda x, col=str_col, num=num_col: col if x == num else x)
+    return match_cases(df_truth, df_test, uid_col), categories.tolist(), test_columns.tolist()
+def match_cases(df1, df2, column) -> pd.DataFrame:
+    """
+    Match cases between two DataFrames
+    :arg df1: First DataFrame
+    :arg df2: Second DataFrame
+    :arg column: Column to match on
+    :return: A DataFrame containing the matched cases
+    """
+    merged_df = df1.merge(df2, on=column, how='inner')  # , suffixes=('_truth', '_ai'))
+    return merged_df
+# Step 5: Determine reference groups
+def determine_valid_n_reference_groups(df, categories, min_count=10) -> Tuple[dict, dict, pd.DataFrame]:
+    """
+    Determine the valid and reference groups for the given categories
+    :arg df: DataFrame
+    :arg categories: List of categories
+    :arg min_count: Minimum count for a group to be considered valid
+    :return: A tuple containing the reference groups, valid groups, and the filtered DataFrame
+    """
+    if isinstance(categories, pd.Index):
+        categories = categories.to_list()
+    reference_groups = {}
+    valid_groups = {}
+    for category in categories:
+        valid_groups[category] = {}
+        category_counts = df[category].value_counts()
+        for value in category_counts.index:
+            if category_counts[value] >= min_count and value != 'Not Reported':
+                valid_groups[category][value] = category_counts[value]
+        if valid_groups[category]:
+            reference_groups[category] = max(valid_groups[category], key=valid_groups[category].get)
+    # Filter the DataFrame based on valid groups
+    filtered_df = df.copy()
+    for category in categories:
+        valid_values = list(valid_groups[category].keys())
+        filtered_df = filtered_df[filtered_df[category].isin(valid_values)]
+    return reference_groups, valid_groups, filtered_df
+def save_pickled_data(output_config: dict, metric: str, data: any):
+    """
+    Save pickled data to a file
+    :arg output_config: Output configuration dictionary
+    :arg metric: Metric name
+    :arg data: Data to save
+    """
+    metric_config = output_config.get(metric.lower(), {})
+    if metric_config.get('save', False):
+        filename = f"{metric_config['file prefix']}{time.strftime('%Y%m%d%H%M%S')}.pkl"
+        print(f'Saving {metric} data to filename:', filename)
+        # Create directory if it doesn't exist
+        Path(filename).parent.mkdir(parents=True, exist_ok=True)
+        with open(filename, 'wb') as f:
+            pickle.dump(data, f)
+def check_required_columns(df: pd.DataFrame, columns: List[str]) -> None:
+    """
+    Raise an error if any required column is missing.
+    :arg df: DataFrame to check for required columns.
+    :arg columns: List of required columns.
+    """
+    missing = [col for col in columns if col not in df.columns]
+    if missing:
+        raise ValueError(f"Missing required columns: {missing}")
+@dataclass(frozen=True)
+class TestAndDemographicData:
+    """
+    Class to store file data
+    """
+    matched_df: pd.DataFrame
+    truth_col: str
+    categories: List[str]
+    test_cols: List[str]
+    reference_groups: Dict[str, Any]
+    valid_groups: Dict[str, List[Any]]
+    n_iter: Optional[int]
+    base_seed: Optional[int]
+def build_test_and_demographic_data(config: Dict[str, Any]) -> TestAndDemographicData:
+    """
+    Build the TestAndDemographicData object from the configuration dictionary.
+    :arg config: Configuration dictionary
+    :returns: TestAndDemographicData object
+    """
+    matched_df, categories, test_cols = create_matched_df_from_files(config['input data'], config['numeric_cols'])
+    min_count = config.get('min count per category', 10)
+    reference_groups, valid_groups, _ = determine_valid_n_reference_groups(matched_df, categories, min_count=min_count)
+    n_iter = config.get('bootstrap', {}).get('iterations', 1000)
+    base_seed = config.get('bootstrap', {}).get('seed', None)
+    truth_col = config['input data'].get('truth column', 'truth')
+    # Check required columns before further processing
+    required_columns = [truth_col] + test_cols + categories
+    check_required_columns(matched_df, required_columns)
+    return TestAndDemographicData(matched_df, truth_col, categories, test_cols, reference_groups, valid_groups, n_iter,
+                                  base_seed)

MIDRC_MELODY/common/data_preprocessing.py ADDED Viewed

@@ -0,0 +1,134 @@
+#  Copyright (c) 2025 Medical Imaging and Data Resource Center (MIDRC).
+#
+#      Licensed under the Apache License, Version 2.0 (the "License");
+#      you may not use this file except in compliance with the License.
+#      You may obtain a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#      Unless required by applicable law or agreed to in writing, software
+#      distributed under the License is distributed on an "AS IS" BASIS,
+#      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#      See the License for the specific language governing permissions and
+#      limitations under the License.
+#
+"""
+This module contains functions for data preprocessing and combining datasets.
+"""
+import numpy as np
+import pandas as pd
+def _generate_default_labels(bins: (list[int], list[float])):
+    """
+    Generates default labels for the bins.
+    Args:
+        bins (list): The bins used for the binning process
+    Returns:
+        list(str): A list of labels for the bins
+    """
+    labels = []
+    for i in range(len(bins) - 1):
+        if isinstance(bins[i], int) and isinstance(bins[i + 1], int):
+            if i < len(bins) - 2:
+                labels.append(f"{bins[i]}-{bins[i + 1] - 1}")
+            else:
+                labels.append(f">={bins[i]}")
+        else:
+            labels.append(f"{bins[i]}-{bins[i + 1]}")
+    return labels
+def _adjust_outliers(df: pd.DataFrame, cut_column_name: str, column_name: str, bins: (list[int], list[float])):
+    """
+    Adjusts the outliers in the cut column.
+    Args:
+        df: input DataFrame
+        cut_column_name: column name to be adjusted (e.g. created by the binning process)
+        column_name: column name to be checked for outliers
+        bins: The bins used for the binning process
+    Returns:
+        pd.DataFrame: DataFrame with the outliers adjusted in the cut column
+    """
+    new_text = "Not Reported"
+    low_text = "Outlier_Low"
+    high_text = "Outlier_High"
+    print(f"WARNING: There are values outside the bins specified for the '{column_name}' column.")
+    df.loc[df[cut_column_name].isna() & (df[column_name] < bins[0]), cut_column_name] = low_text
+    df.loc[df[cut_column_name].isna() & (df[column_name] >= bins[-1]), cut_column_name] = high_text
+    df.loc[df[cut_column_name].isna(), cut_column_name] = new_text
+    if (df[cut_column_name] == low_text).sum() > 0:
+        print(f"         {(df[cut_column_name] == low_text).sum()} values are below the min bin value.\n"
+              f"         These will be placed in a new '{low_text}' category.")
+    if (df[cut_column_name] == high_text).sum() > 0:
+        print(f"         {(df[cut_column_name] == high_text).sum()} values are above the max bin value.\n"
+              f"         These will be placed in a new '{high_text}' category.")
+    if (df[cut_column_name] == new_text).sum() > 0:
+        print(f"         {(df[cut_column_name] == new_text).sum()} values are nan.\n"
+              f"         These will be placed in a new '{new_text}' category.")
+    return df
+def bin_dataframe_column(df_to_bin: pd.DataFrame, column_name: str, cut_column_name: str = 'CUT',
+                         bins: (list[int], list[float]) = None, labels: list[str] = None, *, right: bool = False):
+    """
+    Cuts the age column into bins and adds a column with the bin labels.
+    Parameters:
+        df_to_bin: pandas DataFrame containing the data
+        column_name: name of the column to be binned
+        cut_column_name: name of the column to be added with the bin labels
+        bins: list of bins to be used for the binning
+        labels: list of labels for the bins
+        right: whether to use right-inclusive intervals
+    Returns:
+        pd.DataFrame: pandas DataFrame with the binned column and the labels
+    """
+    if column_name not in df_to_bin.columns:
+        return df_to_bin
+    if bins is None:
+        bins = np.arange(0, 100, 10)
+    if labels is None:
+        labels = _generate_default_labels(bins)
+    df_out = df_to_bin.assign(**{
+        cut_column_name: pd.cut(
+            df_to_bin[column_name],
+            bins=bins,
+            labels=labels,
+            right=right,
+        ).astype("string"),
+    })
+    if df_out[cut_column_name].isna().any():
+        df_out = _adjust_outliers(df_out, cut_column_name, column_name, bins)
+    return df_out
+def combine_datasets_from_list(df_list: list[pd.DataFrame], dataset_column: str = '_dataset_'):
+    """
+    Combines a list of dataframes into a single dataframe with a new column for the dataset name.
+    Args:
+        df_list (list[pd.DataFrame]): A list of dataframes to be combined.
+        dataset_column (str, optional): The name of the column to be used for the dataset name. Defaults to '_dataset_'.
+    Returns:
+        pd.DataFrame: A combined dataframe with a new column for the dataset name.
+    """
+    labels = [f'Dataset {i}' for i in range(len(df_list))]  # Dataset labels
+    combined_df = pd.concat(
+        [df.assign(**{dataset_column: label}) for label, df in zip(labels, df_list)],
+        ignore_index=True,
+    )
+    return combined_df

MIDRC_MELODY/common/edit_config.py ADDED Viewed

@@ -0,0 +1,156 @@
+#  Copyright (c) 2025 Medical Imaging and Data Resource Center (MIDRC).
+#
+#      Licensed under the Apache License, Version 2.0 (the "License");
+#      you may not use this file except in compliance with the License.
+#      You may obtain a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+#      Unless required by applicable law or agreed to in writing, software
+#      distributed under the License is distributed on an "AS IS" BASIS,
+#      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#      See the License for the specific language governing permissions and
+#      limitations under the License.
+#
+import curses
+from curses.textpad import rectangle, Textbox
+class PadWrapper:
+    def __init__(self, pad, win_h, win_w, top, left):
+        self.real_pad = pad
+        self.offset = 0
+        self.win_h = win_h
+        self.win_w = win_w
+        self.top = top
+        self.left = left
+    def refresh(self, *args):
+        self.real_pad.refresh(
+            self.offset, 0,
+            self.top, self.left,
+            self.top + self.win_h - 1,
+            self.left + self.win_w - 1
+        )
+    def __getattr__(self, name):
+        return getattr(self.real_pad, name)
+def _load_file(path):
+    with open(path, 'r', encoding='utf-8') as f:
+        return f.read().split('\n')
+def _save_file(path, lines):
+    with open(path, 'w', encoding='utf-8') as f:
+        f.write('\n'.join(lines))
+def _handle_scroll(key, pad):
+    real_h, _ = pad.real_pad.getmaxyx()
+    win_h = pad.win_h
+    y, x = pad.getyx()
+    if key == curses.KEY_DOWN:
+        if y - pad.offset >= win_h - 1 and pad.offset < real_h - win_h:
+            pad.offset += 1
+    elif key == curses.KEY_UP:
+        if y - pad.offset <= 0 and pad.offset > 0:
+            pad.offset -= 1
+    elif key == curses.KEY_NPAGE:
+        pad.offset = min(pad.offset + win_h, real_h - win_h)
+        if y < pad.offset:
+            pad.move(pad.offset, x)
+    elif key == curses.KEY_PPAGE:
+        pad.offset = max(pad.offset - win_h, 0)
+        if y >= pad.offset + win_h:
+            pad.move(pad.offset + win_h - 1, x)
+    pad.refresh()
+    return key
+class ConsoleTextEditor:
+    def __init__(self, stdscr, original, path):
+        self.stdscr = stdscr
+        self.original = original
+        self.path = path
+        self._init_curses()
+        self._draw_border()
+        self.pad = self._make_pad()
+        self._fill_pad()
+    def _init_curses(self):
+        curses.cbreak()
+        curses.noecho()
+        self.stdscr.keypad(True)
+        curses.curs_set(1)
+    def _draw_border(self):
+        h, w = self.stdscr.getmaxyx()
+        rectangle(self.stdscr, 1, 1, h - 2, w - 2)
+        self.stdscr.addstr(
+            h - 1, 2,
+            "Ctrl-G=save  Ctrl-C=cancel  ↑/↓=scroll  PgUp/PgDn=jump"
+        )
+        self.stdscr.refresh()
+    def _make_pad(self):
+        h, w = self.stdscr.getmaxyx()
+        win_h, win_w = h - 4, w - 4
+        real_h = max(len(self.original) + 1, win_h)
+        _real_pad = curses.newpad(real_h, win_w)
+        pad = PadWrapper(_real_pad, win_h, win_w, top=2, left=2)
+        pad.keypad(True)
+        pad.scrollok(True)
+        pad.idlok(True)
+        return pad
+    def _fill_pad(self):
+        for idx, line in enumerate(self.original):
+            try:
+                self.pad.addstr(idx, 0, line)
+            except curses.error:
+                pass
+        self.pad.move(0, 0)
+        self.pad.refresh()
+    def _collect_lines(self):
+        real_h, _ = self.pad.real_pad.getmaxyx()
+        lines = []
+        for i in range(real_h):
+            raw = self.pad.instr(i, 0, self.pad.win_w).decode('utf-8', 'ignore')
+            lines.append(raw.rstrip('\x00'))
+        return lines
+    def _validator(self, ch):
+        _handle_scroll(ch, self.pad)
+        try:
+            self.pad.refresh()
+        except curses.error:
+            pass
+        return ch
+    def run(self):
+        tb = Textbox(self.pad)
+        try:
+            tb.edit(self._validator)
+        except KeyboardInterrupt:
+            return
+        finally:
+            curses.flushinp()
+        lines = self._collect_lines()
+        _save_file(self.path, lines)
+def _run_editor(stdscr, original, path):
+    editor = ConsoleTextEditor(stdscr, original, path)
+    editor.run()
+def edit_config(path):
+    # Add this to the screen in case curses doesn't finish cleaning up on ctrl-c
+    print("Press Any Key to Continue...")
+    original = _load_file(path)
+    curses.wrapper(lambda stdscr: _run_editor(stdscr, original, path))