MIDRC-MELODY 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. MIDRC_MELODY/__init__.py +0 -0
  2. MIDRC_MELODY/__main__.py +4 -0
  3. MIDRC_MELODY/common/__init__.py +0 -0
  4. MIDRC_MELODY/common/data_loading.py +199 -0
  5. MIDRC_MELODY/common/data_preprocessing.py +134 -0
  6. MIDRC_MELODY/common/edit_config.py +156 -0
  7. MIDRC_MELODY/common/eod_aaod_metrics.py +292 -0
  8. MIDRC_MELODY/common/generate_eod_aaod_spiders.py +69 -0
  9. MIDRC_MELODY/common/generate_qwk_spiders.py +56 -0
  10. MIDRC_MELODY/common/matplotlib_spider.py +425 -0
  11. MIDRC_MELODY/common/plot_tools.py +132 -0
  12. MIDRC_MELODY/common/plotly_spider.py +217 -0
  13. MIDRC_MELODY/common/qwk_metrics.py +244 -0
  14. MIDRC_MELODY/common/table_tools.py +230 -0
  15. MIDRC_MELODY/gui/__init__.py +0 -0
  16. MIDRC_MELODY/gui/config_editor.py +200 -0
  17. MIDRC_MELODY/gui/data_loading.py +157 -0
  18. MIDRC_MELODY/gui/main_controller.py +154 -0
  19. MIDRC_MELODY/gui/main_window.py +545 -0
  20. MIDRC_MELODY/gui/matplotlib_spider_widget.py +204 -0
  21. MIDRC_MELODY/gui/metrics_model.py +62 -0
  22. MIDRC_MELODY/gui/plotly_spider_widget.py +56 -0
  23. MIDRC_MELODY/gui/qchart_spider_widget.py +272 -0
  24. MIDRC_MELODY/gui/shared/__init__.py +0 -0
  25. MIDRC_MELODY/gui/shared/react/__init__.py +0 -0
  26. MIDRC_MELODY/gui/shared/react/copyabletableview.py +100 -0
  27. MIDRC_MELODY/gui/shared/react/grabbablewidget.py +406 -0
  28. MIDRC_MELODY/gui/tqdm_handler.py +210 -0
  29. MIDRC_MELODY/melody.py +102 -0
  30. MIDRC_MELODY/melody_gui.py +111 -0
  31. MIDRC_MELODY/resources/MIDRC.ico +0 -0
  32. midrc_melody-0.3.3.dist-info/METADATA +151 -0
  33. midrc_melody-0.3.3.dist-info/RECORD +37 -0
  34. midrc_melody-0.3.3.dist-info/WHEEL +5 -0
  35. midrc_melody-0.3.3.dist-info/entry_points.txt +4 -0
  36. midrc_melody-0.3.3.dist-info/licenses/LICENSE +201 -0
  37. midrc_melody-0.3.3.dist-info/top_level.txt +1 -0
File without changes
@@ -0,0 +1,4 @@
1
+ from melody import main
2
+
3
+ if __name__ == "__main__":
4
+ main()
File without changes
@@ -0,0 +1,199 @@
1
+ # Copyright (c) 2025 Medical Imaging and Data Resource Center (MIDRC).
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ #
15
+
16
+ """ Data Loading and Preprocessing Functions """
17
+
18
+ from dataclasses import dataclass
19
+ from pathlib import Path
20
+ import pickle
21
+ import sys
22
+ import time
23
+ from typing import Any, Dict, List, Optional, Tuple
24
+
25
+ import pandas as pd
26
+
27
+ from MIDRC_MELODY.common.data_preprocessing import bin_dataframe_column
28
+
29
+
30
+ def check_file_exists(file_path: str, key_name: str) -> None:
31
+ """
32
+ Check if a file exists and exit gracefully with an error message if it doesn't.
33
+
34
+ :arg file_path: Path to the file to check.
35
+ :arg key_name: Key name in the configuration file.
36
+ """
37
+ if not Path(file_path).exists():
38
+ print(f"Error: The file specified for '{key_name}' ('{file_path}') does not exist.")
39
+ print(f"Please update the '{key_name}' path in the config file to point to a valid file.")
40
+ print("Ensure the path is correct and accessible.")
41
+ sys.exit(1)
42
+
43
+
44
+ def create_matched_df_from_files(input_data: dict, numeric_cols_dict: dict) -> Tuple[pd.DataFrame, list, list]:
45
+ """
46
+ Create a matched DataFrame from the truth and test files
47
+
48
+ :arg input_data: Dictionary containing the input data
49
+ :arg numeric_cols_dict: Dictionary containing the numeric columns information
50
+
51
+ :return: A tuple containing the matched DataFrame, a list of categories, and a list of test columns
52
+ """
53
+ truth_file = input_data['truth file']
54
+ test_scores_file = input_data['test scores']
55
+
56
+ # Check if files exist
57
+ check_file_exists(truth_file, 'truth file')
58
+ check_file_exists(test_scores_file, 'test scores')
59
+
60
+ # Read the truth and test scores files
61
+ df_truth = pd.read_csv(truth_file)
62
+ df_test = pd.read_csv(test_scores_file)
63
+ uid_col = input_data.get('uid column', 'case_name')
64
+ truth_col = input_data.get('truth column', 'truth')
65
+
66
+ test_columns = df_test[df_test.columns.difference([uid_col])].columns
67
+ categories = df_truth[df_truth.columns.difference([uid_col, truth_col])].columns
68
+
69
+ # Bin numerical columns, specifically 'age'
70
+ for str_col, col_dict in numeric_cols_dict.items():
71
+ num_col = col_dict['raw column'] if 'raw column' in col_dict else str_col
72
+ bins = col_dict['bins'] if 'bins' in col_dict else None
73
+ labels = col_dict['labels'] if 'labels' in col_dict else None
74
+
75
+ if num_col in df_truth.columns:
76
+ df_truth = bin_dataframe_column(df_truth, num_col, str_col, bins=bins, labels=labels)
77
+ categories = categories.map(lambda x, col=str_col, num=num_col: col if x == num else x)
78
+
79
+ return match_cases(df_truth, df_test, uid_col), categories.tolist(), test_columns.tolist()
80
+
81
+
82
+ def match_cases(df1, df2, column) -> pd.DataFrame:
83
+ """
84
+ Match cases between two DataFrames
85
+
86
+ :arg df1: First DataFrame
87
+ :arg df2: Second DataFrame
88
+ :arg column: Column to match on
89
+
90
+ :return: A DataFrame containing the matched cases
91
+ """
92
+ merged_df = df1.merge(df2, on=column, how='inner') # , suffixes=('_truth', '_ai'))
93
+ return merged_df
94
+
95
+
96
+ # Step 5: Determine reference groups
97
+ def determine_valid_n_reference_groups(df, categories, min_count=10) -> Tuple[dict, dict, pd.DataFrame]:
98
+ """
99
+ Determine the valid and reference groups for the given categories
100
+
101
+ :arg df: DataFrame
102
+ :arg categories: List of categories
103
+ :arg min_count: Minimum count for a group to be considered valid
104
+
105
+ :return: A tuple containing the reference groups, valid groups, and the filtered DataFrame
106
+ """
107
+ if isinstance(categories, pd.Index):
108
+ categories = categories.to_list()
109
+
110
+ reference_groups = {}
111
+ valid_groups = {}
112
+
113
+ for category in categories:
114
+ valid_groups[category] = {}
115
+ category_counts = df[category].value_counts()
116
+
117
+ for value in category_counts.index:
118
+ if category_counts[value] >= min_count and value != 'Not Reported':
119
+ valid_groups[category][value] = category_counts[value]
120
+
121
+ if valid_groups[category]:
122
+ reference_groups[category] = max(valid_groups[category], key=valid_groups[category].get)
123
+
124
+ # Filter the DataFrame based on valid groups
125
+ filtered_df = df.copy()
126
+ for category in categories:
127
+ valid_values = list(valid_groups[category].keys())
128
+ filtered_df = filtered_df[filtered_df[category].isin(valid_values)]
129
+
130
+ return reference_groups, valid_groups, filtered_df
131
+
132
+
133
+ def save_pickled_data(output_config: dict, metric: str, data: any):
134
+ """
135
+ Save pickled data to a file
136
+
137
+ :arg output_config: Output configuration dictionary
138
+ :arg metric: Metric name
139
+ :arg data: Data to save
140
+ """
141
+ metric_config = output_config.get(metric.lower(), {})
142
+ if metric_config.get('save', False):
143
+ filename = f"{metric_config['file prefix']}{time.strftime('%Y%m%d%H%M%S')}.pkl"
144
+ print(f'Saving {metric} data to filename:', filename)
145
+ # Create directory if it doesn't exist
146
+ Path(filename).parent.mkdir(parents=True, exist_ok=True)
147
+
148
+ with open(filename, 'wb') as f:
149
+ pickle.dump(data, f)
150
+
151
+
152
+ def check_required_columns(df: pd.DataFrame, columns: List[str]) -> None:
153
+ """
154
+ Raise an error if any required column is missing.
155
+
156
+ :arg df: DataFrame to check for required columns.
157
+ :arg columns: List of required columns.
158
+ """
159
+ missing = [col for col in columns if col not in df.columns]
160
+ if missing:
161
+ raise ValueError(f"Missing required columns: {missing}")
162
+
163
+
164
+ @dataclass(frozen=True)
165
+ class TestAndDemographicData:
166
+ """
167
+ Class to store file data
168
+ """
169
+ matched_df: pd.DataFrame
170
+ truth_col: str
171
+ categories: List[str]
172
+ test_cols: List[str]
173
+ reference_groups: Dict[str, Any]
174
+ valid_groups: Dict[str, List[Any]]
175
+ n_iter: Optional[int]
176
+ base_seed: Optional[int]
177
+
178
+
179
+ def build_test_and_demographic_data(config: Dict[str, Any]) -> TestAndDemographicData:
180
+ """
181
+ Build the TestAndDemographicData object from the configuration dictionary.
182
+
183
+ :arg config: Configuration dictionary
184
+
185
+ :returns: TestAndDemographicData object
186
+ """
187
+ matched_df, categories, test_cols = create_matched_df_from_files(config['input data'], config['numeric_cols'])
188
+ min_count = config.get('min count per category', 10)
189
+ reference_groups, valid_groups, _ = determine_valid_n_reference_groups(matched_df, categories, min_count=min_count)
190
+ n_iter = config.get('bootstrap', {}).get('iterations', 1000)
191
+ base_seed = config.get('bootstrap', {}).get('seed', None)
192
+ truth_col = config['input data'].get('truth column', 'truth')
193
+
194
+ # Check required columns before further processing
195
+ required_columns = [truth_col] + test_cols + categories
196
+ check_required_columns(matched_df, required_columns)
197
+
198
+ return TestAndDemographicData(matched_df, truth_col, categories, test_cols, reference_groups, valid_groups, n_iter,
199
+ base_seed)
@@ -0,0 +1,134 @@
1
+ # Copyright (c) 2025 Medical Imaging and Data Resource Center (MIDRC).
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ #
15
+
16
+ """
17
+ This module contains functions for data preprocessing and combining datasets.
18
+ """
19
+
20
+ import numpy as np
21
+ import pandas as pd
22
+
23
+
24
+ def _generate_default_labels(bins: (list[int], list[float])):
25
+ """
26
+ Generates default labels for the bins.
27
+
28
+ Args:
29
+ bins (list): The bins used for the binning process
30
+
31
+ Returns:
32
+ list(str): A list of labels for the bins
33
+ """
34
+ labels = []
35
+ for i in range(len(bins) - 1):
36
+ if isinstance(bins[i], int) and isinstance(bins[i + 1], int):
37
+ if i < len(bins) - 2:
38
+ labels.append(f"{bins[i]}-{bins[i + 1] - 1}")
39
+ else:
40
+ labels.append(f">={bins[i]}")
41
+ else:
42
+ labels.append(f"{bins[i]}-{bins[i + 1]}")
43
+ return labels
44
+
45
+
46
+ def _adjust_outliers(df: pd.DataFrame, cut_column_name: str, column_name: str, bins: (list[int], list[float])):
47
+ """
48
+ Adjusts the outliers in the cut column.
49
+
50
+ Args:
51
+ df: input DataFrame
52
+ cut_column_name: column name to be adjusted (e.g. created by the binning process)
53
+ column_name: column name to be checked for outliers
54
+ bins: The bins used for the binning process
55
+
56
+ Returns:
57
+ pd.DataFrame: DataFrame with the outliers adjusted in the cut column
58
+ """
59
+ new_text = "Not Reported"
60
+ low_text = "Outlier_Low"
61
+ high_text = "Outlier_High"
62
+ print(f"WARNING: There are values outside the bins specified for the '{column_name}' column.")
63
+ df.loc[df[cut_column_name].isna() & (df[column_name] < bins[0]), cut_column_name] = low_text
64
+ df.loc[df[cut_column_name].isna() & (df[column_name] >= bins[-1]), cut_column_name] = high_text
65
+ df.loc[df[cut_column_name].isna(), cut_column_name] = new_text
66
+ if (df[cut_column_name] == low_text).sum() > 0:
67
+ print(f" {(df[cut_column_name] == low_text).sum()} values are below the min bin value.\n"
68
+ f" These will be placed in a new '{low_text}' category.")
69
+ if (df[cut_column_name] == high_text).sum() > 0:
70
+ print(f" {(df[cut_column_name] == high_text).sum()} values are above the max bin value.\n"
71
+ f" These will be placed in a new '{high_text}' category.")
72
+ if (df[cut_column_name] == new_text).sum() > 0:
73
+ print(f" {(df[cut_column_name] == new_text).sum()} values are nan.\n"
74
+ f" These will be placed in a new '{new_text}' category.")
75
+ return df
76
+
77
+
78
+ def bin_dataframe_column(df_to_bin: pd.DataFrame, column_name: str, cut_column_name: str = 'CUT',
79
+ bins: (list[int], list[float]) = None, labels: list[str] = None, *, right: bool = False):
80
+ """
81
+ Cuts the age column into bins and adds a column with the bin labels.
82
+
83
+ Parameters:
84
+ df_to_bin: pandas DataFrame containing the data
85
+ column_name: name of the column to be binned
86
+ cut_column_name: name of the column to be added with the bin labels
87
+ bins: list of bins to be used for the binning
88
+ labels: list of labels for the bins
89
+ right: whether to use right-inclusive intervals
90
+
91
+ Returns:
92
+ pd.DataFrame: pandas DataFrame with the binned column and the labels
93
+ """
94
+ if column_name not in df_to_bin.columns:
95
+ return df_to_bin
96
+
97
+ if bins is None:
98
+ bins = np.arange(0, 100, 10)
99
+
100
+ if labels is None:
101
+ labels = _generate_default_labels(bins)
102
+
103
+ df_out = df_to_bin.assign(**{
104
+ cut_column_name: pd.cut(
105
+ df_to_bin[column_name],
106
+ bins=bins,
107
+ labels=labels,
108
+ right=right,
109
+ ).astype("string"),
110
+ })
111
+
112
+ if df_out[cut_column_name].isna().any():
113
+ df_out = _adjust_outliers(df_out, cut_column_name, column_name, bins)
114
+
115
+ return df_out
116
+
117
+
118
+ def combine_datasets_from_list(df_list: list[pd.DataFrame], dataset_column: str = '_dataset_'):
119
+ """
120
+ Combines a list of dataframes into a single dataframe with a new column for the dataset name.
121
+
122
+ Args:
123
+ df_list (list[pd.DataFrame]): A list of dataframes to be combined.
124
+ dataset_column (str, optional): The name of the column to be used for the dataset name. Defaults to '_dataset_'.
125
+
126
+ Returns:
127
+ pd.DataFrame: A combined dataframe with a new column for the dataset name.
128
+ """
129
+ labels = [f'Dataset {i}' for i in range(len(df_list))] # Dataset labels
130
+ combined_df = pd.concat(
131
+ [df.assign(**{dataset_column: label}) for label, df in zip(labels, df_list)],
132
+ ignore_index=True,
133
+ )
134
+ return combined_df
@@ -0,0 +1,156 @@
1
+ # Copyright (c) 2025 Medical Imaging and Data Resource Center (MIDRC).
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ #
15
+
16
+ import curses
17
+ from curses.textpad import rectangle, Textbox
18
+
19
+
20
+ class PadWrapper:
21
+ def __init__(self, pad, win_h, win_w, top, left):
22
+ self.real_pad = pad
23
+ self.offset = 0
24
+ self.win_h = win_h
25
+ self.win_w = win_w
26
+ self.top = top
27
+ self.left = left
28
+
29
+ def refresh(self, *args):
30
+ self.real_pad.refresh(
31
+ self.offset, 0,
32
+ self.top, self.left,
33
+ self.top + self.win_h - 1,
34
+ self.left + self.win_w - 1
35
+ )
36
+
37
+ def __getattr__(self, name):
38
+ return getattr(self.real_pad, name)
39
+
40
+
41
+ def _load_file(path):
42
+ with open(path, 'r', encoding='utf-8') as f:
43
+ return f.read().split('\n')
44
+
45
+
46
+ def _save_file(path, lines):
47
+ with open(path, 'w', encoding='utf-8') as f:
48
+ f.write('\n'.join(lines))
49
+
50
+
51
+ def _handle_scroll(key, pad):
52
+ real_h, _ = pad.real_pad.getmaxyx()
53
+ win_h = pad.win_h
54
+ y, x = pad.getyx()
55
+ if key == curses.KEY_DOWN:
56
+ if y - pad.offset >= win_h - 1 and pad.offset < real_h - win_h:
57
+ pad.offset += 1
58
+ elif key == curses.KEY_UP:
59
+ if y - pad.offset <= 0 and pad.offset > 0:
60
+ pad.offset -= 1
61
+ elif key == curses.KEY_NPAGE:
62
+ pad.offset = min(pad.offset + win_h, real_h - win_h)
63
+ if y < pad.offset:
64
+ pad.move(pad.offset, x)
65
+ elif key == curses.KEY_PPAGE:
66
+ pad.offset = max(pad.offset - win_h, 0)
67
+ if y >= pad.offset + win_h:
68
+ pad.move(pad.offset + win_h - 1, x)
69
+
70
+ pad.refresh()
71
+ return key
72
+
73
+
74
+ class ConsoleTextEditor:
75
+ def __init__(self, stdscr, original, path):
76
+ self.stdscr = stdscr
77
+ self.original = original
78
+ self.path = path
79
+ self._init_curses()
80
+ self._draw_border()
81
+ self.pad = self._make_pad()
82
+ self._fill_pad()
83
+
84
+ def _init_curses(self):
85
+ curses.cbreak()
86
+ curses.noecho()
87
+ self.stdscr.keypad(True)
88
+ curses.curs_set(1)
89
+
90
+ def _draw_border(self):
91
+ h, w = self.stdscr.getmaxyx()
92
+ rectangle(self.stdscr, 1, 1, h - 2, w - 2)
93
+ self.stdscr.addstr(
94
+ h - 1, 2,
95
+ "Ctrl-G=save Ctrl-C=cancel ↑/↓=scroll PgUp/PgDn=jump"
96
+ )
97
+ self.stdscr.refresh()
98
+
99
+ def _make_pad(self):
100
+ h, w = self.stdscr.getmaxyx()
101
+ win_h, win_w = h - 4, w - 4
102
+ real_h = max(len(self.original) + 1, win_h)
103
+ _real_pad = curses.newpad(real_h, win_w)
104
+ pad = PadWrapper(_real_pad, win_h, win_w, top=2, left=2)
105
+ pad.keypad(True)
106
+ pad.scrollok(True)
107
+ pad.idlok(True)
108
+ return pad
109
+
110
+ def _fill_pad(self):
111
+ for idx, line in enumerate(self.original):
112
+ try:
113
+ self.pad.addstr(idx, 0, line)
114
+ except curses.error:
115
+ pass
116
+ self.pad.move(0, 0)
117
+ self.pad.refresh()
118
+
119
+ def _collect_lines(self):
120
+ real_h, _ = self.pad.real_pad.getmaxyx()
121
+ lines = []
122
+ for i in range(real_h):
123
+ raw = self.pad.instr(i, 0, self.pad.win_w).decode('utf-8', 'ignore')
124
+ lines.append(raw.rstrip('\x00'))
125
+ return lines
126
+
127
+ def _validator(self, ch):
128
+ _handle_scroll(ch, self.pad)
129
+ try:
130
+ self.pad.refresh()
131
+ except curses.error:
132
+ pass
133
+ return ch
134
+
135
+ def run(self):
136
+ tb = Textbox(self.pad)
137
+ try:
138
+ tb.edit(self._validator)
139
+ except KeyboardInterrupt:
140
+ return
141
+ finally:
142
+ curses.flushinp()
143
+ lines = self._collect_lines()
144
+ _save_file(self.path, lines)
145
+
146
+
147
+ def _run_editor(stdscr, original, path):
148
+ editor = ConsoleTextEditor(stdscr, original, path)
149
+ editor.run()
150
+
151
+
152
+ def edit_config(path):
153
+ # Add this to the screen in case curses doesn't finish cleaning up on ctrl-c
154
+ print("Press Any Key to Continue...")
155
+ original = _load_file(path)
156
+ curses.wrapper(lambda stdscr: _run_editor(stdscr, original, path))