PyPI - auditapp - Versions diffs - 0.0.7__py3-none-any.whl - Mend

auditapp 0.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (73) hide show

audit/__init__.py +0 -0
audit/app/.streamlit/config.toml +7 -0
audit/app/APP.py +74 -0
audit/app/__init__.py +0 -0
audit/app/launcher.py +38 -0
audit/app/util/__init__.py +0 -0
audit/app/util/commons/__init__.py +0 -0
audit/app/util/commons/checks.py +24 -0
audit/app/util/commons/data_preprocessing.py +98 -0
audit/app/util/commons/sidebars.py +330 -0
audit/app/util/commons/utils.py +11 -0
audit/app/util/constants/__init__.py +0 -0
audit/app/util/constants/descriptions.py +371 -0
audit/app/util/constants/features.py +168 -0
audit/app/util/constants/metrics.py +19 -0
audit/app/util/images/AUDIT.png +0 -0
audit/app/util/images/AUDIT_big.jpeg +0 -0
audit/app/util/images/AUDIT_medium.jpeg +0 -0
audit/app/util/images/AUDIT_transparent.png +0 -0
audit/app/util/images/artorg_logo.gif +0 -0
audit/app/util/images/audit_schema.png +0 -0
audit/app/util/images/uniovi_logo.png +0 -0
audit/app/util/pages/Home_Page.py +134 -0
audit/app/util/pages/Longitudinal_Measurements.py +98 -0
audit/app/util/pages/Model_Performance_Analysis.py +146 -0
audit/app/util/pages/Multi_Model_Performance_Comparison.py +93 -0
audit/app/util/pages/Multivariate_Feature_Analysis.py +96 -0
audit/app/util/pages/Pairwise_Model_Performance_Comparison.py +221 -0
audit/app/util/pages/Segmentation_Error_Matrix.py +198 -0
audit/app/util/pages/Subjects_Exploration.py +119 -0
audit/app/util/pages/Univariate_Feature_Analysis.py +149 -0
audit/app/util/pages/__init__.py +0 -0
audit/cli.py +46 -0
audit/configs/app.yml +42 -0
audit/configs/feature_extractor.yml +31 -0
audit/configs/itk_labels.txt +6 -0
audit/configs/metric_extractor.yml +34 -0
audit/feature_extractor.py +69 -0
audit/features/__init__.py +0 -0
audit/features/main.py +177 -0
audit/features/spatial.py +107 -0
audit/features/statistical.py +122 -0
audit/features/texture.py +111 -0
audit/features/tumor.py +257 -0
audit/metric_extractor.py +68 -0
audit/metrics/__init__.py +0 -0
audit/metrics/commons.py +64 -0
audit/metrics/error_matrix.py +43 -0
audit/metrics/main.py +230 -0
audit/metrics/segmentation_metrics.py +251 -0
audit/metrics/statistical_tests.py +224 -0
audit/utils/__init__.py +0 -0
audit/utils/commons/__init__.py +0 -0
audit/utils/commons/file_manager.py +516 -0
audit/utils/commons/strings.py +115 -0
audit/utils/external_tools/__init__.py +0 -0
audit/utils/external_tools/itk_snap.py +103 -0
audit/utils/sequences/__init__.py +0 -0
audit/utils/sequences/sequences.py +290 -0
audit/visualization/__init__.py +0 -0
audit/visualization/barplots.py +79 -0
audit/visualization/boxplot.py +227 -0
audit/visualization/confusion_matrices.py +207 -0
audit/visualization/constants.py +11 -0
audit/visualization/histograms.py +182 -0
audit/visualization/scatter_plots.py +238 -0
audit/visualization/sequences.py +9 -0
audit/visualization/time_series.py +123 -0
auditapp-0.0.7.dist-info/LICENSE.md +201 -0
auditapp-0.0.7.dist-info/METADATA +347 -0
auditapp-0.0.7.dist-info/RECORD +73 -0
auditapp-0.0.7.dist-info/WHEEL +4 -0
auditapp-0.0.7.dist-info/entry_points.txt +3 -0

audit/__init__.py ADDED Viewed

File without changes

audit/app/.streamlit/config.toml ADDED Viewed

@@ -0,0 +1,7 @@
+[theme]
+#base="light"
+primaryColor="#45ad87"
+backgroundColor="#FFFFFF"
+secondaryBackgroundColor="#f1ebe6"
+textColor="#262730"
+font="sans serif"

audit/app/APP.py ADDED Viewed

@@ -0,0 +1,74 @@
+import sys
+import warnings
+from pathlib import Path
+import streamlit as st
+from PIL import Image
+from audit.utils.commons.file_manager import load_config_file
+from audit.app.util.pages.Home_Page import home_page
+from audit.app.util.pages.Longitudinal_Measurements import longitudinal
+from audit.app.util.pages.Model_Performance_Analysis import performance
+from audit.app.util.pages.Multi_Model_Performance_Comparison import multi_model
+from audit.app.util.pages.Multivariate_Feature_Analysis import multivariate
+from audit.app.util.pages.Pairwise_Model_Performance_Comparison import pairwise_comparison
+from audit.app.util.pages.Segmentation_Error_Matrix import matrix
+from audit.app.util.pages.Subjects_Exploration import subjects
+from audit.app.util.pages.Univariate_Feature_Analysis import univariate
+warnings.simplefilter(action="ignore", category=FutureWarning)
+class AUDIT:
+    def __init__(self, config):
+        self.apps = []
+        self.config = config
+    def add_page(self, title, func):
+        self.apps.append({"title": title, "function": func})
+    def run(self):
+        st.set_page_config(page_title="AUDIT", page_icon=":brain", layout="wide")
+        # Resolve the absolute path for the logo
+        base_dir = Path(__file__).resolve().parent
+        audit_logo_path = base_dir / "util/images/AUDIT_transparent.png"
+        # Load the image
+        if audit_logo_path.exists():
+            audit_logo = Image.open(audit_logo_path)
+            st.sidebar.image(audit_logo, use_column_width=True)
+        else:
+            st.sidebar.error(f"Logo not found: {audit_logo_path}")
+        st.sidebar.markdown("## Main Menu")
+        page = st.sidebar.selectbox("Select Page", self.apps, format_func=lambda page: page["title"])
+        st.sidebar.markdown("---")
+        page["function"](self.config)
+def main():
+    # Extract the config path from sys.argv (Streamlit passes arguments this way)
+    config_path = "./configs/app.yml"  # Default config path
+    if len(sys.argv) > 2 and sys.argv[1] == "--config":
+        config_path = sys.argv[2]
+    # Load the configuration file
+    config = load_config_file(config_path)
+    # Initialize and run the app
+    app = AUDIT(config)
+    app.add_page("Home Page", home_page)
+    app.add_page("Univariate Analysis", univariate)
+    app.add_page("Multivariate Analysis", multivariate)
+    app.add_page("Segmentation Error Matrix", matrix)
+    app.add_page("Model Performance Analysis", performance)
+    app.add_page("Pairwise Model Performance Comparison", pairwise_comparison)
+    app.add_page("Multi-model Performance Comparison", multi_model)
+    app.add_page("Longitudinal Measurements", longitudinal)
+    app.add_page("Subjects Exploration", subjects)
+    app.run()
+if __name__ == "__main__":
+    main()

audit/app/__init__.py ADDED Viewed

File without changes

audit/app/launcher.py ADDED Viewed

@@ -0,0 +1,38 @@
+import os
+from pathlib import Path
+import argparse
+def run_streamlit_app(config):
+    # Get the path to the APP.py file
+    app_path = Path(__file__).resolve().parent / "APP.py"
+    # Ensure the app exists
+    if not app_path.exists():
+        raise FileNotFoundError(f"Streamlit app not found at: {app_path}")
+    # Build the command to launch Streamlit with the provided config
+    command = f"streamlit run {app_path} -- --config {config}"
+    # Print and execute the command
+    print(f"Running command: {command}")
+    os.system(command)
+def main():
+    # Command-line argument parsing
+    parser = argparse.ArgumentParser(description="AUDIT web APP.")
+    parser.add_argument(
+        '--config',
+        type=str,
+        default='./configs/app.yml',  # Path relative to the script location
+        help="Path to the configuration file for web app."
+    )
+    args = parser.parse_args()
+    run_streamlit_app(args.config)
+if __name__ == "__main__":
+    main()

audit/app/util/__init__.py ADDED Viewed

File without changes

audit/app/util/commons/__init__.py ADDED Viewed

File without changes

audit/app/util/commons/checks.py ADDED Viewed

@@ -0,0 +1,24 @@
+def health_checks(selected_sets, select_feature_names: list = []):
+    if len(selected_sets) == 0:
+        return False, "Please, select a dataset from the left sidebar."
+    elif any(k is None for k in select_feature_names):
+        return False, f'Please, select an available category.'
+    else:
+        return True, ''
+def dataset_sanity_check(selected_sets):
+    if len(selected_sets) == 0:
+        return False
+    else:
+        return True
+def models_sanity_check(baseline_model, benchmark_model):
+    if baseline_model == benchmark_model:
+        return False
+    else:
+        return True

audit/app/util/commons/data_preprocessing.py ADDED Viewed

@@ -0,0 +1,98 @@
+from typing import List
+from typing import Union
+import pandas as pd
+def select_datasets(data, sets=None):
+    if isinstance(sets, list):
+        return data[data.set.isin(sets)]
+    elif isinstance(sets, str):
+        return data[data.set == sets]
+    else:
+        return data
+def select_features(data, features=None):
+    if isinstance(features, list):
+        return data[features]
+    elif isinstance(features, str):
+        return data[[features]]
+    else:
+        return data
+def select_regions(data, regions=None):
+    if isinstance(regions, list):
+        return data[data.region.isin(regions)]
+    elif isinstance(regions, str):
+        return data[data.region == regions]
+    else:
+        return data
+def select_models(data, models=None):
+    if isinstance(models, list):
+        return data[data.model.isin(models)]
+    elif isinstance(models, str):
+        return data[data.model == models]
+    else:
+        return data
+def select_subjects(data, subjects=None):
+    if isinstance(subjects, list):
+        return data[data.ID.isin(subjects)]
+    elif isinstance(subjects, str):
+        return data[data.ID == subjects]
+    else:
+        return data
+def filter_outliers(
+        data: pd.DataFrame,
+        filtering_method: str = None,
+        filtering_feature: str = None,
+        remove_low: float = None,
+        remove_up: float = None,
+        clip_low: float = None,
+        clip_up: float = None,
+        num_std_devs: int = None
+):
+    if filtering_method:
+        mean, std_dev = data[filtering_feature].mean(), data[filtering_feature].std()
+        if filtering_method == "Removing outliers":
+            data = data[data[filtering_feature].between(remove_low, remove_up)]
+        elif filtering_method == "Clipping outliers":
+            data[filtering_feature] = data[filtering_feature].clip(clip_low, clip_up)
+        elif filtering_method == "Standard deviations":
+            upper_bound = mean + int(num_std_devs) * std_dev
+            lower_bound = mean - int(num_std_devs) * std_dev
+            data = data[data[filtering_feature].between(lower_bound, upper_bound)]
+    return data
+def processing_data(
+        data: pd.DataFrame,
+        sets: Union[List[str], str] = None,
+        models: Union[List[str], str] = None,
+        features: Union[List[str], str] = None,
+        regions: Union[List[str], str] = None,
+        subjects: Union[List[str], str] = None,
+        filtering_method: str = None,
+        filtering_feature: str = None,
+        remove_low: float = None,
+        remove_up: float = None,
+        clip_low: float = None,
+        clip_up: float = None,
+        num_std_devs: int = None
+):
+    data = select_datasets(data, sets=sets)
+    data = select_models(data, models=models)
+    data = select_features(data, features=features)
+    data = select_regions(data, regions=regions)
+    data = select_subjects(data, subjects=subjects)
+    data = filter_outliers(data, filtering_method, filtering_feature, remove_low, remove_up, clip_low, clip_up, num_std_devs)
+    return data

audit/app/util/commons/sidebars.py ADDED Viewed

@@ -0,0 +1,330 @@
+import streamlit as st
+from audit.app.util.constants.features import Features
+from audit.app.util.constants.metrics import Metrics
+from audit.utils.commons.strings import pretty_string
+# Load constants
+const_metrics = Metrics()
+metrics_dict = const_metrics.get_metrics()
+orderby_dict = const_metrics.orderby
+const_features = Features()
+def setup_sidebar_multi_datasets(data_paths):
+    with st.sidebar.expander("Datasets", expanded=True):
+        selected_sets = st.multiselect(
+            label="Select datasets to visualize:",
+            options=data_paths.keys(),
+            default=data_paths.keys()
+        )
+    return selected_sets
+def setup_sidebar_single_dataset(data):
+    with st.sidebar.expander("Datasets", expanded=True):
+        selected_set = st.selectbox("Select dataset to analyze:", options=list(data.set.unique()), index=0)
+    return selected_set
+def setup_sidebar_single_metric(data):
+    available_metrics = [k for k, v in metrics_dict.items() if v in data.columns]
+    with st.sidebar.expander("Metrics", expanded=True):
+        selected_metric = st.selectbox("Select metric to analyze:", options=available_metrics, index=0)
+    return selected_metric
+def setup_sidebar_multi_metrics(data):
+    available_metrics = [k for k, v in metrics_dict.items() if v in data.columns]
+    with st.sidebar.expander("Metrics", expanded=True):
+        selected_metrics = st.multiselect(
+            label="Select metrics to analyze:",
+            options=available_metrics,
+            default=available_metrics[0]
+        )
+    return selected_metrics
+def setup_sidebar_single_model(data):
+    with st.sidebar.expander("Models", expanded=True):
+        selected_model = st.selectbox("Select model to analyze:", options=list(data.model.unique()), index=0)
+    return selected_model
+def setup_sidebar_multi_model(data):
+    with st.sidebar.expander("Models", expanded=True):
+        selected_models = st.multiselect(
+            label="Select models to analyze:",
+            options=list(data.model.unique()),
+            default=list(data.model.unique())
+        )
+    return selected_models
+def setup_sidebar_pairwise_models(data, selected_set):
+    with st.sidebar.expander("Models", expanded=True):
+        # models_available = [capitalizer(pretty_string(m)) for m in data[data.set == selected_set].model.unique()]
+        models_available = data[data.set == selected_set].model.unique()
+        baseline_model = st.selectbox("Select the baseline model:", options=models_available, index=0)
+        benchmark_model = st.selectbox("Select the benchmark model:", options=models_available, index=1)
+        if baseline_model == benchmark_model:
+            st.error("Models selected must be different to make a performance comparison", icon="🚨")
+    return baseline_model, benchmark_model
+def setup_sidebar_features(data, name, c_index=0, f_index=0, key=None):
+    with st.sidebar.expander(name, expanded=True):
+        select_category = st.selectbox(
+            label="Feature category:",
+            options=const_features.categories,
+            index=c_index,
+            key=f"c_{key}"
+        )
+        available_features = [k for k, v in const_features.get_features(select_category).items() if v in data.columns]
+        selected_feature = st.selectbox(
+            label="Feature name:",
+            options=available_features,
+            index=f_index,
+            key=f"f_{key}"
+        )
+        selected_feature = const_features.get_features(select_category).get(selected_feature, None)
+    return selected_feature
+def setup_sidebar_color(data, name, c_index=0, f_index=0, key=None):
+    with st.sidebar.expander(name, expanded=True):
+        select_color_category = st.selectbox(
+            label="Feature category:",
+            options=["Dataset"] + const_features.categories,
+            index=c_index,
+            key=f"c_{key}"
+        )
+        if select_color_category == "Dataset":
+            select_color_axis = 'Dataset'
+        else:
+            available_features = [k for k, v in const_features.get_features(select_color_category).items() if
+                                  v in data.columns]
+            select_color_axis = st.selectbox(
+                label="Feature name:",
+                options=available_features,
+                index=f_index,
+                key=f"f_{key}"
+            )
+            select_color_axis = const_features.get_features(select_color_category).get(select_color_axis, None)
+    return select_color_axis
+def setup_highlight_subject(data):
+    with st.sidebar.expander(label="Highlight subject"):
+        selected_sets = st.selectbox(
+            label="Dataset:", options=data.set.unique(), index=0
+        )
+        highlight_subject = st.selectbox(
+            label="Enter subject ID to highlight",
+            options=[None] + list(data[data.set == selected_sets].ID.unique()),
+            index=0
+        )
+    return highlight_subject
+def setup_histogram_options(plot_type):
+    """
+    Set up histogram customization options based on plot type.
+    Args:
+        plot_type (str): Type of plot ("Histogram" or "Probability").
+    Returns:
+        tuple: Number of bins or bin size based on user selection.
+    """
+    n_bins, bins_size = None, None
+    if plot_type == "Histogram":
+        with st.sidebar.expander("Customization", expanded=True):
+            option = st.selectbox("Define number of bins or bins size", ("Number of bins", "Bins size"))
+            if option == "Number of bins":
+                n_bins = st.number_input(
+                    "Select the number of bins",
+                    min_value=1,
+                    max_value=200,
+                    value=100,
+                    step=1,
+                    placeholder="Type a number...",
+                    help="The actual number of bins will be the closest value to your selection based on distribution.",
+                )
+            elif option == "Bins size":
+                bins_size = st.number_input(
+                    "Select bins size", min_value=1, max_value=None, value=1, step=1, placeholder="Type a number..."
+                )
+    return n_bins, bins_size
+def setup_filtering_options(df, feature):
+    """
+    Set up filtering options based on selected features.
+    Args:
+        df (DataFrame): DataFrame containing the data.
+        feature (dict): Feature selected
+    Returns:
+        tuple: Filtering method and corresponding parameters.
+    """
+    with st.sidebar.expander("Filtering", expanded=False):
+        filtering_method = st.radio(
+            label="Filter data based on",
+            options=["No filter", "Removing outliers", "Clipping outliers", "Standard deviations"],
+            captions=[
+                "",
+                "It remove values outside a specified range",
+                "It restricts the range of data by capping values below and above a threshold to the lower "
+                "and upper bound selected.",
+                "Filtering data based on standard deviations",
+            ],
+        )
+        remove_low, remove_up = None, None
+        clip_low, clip_up = None, None
+        num_std_devs = None
+        if filtering_method == "Removing outliers":
+            remove_low, remove_up = st.slider(
+                "Remove outliers within a range of values",
+                min_value=df[feature].min(),
+                max_value=df[feature].max(),
+                value=(df[feature].min(), df[feature].max()),
+            )
+        elif filtering_method == "Clipping outliers":
+            clip_low, clip_up = st.slider(
+                "Clip outliers within a range of values",
+                min_value=df[feature].min(),
+                max_value=df[feature].max(),
+                value=(df[feature].min(), df[feature].max()),
+            )
+        elif filtering_method == "Standard deviations":
+            # mean, std_dev = df[feature].mean(), df[feature].std()
+            num_std_devs = st.number_input(label="Number of standard deviations", min_value=1, step=1, value=3)
+    return filtering_method, remove_low, remove_up, clip_low, clip_up, num_std_devs
+def setup_metrics_customization(baseline_model, benchmark_model, aggregated):
+    mapping_performance = {
+                f"subject ID": "ID",
+                f"Performance ({baseline_model})": f"{baseline_model}",
+                f"Performance ({benchmark_model})": f"{benchmark_model}",
+                }
+    num_max_subjects, selected_sorted, selected_order = None, None, None
+    if not aggregated:
+        with st.sidebar.expander("Customization", expanded=True):
+            num_max_subjects = st.number_input("Maximum subjects to visualize", min_value=1, value=5, step=1)
+            mapping_buttons_columns_perf = {
+                **const_features.get_multiple_features(["common"]).copy(),
+                **mapping_performance,
+            }
+            selected_sorted = st.selectbox("Sorted by:", options=mapping_buttons_columns_perf)
+            selected_order = st.radio("Order by:", options=orderby_dict.keys())
+    return num_max_subjects, mapping_performance.get(selected_sorted), orderby_dict.get(selected_order)
+def setup_improvement_button():
+    improvement_type = st.selectbox(
+        label="Type of comparison",
+        options=["relative", "absolute", "ratio"],
+        format_func=pretty_string,
+        index=0
+    )
+    return improvement_type
+def setup_aggregation_button():
+    return st.checkbox("Aggregated.", value=True, help="It aggregates all the subjects, if enabled.")
+def setup_clip_sidebar(data, feature):
+    clip_low, clip_up = None, None
+    with st.sidebar:
+        metric_clip = st.checkbox(
+            "Clip the metric",
+            help="It restricts the range of the metrics by capping values below and "
+                 "above a threshold to the lower and upper bound selected, if "
+                 "enabled.",
+        )
+        if metric_clip:
+            clip_low, clip_up = st.slider(
+                label="Clip metric",
+                min_value=data[feature].min(),
+                max_value=data[feature].max(),
+                value=(data[feature].min(), data[feature].max()),
+                label_visibility="collapsed",
+            )
+    return clip_low, clip_up
+def setup_statistical_test():
+    statistical_test = st.checkbox(
+        label="Perform statistical test",
+        help="It performs statistical tests to evaluate whether exist statistical "
+             "differences between the model performance, if enabled.",
+    )
+    return statistical_test
+def setup_button_data_download(df):
+    st.download_button(
+        label="Download data used in the statistical tests as CSV",
+        data=df.to_csv().encode("utf-8"),
+        file_name="raw_data_statistical_test.csv",
+        mime="text/csv",
+    )
+def setup_sidebar_regions(data, aggregated):
+    selected_regions = None
+    if not aggregated:
+        with st.sidebar.expander("Regions", expanded=True):
+            available_regions = list(data.region.unique())
+            selected_regions = st.multiselect(
+                label="Select the regions to visualize:",
+                options=available_regions,
+                default=available_regions
+            )
+    return selected_regions
+def setup_sidebar_longitudinal_subject(data):
+    with st.sidebar.expander("Subjects", expanded=True):
+        subject_selected = st.selectbox(
+            label="Select a subject to visualize:",
+            options=sorted(data.longitudinal_id.unique()),
+            index=0
+        )
+    return subject_selected
+def setup_sidebar_single_subjects(data):
+    with st.sidebar.expander("Subjects", expanded=True):
+        subject_selected = st.selectbox(
+            label="Select a subject to visualize:",
+            options=sorted(data.ID.unique()),
+            index=0
+        )
+    return subject_selected

audit/app/util/commons/utils.py ADDED Viewed

@@ -0,0 +1,11 @@
+import plotly.io as pio
+import streamlit as st
+def download_plot(fig, label="", filename="image"):
+    st.download_button(
+        label=f"Download {label} plot",
+        data=pio.to_image(fig, format="svg"),
+        file_name=f"{filename}.svg",
+        mime="/image/svg"
+    )

audit/app/util/constants/__init__.py ADDED Viewed

File without changes