PyPI - cccpm - Versions diffs - 0.2.1__py3-none-any.whl - Mend

cccpm 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

cccpm/__init__.py +1 -0
cccpm/cpm_analysis.py +272 -0
cccpm/edge_selection.py +271 -0
cccpm/fold.py +46 -0
cccpm/logging.py +37 -0
cccpm/models.py +148 -0
cccpm/more_models.py +205 -0
cccpm/reporting/__init__.py +1 -0
cccpm/reporting/assets/CCCPM.png +0 -0
cccpm/reporting/html_report.py +363 -0
cccpm/reporting/plots/__init__.py +0 -0
cccpm/reporting/plots/chord_v2.py +821 -0
cccpm/reporting/plots/cpm_chord_plot.py +149 -0
cccpm/reporting/plots/plots.py +337 -0
cccpm/reporting/plots/utils.py +19 -0
cccpm/reporting/reporting_utils.py +124 -0
cccpm/results_manager.py +463 -0
cccpm/scoring.py +40 -0
cccpm/simulation/__init__.py +0 -0
cccpm/simulation/simulate_multivariate.py +252 -0
cccpm/simulation/simulate_sem.py +319 -0
cccpm/simulation/simulate_simple.py +37 -0
cccpm/utils.py +386 -0
cccpm-0.2.1.dist-info/METADATA +105 -0
cccpm-0.2.1.dist-info/RECORD +26 -0
cccpm-0.2.1.dist-info/WHEEL +4 -0

cccpm/reporting/plots/cpm_chord_plot.py ADDED Viewed

@@ -0,0 +1,149 @@
+import os
+import numpy as np
+from typing import Union, Tuple
+import matplotlib.pyplot as plt
+import pandas as pd
+import netplotbrain
+def vector_to_upper_triangular_matrix(vector):
+    """
+    Convert a vector containing strictly upper triangular elements back
+    to a 2D square matrix.
+    Parameters:
+    vector (np.ndarray): A vector containing the strictly upper triangular elements.
+    Returns:
+    np.ndarray: The reconstructed 2D square matrix.
+    """
+    # Calculate the size of the matrix from the vector length
+    size = int((np.sqrt(8 * vector.size + 1) - 1) / 2) + 1
+    if size * (size - 1) // 2 != vector.size:
+        raise ValueError("Vector size does not match the number of elements for a valid square matrix.")
+    matrix = np.zeros((size, size))
+    # Get the indices of the strictly upper triangular part
+    row_indices, col_indices = np.triu_indices(size, k=1)
+    # Place the elements into the matrix
+    matrix[row_indices, col_indices] = vector
+    matrix[col_indices, row_indices] = vector
+    return matrix
+def get_colors_from_colormap(n_colors, colormap_name='tab10'):
+    """
+    Get a set of distinct colors from a specified colormap.
+    Parameters:
+    n_colors (int): Number of distinct colors needed.
+    colormap_name (str): Name of the colormap to use (e.g., 'tab10').
+    Returns:
+    list: A list of color strings.
+    """
+    cmap = plt.get_cmap(colormap_name)
+    colors = [cmap(i / (n_colors - 1)) for i in range(n_colors)]
+    return colors
+def convert_matrix(adj: Union[list, np.ndarray]) -> Tuple[np.ndarray, np.ndarray]:
+    """
+    Converts your adjacency (connectivity) matrix into a list of edges (i, j)
+        and their weights
+    :param adj: the matrix
+    """
+    if isinstance(adj, list):
+        adj = np.array(adj)
+    idxs = np.triu_indices(adj.shape[0], k=1)
+    weights = adj[idxs]
+    idxs = np.array(idxs).T
+    smol = 1e-6
+    idxs = idxs[(weights > smol) | (weights < -smol)]
+    weights = weights[(weights > smol) | (weights < -smol)]
+    return idxs, weights
+def extract_edges(matrix, keep_only_non_zero_edges: bool = False):
+    """
+    Given a square matrix (graph), this function returns:
+    1. A NumPy array with two columns containing the ids of the two nodes connected by an edge.
+    2. A NumPy array containing the weights of the edges.
+    Args:
+    matrix (2D numpy array): A square matrix representing a graph.
+    Returns:
+    edges (2D numpy array): Array of edges.
+    weights (1D numpy array): Array of weights corresponding to the edges.
+    """
+    if isinstance(matrix, np.ndarray) and matrix.shape[0] == matrix.shape[1]:
+        n = matrix.shape[0]
+        edges = []
+        weights = []
+        for i in range(1, n):
+            for j in range(i):
+                if keep_only_non_zero_edges:
+                    if matrix[i, j] != 0:  # Only include non-zero edges
+                        edges.append([i, j])
+                        weights.append(matrix[i, j])
+                else:
+                    edges.append([i, j])
+                    weights.append(matrix[i, j])
+        edges = np.array(edges, dtype=int)
+        weights = np.array(weights)
+        return edges, weights
+    else:
+        raise ValueError("Input must be a square matrix (2D NumPy array).")
+def plot_netplotbrain(results_folder, selected_metric, atlas_labels):
+    edges = np.load(os.path.join(results_folder, f"{selected_metric}.npy"))
+    if (selected_metric == "sig_stability_positive_edges") or (selected_metric == "sig_stability_negative_edges"):
+        threshold = 0.01
+        corr_transformed = np.where(np.abs(edges) > threshold, 0, edges)
+        corr_transformed = np.where(np.abs(edges) <= threshold, 1, corr_transformed)
+        edges = corr_transformed
+    elif (selected_metric == "stability_positive_edges") or (selected_metric == "stability_negative_edges"):
+        threshold = 1
+        corr_transformed = np.where(np.abs(edges) < threshold, 0, edges)
+        corr_transformed = np.where(np.abs(edges) >= threshold, 1, corr_transformed)
+        edges = corr_transformed
+    if 'positive' in selected_metric:
+        edge_color = "#b22222"
+    else:
+        edge_color = "#317199"
+    edges_plot, edge_weights = extract_edges(edges, keep_only_non_zero_edges=True)
+    if atlas_labels is not None and edges_plot.any():
+        aparc = atlas_labels
+        edges_netplot = pd.DataFrame({'i': edges_plot[:, 0], 'j': edges_plot[:, 1],
+                                      'weights': edge_weights})
+        fig, ax = netplotbrain.plot(template='MNI152NLin2009cAsym',
+                                    template_style='glass',
+                                    nodes=aparc,
+                                    edges=edges_netplot,
+                                    view=['LSR'],
+                                    highlight_edges=True,
+                                    highlight_nodes=None,
+                                    node_type='circles',
+                                    edge_color=edge_color,
+                                    node_color='#332f2c'
+                                    )
+    else:
+        fig = plt.figure()
+        edges_netplot = None
+    fig.savefig(os.path.join(results_folder, "plots", f"netplotbrain_{selected_metric}.png"))
+    return os.path.join(results_folder, "plots", f"netplotbrain_{selected_metric}.png"), edges_netplot
+if __name__ == "__main__":
+    results_directory = '/spm-data/vault-data3/mmll/projects/cpm_python/results/hcp_SSAGA_TB_Yrs_Smoked_spearman_partial_p=0.001/'
+    selected_metric = "sig_stability_negative_edges"
+    #plot_cpm_chord_plot(results_directory, selected_metric)
+    plot_netplotbrain(results_directory, selected_metric)

cccpm/reporting/plots/plots.py ADDED Viewed

@@ -0,0 +1,337 @@
+import os
+import pandas as pd
+import seaborn as sns
+import matplotlib.pyplot as plt
+import matplotlib as mpl
+import matplotlib.gridspec as gridspec
+from pandas.api.types import is_numeric_dtype
+# Shared plotting settings
+COLOR_MAP = {
+    "positive": "#FF5768",
+    "negative": "#6C88C4",
+    "both": "#d6d6d6"
+}
+MODEL_ORDER = ["covariates", "connectome", "full", "residuals", "increment"]
+def apply_nature_style():
+    sns.set_theme(style="white")
+    mpl.rcParams.update({
+        "font.size": 7,
+        "axes.labelsize": 7,
+        "axes.titlesize": 7,
+        "xtick.labelsize": 6,
+        "ytick.labelsize": 6,
+        "lines.linewidth": 0.75,
+        "axes.linewidth": 0.5,
+        "legend.fontsize": 6
+    })
+def scatter_plot(df: pd.DataFrame, results_folder: str, y_name) -> str:
+    apply_nature_style()
+    df = df[df['model'].isin(['connectome', 'residuals', 'full'])]
+    def regplot_colored(data, **kwargs):
+        color = COLOR_MAP.get(data['network'].iloc[0], "#000000")
+        sns.regplot(
+            data=data,
+            x="y_true", y="y_pred",
+            scatter_kws={"alpha": 0.7, "s": 14, "edgecolor": "white", "color": color},
+            line_kws={"color": color, "linewidth": 0.75},
+            **kwargs
+        )
+    g = sns.FacetGrid(df, row="model", col="network", margin_titles=True, height=1.5, aspect=1)
+    g.map_dataframe(regplot_colored)
+    g.set_titles(col_template="{col_name}", row_template="{row_name}", size=7)
+    g.set_xlabels(y_name)
+    g.set_ylabels(f"predicted {y_name}")
+    sns.despine(trim=True)
+    g.fig.tight_layout(pad=0.5)
+    png_path = os.path.join(results_folder, "predictions.png")
+    pdf_path = os.path.join(results_folder, "predictions.pdf")
+    g.fig.savefig(png_path, dpi=600, bbox_inches="tight")
+    g.fig.savefig(pdf_path, bbox_inches="tight")
+    return png_path
+def scatter_plot_covariates_model(df: pd.DataFrame, results_folder: str, y_name) -> str:
+    """
+    Generate a single scatter plot with regression line for the 'covariates' model.
+    """
+    apply_nature_style()
+    df = df[df["model"] == "covariates"]
+    # Create a figure with GridSpec
+    fig = plt.figure(figsize=(6, 2))
+    gs = gridspec.GridSpec(1, 3, figure=fig)
+    # Create one subplot in the center cell
+    ax = fig.add_subplot(gs[0, 1])
+    sns.regplot(
+        data=df,
+        x="y_true",
+        y="y_pred",
+        scatter_kws={"alpha": 0.7, "s": 14, "edgecolor": "white", "color": "black"},
+        line_kws={"color": "black", "linewidth": 0.75},
+        ax=ax
+    )
+    sns.despine(trim=True)
+    ax.set_xlabel(y_name)
+    ax.set_ylabel(f"predicted {y_name}")
+    ax.set_title('covariates')
+    png_path = os.path.join(results_folder, "scatter_covariates.png")
+    pdf_path = os.path.join(results_folder, "scatter_covariates.pdf")
+    plt.tight_layout(pad=0.5)
+    # This makes the figure 10x10 inches
+    fig.savefig(png_path, dpi=600)
+    fig.savefig(pdf_path)
+    return png_path
+def histograms_network_strengths(df: pd.DataFrame, results_folder: str, y_name) -> str:
+    """
+    Create a 2x2 grid of histograms showing the distribution of network_strength
+    for two models ('connectome', 'residuals') and two networks ('positive', 'negative').
+    """
+    apply_nature_style()
+    # Filter relevant data
+    df = df[df["model"].isin(["connectome", "residuals"])]
+    df = df[df["network"].isin(["positive", "negative"])]
+    # Color mapping
+    color_map = {
+        "positive": "#FF5768",  # red
+        "negative": "#6C88C4"   # blue
+    }
+    def histplot_colored(data, color=None, **kwargs):
+        # Override color based on 'network' value
+        network = data["network"].iloc[0]
+        color = {"positive": "#FF5768", "negative": "#6C88C4"}[network]
+        sns.histplot(
+            data=data,
+            x="network_strength",
+            bins=30,
+            edgecolor="white",
+            linewidth=0.3,
+            color=color,  # This now safely overrides the one passed by FacetGrid
+            **kwargs
+        )
+    # Create 2x2 facet grid
+    g = sns.FacetGrid(
+        df,
+        row="model",
+        col="network",
+        margin_titles=True,
+        height=1.5,
+        aspect=1
+    )
+    g.map_dataframe(histplot_colored)
+    g.set_titles(col_template="{col_name}", row_template="{row_name}", size=7)
+    g.set_axis_labels("network strength", y_name)
+    sns.despine(trim=True)
+    g.fig.tight_layout(pad=0.5)
+    # Save
+    png_path = os.path.join(results_folder, "histograms_network_strengths.png")
+    pdf_path = os.path.join(results_folder, "histograms_network_strengths.pdf")
+    g.fig.savefig(png_path, dpi=600, bbox_inches="tight")
+    g.fig.savefig(pdf_path, bbox_inches="tight")
+    return png_path
+def scatter_plot_network_strengths(df: pd.DataFrame, results_folder: str, y_name) -> str:
+    """
+    Create a 2x2 scatter plot of y_true vs network_strength
+    for two models ('connectome', 'residuals') and two networks ('positive', 'negative').
+    """
+    apply_nature_style()
+    # Define color mapping
+    color_map = {
+        "positive": "#FF5768",  # red
+        "negative": "#6C88C4"   # blue
+    }
+    # Plotting function with custom color per network
+    def regplot_colored(data, **kwargs):
+        network = data["network"].iloc[0]
+        color = color_map.get(network, "black")
+        sns.regplot(
+            data=data,
+            x="network_strength",
+            y="y_true",
+            scatter_kws={"alpha": 0.7, "s": 14, "edgecolor": "white", "color": color},
+            line_kws={"color": color, "linewidth": 0.75},
+            **kwargs
+        )
+    # Create 2x2 facet grid: rows = model, cols = network
+    g = sns.FacetGrid(
+        df,
+        row="model",
+        col="network",
+        margin_titles=True,
+        height=1.5,
+        aspect=1
+    )
+    g.map_dataframe(regplot_colored)
+    g.set_titles(col_template="{col_name}", row_template="{row_name}", size=7)
+    g.set_axis_labels("network strength", y_name)
+    sns.despine(trim=True)
+    g.fig.tight_layout(pad=0.5)
+    # Save
+    png_path = os.path.join(results_folder, "scatter_network_strengths.png")
+    pdf_path = os.path.join(results_folder, "scatter_network_strengths.pdf")
+    g.fig.savefig(png_path, dpi=600, bbox_inches="tight")
+    g.fig.savefig(pdf_path, bbox_inches="tight")
+    return png_path
+def boxplot_model_performance(
+    df: pd.DataFrame,
+    metric: str,
+    results_folder: str,
+    models: list[str],
+    filename_suffix: str = ""
+) -> str:
+    """
+    Creates a horizontal boxplot comparing models across network types.
+    Parameters:
+        df: Input dataframe.
+        metric: Name of the column to be plotted on the x-axis.
+        results_folder: Output folder path.
+        models: List of model names to include (e.g. ['increment'] or others).
+        filename_suffix: Optional string to append to the output filename.
+    """
+    apply_nature_style()
+    df = df[df["model"].isin(models)]
+    # Adjust figure size based on model count
+    height = 0.75 if len(models) == 1 else 2
+    fig, ax = plt.subplots(figsize=(7, height))
+    sns.boxplot(
+        data=df,
+        x=metric,
+        y="model",
+        hue="network",
+        order=models,
+        hue_order=["both", "negative", "positive"],
+        palette=COLOR_MAP,
+        orient="h",
+        fliersize=2,
+        linewidth=0.5,
+        width=0.5,
+        ax=ax
+    )
+    if metric in ["pearson_score", "spearman_score", "explained_variance_score"]:
+        ax.axvline(x=0, color="black", linewidth=0.5)
+        ax.set_xlim(-0.5, 1)
+    sns.despine(trim=True)
+    ax.set_xlabel(metric.replace("_", " "))
+    ax.set_ylabel("")
+    # Move legend outside the plot
+    ax.legend(
+        title="",
+        loc="center left",
+        bbox_to_anchor=(1.01, 0.5),
+        frameon=False,
+        handletextpad=0.5
+    )
+    # Save plot
+    suffix = f"_{filename_suffix}" if filename_suffix else ""
+    png_path = os.path.join(results_folder, f"boxplot_{metric}{suffix}.png")
+    pdf_path = os.path.join(results_folder, f"boxplot_{metric}{suffix}.pdf")
+    svg_path = os.path.join(results_folder, f"boxplot_{metric}{suffix}.svg")
+    fig.tight_layout(pad=0.2)
+    fig.savefig(png_path, dpi=600, bbox_inches="tight")
+    fig.savefig(pdf_path, bbox_inches="tight")
+    fig.savefig(svg_path, bbox_inches="tight")
+    return png_path
+def pairplot_flexible(df: pd.DataFrame, output_path: str) -> str:
+    sns.set_theme(style="white")
+    variables = df.columns
+    n = len(variables)
+    fig, axes = plt.subplots(n, n, figsize=(2.5 * n, 2.5 * n))
+    for i, row_var in enumerate(variables):
+        for j, col_var in enumerate(variables):
+            ax = axes[i, j]
+            ax.set_xlabel("")
+            ax.set_ylabel("")
+            x = df[col_var]
+            y = df[row_var]
+            is_x_cont = is_numeric_dtype(x)
+            is_y_cont = is_numeric_dtype(y)
+            if i == j:
+                if is_x_cont:
+                    sns.histplot(x, bins=20, ax=ax, color="gray", edgecolor="white")
+                else:
+                    counts = x.value_counts().sort_index()
+                    sns.barplot(
+                        x=counts.index.astype(str),
+                        y=counts.values,
+                        hue=counts.index.astype(str),  # ← now we have a hue
+                        palette="pastel",
+                        legend=False,
+                        ax=ax
+                    )
+                    # rotate labels (you can also use tick_params as shown earlier)
+                    ax.set_xticks(range(len(counts)))
+                    ax.set_xticklabels(counts.index.astype(str), rotation=45, ha="right")
+                ax.set_title(row_var, fontsize=9)
+                sns.despine(ax=ax)
+                continue
+            if is_x_cont and is_y_cont:
+                sns.scatterplot(x=x, y=y, ax=ax, s=15, alpha=0.6, edgecolor="white", linewidth=0.3)
+            elif is_x_cont and not is_y_cont:
+                sns.histplot(data=df, x=col_var, hue=row_var, ax=ax, element="step", stat="count",
+                             common_norm=False, bins=20, palette="Set2")
+            elif not is_x_cont and is_y_cont:
+                sns.histplot(data=df, x=row_var, hue=col_var, ax=ax, element="step", stat="count",
+                             common_norm=False, bins=20, palette="Set2")
+            else:
+                ctab = pd.crosstab(y, x)
+                sns.heatmap(ctab, annot=True, fmt='d', cmap="Blues", cbar=False, ax=ax)
+                ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')
+                ax.set_yticklabels(ax.get_yticklabels(), rotation=0, va='center')
+            ax.tick_params(axis='both', labelsize=6)
+            sns.despine(ax=ax)
+    #plt.tight_layout()
+    fig.savefig(output_path, dpi=600)
+    plt.close(fig)
+    return output_path

cccpm/reporting/plots/utils.py ADDED Viewed

@@ -0,0 +1,19 @@
+import matplotlib.pyplot as plt
+import numpy as np
+def get_colors_from_colormap(n_colors, colormap_name='tab10'):
+    """
+    Get a set of distinct colors from a specified colormap.
+    Parameters:
+    n_colors (int): Number of distinct colors needed.
+    colormap_name (str): Name of the colormap to use (e.g., 'tab10').
+    Returns:
+    list: A list of color strings.
+    """
+    cmap = plt.get_cmap(colormap_name)
+    colors = [cmap(i / (n_colors - 1)) for i in range(n_colors)]
+    return colors

cccpm/reporting/reporting_utils.py ADDED Viewed

@@ -0,0 +1,124 @@
+import os
+import pandas as pd
+def format_results_table(df, precision=2):
+    """
+    Format a MultiIndex DataFrame:
+    - Merge 'mean' and 'std' into 'summary'
+    - Format p-values: add * / ** and highlight in bold using CSS
+    - Return a styled Pandas DataFrame with APA style
+    """
+    formatted = {}
+    metrics = df.columns.get_level_values(0).unique()
+    for metric in metrics:
+        mean = df[(metric, "mean")]
+        std = df[(metric, "std")]
+        p = df[(metric, "p")]
+        # Format mean [std]
+        summary_col = mean.round(precision).astype(str) + " [" + std.round(precision).astype(str) + "]"
+        # Annotate p-values with asterisks (we'll apply bold via styling)
+        def p_string(val):
+            if pd.isna(val):
+                return ""
+            elif val < 0.001:
+                return "<0.001**"
+            elif val < 0.01:
+                return f"{val:.3f}**"
+            elif val < 0.05:
+                return f"{val:.3f}*"
+            else:
+                return f"{val:.3f}"
+        formatted[(metric, "mean [sd]")] = summary_col
+        formatted[(metric, "p")] = p.apply(p_string)
+    combined = pd.DataFrame(formatted, index=df.index)
+    combined.columns = pd.MultiIndex.from_tuples(combined.columns)
+    # Column sort: summary → p
+    combined = combined.loc[:, sorted(combined.columns, key=lambda x: (x[0], ["mean [sd]", "p"].index(x[1])))]
+    # Build Styler
+    styler = combined.style.set_properties(
+        **{
+            'font-size': '10px',
+            'padding': '2px 4px',
+            'text-align': 'center'
+        }
+    ).set_table_styles([
+        {'selector': 'th',
+         'props': [('font-size', '11px'),
+                   ('padding', '2px 4px'),
+                   ('text-align', 'center'),
+                   ('background-color', '#f9f9f9')]},
+        {'selector': '.row_heading',
+         'props': [('font-size', '10px'),
+                   ('padding', '2px 4px')]},
+        {'selector': '.index_name',
+         'props': [('font-size', '10px'),
+                   ('padding', '2px 4px')]}
+    ])
+    # Apply bold to significant p-values via CSS
+    def bold_sig(val):
+        if isinstance(val, str) and val.endswith("**") or val.endswith("*"):
+            return 'font-weight: bold'
+        return ''
+    # Apply only to p-value columns
+    for col in combined.columns:
+        if col[1] == "p":
+            styler = styler.map(bold_sig, subset=[col])
+    # Add thick horizontal lines between top-level index groups
+    def thick_divider_rows(df):
+        styles = pd.DataFrame("", index=df.index, columns=df.columns)
+        previous_group = None
+        for i, idx in enumerate(df.index):
+            current_group = idx[0]  # assumes 'model' is the first index level
+            if previous_group is not None and current_group != previous_group:
+                styles.iloc[i] = 'border-top: 1px solid black'
+            previous_group = current_group
+        return styles
+    styler = styler.apply(thick_divider_rows, axis=None)
+    return styler
+def extract_log_block(filepath):
+    with open(filepath, "r") as f:
+        lines = f.readlines()
+    # Find all indices of separator lines (e.g. "=======")
+    sep_indices = [i for i, line in enumerate(lines) if line.strip().startswith("=")]
+    if len(sep_indices) >= 2:
+        # Take everything between the first two separator lines
+        start = sep_indices[0] + 1
+        end = sep_indices[1]
+        content = lines[start:end]
+    else:
+        content = []  # or raise an error, depending on your expectations
+    return "".join(content).strip()
+# Function to read CSV file from the given folder path
+def load_data_from_folder(folder_path, filename):
+    csv_path = os.path.join(folder_path, filename)
+    if os.path.exists(csv_path):
+        return pd.read_csv(csv_path)
+    else:
+        raise RuntimeError(f"No CSV file found at path: {csv_path}")
+def load_results_from_folder(folder_path, filename):
+    csv_path = os.path.join(folder_path, filename)
+    if os.path.exists(csv_path):
+        return pd.read_csv(csv_path, header=[0, 1], index_col=[0, 1])
+    else:
+        raise RuntimeError(f"No CSV file found at path: {csv_path}")