PyPI - pythonflex - Versions diffs - 0.3.4__py3-none-any.whl → 0.4__py3-none-any.whl - Mend

pythonflex 0.3.4py3-none-any.whl → 0.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

pythonflex/__init__.py +28 -4
pythonflex/analysis.py +287 -579
pythonflex/examples/basic_usage.py +38 -30
pythonflex/examples/manuscript.py +37 -43
pythonflex/examples/runtime/runtime_benchmark.py +218 -0
pythonflex/examples/runtime/runtime_benchmark_10_runs_memmap.py +534 -0
pythonflex/examples/runtime/runtime_benchmark_corum_njobs.py +245 -0
pythonflex/examples/runtime/runtime_benchmark_gobp_njobs_chunks.py +319 -0
pythonflex/examples/runtime/runtime_benchmark_gobp_optimization.py +417 -0
pythonflex/examples/runtime/runtime_benchmark_repeated.py +347 -0
pythonflex/old_functions.py +422 -0
pythonflex/plotting.py +655 -242
pythonflex/preprocessing.py +54 -216
pythonflex/utils.py +36 -9
{pythonflex-0.3.4.dist-info → pythonflex-0.4.dist-info}/METADATA +8 -6
pythonflex-0.4.dist-info/RECORD +32 -0
{pythonflex-0.3.4.dist-info → pythonflex-0.4.dist-info}/WHEEL +1 -1
pythonflex-0.4.dist-info/licenses/LICENSE +7 -0
pythonflex-0.3.4.dist-info/RECORD +0 -24
{pythonflex-0.3.4.dist-info → pythonflex-0.4.dist-info}/entry_points.txt +0 -0

pythonflex/plotting.py CHANGED Viewed

@@ -9,6 +9,7 @@ import pandas as pd
 import matplotlib.pyplot as plt
 from matplotlib import patches
 from matplotlib.cm import get_cmap
+from matplotlib.lines import Line2D
 from matplotlib.ticker import NullFormatter, NullLocator
 # Completely disable LaTeX and clear all font cache/references
@@ -26,14 +27,17 @@ mpl.rcParams['font.cursive'] = ['Apple Chancery', 'Textile', 'Zapf Chancery', 'S
 mpl.rcParams['font.fantasy'] = ['Comic Sans MS', 'Chicago', 'Charcoal', 'Impact', 'Western', 'Humor Sans', 'fantasy']
 mpl.rcParams['font.monospace'] = ['DejaVu Sans Mono', 'Bitstream Vera Sans Mono', 'Computer Modern Typewriter', 'Andale Mono', 'Nimbus Mono L', 'Courier New', 'Courier', 'Fixed', 'Terminal', 'monospace']
-# Remove any LaTeX-specific math font settings
-mpl.rcParams['mathtext.fontset'] = 'dejavusans'
-mpl.rcParams['mathtext.default'] = 'regular'
+# Remove any LaTeX-specific math font settings
+mpl.rcParams['mathtext.fontset'] = 'dejavusans'
+mpl.rcParams['mathtext.default'] = 'regular'
+mpl.rcParams['pdf.fonttype'] = 42
+mpl.rcParams['ps.fonttype'] = 42
+mpl.rcParams['svg.fonttype'] = 'none'
 # Force font manager to rebuild with system fonts only
 try:
     fm.fontManager.__init__()
-except:
+except Exception:
     pass
 # Local modules
@@ -275,17 +279,233 @@ def plot_all_runs_pra(pra_list, mean_df=None, line_width=2.0, hide_minor_ticks=T
         output_path = Path(config["output_folder"]) / f"aggregated_all_runs_precision_recall_curve.{output_type}"
         fig.savefig(output_path, bbox_inches="tight", format=output_type)
-    if plot_config.get("show_plot", True):
-        plt.show()
-    plt.close(fig)
-def plot_percomplex_scatter(n_top=10, sig_color='#B71A2A', nonsig_color='#DBDDDD', label_color='black', border_color='black', border_width=1.0, show_text_background=True):
-    config = dload("config")
-    plot_config = config["plotting"]
-    rdict = dload("pra_percomplex")
+    if plot_config.get("show_plot", True):
+        plt.show()
+    plt.close(fig)
+def _short_scatter_label(label, max_chars=10):
+    label = str(label)
+    return label[:max_chars] + "." if len(label) > max_chars else label
+def _bbox_overlap_area(box_a, box_b):
+    x_overlap = max(0.0, min(box_a[2], box_b[2]) - max(box_a[0], box_b[0]))
+    y_overlap = max(0.0, min(box_a[3], box_b[3]) - max(box_a[1], box_b[1]))
+    return x_overlap * y_overlap
+def _line_intersects_bbox(start_px, end_px, box):
+    samples = np.linspace(0.05, 0.95, 12)
+    xs = start_px[0] + (end_px[0] - start_px[0]) * samples
+    ys = start_px[1] + (end_px[1] - start_px[1]) * samples
+    return np.any(
+        (xs >= box[0]) & (xs <= box[2]) &
+        (ys >= box[1]) & (ys <= box[3])
+    )
+def _label_alignment(dx, dy):
+    if abs(dx) < 1e-9:
+        ha = "center"
+    else:
+        ha = "left" if dx > 0 else "right"
+    if abs(dy) < 1e-9:
+        va = "center"
+    else:
+        va = "bottom" if dy > 0 else "top"
+    return ha, va
+def _label_display_bbox(ax, anchor_xy, width_px, height_px, ha, va):
+    anchor_x, anchor_y = ax.transData.transform(anchor_xy)
+    if ha == "left":
+        x0, x1 = anchor_x, anchor_x + width_px
+    elif ha == "right":
+        x0, x1 = anchor_x - width_px, anchor_x
+    else:
+        x0, x1 = anchor_x - width_px / 2.0, anchor_x + width_px / 2.0
+    if va == "bottom":
+        y0, y1 = anchor_y, anchor_y + height_px
+    elif va == "top":
+        y0, y1 = anchor_y - height_px, anchor_y
+    else:
+        y0, y1 = anchor_y - height_px / 2.0, anchor_y + height_px / 2.0
+    return (x0, y0, x1, y1)
+def _measure_label_size_pixels(ax, label, fontsize, bbox_props, renderer):
+    probe = ax.text(
+        0.5, 0.5, label,
+        fontsize=fontsize,
+        ha="left",
+        va="bottom",
+        linespacing=1,
+        alpha=0.0,
+        bbox=bbox_props,
+    )
+    bbox = probe.get_window_extent(renderer=renderer).expanded(1.12, 1.35)
+    probe.remove()
+    return bbox.width, bbox.height
+def _place_scatter_labels_radially(ax, label_items, obstacle_points, fontsize=4, bbox_props=None):
+    """Choose label anchors by scanning candidate positions around each point."""
+    fig = ax.figure
+    fig.canvas.draw()
+    renderer = fig.canvas.get_renderer()
+    axis_box = ax.get_window_extent(renderer=renderer)
+    obstacle_points = np.asarray(obstacle_points, dtype=float)
+    if obstacle_points.size == 0:
+        obstacle_points = np.empty((0, 2), dtype=float)
+    else:
+        obstacle_points = obstacle_points[np.isfinite(obstacle_points).all(axis=1)]
+    obstacle_pixels = ax.transData.transform(obstacle_points) if len(obstacle_points) else np.empty((0, 2))
+    angles = np.deg2rad(np.arange(0, 360, 15))
+    distances = np.array([0.035, 0.055, 0.08, 0.11, 0.145, 0.18])
+    axis_margin_px = 2.0
+    valid_items = [
+        (x, y, label)
+        for x, y, label in label_items
+        if np.isfinite(x) and np.isfinite(y)
+    ]
+    valid_items = sorted(
+        valid_items,
+        key=lambda item: (
+            min(item[0], 1.0 - item[0], item[1], 1.0 - item[1]),
+            -item[1],
+            -item[0],
+        ),
+    )
+    placed = []
+    placed_boxes = []
+    for point_x, point_y, label in valid_items:
+        label_width, label_height = _measure_label_size_pixels(
+            ax, label, fontsize, bbox_props, renderer
+        )
+        point_px = ax.transData.transform((point_x, point_y))
+        best_candidate = None
+        for distance in distances:
+            for angle in angles:
+                dx = float(np.cos(angle) * distance)
+                dy = float(np.sin(angle) * distance)
+                text_x = point_x + dx
+                text_y = point_y + dy
+                ha, va = _label_alignment(dx, dy)
+                box = _label_display_bbox(
+                    ax, (text_x, text_y), label_width, label_height, ha, va
+                )
+                outside_axes = (
+                    box[0] < axis_box.x0 + axis_margin_px or
+                    box[1] < axis_box.y0 + axis_margin_px or
+                    box[2] > axis_box.x1 - axis_margin_px or
+                    box[3] > axis_box.y1 - axis_margin_px
+                )
+                if outside_axes:
+                    continue
+                overlaps = [_bbox_overlap_area(box, placed_box) for placed_box in placed_boxes]
+                overlap_hits = sum(area > 0 for area in overlaps)
+                overlap_area = sum(overlaps)
+                if len(obstacle_pixels):
+                    dot_hits = np.count_nonzero(
+                        (obstacle_pixels[:, 0] >= box[0]) &
+                        (obstacle_pixels[:, 0] <= box[2]) &
+                        (obstacle_pixels[:, 1] >= box[1]) &
+                        (obstacle_pixels[:, 1] <= box[3])
+                    )
+                    if box[0] <= point_px[0] <= box[2] and box[1] <= point_px[1] <= box[3]:
+                        dot_hits = max(0, dot_hits - 1)
+                else:
+                    dot_hits = 0
+                text_px = ax.transData.transform((text_x, text_y))
+                connector_hits = sum(
+                    _line_intersects_bbox(point_px, text_px, placed_box)
+                    for placed_box in placed_boxes
+                )
+                edge_gap = min(
+                    box[0] - axis_box.x0,
+                    box[1] - axis_box.y0,
+                    axis_box.x1 - box[2],
+                    axis_box.y1 - box[3],
+                )
+                score = (
+                    overlap_hits * 100000.0 +
+                    overlap_area * 0.5 +
+                    dot_hits * 1500.0 +
+                    connector_hits * 1200.0 +
+                    np.hypot(text_px[0] - point_px[0], text_px[1] - point_px[1]) * 0.05 +
+                    10.0 / max(edge_gap, 1.0)
+                )
+                if best_candidate is None or score < best_candidate["score"]:
+                    best_candidate = {
+                        "point_x": point_x,
+                        "point_y": point_y,
+                        "text_x": text_x,
+                        "text_y": text_y,
+                        "label": label,
+                        "ha": ha,
+                        "va": va,
+                        "box": box,
+                        "score": score,
+                    }
+        if best_candidate is None:
+            # Extremely rare fallback for very long labels or tight axes.
+            fallback_dx = -0.055 if point_x > 0.5 else 0.055
+            fallback_dy = -0.055 if point_y > 0.5 else 0.055
+            ha, va = _label_alignment(fallback_dx, fallback_dy)
+            text_x = max(0.02, min(0.98, point_x + fallback_dx))
+            text_y = max(0.02, min(0.98, point_y + fallback_dy))
+            best_candidate = {
+                "point_x": point_x,
+                "point_y": point_y,
+                "text_x": text_x,
+                "text_y": text_y,
+                "label": label,
+                "ha": ha,
+                "va": va,
+                "box": _label_display_bbox(
+                    ax, (text_x, text_y), label_width, label_height, ha, va
+                ),
+                "score": float("inf"),
+            }
+        placed_boxes.append(best_candidate["box"])
+        placed.append(best_candidate)
+    return placed
+def plot_percomplex_scatter(
+    n_top=10,
+    sig_color='black',
+    nonsig_color='none',
+    label_color='black',
+    border_color='black',
+    border_width=1.0,
+    nonsig_border_color="#7F7F7F",
+    nonsig_border_width=0.5,
+    show_text_background=True,
+):
+    config = dload("config")
+    plot_config = config["plotting"]
+    rdict = dload("pra_percomplex")
+    input_colors = dload("input", "colors")
+    input_colors = {_sanitize(k): v for k, v in input_colors.items()} if input_colors else {}
     if len(rdict) < 2:
-        print("Skipping plot: At least two datasets are required for per-complex scatter plot.")
+        log.warning(
+            "Skipping plot: at least two datasets are required for per-complex scatter plot."
+        )
         return
     column_pairs = list(combinations(rdict.keys(), 2))
@@ -299,93 +519,61 @@ def plot_percomplex_scatter(n_top=10, sig_color='#B71A2A', nonsig_color='#DBDDDD
             df = pd.concat([df, val[key]], axis=1)
     for pair in column_pairs:
-        extreme_indices_0 = df[pair[0]].sort_values(ascending=False).head(n_top).index
-        extreme_indices_1 = df[pair[1]].sort_values(ascending=False).head(n_top).index
-        significant_indices = extreme_indices_0.union(extreme_indices_1)
-        bg_df  = df.drop(index=significant_indices)
-        sig_df = df.loc[significant_indices]
-        # Create square figure
-        fig, ax = plt.subplots(figsize=(6, 6))
-        # Background cloud (filled dots with black borders, not rasterized)
-        bg_sizes = (bg_df['n_used_genes'] if 'n_used_genes' in bg_df else pd.Series(1, index=bg_df.index)) * 5
-        ax.scatter(
-            bg_df[pair[0]], bg_df[pair[1]],
-            facecolors=nonsig_color, edgecolors=border_color,
-            s=bg_sizes, linewidth=border_width, alpha=1.0,
-            zorder=0
-        )
-        # Significant points (filled dots with black borders)
-        sig_sizes = (sig_df['n_used_genes'] if 'n_used_genes' in sig_df else pd.Series(1, index=sig_df.index)) * 8
-        ax.scatter(
-            sig_df[pair[0]], sig_df[pair[1]],
-            facecolors=sig_color, edgecolors=border_color,
-            s=sig_sizes, linewidth=border_width, zorder=2
-        )
-        # Improved label positioning with adaptive spacing
-        coords = sorted(
-            [(sig_df.loc[idx, pair[0]], sig_df.loc[idx, pair[1]], idx) for idx in sig_df.index],
-            key=lambda c: (-c[1], -c[0])
-        )
-        # Calculate proper parameters for normalized coordinate system (0-1 range)
-        max_y = 1.0  # Normalized plots use 0-1 range
-        scale_factor = 1.0  # Standard scaling for normalized plots
-        min_distance = 0.08  # Increased spacing for 0-1 range to avoid overlap
-        adjusted_coords = adjust_text_positions_improved(
-            coords, sig_sizes,
-            min_distance=min_distance,
-            max_y=max_y,
-            scale_factor=scale_factor,
-            y_threshold=0.8  # Points above this will have labels below
-        )
-        for x, adj_y, idx, direction in adjusted_coords:
-            y = df.loc[idx, pair[1]]
-            # Calculate connector line extension, but constrain within plot bounds
-            line_extension_factor = 1.5  # Reduced from 2.5 to keep labels in bounds
-            extended_adj_y = y + (adj_y - y) * line_extension_factor
-            # Clip to ensure connector stays within 0-1 range
-            extended_adj_y = max(0.02, min(extended_adj_y, 0.98))
-            # Draw connector line
-            ax.plot([x, x], [y, extended_adj_y],
-                   color=label_color, linewidth=0.6, alpha=0.15, zorder=3)
-            # Position text at the end of extended line with small offset
-            text_y_offset = 0.01 if direction == "up" else -0.01
-            final_text_y = extended_adj_y + text_y_offset
-            # Final clip to ensure text stays within 0-1 range
-            final_text_y = max(0.02, min(final_text_y, 0.98))
-            # Prepare text bbox settings (can be turned on/off)
-            bbox_props = dict(facecolor="white", alpha=0.7, edgecolor="none", pad=1) if show_text_background else None
-            ax.text(
-                x, final_text_y,
-                df.loc[idx, 'Name'][:10] + '.' if len(df.loc[idx, 'Name']) > 10 else df.loc[idx, 'Name'],
-                fontsize=4,
-                ha='left',
-                va='bottom' if direction == "up" else 'top',
-                color=label_color,
-                linespacing=1,
-                zorder=4,
-                clip_on=True,  # Enable clipping to axes bounds
-                bbox=bbox_props
-            )
-        # Diagonal & axes cosmetics
-        ax.plot([0, 1], [0, 1], linestyle='-', color='lightgray', alpha=0.4, linewidth=0.5, zorder=1)
-        # Force square aspect ratio and exact 0-1 range
+        extreme_indices_0 = df[pair[0]].sort_values(ascending=False).head(n_top).index
+        extreme_indices_1 = df[pair[1]].sort_values(ascending=False).head(n_top).index
+        significant_indices = extreme_indices_0.union(extreme_indices_1)
+        significant_in_both = extreme_indices_0.intersection(extreme_indices_1)
+        significant_pair0_only = extreme_indices_0.difference(extreme_indices_1)
+        significant_pair1_only = extreme_indices_1.difference(extreme_indices_0)
+        bg_df  = df.drop(index=significant_indices)
+        sig_df = df.loc[significant_indices]
+        # Create square figure
+        fig, ax = plt.subplots(figsize=(6, 6))
+        # Background cloud: non-significant complexes are open circles.
+        bg_sizes = (bg_df['n_used_genes'] if 'n_used_genes' in bg_df else pd.Series(1, index=bg_df.index)) * 5
+        ax.scatter(
+            bg_df[pair[0]], bg_df[pair[1]],
+            facecolors="none", edgecolors=nonsig_border_color,
+            s=bg_sizes, linewidth=nonsig_border_width, alpha=0.8,
+            zorder=0
+        )
+        def scatter_significant(indices, color, zorder=2):
+            if len(indices) == 0:
+                return
+            point_df = df.loc[indices]
+            point_sizes = (
+                point_df['n_used_genes']
+                if 'n_used_genes' in point_df
+                else pd.Series(1, index=point_df.index)
+            ) * 8
+            ax.scatter(
+                point_df[pair[0]], point_df[pair[1]],
+                facecolors=color, edgecolors=color,
+                s=point_sizes, linewidth=border_width, zorder=zorder
+            )
+        # Dataset-specific significant complexes use the dataset input color.
+        scatter_significant(
+            significant_pair0_only,
+            input_colors.get(_sanitize(pair[0]), sig_color),
+            zorder=2,
+        )
+        scatter_significant(
+            significant_pair1_only,
+            input_colors.get(_sanitize(pair[1]), sig_color),
+            zorder=2,
+        )
+        # Complexes significant in both datasets stay black to avoid ambiguous color mixing.
+        scatter_significant(significant_in_both, "black", zorder=3)
+        # Diagonal & axes cosmetics
+        ax.plot([0, 1], [0, 1], linestyle='-', color='lightgray', alpha=0.4, linewidth=0.5, zorder=1)
+        # Force square aspect ratio and exact 0-1 range
         ax.set_xlim(0, 1)
         ax.set_ylim(0, 1)
         ax.set_aspect('equal', adjustable='box')
@@ -400,15 +588,55 @@ def plot_percomplex_scatter(n_top=10, sig_color='#B71A2A', nonsig_color='#DBDDDD
         #ax.set_title(f"{pair[0]} vs {pair[1]} - Comparison of complex performance")
         # Nature style: no grid, open top/right spines
-        ax.grid(False)
-        ax.spines['top'].set_visible(False)
-        ax.spines['right'].set_visible(False)
-        plt.tight_layout()
-        if plot_config["save_plot"]:
-            output_type = plot_config["output_type"]
-            output_path = Path(config["output_folder"]) / f"percomplex_scatter_{pair[0]}_vs_{pair[1]}.{output_type}"
+        ax.grid(False)
+        ax.spines['top'].set_visible(False)
+        ax.spines['right'].set_visible(False)
+        plt.tight_layout()
+        # Radial label positioning searches all directions around each significant point.
+        label_items = []
+        for idx in sig_df.index:
+            label_items.append((
+                float(sig_df.loc[idx, pair[0]]),
+                float(sig_df.loc[idx, pair[1]]),
+                _short_scatter_label(df.loc[idx, 'Name']),
+            ))
+        all_points = df[[pair[0], pair[1]]].dropna().to_numpy(dtype=float)
+        label_positions = _place_scatter_labels_radially(
+            ax,
+            label_items,
+            all_points,
+            fontsize=4,
+            bbox_props=None,
+        )
+        for label_pos in label_positions:
+            ax.plot(
+                [label_pos["point_x"], label_pos["text_x"]],
+                [label_pos["point_y"], label_pos["text_y"]],
+                color=label_color,
+                linewidth=0.6,
+                alpha=0.15,
+                zorder=3,
+            )
+            ax.text(
+                label_pos["text_x"],
+                label_pos["text_y"],
+                label_pos["label"],
+                fontsize=4,
+                ha=label_pos["ha"],
+                va=label_pos["va"],
+                color=label_color,
+                linespacing=1,
+                zorder=4,
+                clip_on=True,
+                bbox=None,
+            )
+        if plot_config["save_plot"]:
+            output_type = plot_config["output_type"]
+            output_path = Path(config["output_folder"]) / f"percomplex_scatter_{pair[0]}_vs_{pair[1]}.{output_type}"
             fig.savefig(output_path, bbox_inches="tight", format=output_type)
         if plot_config.get("show_plot", True):
@@ -894,16 +1122,28 @@ def position_cluster_labels(cluster, cluster_id, max_y, effective_max_y, label_c
                     clip_on=True, bbox=bbox_props
                 )
-def plot_percomplex_scatter_bysize(n_labels=10, n_top=10, sig_color='#B71A2A', nonsig_color='#DBDDDD',
-                                   label_color='black', border_color='black', border_width=1.0,
-                                   show_text_background=True):
-    config = dload("config")
-    plot_config = config["plotting"]
-    rdict = dload("pra_percomplex")
-    for key, per_complex in rdict.items():
-        sorted_pc = per_complex.sort_values(by="auc_score", ascending=False, na_position="last")
-        top_labels, rest = sorted_pc.head(n_labels), sorted_pc.iloc[n_labels:]
+def plot_percomplex_scatter_bysize(
+    n_labels=10,
+    n_top=10,
+    sig_color='black',
+    nonsig_color='none',
+    label_color='black',
+    border_color='black',
+    border_width=1.0,
+    nonsig_border_color="#7F7F7F",
+    nonsig_border_width=0.5,
+    show_text_background=True,
+):
+    config = dload("config")
+    plot_config = config["plotting"]
+    rdict = dload("pra_percomplex")
+    input_colors = dload("input", "colors")
+    input_colors = {_sanitize(k): v for k, v in input_colors.items()} if input_colors else {}
+    for key, per_complex in rdict.items():
+        dataset_color = input_colors.get(_sanitize(key), sig_color)
+        sorted_pc = per_complex.sort_values(by="auc_score", ascending=False, na_position="last")
+        top_labels, rest = sorted_pc.head(n_labels), sorted_pc.iloc[n_labels:]
         # Calculate data range for appropriate figure sizing
         max_genes = sorted_pc.n_used_genes.max()
@@ -914,22 +1154,22 @@ def plot_percomplex_scatter_bysize(n_labels=10, n_top=10, sig_color='#B71A2A', n
         fig_height = min(max(4, aspect_ratio), 8)  # Between 4-8 inches
         fig, ax = plt.subplots(figsize=(6, fig_height))
-        # Background (REST): filled dots with black borders, not rasterized
-        ax.scatter(
-            rest.auc_score, rest.n_used_genes,
-            facecolors=nonsig_color, edgecolors=border_color,
-            linewidth=border_width, s=rest.n_used_genes * 5,
-            alpha=1.0, label="Other Complexes",
-            zorder=0
-        )
-        # Top N: filled dots with black borders
-        ax.scatter(
-            top_labels.auc_score, top_labels.n_used_genes,
-            facecolors=sig_color, edgecolors=border_color,
-            linewidth=border_width, s=top_labels.n_used_genes * 8,
-            label=f"Top {n_labels} AUC Scores", alpha=1.0, zorder=2
-        )
+        # Background: non-significant complexes are open circles.
+        ax.scatter(
+            rest.auc_score, rest.n_used_genes,
+            facecolors="none", edgecolors=nonsig_border_color,
+            linewidth=nonsig_border_width, s=rest.n_used_genes * 5,
+            alpha=0.8, label="Other Complexes",
+            zorder=0
+        )
+        # Top N/significant complexes are filled black circles.
+        ax.scatter(
+            top_labels.auc_score, top_labels.n_used_genes,
+            facecolors=dataset_color, edgecolors=dataset_color,
+            linewidth=border_width, s=top_labels.n_used_genes * 8,
+            label=f"Top {n_labels} AUC Scores", alpha=1.0, zorder=2
+        )
         # Enhanced anti-overlap labeling system
         coords = [(row.auc_score, row.n_used_genes, idx) for idx, row in top_labels.iterrows()]
@@ -1010,6 +1250,7 @@ def plot_complex_contributions(
         tmp = np.tile(x, (mx, 1))
         x = cont_stepwise_mat.values / tmp
         x_df = pd.DataFrame(x, index=cont_stepwise_anno, columns=cont_stepwise_mat.columns)
         ind_for_mean = y >= (last_prec_value - min_precision_cutoff)
         if sum(ind_for_mean) == 0:
             log.info("No values above 'min.precision.cutoff'"); return False
@@ -1094,15 +1335,23 @@ def plot_significant_complexes():
         input_colors = {_sanitize(k): v for k, v in input_colors.items()}
     thresholds = [0.1, 0.2, 0.3, 0.4, 0.5]
+    if not isinstance(pra_percomplex, dict) or not pra_percomplex:
+        log.warning("No per-complex PRA data found. Run pra_percomplex() first.")
+        return pd.DataFrame(index=thresholds)
     datasets = list(pra_percomplex.keys())
     num_datasets = len(datasets)
+    if num_datasets == 0:
+        return pd.DataFrame(index=thresholds)
     df = pd.DataFrame(index=thresholds)
     for key, complex_data in pra_percomplex.items():
         if "corrected_auc_score" in complex_data.columns:
             score_col = "corrected_auc_score"
         else:
             score_col = "auc_score"
         df[key] = [complex_data.query(f'{score_col} >= {t}').shape[0] for t in thresholds]
     fig, ax = plt.subplots()
@@ -1221,15 +1470,20 @@ def plot_auc_scores():
     return pra_dict
-def plot_mpr_complexes_auc_scores(filter_key: str = "all"):
+def plot_mpr_complex_auc_scores(variant: str = "unfiltered", save=None, outname=None):
     """Plot AUC scores for the mPR complexes curve (Fig 1F-style).
     Requires `mpr_prepare()` to have been run for each dataset.
     Parameters
     ----------
-    filter_key : str
-        One of: "all", "no_mtRibo_ETCI", "no_small_highAUPRC".
+    variant : str
+        One of: "unfiltered", "without_mt_ribo_etci",
+        "without_small_high_auprc".
+    save : bool, optional
+        Whether to save the figure. If None, uses config["plotting"]["save_plot"].
+    outname : str, optional
+        Output filename. If None, auto-generated.
     Returns
     -------
@@ -1250,12 +1504,14 @@ def plot_mpr_complexes_auc_scores(filter_key: str = "all"):
         )
         return pd.Series(dtype=float)
+    variant_key = _normalize_mpr_variant(variant)
     # Build Series: dataset -> auc
     auc_by_dataset = {}
     for dataset, per_filter in mpr_auc_dict.items():
         if not isinstance(per_filter, dict):
             continue
-        val = per_filter.get(filter_key)
+        val = per_filter.get(variant_key)
         if val is None:
             continue
         try:
@@ -1265,7 +1521,8 @@ def plot_mpr_complexes_auc_scores(filter_key: str = "all"):
     if not auc_by_dataset:
         log.warning(
-            f"No mPR complexes AUC scores found for filter '{filter_key}'. Available filters: {list(FILTER_STYLES.keys())}"
+            f"No mPR complex AUC scores found for variant '{variant}'. "
+            f"Available variants: {list(PUBLIC_MPR_VARIANTS.keys())}"
         )
         return pd.Series(dtype=float)
@@ -1308,11 +1565,16 @@ def plot_mpr_complexes_auc_scores(filter_key: str = "all"):
     ax.spines["top"].set_visible(False)
     ax.spines["right"].set_visible(False)
-    if plot_config.get("save_plot", False):
+    should_save = plot_config.get("save_plot", False) if save is None else bool(save)
+    if should_save:
         output_type = plot_config.get("output_type", "pdf")
         output_folder = Path(config["output_folder"])
         output_folder.mkdir(parents=True, exist_ok=True)
-        output_path = output_folder / f"mpr_complexes_auc_{filter_key}.{output_type}"
+        if outname is None:
+            outname = f"mpr_complexes_auc_{variant_key}.{output_type}"
+        output_path = Path(outname)
+        if len(output_path.parts) == 1:
+            output_path = output_folder / outname
         plt.savefig(output_path, bbox_inches="tight", format=output_type)
     if plot_config.get("show_plot", True):
@@ -1321,6 +1583,13 @@ def plot_mpr_complexes_auc_scores(filter_key: str = "all"):
     plt.close(fig)
     return s
+def plot_mpr_complexes_auc_scores(filter_key: str = "all"):
+    """Backward-compatible wrapper for plot_mpr_complex_auc_scores()."""
+    return plot_mpr_complex_auc_scores(
+        variant=_legacy_filter_to_variant(filter_key, default="unfiltered")
+    )
 # -----------------------------------------------------------------------------
 # mPR plots (Fig. 1E and Fig. 1F)
 # -----------------------------------------------------------------------------
@@ -1475,26 +1744,6 @@ def plot_mpr_tp(name, ax=None, save=True, outname=None):
     return ax
-"""
-Multi-dataset mPR plotting functions.
-Usage:
-    from pythonflex.plotting import plot_mpr_tp_multi, plot_mpr_complexes_multi
-    # Plot multiple datasets
-    plot_mpr_tp_multi(["19Q2", "19Q4", "20Q1"])
-    plot_mpr_complexes_multi(["19Q2", "19Q4", "20Q1"])
-"""
-import numpy as np
-import pandas as pd
-import matplotlib.pyplot as plt
-from matplotlib.lines import Line2D
-from pathlib import Path
-from .utils import dload
-from .logging_config import log
 # Default color palette (colorblind-friendly)
 DEFAULT_COLORS = [
     "#4E79A7",  # blue
@@ -1509,40 +1758,101 @@ DEFAULT_COLORS = [
     "#BAB0AC",  # gray
 ]
-# Filter line styles
-FILTER_STYLES = {
+# Public mPR variant names map to the internal keys stored by mpr_prepare().
+PUBLIC_MPR_VARIANTS = {
+    "unfiltered": "all",
+    "without_mt_ribo_etci": "no_mtRibo_ETCI",
+    "without_small_high_auprc": "no_small_highAUPRC",
+}
+INTERNAL_MPR_VARIANTS = {v: k for k, v in PUBLIC_MPR_VARIANTS.items()}
+# mPR variant line styles keyed by internal storage names.
+MPR_VARIANT_STYLES = {
     "all": {"linestyle": "-", "label": "all data"},
     "no_mtRibo_ETCI": {"linestyle": "--", "label": "no mtRibo, ETC I"},
     "no_small_highAUPRC": {"linestyle": "dotted", "label": "no small, high AUPRC"},
 }
+# Compatibility alias for users who imported this internal constant.
+FILTER_STYLES = MPR_VARIANT_STYLES
+def _normalize_mpr_variant(variant):
+    """Return the internal mPR variant key for one public variant name."""
+    if variant in PUBLIC_MPR_VARIANTS:
+        return PUBLIC_MPR_VARIANTS[variant]
+    if variant in MPR_VARIANT_STYLES:
+        if variant == "all":
+            return PUBLIC_MPR_VARIANTS["unfiltered"]
+        return variant
+    raise ValueError(
+        "Unknown mPR variant "
+        f"{variant!r}. Use one of {list(PUBLIC_MPR_VARIANTS.keys())}."
+    )
-def _normalize_show_filters(show_filters):
-    """Normalize show_filters to an ordered tuple of filter keys.
-    Common footgun: passing a single string (e.g. "no_mtRibo_ETCI") is iterable,
-    which would otherwise be treated as a sequence of characters.
-    """
+def _normalize_mpr_variants(variants):
+    """Normalize public mPR variant names to internal storage keys."""
+    if variants is None:
+        raw_variants = ("all",)
+    elif isinstance(variants, str):
+        raw_variants = (variants,)
+    else:
+        try:
+            raw_variants = tuple(variants)
+        except TypeError:
+            raw_variants = (variants,)
+    out = []
+    for variant in raw_variants:
+        if variant == "all":
+            out.extend(PUBLIC_MPR_VARIANTS.values())
+        else:
+            out.append(_normalize_mpr_variant(variant))
+    # Preserve user order while removing duplicates.
+    return tuple(dict.fromkeys(out))
+def _legacy_filter_to_variant(filter_key, default=None):
+    """Map old filter-key names to public variant names."""
+    if filter_key is None:
+        return default if default is not None else "all"
+    mapping = {
+        "all": "unfiltered",
+        "no_mtRibo_ETCI": "without_mt_ribo_etci",
+        "no_small_highAUPRC": "without_small_high_auprc",
+    }
+    return mapping.get(filter_key, filter_key)
+def _legacy_filters_to_variants(show_filters):
+    """Map old show_filters values to public variant names."""
     if show_filters is None:
-        return tuple(FILTER_STYLES.keys())
+        return "all"
     if isinstance(show_filters, str):
-        return (show_filters,)
+        return _legacy_filter_to_variant(show_filters)
     try:
-        return tuple(show_filters)
+        return tuple(_legacy_filter_to_variant(item) for item in show_filters)
     except TypeError:
-        return (show_filters,)
+        return (_legacy_filter_to_variant(show_filters),)
-def plot_mpr_tp_multi(
+def _normalize_show_filters(show_filters):
+    """Backward-compatible normalizer for old internal filter keys."""
+    return _normalize_mpr_variants(_legacy_filters_to_variants(show_filters))
+def plot_mpr_true_positive_curve(
     dataset_names=None,
     colors=None,
     ax=None,
     save=True,
     outname=None,
     linewidth=1.8,
-    show_filters=("all", "no_mtRibo_ETCI", "no_small_highAUPRC"),
+    variants="unfiltered",
 ):
     """
-    Plot TP vs precision curves for multiple datasets.
+    Plot mPR true-positive vs precision curves for multiple datasets.
     Can auto-detect datasets or use provided dataset names.
     Each dataset gets one color, each filter type gets one line style.
@@ -1562,8 +1872,9 @@ def plot_mpr_tp_multi(
         Output filename. If None, auto-generated.
     linewidth : float
         Line width for all curves
-    show_filters : tuple of str
-        Which filters to show. Default is all three.
+    variants : str or iterable of str
+        Which mPR variants to show. Use "unfiltered",
+        "without_mt_ribo_etci", "without_small_high_auprc", or "all".
     Returns
     -------
@@ -1573,7 +1884,7 @@ def plot_mpr_tp_multi(
     plot_config = config["plotting"]
     input_colors = dload("input", "colors")
-    show_filters = _normalize_show_filters(show_filters)
+    variant_keys = _normalize_mpr_variants(variants)
     # Sanitize color keys
     if input_colors:
@@ -1641,13 +1952,13 @@ def plot_mpr_tp_multi(
         tp_curves = mpr["tp_curves"]
         color = colors[i % len(colors)]
-        for filter_key in show_filters:
-            if filter_key not in tp_curves:
+        for variant_key in variant_keys:
+            if variant_key not in tp_curves:
                 continue
-            data = tp_curves[filter_key]
+            data = tp_curves[variant_key]
             if not isinstance(data, dict) or "tp" not in data or "precision" not in data:
-                log.warning(f"Invalid tp_curves data structure for '{name}' filter '{filter_key}', skipping.")
+                log.warning(f"Invalid tp_curves data structure for '{name}' variant '{variant_key}', skipping.")
                 continue
             tp = np.asarray(data["tp"], dtype=float)
@@ -1661,7 +1972,7 @@ def plot_mpr_tp_multi(
             prec_plot = prec[mask]
             xmax = max(xmax, float(tp_plot.max()))
-            style = FILTER_STYLES.get(filter_key, {})
+            style = MPR_VARIANT_STYLES.get(variant_key, {})
             ax.plot(
                 tp_plot,
                 prec_plot,
@@ -1694,7 +2005,7 @@ def plot_mpr_tp_multi(
     ax.spines['right'].set_visible(False)
     # Create vertically stacked legends
-    _add_vertical_legend(ax, dataset_names, colors, show_filters, linewidth)
+    _add_vertical_legend(ax, dataset_names, colors, variant_keys, linewidth)
     # Save
     if save:
@@ -1713,7 +2024,8 @@ def plot_mpr_tp_multi(
     return ax
-def plot_mpr_complexes_multi(
+def plot_mpr_tp_multi(
     dataset_names=None,
     colors=None,
     ax=None,
@@ -1721,11 +2033,31 @@ def plot_mpr_complexes_multi(
     outname=None,
     linewidth=1.8,
     show_filters=("all", "no_mtRibo_ETCI", "no_small_highAUPRC"),
+):
+    """Backward-compatible wrapper for plot_mpr_true_positive_curve()."""
+    return plot_mpr_true_positive_curve(
+        dataset_names=dataset_names,
+        colors=colors,
+        ax=ax,
+        save=save,
+        outname=outname,
+        linewidth=linewidth,
+        variants=_legacy_filters_to_variants(show_filters),
+    )
+def plot_mpr_complex_coverage_curve(
+    dataset_names=None,
+    colors=None,
+    ax=None,
+    save=True,
+    outname=None,
+    linewidth=1.8,
+    variants="unfiltered",
     show_markers="auto",
     marker_size=20,
 ):
     """
-    Plot module-level PR (#complexes vs precision) for multiple datasets.
+    Plot mPR complex-coverage vs precision curves for multiple datasets.
     Can auto-detect datasets or use provided dataset names.
     Each dataset gets one color, each filter type gets one line style.
@@ -1745,8 +2077,9 @@ def plot_mpr_complexes_multi(
         Output filename. If None, auto-generated.
     linewidth : float
         Line width for all curves
-    show_filters : tuple of str
-        Which filters to show. Default is all three.
+    variants : str or iterable of str
+        Which mPR variants to show. Use "unfiltered",
+        "without_mt_ribo_etci", "without_small_high_auprc", or "all".
     show_markers : bool or "auto"
         If True, draw markers on curves to make short curves visible.
         If "auto" (default), markers are drawn only for curves with <= 10 points.
@@ -1761,7 +2094,7 @@ def plot_mpr_complexes_multi(
     plot_config = config["plotting"]
     input_colors = dload("input", "colors")
-    show_filters = _normalize_show_filters(show_filters)
+    variant_keys = _normalize_mpr_variants(variants)
     # Sanitize color keys
     if input_colors:
@@ -1812,32 +2145,61 @@ def plot_mpr_complexes_multi(
     else:
         fig = ax.figure
-    # Plot each dataset
+    # First pass: determine max coverage across all datasets/filters for adaptive x-axis
+    max_cov_global = 0
+    _mpr_cache = {}
     for i, name in enumerate(dataset_names):
         mpr = dload("mpr", name)
+        _mpr_cache[name] = mpr
+        if mpr is not None:
+            for variant_key in variant_keys:
+                arr = mpr["coverage_curves"].get(variant_key)
+                if arr is not None:
+                    max_cov_global = max(max_cov_global, float(np.asarray(arr).max()))
+    # Build adaptive x-axis limits and ticks
+    import math
+    if max_cov_global <= 200:
+        # Original fixed range — keeps CORUM plots identical to before
+        x_max_plot = 200
+        tick_positions = [1, 2, 20, 200]
+        tick_labels = ["0", "2", "20", "200"]
+    else:
+        # Round up to the next power of 10 so the max bar has breathing room
+        x_max_plot = 10 ** math.ceil(math.log10(max_cov_global + 1))
+        tick_positions = [1, 2]
+        v = 10
+        while v <= x_max_plot:
+            tick_positions.append(v)
+            v *= 10
+        tick_labels = ["0"] + [str(t) for t in tick_positions[1:]]
+    # Plot each dataset
+    for i, name in enumerate(dataset_names):
+        mpr = _mpr_cache[name]
         if mpr is None:
             log.warning(f"mPR data for '{name}' not found, skipping.")
             continue
         precision_cutoffs = np.asarray(mpr["precision_cutoffs"], dtype=float)
         coverage = mpr["coverage_curves"]
         color = colors[i % len(colors)]
-        for filter_key in show_filters:
-            if filter_key not in coverage:
+        for variant_key in variant_keys:
+            if variant_key not in coverage:
                 continue
-            cov = np.asarray(coverage[filter_key], dtype=float)
-            # Keep only positive coverage up to 200 complexes
-            mask = (cov > 0) & (cov <= 200)
+            cov = np.asarray(coverage[variant_key], dtype=float)
+            # Keep only positive coverage within the visible x range
+            mask = (cov > 0) & (cov <= x_max_plot)
             if not mask.any():
                 continue
             cov_plot = cov[mask]
             prec_plot = precision_cutoffs[mask]
-            style = FILTER_STYLES.get(filter_key, {})
+            style = MPR_VARIANT_STYLES.get(variant_key, {})
             # Decide marker visibility
             if show_markers == "auto":
@@ -1858,17 +2220,15 @@ def plot_mpr_complexes_multi(
                     marker=("o" if use_markers else None),
                     markersize=(3 if use_markers else None),
                 )
     # Configure axes
     ax.set_xscale("log")
-    ax.set_xlim(1, 200)
+    ax.set_xlim(1, x_max_plot)
     ax.set_xlabel("# complexes")
     ax.set_ylabel("Precision")
     ax.set_ylim(0.0, 1.05)
-    # Custom x-ticks
-    tick_positions = [1, 2, 20, 200]
-    tick_labels = ["0", "2", "20", "200"]
+    # Adaptive x-ticks
     ax.set_xticks(tick_positions)
     ax.set_xticklabels(tick_labels)
@@ -1877,7 +2237,7 @@ def plot_mpr_complexes_multi(
     ax.spines['right'].set_visible(False)
     # Create vertically stacked legends
-    _add_vertical_legend(ax, dataset_names, colors, show_filters, linewidth)
+    _add_vertical_legend(ax, dataset_names, colors, variant_keys, linewidth)
     # Save
     if save:
@@ -1896,11 +2256,71 @@ def plot_mpr_complexes_multi(
     return ax
-def _add_vertical_legend(ax, dataset_names, colors, show_filters, linewidth):
+def plot_mpr_complexes_multi(
+    dataset_names=None,
+    colors=None,
+    ax=None,
+    save=True,
+    outname=None,
+    linewidth=1.8,
+    show_filters=("all", "no_mtRibo_ETCI", "no_small_highAUPRC"),
+    show_markers="auto",
+    marker_size=20,
+):
+    """Backward-compatible wrapper for plot_mpr_complex_coverage_curve()."""
+    return plot_mpr_complex_coverage_curve(
+        dataset_names=dataset_names,
+        colors=colors,
+        ax=ax,
+        save=save,
+        outname=outname,
+        linewidth=linewidth,
+        variants=_legacy_filters_to_variants(show_filters),
+        show_markers=show_markers,
+        marker_size=marker_size,
+    )
+def plot_mpr_summary(
+    dataset_names=None,
+    colors=None,
+    variants="unfiltered",
+    save=True,
+    linewidth=1.8,
+    show_markers="auto",
+    marker_size=20,
+    auc_variant=None,
+):
+    """Generate the standard mPR summary plots and return complex AUC scores."""
+    plot_mpr_true_positive_curve(
+        dataset_names=dataset_names,
+        colors=colors,
+        save=save,
+        linewidth=linewidth,
+        variants=variants,
+    )
+    plot_mpr_complex_coverage_curve(
+        dataset_names=dataset_names,
+        colors=colors,
+        save=save,
+        linewidth=linewidth,
+        variants=variants,
+        show_markers=show_markers,
+        marker_size=marker_size,
+    )
+    if auc_variant is None:
+        variant_keys = _normalize_mpr_variants(variants)
+        auc_variant = INTERNAL_MPR_VARIANTS.get(variant_keys[0], "unfiltered")
+    return plot_mpr_complex_auc_scores(variant=auc_variant, save=save)
+def _add_vertical_legend(ax, dataset_names, colors, variant_keys, linewidth):
     """
-    Add vertically stacked legends: Dataset on top, Filter below.
+    Add vertically stacked legends: Dataset on top, mPR variant below.
     """
-    show_filters = _normalize_show_filters(show_filters)
+    variant_keys = _normalize_show_filters(variant_keys)
     # Legend 1: Datasets (colors) - solid lines
     dataset_handles = []
     for i, name in enumerate(dataset_names):
@@ -1908,19 +2328,19 @@ def _add_vertical_legend(ax, dataset_names, colors, show_filters, linewidth):
         handle = Line2D([0], [0], color=color, linewidth=linewidth, linestyle="-")
         dataset_handles.append(handle)
-    # Legend 2: Filters (line styles) - black lines
-    filter_handles = []
-    filter_labels = []
-    for filter_key in show_filters:
-        style = FILTER_STYLES.get(filter_key, {})
+    # Legend 2: mPR variants (line styles) - black lines
+    variant_handles = []
+    variant_labels = []
+    for variant_key in variant_keys:
+        style = MPR_VARIANT_STYLES.get(variant_key, {})
         handle = Line2D(
             [0], [0],
             color="black",
             linewidth=linewidth,
             linestyle=style.get("linestyle", "-")
         )
-        filter_handles.append(handle)
-        filter_labels.append(style.get("label", filter_key))
+        variant_handles.append(handle)
+        variant_labels.append(style.get("label", variant_key))
     # Position legends vertically with proper alignment
     # Dataset legend on upper right
@@ -1938,19 +2358,19 @@ def _add_vertical_legend(ax, dataset_names, colors, show_filters, linewidth):
     # Filter legend below the dataset legend, aligned properly without title
     legend2 = ax.legend(
-        filter_handles,
-        filter_labels,
+        variant_handles,
+        variant_labels,
         loc="upper left",
         frameon=False,
         fontsize=7,
         bbox_to_anchor=(1.05, 1.0 - len(dataset_names) * 0.06 - 0.1)
     )
-def _add_dual_legend(ax, dataset_names, colors, show_filters, linewidth):
+def _add_dual_legend(ax, dataset_names, colors, variant_keys, linewidth):
     """
-    Add two legends: one for datasets (colors), one for filters (line styles).
+    Add two legends: one for datasets (colors), one for mPR variants (line styles).
     """
-    show_filters = _normalize_show_filters(show_filters)
+    variant_keys = _normalize_show_filters(variant_keys)
     # Legend 1: Datasets (colors) - solid lines
     dataset_handles = []
     for i, name in enumerate(dataset_names):
@@ -1958,19 +2378,19 @@ def _add_dual_legend(ax, dataset_names, colors, show_filters, linewidth):
         handle = Line2D([0], [0], color=color, linewidth=linewidth, linestyle="-")
         dataset_handles.append(handle)
-    # Legend 2: Filters (line styles) - black lines
-    filter_handles = []
-    filter_labels = []
-    for filter_key in show_filters:
-        style = FILTER_STYLES.get(filter_key, {})
+    # Legend 2: mPR variants (line styles) - black lines
+    variant_handles = []
+    variant_labels = []
+    for variant_key in variant_keys:
+        style = MPR_VARIANT_STYLES.get(variant_key, {})
         handle = Line2D(
             [0], [0],
             color="black",
             linewidth=linewidth,
             linestyle=style.get("linestyle", "-")
         )
-        filter_handles.append(handle)
-        filter_labels.append(style.get("label", filter_key))
+        variant_handles.append(handle)
+        variant_labels.append(style.get("label", variant_key))
     # Position legends
     # Dataset legend on upper right
@@ -1987,19 +2407,12 @@ def _add_dual_legend(ax, dataset_names, colors, show_filters, linewidth):
     # Filter legend on lower left or right depending on plot type
     legend2 = ax.legend(
-        filter_handles,
-        filter_labels,
+        variant_handles,
+        variant_labels,
         loc="lower left",
         frameon=False,
-        title="Filter",
+        title="Variant",
         fontsize=7,
         title_fontsize=8,
     )
-# ============================================================================
-# Single dataset functions are now obsolete
-# ============================================================================
-# Note: The original single dataset functions plot_mpr_tp() and plot_mpr_complexes()
-# have been replaced by the multi functions that now auto-detect available datasets.
-# Use plot_mpr_tp_multi() and plot_mpr_complexes_multi() instead.

pythonflex 0.3.4__py3-none-any.whl → 0.4__py3-none-any.whl

pythonflex 0.3.4py3-none-any.whl → 0.4py3-none-any.whl