PyPI - pythonflex - Versions diffs - 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl - Mend

pythonflex 0.3.1py3-none-any.whl → 0.3.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

pythonflex/__init__.py +4 -4
pythonflex/analysis.py +82 -1
pythonflex/examples/basic_usage.py +29 -19
pythonflex/examples/manuscript.py +111 -0
pythonflex/plotting.py +331 -18
pythonflex/preprocessing.py +28 -21
{pythonflex-0.3.1.dist-info → pythonflex-0.3.3.dist-info}/METADATA +7 -1
{pythonflex-0.3.1.dist-info → pythonflex-0.3.3.dist-info}/RECORD +10 -14
{pythonflex-0.3.1.dist-info → pythonflex-0.3.3.dist-info}/WHEEL +1 -1
pythonflex/examples/comparison.py +0 -78
pythonflex/examples/dataset_filtering.py +0 -42
pythonflex/examples/diag.py +0 -106
pythonflex/examples/test.py +0 -104
pythonflex/examples/test2.py +0 -11
{pythonflex-0.3.1.dist-info → pythonflex-0.3.3.dist-info}/entry_points.txt +0 -0

pythonflex/__init__.py CHANGED Viewed

@@ -3,9 +3,9 @@ from .utils import dsave, dload
 from .preprocessing import get_example_data_path, load_datasets,  get_common_genes, filter_matrix_by_genes, load_gold_standard, filter_duplicate_terms
 from .analysis import initialize, pra, pra_percomplex, fast_corr, perform_corr, is_symmetric, binary, has_mirror_of_first_pair, convert_full_to_half_matrix, drop_mirror_pairs, quick_sort, complex_contributions, save_results_to_csv, update_matploblib_config, mpr_prepare
 from .plotting import (
-    adjust_text_positions, plot_precision_recall_curve, plot_percomplex_scatter,
+    adjust_text_positions, plot_precision_recall_curve, plot_aggregated_pra, plot_iqr_pra, plot_all_runs_pra, plot_percomplex_scatter,
     plot_percomplex_scatter_bysize, plot_complex_contributions, plot_significant_complexes, plot_auc_scores,
-    plot_mpr_tp, plot_mpr_complexes, plot_mpr_tp_multi, plot_mpr_complexes_multi
+    plot_mpr_tp, plot_mpr_complexes, plot_mpr_tp_multi, plot_mpr_complexes_multi, plot_mpr_complexes_auc_scores
 )
 __all__ = [ "log", "get_example_data_path", "fast_corr",
@@ -13,8 +13,8 @@ __all__ = [ "log", "get_example_data_path", "fast_corr",
     "filter_matrix_by_genes", "load_gold_standard", "filter_duplicate_terms", "pra", "pra_percomplex",
     "perform_corr", "is_symmetric", "binary", "has_mirror_of_first_pair", "convert_full_to_half_matrix",
     "drop_mirror_pairs", "quick_sort", "complex_contributions", "adjust_text_positions", "plot_precision_recall_curve",
-    "plot_percomplex_scatter", "plot_percomplex_scatter_bysize", "plot_complex_contributions",
-    "plot_significant_complexes", "plot_auc_scores", "save_results_to_csv", "update_matploblib_config",
+    "plot_aggregated_pra", "plot_iqr_pra", "plot_all_runs_pra", "plot_percomplex_scatter", "plot_percomplex_scatter_bysize", "plot_complex_contributions",
+    "plot_significant_complexes", "plot_auc_scores", "plot_mpr_complexes_auc_scores", "save_results_to_csv", "update_matploblib_config",
     "mpr_prepare", "plot_mpr_tp", "plot_mpr_complexes",
     "plot_mpr_tp_multi", "plot_mpr_complexes_multi"
 ]

pythonflex/analysis.py CHANGED Viewed

@@ -844,7 +844,7 @@ def quick_sort(df, ascending=False):
     log.done("Pair-wise matrix sorting.")
     return sorted_df
-def save_results_to_csv(categories = ["complex_contributions", "pr_auc", "pra_percomplex"]):
+def save_results_to_csv(categories = ["complex_contributions", "pr_auc", "pra_percomplex", "mpr_complexes_auc"]):
     config = dload("config")  # Load config to get output folder
     output_folder = Path(config.get("output_folder", "output"))
@@ -856,6 +856,18 @@ def save_results_to_csv(categories = ["complex_contributions", "pr_auc", "pra_pe
         if data is None:
             log.warning(f"No data found for category '{category}'. Skipping save.")
             continue
+        if category == "mpr_complexes_auc" and isinstance(data, dict):
+            # Dict[dataset_name -> Dict[filter_key -> auc]]
+            try:
+                df = pd.DataFrame.from_dict(data, orient="index")
+                df.index.name = "Dataset"
+                csv_path = output_folder / f"{category}.csv"
+                df.to_csv(csv_path, index=True)
+                log.info(f"Saved '{category}' to {csv_path}")
+            except Exception as e:
+                log.warning(f"Failed to convert and save '{category}': {e}")
+            continue
         if category == "pr_auc" and isinstance(data, dict):
             # Special handling: Convert dict to DataFrame (assuming keys are indices, values are data)
@@ -1312,6 +1324,64 @@ def _mpr_module_coverage(contrib_df, terms, tp_th=1, percent_th=0.1):
     return coverage
+def _mpr_complexes_auc(
+    coverage: np.ndarray,
+    precision_cutoffs: np.ndarray,
+    max_complexes: float = 200.0,
+) -> float:
+    """Compute AUC for the Fig. 1F-style mPR curve (#complexes vs precision).
+    The plot uses:
+      x = #covered complexes (capped at `max_complexes`, shown on a log axis)
+      y = precision cutoff
+    We compute a normalized AUC by integrating precision over the *normalized*
+    coverage axis:
+        AUC = \int y \, d(x/max_complexes)
+    This yields a score in [0, 1] (or NaN if insufficient data).
+    """
+    cov = np.asarray(coverage, dtype=float)
+    prec = np.asarray(precision_cutoffs, dtype=float)
+    if cov.size == 0 or prec.size == 0:
+        return 0.0
+    # Match plot_mpr_complexes_multi(): only count cov>0 (log-x cannot show 0)
+    mask = (
+        np.isfinite(cov)
+        & np.isfinite(prec)
+        & (cov > 0)
+        & (cov <= max_complexes)
+        & (prec >= 0)
+        & (prec <= 1.0)
+    )
+    if not np.any(mask):
+        return 0.0
+    x_cov = cov[mask]
+    y = prec[mask]
+    # x-axis is log-scaled in the plot; normalize so cov=1 -> 0, cov=max_complexes -> 1
+    # (This matches the plot's tick hack where 1 is labeled as "0".)
+    x = np.log10(x_cov) / np.log10(float(max_complexes))
+    # Sort by x and collapse duplicate x values by taking max y (upper envelope)
+    order = np.argsort(x)
+    x = x[order]
+    y = y[order]
+    x_unique = np.unique(x)
+    if x_unique.size != x.size:
+        y = np.array([float(np.nanmax(y[x == xv])) for xv in x_unique], dtype=float)
+        x = x_unique
+    if x.size < 2:
+        return 0.0
+    return float(np.trapz(y, x))
@@ -1379,6 +1449,7 @@ def mpr_prepare(
     tp_curves = {}
     coverage_curves = {}
+    complexes_auc = {}
     precision_cutoffs = None
     for label, removed in filter_sets.items():
@@ -1393,6 +1464,7 @@ def mpr_prepare(
                 "precision": np.array([], dtype=float),
             }
             coverage_curves[label] = np.zeros(0, dtype=float)
+            complexes_auc[label] = float("nan")
             continue
         tp_cum = true.cumsum()
@@ -1417,11 +1489,17 @@ def mpr_prepare(
             percent_th=percent_th,
         )
         coverage_curves[label] = cov
+        complexes_auc[label] = _mpr_complexes_auc(
+            cov,
+            precision_cutoffs,
+            max_complexes=200.0,
+        )
     mpr_data = {
         "precision_cutoffs": precision_cutoffs,
         "tp_curves": tp_curves,
         "coverage_curves": coverage_curves,
+        "complexes_auc": complexes_auc,
         "filters": {
             "no_mtRibo_ETCI": sorted(mtRibo_ids),
             "no_small_highAUPRC": sorted(small_hi_ids),
@@ -1435,6 +1513,9 @@ def mpr_prepare(
     dsave(mpr_data, "mpr", name)
+    # Convenience: store AUCs as their own category for easy export / plotting.
+    dsave(complexes_auc, "mpr_complexes_auc", name)
 ### OLD FUNCTIONS

pythonflex/examples/basic_usage.py CHANGED Viewed

@@ -8,32 +8,34 @@ import pythonflex as flex
 inputs = {
     "Melanoma (63 Screens)": {
         "path": flex.get_example_data_path("melanoma_cell_lines_500_genes.csv"),
-        "sort": "high"
+        "sort": "high",
+        "color": "#FF0000"
     },
     "Liver (24 Screens)": {
         "path": flex.get_example_data_path("liver_cell_lines_500_genes.csv"),
-        "sort": "high"
+        "sort": "high",
+        "color": "#FFDD00"
     },
     "Neuroblastoma (37 Screens)": {
         "path": flex.get_example_data_path("neuroblastoma_cell_lines_500_genes.csv"),
-        "sort": "high"
+        "sort": "high",
+        "color": "#FFDDDD"
     },
 }
-#%%
 default_config = {
     "min_genes_in_complex": 0,
     "min_genes_per_complex_analysis": 3,
-    "output_folder": "output",
+    "output_folder": "CORUM",
     "gold_standard": "CORUM",
-    "color_map": "RdYlBu",
-    "jaccard": True,
+    "color_map": "BuGn",
+    "jaccard": False,
     "use_common_genes": False,  # Set to False for individual dataset-gold standard intersections
     "plotting": {
         "save_plot": True,
-        "output_type": "pdf",
+        "output_type": "png",
     },
     "preprocessing": {
         "fill_na": True,
@@ -41,7 +43,8 @@ default_config = {
     },
     "corr_function": "numpy",
     "logging": {
-        "visible_levels": ["DONE","STARTED"]  # "PROGRESS", "STARTED", ,"INFO","WARNING"
+        "visible_levels": ["DONE"]
+        # "PROGRESS", "STARTED", ,"INFO","WARNING"
     }
 }
@@ -52,26 +55,33 @@ flex.initialize(default_config)
 data, _ = flex.load_datasets(inputs)
 terms, genes_in_terms = flex.load_gold_standard()
-#%%
 # Run analysis
 for name, dataset in data.items():
     pra = flex.pra(name, dataset, is_corr=False)
     fpc = flex.pra_percomplex(name, dataset, is_corr=False)
     cc = flex.complex_contributions(name)
+    flex.mpr_prepare(name)
 #%%
 # Generate plots
-flex.plot_auc_scores()
-flex.plot_precision_recall_curve()
-flex.plot_percomplex_scatter(n_top=20)
-flex.plot_percomplex_scatter_bysize()
-flex.plot_significant_complexes()
-flex.plot_complex_contributions()
+# flex.plot_precision_recall_curve()
+# flex.plot_auc_scores()
+# flex.plot_significant_complexes()
+# flex.plot_percomplex_scatter(n_top=20)
+# flex.plot_percomplex_scatter_bysize()
+# flex.plot_complex_contributions()
+#%%
+#flex.plot_mpr_tp_multi(show_filters="all")
+flex.plot_mpr_complexes_multi(show_filters="all")
 #%%
 # Save results to CSV
 flex.save_results_to_csv()
+# %%
+flex.plot_mpr_complexes_auc_scores("all")
+# %%

pythonflex/examples/manuscript.py ADDED Viewed

@@ -0,0 +1,111 @@
+"""
+Basic usage example of the pythonFLEX package.
+Demonstrates initialization, data loading, analysis, and plotting.
+"""
+#%%
+import pythonflex as flex
+import pandas as pd
+gene_effect = pd.read_csv('C:/Users/yd/Desktop/projects/_datasets/depmap/25Q2/gene_effect.csv', index_col=0)
+skin = pd.read_csv('C:/Users/yd/Desktop/projects/_datasets/depmap/25Q2/subset/skin_cell_lines.csv', index_col=0)
+soft = pd.read_csv('C:/Users/yd/Desktop/projects/_datasets/depmap/25Q2/subset/soft_tissue_cell_lines.csv', index_col=0)
+cholesky = pd.read_csv('C:/Users/yd/Desktop/projects/_datasets/depmap/25Q2/25Q2_chronos_whitened_Cholesky.csv', index_col=0).T
+# inputs = {
+#     "All Screens": {
+#         "path": gene_effect,
+#         "sort": "high",
+#         "color": "#000000"
+#     },
+#     "Skin": {
+#         "path": skin,
+#         "sort": "high",
+#         "color": "#FF0000"
+#     },
+#     "Soft Tissue": {
+#         "path": soft,
+#         "sort": "high",
+#         "color": "#FFFF00"
+#     },
+# }
+inputs = {
+    "DM All Screens": {
+        "path": gene_effect,
+        "sort": "high",
+        "color": "#000000"
+    },
+    "DM Cholesky Whitening": {
+        "path": cholesky,
+        "sort": "high",
+        "color": "#FF0000"
+    },
+}
+default_config = {
+    "min_genes_in_complex": 2,
+    "min_genes_per_complex_analysis": 3,
+    "output_folder": "CORUM_DMvsCholesky",
+    "gold_standard": "CORUM",
+    "color_map": "BuGn",
+    "jaccard": False,
+    "use_common_genes": False,  # Set to False for individual dataset-gold standard intersections
+    "plotting": {
+        "save_plot": True,
+        "output_type": "pdf",
+    },
+    "preprocessing": {
+        "fill_na": True,
+        "normalize": False,
+    },
+    "corr_function": "numpy",
+    "logging": {
+        "visible_levels": ["DONE"]
+        # "PROGRESS", "STARTED", ,"INFO","WARNING"
+    }
+}
+# Initialize logger, config, and output folder
+flex.initialize(default_config)
+# Load datasets and gold standard terms
+data, _ = flex.load_datasets(inputs)
+terms, genes_in_terms = flex.load_gold_standard()
+# Run analysis
+for name, dataset in data.items():
+    pra = flex.pra(name, dataset, is_corr=False)
+    fpc = flex.pra_percomplex(name, dataset, is_corr=False)
+    cc = flex.complex_contributions(name)
+    flex.mpr_prepare(name)
+#%%
+# Generate plots
+flex.plot_precision_recall_curve()
+flex.plot_auc_scores()
+flex.plot_significant_complexes()
+flex.plot_percomplex_scatter(n_top=20)
+flex.plot_percomplex_scatter_bysize()
+flex.plot_complex_contributions()
+##
+#%%
+flex.plot_mpr_tp_multi(show_filters="all")
+flex.plot_mpr_complexes_multi(show_filters="all")
+# Save results to CSV
+flex.save_results_to_csv()
+# %%
+# %%

pythonflex/plotting.py CHANGED Viewed

@@ -114,6 +114,171 @@ def plot_precision_recall_curve(line_width=2.0, hide_minor_ticks=True):
         plt.show()
     plt.close(fig)
+def plot_aggregated_pra(agg_df, line_width=2.0, hide_minor_ticks=True):
+    """
+    Plots an aggregated Precision-Recall curve with mean line and min-max shading.
+    agg_df should be indexed by 'tp' and contain 'mean', 'min', 'max' columns for precision.
+    """
+    config = dload("config")
+    plot_config = config["plotting"]
+    # Increase figure width to accommodate external legend without squashing axes
+    fig, ax = plt.subplots(figsize=(6, 4))
+    # Adjust layout to make room for legend on the right
+    plt.subplots_adjust(right=0.7)
+    ax.set_xscale("log")
+    # optionally hide minor ticks on the log axis
+    if hide_minor_ticks:
+        ax.xaxis.set_minor_locator(NullLocator())
+        ax.xaxis.set_minor_formatter(NullFormatter())
+    # Filter out very low TP counts if necessary, similar to plot_precision_recall_curve
+    agg_df = agg_df[agg_df.index > 10]
+    tp = agg_df.index
+    mean_prec = agg_df['mean']
+    min_prec = agg_df['min']
+    max_prec = agg_df['max']
+    # Plot shading
+    ax.fill_between(tp, min_prec, max_prec, color='gray', alpha=0.3, label='Range (Min-Max)')
+    # Plot mean line
+    ax.plot(tp, mean_prec, c="black", label="Mean Precision", linewidth=line_width, alpha=0.9)
+    ax.set(title="",
+           xlabel="Number of True Positives (TP)",
+           ylabel="Precision")
+    ax.legend(loc="upper left", bbox_to_anchor=(1.05, 1), frameon=False)
+    ax.set_ylim(0, 1)
+    # Nature style: no grid, open top/right spines
+    ax.grid(False)
+    ax.spines['top'].set_visible(False)
+    ax.spines['right'].set_visible(False)
+    if plot_config["save_plot"]:
+        output_type = plot_config["output_type"]
+        output_path = Path(config["output_folder"]) / f"aggregated_precision_recall_curve.{output_type}"
+        fig.savefig(output_path, bbox_inches="tight", format=output_type)
+    if plot_config.get("show_plot", True):
+        plt.show()
+    plt.close(fig)
+def plot_iqr_pra(agg_df, line_width=2.0, hide_minor_ticks=True):
+    """
+    Plots an aggregated Precision-Recall curve with mean line and IQR (25-75%) shading.
+    agg_df should be indexed by 'tp' and contain 'mean', '25%', '75%' columns for precision.
+    """
+    config = dload("config")
+    plot_config = config["plotting"]
+    # Increase figure width to accommodate external legend without squashing axes
+    fig, ax = plt.subplots(figsize=(6, 4))
+    # Adjust layout to make room for legend on the right
+    plt.subplots_adjust(right=0.7)
+    ax.set_xscale("log")
+    # optionally hide minor ticks on the log axis
+    if hide_minor_ticks:
+        ax.xaxis.set_minor_locator(NullLocator())
+        ax.xaxis.set_minor_formatter(NullFormatter())
+    # Filter out very low TP counts
+    agg_df = agg_df[agg_df.index > 10]
+    tp = agg_df.index
+    mean_prec = agg_df['mean']
+    q25_prec = agg_df['25%']
+    q75_prec = agg_df['75%']
+    # Plot shading
+    ax.fill_between(tp, q25_prec, q75_prec, color='gray', alpha=0.3, label='IQR (25-75%)')
+    # Plot mean line
+    ax.plot(tp, mean_prec, c="black", label="Mean Precision", linewidth=line_width, alpha=0.9)
+    ax.set(title="Precision-Recall (IQR)",
+           xlabel="Number of True Positives (TP)",
+           ylabel="Precision")
+    ax.legend(loc="upper left", bbox_to_anchor=(1.05, 1), frameon=False)
+    ax.set_ylim(0, 1)
+    # Nature style
+    ax.grid(False)
+    ax.spines['top'].set_visible(False)
+    ax.spines['right'].set_visible(False)
+    if plot_config["save_plot"]:
+        output_type = plot_config["output_type"]
+        output_path = Path(config["output_folder"]) / f"aggregated_iqr_precision_recall_curve.{output_type}"
+        fig.savefig(output_path, bbox_inches="tight", format=output_type)
+    if plot_config.get("show_plot", True):
+        plt.show()
+    plt.close(fig)
+def plot_all_runs_pra(pra_list, mean_df=None, line_width=2.0, hide_minor_ticks=True):
+    """
+    Plots all individual Precision-Recall curves faintly, with an optional mean line.
+    pra_list: list of dataframes (each with 'tp' and 'precision' columns) OR list of Series (if index is tp)
+    mean_df: optional dataframe with 'mean' column indexed by tp
+    """
+    config = dload("config")
+    plot_config = config["plotting"]
+    fig, ax = plt.subplots(figsize=(6, 4))
+    plt.subplots_adjust(right=0.7)
+    ax.set_xscale("log")
+    if hide_minor_ticks:
+        ax.xaxis.set_minor_locator(NullLocator())
+        ax.xaxis.set_minor_formatter(NullFormatter())
+    # Plot individual lines
+    for i, df in enumerate(pra_list):
+        # Ensure we filter low TPs same as others
+        df_filtered = df[df['tp'] > 10] if 'tp' in df.columns else df[df.index > 10]
+        x = df_filtered['tp'] if 'tp' in df_filtered.columns else df_filtered.index
+        y = df_filtered['precision'] if 'precision' in df_filtered.columns else df_filtered.values
+        # Only add label for the first line to avoid cluttering legend
+        lbl = "Individual Runs" if i == 0 else None
+        ax.plot(x, y, c="gray", linewidth=0.5, alpha=0.3, label=lbl)
+    # Plot mean line if provided
+    if mean_df is not None:
+        mean_df = mean_df[mean_df.index > 10]
+        ax.plot(mean_df.index, mean_df['mean'], c="black", label="Mean Precision", linewidth=line_width, alpha=0.9)
+    ax.set(title="Precision-Recall (All Runs)",
+           xlabel="Number of True Positives (TP)",
+           ylabel="Precision")
+    ax.legend(loc="upper left", bbox_to_anchor=(1.05, 1), frameon=False)
+    ax.set_ylim(0, 1)
+    # Nature style
+    ax.grid(False)
+    ax.spines['top'].set_visible(False)
+    ax.spines['right'].set_visible(False)
+    if plot_config["save_plot"]:
+        output_type = plot_config["output_type"]
+        output_path = Path(config["output_folder"]) / f"aggregated_all_runs_precision_recall_curve.{output_type}"
+        fig.savefig(output_path, bbox_inches="tight", format=output_type)
+    if plot_config.get("show_plot", True):
+        plt.show()
+    plt.close(fig)
 def plot_percomplex_scatter(n_top=10, sig_color='#B71A2A', nonsig_color='#DBDDDD', label_color='black', border_color='black', border_width=1.0, show_text_background=True):
     config = dload("config")
     plot_config = config["plotting"]
@@ -1056,13 +1221,110 @@ def plot_auc_scores():
     return pra_dict
+def plot_mpr_complexes_auc_scores(filter_key: str = "all"):
+    """Plot AUC scores for the mPR complexes curve (Fig 1F-style).
+    Requires `mpr_prepare()` to have been run for each dataset.
+    Parameters
+    ----------
+    filter_key : str
+        One of: "all", "no_mtRibo_ETCI", "no_small_highAUPRC".
+    Returns
+    -------
+    pd.Series
+        AUC values indexed by dataset name (sorted descending).
+    """
+    config = dload("config")
+    plot_config = config["plotting"]
+    mpr_auc_dict = dload("mpr_complexes_auc")
+    input_colors = dload("input", "colors")
+    if input_colors:
+        input_colors = {_sanitize(k): v for k, v in input_colors.items()}
+    if not isinstance(mpr_auc_dict, dict) or not mpr_auc_dict:
+        log.warning(
+            "No mPR complexes AUC data found. Run mpr_prepare() first (it stores 'mpr_complexes_auc')."
+        )
+        return pd.Series(dtype=float)
+    # Build Series: dataset -> auc
+    auc_by_dataset = {}
+    for dataset, per_filter in mpr_auc_dict.items():
+        if not isinstance(per_filter, dict):
+            continue
+        val = per_filter.get(filter_key)
+        if val is None:
+            continue
+        try:
+            auc_by_dataset[dataset] = float(val)
+        except (TypeError, ValueError):
+            continue
+    if not auc_by_dataset:
+        log.warning(
+            f"No mPR complexes AUC scores found for filter '{filter_key}'. Available filters: {list(FILTER_STYLES.keys())}"
+        )
+        return pd.Series(dtype=float)
+    s = pd.Series(auc_by_dataset).sort_values(ascending=False)
+    datasets = list(s.index)
+    auc_scores = list(s.values)
+    fig, ax = plt.subplots()
+    # Color logic (match other bar plots)
+    cmap_name = config.get("color_map", "tab10")
+    try:
+        cmap = get_cmap(cmap_name)
+    except ValueError:
+        cmap = get_cmap("tab10")
+    num_datasets = len(datasets)
+    if num_datasets <= 10 and cmap_name == "tab10":
+        default_colors = [cmap(i) for i in range(num_datasets)]
+    else:
+        default_colors = [cmap(float(i) / max(num_datasets - 1, 1)) for i in range(num_datasets)]
+    final_colors = []
+    for i, dataset in enumerate(datasets):
+        color = input_colors.get(dataset) if input_colors else None
+        if color is None:
+            color = default_colors[i]
+        final_colors.append(color)
+    ax.bar(datasets, auc_scores, color=final_colors, edgecolor="black")
+    ymax = max([v for v in auc_scores if np.isfinite(v)], default=0.0)
+    ax.set_ylim(0, ymax + 0.01)
+    ax.set_ylabel("mPR complexes AUC")
+    plt.xticks(rotation=45, ha="right")
+    # Styling consistent with other plots
+    ax.grid(visible=False, which="both", axis="both")
+    ax.set_axisbelow(False)
+    ax.spines["top"].set_visible(False)
+    ax.spines["right"].set_visible(False)
+    if plot_config.get("save_plot", False):
+        output_type = plot_config.get("output_type", "pdf")
+        output_folder = Path(config["output_folder"])
+        output_folder.mkdir(parents=True, exist_ok=True)
+        output_path = output_folder / f"mpr_complexes_auc_{filter_key}.{output_type}"
+        plt.savefig(output_path, bbox_inches="tight", format=output_type)
+    if plot_config.get("show_plot", True):
+        plt.show()
+    plt.close(fig)
+    return s
 # -----------------------------------------------------------------------------
 # mPR plots (Fig. 1E and Fig. 1F)
 # -----------------------------------------------------------------------------
 def plot_mpr_complexes(name, ax=None, save=True, outname=None):
     """
     Fig. 1F-style module-level PR:
@@ -1213,7 +1475,6 @@ def plot_mpr_tp(name, ax=None, save=True, outname=None):
     return ax
 """
 Multi-dataset mPR plotting functions.
@@ -1234,7 +1495,6 @@ from pathlib import Path
 from .utils import dload
 from .logging_config import log
 # Default color palette (colorblind-friendly)
 DEFAULT_COLORS = [
     "#4E79A7",  # blue
@@ -1257,6 +1517,21 @@ FILTER_STYLES = {
 }
+def _normalize_show_filters(show_filters):
+    """Normalize show_filters to an ordered tuple of filter keys.
+    Common footgun: passing a single string (e.g. "no_mtRibo_ETCI") is iterable,
+    which would otherwise be treated as a sequence of characters.
+    """
+    if show_filters is None:
+        return tuple(FILTER_STYLES.keys())
+    if isinstance(show_filters, str):
+        return (show_filters,)
+    try:
+        return tuple(show_filters)
+    except TypeError:
+        return (show_filters,)
 def plot_mpr_tp_multi(
     dataset_names=None,
     colors=None,
@@ -1297,6 +1572,8 @@ def plot_mpr_tp_multi(
     config = dload("config")
     plot_config = config["plotting"]
     input_colors = dload("input", "colors")
+    show_filters = _normalize_show_filters(show_filters)
     # Sanitize color keys
     if input_colors:
@@ -1421,14 +1698,21 @@ def plot_mpr_tp_multi(
     # Save
     if save:
+        output_type = plot_config.get("output_type", "pdf")
         if outname is None:
-            outname = "mpr_tp_multi.pdf"
+            outname = f"mpr_tp_multi.{output_type}"
+        # Check if outname is just a filename or a full path
+        outpath = Path(outname)
+        if len(outpath.parts) == 1:
+             # Just a filename, prepend configured output folder
+             outpath = Path(config["output_folder"]) / outname
         fig.tight_layout()
-        fig.savefig(outname, bbox_inches="tight")
+        fig.savefig(outpath, bbox_inches="tight", format=output_type)
     return ax
 def plot_mpr_complexes_multi(
     dataset_names=None,
     colors=None,
@@ -1437,6 +1721,8 @@ def plot_mpr_complexes_multi(
     outname=None,
     linewidth=1.8,
     show_filters=("all", "no_mtRibo_ETCI", "no_small_highAUPRC"),
+    show_markers="auto",
+    marker_size=20,
 ):
     """
     Plot module-level PR (#complexes vs precision) for multiple datasets.
@@ -1461,6 +1747,11 @@ def plot_mpr_complexes_multi(
         Line width for all curves
     show_filters : tuple of str
         Which filters to show. Default is all three.
+    show_markers : bool or "auto"
+        If True, draw markers on curves to make short curves visible.
+        If "auto" (default), markers are drawn only for curves with <= 10 points.
+    marker_size : int
+        Scatter marker size (points^2) when markers are shown.
     Returns
     -------
@@ -1469,6 +1760,8 @@ def plot_mpr_complexes_multi(
     config = dload("config")
     plot_config = config["plotting"]
     input_colors = dload("input", "colors")
+    show_filters = _normalize_show_filters(show_filters)
     # Sanitize color keys
     if input_colors:
@@ -1545,13 +1838,26 @@ def plot_mpr_complexes_multi(
             prec_plot = precision_cutoffs[mask]
             style = FILTER_STYLES.get(filter_key, {})
-            ax.plot(
-                cov_plot,
-                prec_plot,
-                color=color,
-                linestyle=style.get("linestyle", "-"),
-                linewidth=linewidth,
-            )
+            # Decide marker visibility
+            if show_markers == "auto":
+                use_markers = (cov_plot.size <= 10)
+            else:
+                use_markers = bool(show_markers)
+            if cov_plot.size == 1:
+                # A single point is effectively invisible as a line; draw a marker.
+                ax.scatter(cov_plot, prec_plot, color=color, s=marker_size, zorder=3)
+            else:
+                ax.plot(
+                    cov_plot,
+                    prec_plot,
+                    color=color,
+                    linestyle=style.get("linestyle", "-"),
+                    linewidth=linewidth,
+                    marker=("o" if use_markers else None),
+                    markersize=(3 if use_markers else None),
+                )
     # Configure axes
     ax.set_xscale("log")
@@ -1575,18 +1881,26 @@ def plot_mpr_complexes_multi(
     # Save
     if save:
+        output_type = plot_config.get("output_type", "pdf")
         if outname is None:
-            outname = "mpr_complexes_multi.pdf"
+            outname = f"mpr_complexes_multi.{output_type}"
+        # Check if outname is just a filename or a full path
+        outpath = Path(outname)
+        if len(outpath.parts) == 1:
+             # Just a filename, prepend configured output folder
+             outpath = Path(config["output_folder"]) / outname
         fig.tight_layout()
-        fig.savefig(outname, bbox_inches="tight")
+        fig.savefig(outpath, bbox_inches="tight", format=output_type)
     return ax
 def _add_vertical_legend(ax, dataset_names, colors, show_filters, linewidth):
     """
     Add vertically stacked legends: Dataset on top, Filter below.
     """
+    show_filters = _normalize_show_filters(show_filters)
     # Legend 1: Datasets (colors) - solid lines
     dataset_handles = []
     for i, name in enumerate(dataset_names):
@@ -1632,11 +1946,11 @@ def _add_vertical_legend(ax, dataset_names, colors, show_filters, linewidth):
         bbox_to_anchor=(1.05, 1.0 - len(dataset_names) * 0.06 - 0.1)
     )
 def _add_dual_legend(ax, dataset_names, colors, show_filters, linewidth):
     """
     Add two legends: one for datasets (colors), one for filters (line styles).
     """
+    show_filters = _normalize_show_filters(show_filters)
     # Legend 1: Datasets (colors) - solid lines
     dataset_handles = []
     for i, name in enumerate(dataset_names):
@@ -1682,7 +1996,6 @@ def _add_dual_legend(ax, dataset_names, colors, show_filters, linewidth):
         title_fontsize=8,
     )
 # ============================================================================
 # Single dataset functions are now obsolete
 # ============================================================================

pythonflex/preprocessing.py CHANGED Viewed

@@ -13,28 +13,36 @@ from pathlib import Path
 def return_package_dir():
-    # Get the distribution
-    dist = distribution('pythonflex')
-    # Check for direct_url.json
-    direct_url_text = dist.read_text('direct_url.json')
-    if direct_url_text:
-        direct_url = json.loads(direct_url_text)
-        if direct_url.get('dir_info', {}).get('editable'):
-            # Editable install detected
-            project_url = direct_url['url']
-            # Remove 'file:///' prefix and handle Windows paths
-            project_root = project_url.removeprefix('file:///').replace('/', os.sep)
-            # Assuming src layout: project_root/src/pythonflex
-            package_dir = os.path.join(project_root, 'src', 'pythonflex')
+    try:
+        # Get the distribution
+        dist = distribution('pythonflex')
+        # Check for direct_url.json
+        try:
+            direct_url_text = dist.read_text('direct_url.json')
+        except FileNotFoundError:
+            direct_url_text = None
+        if direct_url_text:
+            direct_url = json.loads(direct_url_text)
+            if direct_url.get('dir_info', {}).get('editable'):
+                # Editable install detected
+                project_url = direct_url['url']
+                # Remove 'file:///' prefix and handle Windows paths
+                project_root = project_url.removeprefix('file:///').replace('/', os.sep)
+                # Assuming src layout: project_root/src/pythonflex
+                package_dir = os.path.join(project_root, 'src', 'pythonflex')
+            else:
+                # Non-editable
+                package_dir = str(files('pythonflex'))
         else:
-            # Non-editable
+            # No direct_url, assume non-editable
             package_dir = str(files('pythonflex'))
-    else:
-        # No direct_url, assume non-editable
-        package_dir = str(files('pythonflex'))
+    except Exception: # PackageNotFoundError or other issues
+        # Fallback to local directory relative to this file
+        # precise location: src/pythonflex/preprocessing.py -> package dir is parent
+        package_dir = str(Path(__file__).parent)
     return package_dir
@@ -190,7 +198,6 @@ def load_gold_standard():
         "PATHWAY": "gold_standard/PATHWAY.parquet"
     }
     if gold_standard_source in gold_standard_files:
         # Load predefined gold standard from package resources
         filename = gold_standard_files[gold_standard_source]

{pythonflex-0.3.1.dist-info → pythonflex-0.3.3.dist-info}/METADATA RENAMED Viewed

@@ -1,8 +1,14 @@
 Metadata-Version: 2.4
 Name: pythonflex
-Version: 0.3.1
+Version: 0.3.3
 Summary: pythonFLEX is a benchmarking toolkit for evaluating CRISPR screen results against biological gold standards. The toolkit computes gene-level and complex-level performance metrics, helping researchers systematically assess the biological relevance and resolution of their CRISPR screening data.
 Author-email: Yasir Demirtaş <tyasird@hotmail.com>
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
 Requires-Python: >=3.9
 Requires-Dist: adjusttext
 Requires-Dist: art

{pythonflex-0.3.1.dist-info → pythonflex-0.3.3.dist-info}/RECORD RENAMED Viewed

@@ -1,8 +1,8 @@
-pythonflex/__init__.py,sha256=UKu_QgAZsWgERWedUA7drG4kIQ8zKJLSLc8OYHHNJSM,1570
-pythonflex/analysis.py,sha256=n8gIidtRk9_DxoO6Z4g1MSH0rYsPfQAKdzPtEguZqQY,75067
+pythonflex/__init__.py,sha256=MoDbdVhclK_PF_u9vzN4ntWX6hTRAKfvkTiDisIci5o,1748
+pythonflex/analysis.py,sha256=gKJ4cYA_TWYe521nAXizMqChd36A90TWfDf595fw_0M,77760
 pythonflex/logging_config.py,sha256=iqRKK18zvtfV_-bYHWrXtSZywiUtYxoHkw0ZnVORQBQ,2015
-pythonflex/plotting.py,sha256=7S8IibsyEaK26YKv6FXShMix_15vCUQnZIxD7VyJwmQ,64036
-pythonflex/preprocessing.py,sha256=5cV8zNbrgCslidrMpMjGr-7HzTZgVligWVEsUQu3Stw,10999
+pythonflex/plotting.py,sha256=AOzgyhJX5bPMoGs2ih2zbA30Dm-OoWpk8MNBC-9OQ94,75981
+pythonflex/preprocessing.py,sha256=jIeyB2SPPac-svtjB-zGe3vIyOSVB-SxYIFyNFFiCsY,11440
 pythonflex/utils.py,sha256=7toGnKbA_TKBtHz1HLk7ckWM0bjuFw_Byhp6ZUJaNs4,3694
 pythonflex/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 pythonflex/data/dataset/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -16,13 +16,9 @@ pythonflex/data/gold_standard/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5N
 pythonflex/data/gold_standard/corum.csv,sha256=2rZeyr2Ghm7f-gFxCZnhPtxI2jxRoiZMUEH2EJwAgsI,208889
 pythonflex/data/gold_standard/gobp.csv,sha256=TO9yfx9mO8WkXvWfSB-pFId9T8xYfqdZpshAXC0Fyj8,1739167
 pythonflex/data/gold_standard/pathway.csv,sha256=J3HKVLUZ_Oxucmn_14ieYp3Wr2lcKtp0nIl4_8_K2Yc,489424
-pythonflex/examples/basic_usage.py,sha256=LniAq5Al5meNfcqlniYIRpOYRTce0BvGhZpw4P6_djc,1994
-pythonflex/examples/comparison.py,sha256=Gaakp4xk8EWd_Sdmm9I9QHxk5DyQwpLUfHlQKn1l7WU,2084
-pythonflex/examples/dataset_filtering.py,sha256=7PCKCZWYLZUn3XAStGTCaVGbY9F0gqjT0ote_G6WPho,1238
-pythonflex/examples/diag.py,sha256=9sKfMTn8_em6IJOAX2hE1DRJs7-qrRuWyXWfQUwSK5c,3815
-pythonflex/examples/test.py,sha256=B8-JE5AU7be5loSr6Qv2rOviXXe1NRCYpaEGfGjaow0,2388
-pythonflex/examples/test2.py,sha256=nbjd3A9R2R-Cf4P9jdgclysoZbQVC2Cmzt4Npbsxw6w,184
-pythonflex-0.3.1.dist-info/METADATA,sha256=l6hqrsmT0tRkaDgg4g5KwpRC-tG-Yj7e8sSWuE6uD54,3928
-pythonflex-0.3.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
-pythonflex-0.3.1.dist-info/entry_points.txt,sha256=37liK1baI_CRVDivpjsn8JDClL9_YeTTuSMAZ3Ty7oE,47
-pythonflex-0.3.1.dist-info/RECORD,,
+pythonflex/examples/basic_usage.py,sha256=dizQXYPJWjW7-2d2G29a8qYCBRIsKhrvxOxyXtudK30,2265
+pythonflex/examples/manuscript.py,sha256=V28vIBFmrxGsE_YhvouRFiLKWC9CorbOx9Ed3B2L8bQ,2810
+pythonflex-0.3.3.dist-info/METADATA,sha256=l5CnF5hX_qgnhMEHnTQbK9ZrBJrIRKzbYeCVCC7Wv1M,4226
+pythonflex-0.3.3.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
+pythonflex-0.3.3.dist-info/entry_points.txt,sha256=37liK1baI_CRVDivpjsn8JDClL9_YeTTuSMAZ3Ty7oE,47
+pythonflex-0.3.3.dist-info/RECORD,,

{pythonflex-0.3.1.dist-info → pythonflex-0.3.3.dist-info}/WHEEL RENAMED Viewed

@@ -1,4 +1,4 @@
 Wheel-Version: 1.0
-Generator: hatchling 1.28.0
+Generator: hatchling 1.29.0
 Root-Is-Purelib: true
 Tag: py3-none-any

pythonflex/examples/comparison.py DELETED Viewed

@@ -1,78 +0,0 @@
-"""
-Basic usage example of the pythonFLEX package.
-Demonstrates initialization, data loading, analysis, and plotting.
-"""
-#%%
-import pythonflex as flex
-import pandas as pd
-depmap = pd.read_csv('../../../../_datasets/depmap/25Q2/gene_effect.csv', index_col=0)
-white = pd.read_csv('../../../../_datasets/depmap/25Q2/25Q2_chronos_whitened_PCA.csv', index_col=0).T
-inputs = {
-    "25Q2": {
-        "path": depmap,
-        "sort": "high",
-        "color": "#fff000"  # Black
-    },
-    "25Q2 white": {
-        "path": white,
-        "sort": "high",
-        "color": "#ff0000"  # Orange
-    },
-}
-default_config = {
-    "min_genes_in_complex": 0,
-    "min_genes_per_complex_analysis": 3,
-    "output_folder": "CORUM_25Q2_comparison2",
-    "gold_standard": "CORUM",
-    "color_map": "BuGn",
-    "jaccard": False,
-    "use_common_genes": False,  # Set to False for individual dataset-gold standard intersections
-    "plotting": {
-        "save_plot": True,
-        "output_type": "png",
-    },
-    "preprocessing": {
-        "fill_na": True,
-        "normalize": False,
-    },
-    "corr_function": "numpy",
-    "logging": {
-        "visible_levels": ["DONE"]  # "PROGRESS", "STARTED", ,"INFO","WARNING"
-    }
-}
-# Initialize logger, config, and output folder
-flex.initialize(default_config)
-# Load datasets and gold standard terms
-data, _ = flex.load_datasets(inputs)
-terms, genes_in_terms = flex.load_gold_standard()
-# Run analysis
-for name, dataset in data.items():
-    pra = flex.pra(name, dataset, is_corr=False)
-    fpc = flex.pra_percomplex(name, dataset, is_corr=False)
-    flex.mpr_prepare(name)  # Add this line
-    cc = flex.complex_contributions(name)
-#%%
-# Generate plots
-flex.plot_precision_recall_curve()
-flex.plot_auc_scores()
-flex.plot_significant_complexes()
-flex.plot_percomplex_scatter(n_top=20)
-flex.plot_percomplex_scatter_bysize()
-flex.plot_complex_contributions()
-flex.plot_mpr_tp_multi()
-flex.plot_mpr_complexes_multi()
-# Save results to CSV
-# flex.save_results_to_csv()
-# %%

pythonflex/examples/dataset_filtering.py DELETED Viewed

@@ -1,42 +0,0 @@
-# %%
-import pandas as pd
-df = pd.read_csv("../../../../datasets/depmap/24Q4/CRISPRGeneEffect.csv",index_col=0)
-model = pd.read_csv("../../../../datasets/depmap/24Q4/Model.csv",index_col=0)
-df.columns = df.columns.str.split(" \\(").str[0]
-df = df.T
-#%%
-# %%
-# get ModelID of selected disease for example OncotreePrimaryDisease==Melanoma
-melanoma = model[model.OncotreePrimaryDisease=="Melanoma"].index.unique().values
-liver = model[model.OncotreeLineage=="Liver"].index.unique().values
-neuroblastoma = model[model.OncotreePrimaryDisease=="Neuroblastoma"].index.unique().values
-# %%
-# mel.index is model ids, filter that ids in the columns of df
-mel_df = df.loc[:,df.columns.isin(melanoma)]
-liver_df = df.loc[:,df.columns.isin(liver)]
-neuro_df = df.loc[:,df.columns.isin(neuroblastoma)]
-# %%
-mel_df.to_csv("melanoma.csv")
-liver_df.to_csv("liver.csv")
-neuro_df.to_csv("neuroblastoma.csv")
-df.to_csv("depmap_geneeffect_all_cellines.csv")
-# %%
-import pandas as pd
-df = pd.read_csv('../../../../_datasets/depmap/19Q2/Achilles_gene_effect.csv', index_col=0)
-df.columns = df.columns.str.split(" \\(").str[0]
-df = df.T
-# %%
-df.to_csv("../../../../_datasets/depmap/19Q2/gene_effect.csv")
-# %%

pythonflex/examples/diag.py DELETED Viewed

@@ -1,106 +0,0 @@
-#%%
-# Run this in Jupyter to test the two approaches
-import numpy as np
-import pandas as pd
-from pythonflex.utils import dload
-dataset_name = "[CORUM] 19Q2"
-pra = dload("pra", dataset_name)
-mpr = dload("mpr", dataset_name)
-filter_ids = set(mpr["filters"]["no_mtRibo_ETCI"])
-print(f"Filter IDs: {filter_ids}")
-cid_col = "complex_id" if "complex_id" in pra.columns else "complex_ids"
-# Sort by score descending
-pra_sorted = pra.sort_values("score", ascending=False).reset_index(drop=True)
-def has_filter_id(cids, filter_ids):
-    """Check if any complex ID is in filter_ids"""
-    if isinstance(cids, (np.ndarray, list)):
-        ids = [int(x) for x in cids if pd.notnull(x)]
-    else:
-        return False
-    return any(c in filter_ids for c in ids)
-# Mark which pairs should be filtered
-pra_sorted["should_filter"] = pra_sorted[cid_col].apply(lambda x: has_filter_id(x, filter_ids))
-print(f"\nTotal pairs: {len(pra_sorted)}")
-print(f"Pairs to filter: {pra_sorted['should_filter'].sum()}")
-print(f"TPs to filter: {(pra_sorted['should_filter'] & (pra_sorted['prediction']==1)).sum()}")
-# APPROACH 1: Mark as negative (what your Python does)
-# Keep all rows, but filtered TPs become FPs
-print("\n" + "=" * 70)
-print("APPROACH 1: Mark filtered TPs as negatives (keep rows)")
-print("=" * 70)
-df1 = pra_sorted.copy()
-df1["true_filtered"] = df1["prediction"].copy()
-df1.loc[df1["should_filter"] & (df1["prediction"]==1), "true_filtered"] = 0
-tp_cum_1 = df1["true_filtered"].cumsum()
-prec_1 = tp_cum_1 / (np.arange(len(df1)) + 1)
-# Show precision at key TP counts
-print("\nPrecision at key TP counts:")
-for target_tp in [10, 50, 100, 500, 1000]:
-    if target_tp <= tp_cum_1.max():
-        idx = np.where(tp_cum_1 >= target_tp)[0][0]
-        print(f"   TP={target_tp}: precision={prec_1.iloc[idx]:.3f} (at rank {idx+1})")
-# APPROACH 2: Remove rows entirely (what R does with replace=FALSE)
-print("\n" + "=" * 70)
-print("APPROACH 2: Remove filtered rows entirely")
-print("=" * 70)
-df2 = pra_sorted[~pra_sorted["should_filter"]].copy().reset_index(drop=True)
-tp_cum_2 = df2["prediction"].cumsum()
-prec_2 = tp_cum_2 / (np.arange(len(df2)) + 1)
-print(f"\nRows remaining after removal: {len(df2)}")
-print(f"TPs remaining: {df2['prediction'].sum()}")
-print("\nPrecision at key TP counts:")
-for target_tp in [10, 50, 100, 500, 1000]:
-    if target_tp <= tp_cum_2.max():
-        idx = np.where(tp_cum_2 >= target_tp)[0][0]
-        print(f"   TP={target_tp}: precision={prec_2.iloc[idx]:.3f} (at rank {idx+1})")
-# APPROACH 3: Only remove filtered POSITIVE pairs, keep negatives
-print("\n" + "=" * 70)
-print("APPROACH 3: Remove only filtered TPs (keep filtered negatives)")
-print("=" * 70)
-# This removes TP rows that contain filter IDs, but keeps negative rows
-remove_mask = pra_sorted["should_filter"] & (pra_sorted["prediction"] == 1)
-df3 = pra_sorted[~remove_mask].copy().reset_index(drop=True)
-tp_cum_3 = df3["prediction"].cumsum()
-prec_3 = tp_cum_3 / (np.arange(len(df3)) + 1)
-print(f"\nRows remaining: {len(df3)}")
-print(f"TPs remaining: {df3['prediction'].sum()}")
-print("\nPrecision at key TP counts:")
-for target_tp in [10, 50, 100, 500, 1000]:
-    if target_tp <= tp_cum_3.max():
-        idx = np.where(tp_cum_3 >= target_tp)[0][0]
-        print(f"   TP={target_tp}: precision={prec_3.iloc[idx]:.3f} (at rank {idx+1})")
-print("\n" + "=" * 70)
-print("COMPARISON")
-print("=" * 70)
-print("""
-Approach 1 (mark as negative): Filtered TPs become FPs, lowering precision
-Approach 2 (remove all filtered): Both TPs and negatives removed
-Approach 3 (remove only TPs): Only filtered TPs removed, negatives kept
-The R code uses Approach 3 (remove positive pairs that contain the filter ID).
-""")
-# %%

pythonflex/examples/test.py DELETED Viewed

@@ -1,104 +0,0 @@
-#%%
-import pythonflex as flex
-import os
-# # Define specific cell line types you're interested in
-DATA_DIR = "C:/Users/yd/Desktop/projects/_datasets/depmap/25Q2/subset/"
-# Specific cell lines of interest with "_cell_lines" suffix removed
-cell_line_files = [
-    "soft_tissue_cell_lines.csv",
-    "skin_cell_lines.csv",
-    # "lung_cell_lines.csv",
-    # "head_and_neck_cell_lines.csv",
-    # "esophagus_stomach_cell_lines.csv",
-]
-inputs = {}
-# Create inputs dict with shortened names (removing "_cell_lines" suffix)
-for filename in cell_line_files:
-    # Remove .csv extension and _cell_lines suffix
-    key = filename.replace("_cell_lines.csv", "")
-    full_path = os.path.join(DATA_DIR, filename)
-    inputs[key] = {
-        "path": full_path,
-        "sort": "high"
-    }
-inputs['depmap'] = {
-    "path": "C:/Users/yd/Desktop/projects/_datasets/depmap/25Q2/gene_effect.csv",
-    "sort": "high"
-}
-# Print the resulting inputs dictionary
-print("Configured inputs:")
-for key, value in inputs.items():
-    print(f"  {key}: {value['path']}")
-default_config = {
-    "min_genes_in_complex": 2,
-    "min_genes_per_complex_analysis": 2,
-    "output_folder": "25q2_min_genes_2",
-    "gold_standard": "CORUM",
-    "color_map": "RdYlBu",
-    "jaccard": True,
-    "plotting": {
-        "save_plot": True,
-        "output_type": "pdf",
-    },
-    "preprocessing": {
-        "fill_na": True,
-        "normalize": False,
-    },
-    "corr_function": "numpy",
-    "logging": {
-        "visible_levels": ["DONE","STARTED"]  # "PROGRESS", "STARTED", ,"INFO","WARNING"
-    }
-}
-# Initialize logger, config, and output folder
-flex.initialize(default_config)
-# Load datasets and gold standard terms
-data, _ = flex.load_datasets(inputs)
-terms, genes_in_terms = flex.load_gold_standard()
-#%%
-# Run analysis
-for name, dataset in data.items():
-    pra = flex.pra(name, dataset, is_corr=False)
-    fpc = flex.pra_percomplex(name, dataset, is_corr=False)
-    cc = flex.complex_contributions(name)
-#%%
-# Generate plots
-flex.plot_auc_scores()
-flex.plot_precision_recall_curve()
-flex.plot_percomplex_scatter()
-flex.plot_percomplex_scatter_bysize()
-flex.plot_significant_complexes()
-flex.plot_complex_contributions()
-#%%
-# Save results to CSV
-flex.save_results_to_csv()
-#%%

pythonflex/examples/test2.py DELETED Viewed

@@ -1,11 +0,0 @@
-#%%
-import anndata as ad
-adata = ad.read_h5ad(
-    "C:/Users/yd/Desktop/22mcell/GWCD4i.pseudobulk_merged.h5ad",
-    backed="r"   # read-only, disk-backed
-)
-#%%
-adata
-# %%

{pythonflex-0.3.1.dist-info → pythonflex-0.3.3.dist-info}/entry_points.txt RENAMED Viewed

File without changes

pythonflex 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl

pythonflex 0.3.1py3-none-any.whl → 0.3.3py3-none-any.whl