PyPI - pythonflex - Versions diffs - 0.3.1__tar.gz → 0.3.3__tar.gz - Mend

pythonflex 0.3.1tar.gz → 0.3.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

{pythonflex-0.3.1 → pythonflex-0.3.3}/PKG-INFO RENAMED Viewed

@@ -1,8 +1,14 @@
 Metadata-Version: 2.4
 Name: pythonflex
-Version: 0.3.1
+Version: 0.3.3
 Summary: pythonFLEX is a benchmarking toolkit for evaluating CRISPR screen results against biological gold standards. The toolkit computes gene-level and complex-level performance metrics, helping researchers systematically assess the biological relevance and resolution of their CRISPR screening data.
 Author-email: Yasir Demirtaş <tyasird@hotmail.com>
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
 Requires-Python: >=3.9
 Requires-Dist: adjusttext
 Requires-Dist: art

{pythonflex-0.3.1 → pythonflex-0.3.3}/pyproject.toml RENAMED Viewed

@@ -1,13 +1,20 @@
 [project]
 name = "pythonflex"
-version = "0.3.1"
+version = "0.3.3"
 description = "pythonFLEX is a benchmarking toolkit for evaluating CRISPR screen results against biological gold standards. The toolkit computes gene-level and complex-level performance metrics, helping researchers systematically assess the biological relevance and resolution of their CRISPR screening data."
 readme = "README.md"
 authors = [
     { name = "Yasir Demirtaş", email = "tyasird@hotmail.com" }
 ]
 requires-python = ">=3.9"
+classifiers = [
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "License :: OSI Approved :: MIT License",
+    "Operating System :: OS Independent",
+]
 # Exclude the input folder
 exclude = ["src/pythonflex/input/*", "src/pythonflex/output/*", "src/pythonflex/examples/output/*",
@@ -67,3 +74,4 @@ pythonflex = { workspace = true }
 dev = [
     "pythonflex",
 ]

{pythonflex-0.3.1 → pythonflex-0.3.3}/src/pythonflex/__init__.py RENAMED Viewed

@@ -3,9 +3,9 @@ from .utils import dsave, dload
 from .preprocessing import get_example_data_path, load_datasets,  get_common_genes, filter_matrix_by_genes, load_gold_standard, filter_duplicate_terms
 from .analysis import initialize, pra, pra_percomplex, fast_corr, perform_corr, is_symmetric, binary, has_mirror_of_first_pair, convert_full_to_half_matrix, drop_mirror_pairs, quick_sort, complex_contributions, save_results_to_csv, update_matploblib_config, mpr_prepare
 from .plotting import (
-    adjust_text_positions, plot_precision_recall_curve, plot_percomplex_scatter,
+    adjust_text_positions, plot_precision_recall_curve, plot_aggregated_pra, plot_iqr_pra, plot_all_runs_pra, plot_percomplex_scatter,
     plot_percomplex_scatter_bysize, plot_complex_contributions, plot_significant_complexes, plot_auc_scores,
-    plot_mpr_tp, plot_mpr_complexes, plot_mpr_tp_multi, plot_mpr_complexes_multi
+    plot_mpr_tp, plot_mpr_complexes, plot_mpr_tp_multi, plot_mpr_complexes_multi, plot_mpr_complexes_auc_scores
 )
 __all__ = [ "log", "get_example_data_path", "fast_corr",
@@ -13,8 +13,8 @@ __all__ = [ "log", "get_example_data_path", "fast_corr",
     "filter_matrix_by_genes", "load_gold_standard", "filter_duplicate_terms", "pra", "pra_percomplex",
     "perform_corr", "is_symmetric", "binary", "has_mirror_of_first_pair", "convert_full_to_half_matrix",
     "drop_mirror_pairs", "quick_sort", "complex_contributions", "adjust_text_positions", "plot_precision_recall_curve",
-    "plot_percomplex_scatter", "plot_percomplex_scatter_bysize", "plot_complex_contributions",
-    "plot_significant_complexes", "plot_auc_scores", "save_results_to_csv", "update_matploblib_config",
+    "plot_aggregated_pra", "plot_iqr_pra", "plot_all_runs_pra", "plot_percomplex_scatter", "plot_percomplex_scatter_bysize", "plot_complex_contributions",
+    "plot_significant_complexes", "plot_auc_scores", "plot_mpr_complexes_auc_scores", "save_results_to_csv", "update_matploblib_config",
     "mpr_prepare", "plot_mpr_tp", "plot_mpr_complexes",
     "plot_mpr_tp_multi", "plot_mpr_complexes_multi"
 ]

{pythonflex-0.3.1 → pythonflex-0.3.3}/src/pythonflex/analysis.py RENAMED Viewed

@@ -844,7 +844,7 @@ def quick_sort(df, ascending=False):
     log.done("Pair-wise matrix sorting.")
     return sorted_df
-def save_results_to_csv(categories = ["complex_contributions", "pr_auc", "pra_percomplex"]):
+def save_results_to_csv(categories = ["complex_contributions", "pr_auc", "pra_percomplex", "mpr_complexes_auc"]):
     config = dload("config")  # Load config to get output folder
     output_folder = Path(config.get("output_folder", "output"))
@@ -856,6 +856,18 @@ def save_results_to_csv(categories = ["complex_contributions", "pr_auc", "pra_pe
         if data is None:
             log.warning(f"No data found for category '{category}'. Skipping save.")
             continue
+        if category == "mpr_complexes_auc" and isinstance(data, dict):
+            # Dict[dataset_name -> Dict[filter_key -> auc]]
+            try:
+                df = pd.DataFrame.from_dict(data, orient="index")
+                df.index.name = "Dataset"
+                csv_path = output_folder / f"{category}.csv"
+                df.to_csv(csv_path, index=True)
+                log.info(f"Saved '{category}' to {csv_path}")
+            except Exception as e:
+                log.warning(f"Failed to convert and save '{category}': {e}")
+            continue
         if category == "pr_auc" and isinstance(data, dict):
             # Special handling: Convert dict to DataFrame (assuming keys are indices, values are data)
@@ -1312,6 +1324,64 @@ def _mpr_module_coverage(contrib_df, terms, tp_th=1, percent_th=0.1):
     return coverage
+def _mpr_complexes_auc(
+    coverage: np.ndarray,
+    precision_cutoffs: np.ndarray,
+    max_complexes: float = 200.0,
+) -> float:
+    """Compute AUC for the Fig. 1F-style mPR curve (#complexes vs precision).
+    The plot uses:
+      x = #covered complexes (capped at `max_complexes`, shown on a log axis)
+      y = precision cutoff
+    We compute a normalized AUC by integrating precision over the *normalized*
+    coverage axis:
+        AUC = \int y \, d(x/max_complexes)
+    This yields a score in [0, 1] (or NaN if insufficient data).
+    """
+    cov = np.asarray(coverage, dtype=float)
+    prec = np.asarray(precision_cutoffs, dtype=float)
+    if cov.size == 0 or prec.size == 0:
+        return 0.0
+    # Match plot_mpr_complexes_multi(): only count cov>0 (log-x cannot show 0)
+    mask = (
+        np.isfinite(cov)
+        & np.isfinite(prec)
+        & (cov > 0)
+        & (cov <= max_complexes)
+        & (prec >= 0)
+        & (prec <= 1.0)
+    )
+    if not np.any(mask):
+        return 0.0
+    x_cov = cov[mask]
+    y = prec[mask]
+    # x-axis is log-scaled in the plot; normalize so cov=1 -> 0, cov=max_complexes -> 1
+    # (This matches the plot's tick hack where 1 is labeled as "0".)
+    x = np.log10(x_cov) / np.log10(float(max_complexes))
+    # Sort by x and collapse duplicate x values by taking max y (upper envelope)
+    order = np.argsort(x)
+    x = x[order]
+    y = y[order]
+    x_unique = np.unique(x)
+    if x_unique.size != x.size:
+        y = np.array([float(np.nanmax(y[x == xv])) for xv in x_unique], dtype=float)
+        x = x_unique
+    if x.size < 2:
+        return 0.0
+    return float(np.trapz(y, x))
@@ -1379,6 +1449,7 @@ def mpr_prepare(
     tp_curves = {}
     coverage_curves = {}
+    complexes_auc = {}
     precision_cutoffs = None
     for label, removed in filter_sets.items():
@@ -1393,6 +1464,7 @@ def mpr_prepare(
                 "precision": np.array([], dtype=float),
             }
             coverage_curves[label] = np.zeros(0, dtype=float)
+            complexes_auc[label] = float("nan")
             continue
         tp_cum = true.cumsum()
@@ -1417,11 +1489,17 @@ def mpr_prepare(
             percent_th=percent_th,
         )
         coverage_curves[label] = cov
+        complexes_auc[label] = _mpr_complexes_auc(
+            cov,
+            precision_cutoffs,
+            max_complexes=200.0,
+        )
     mpr_data = {
         "precision_cutoffs": precision_cutoffs,
         "tp_curves": tp_curves,
         "coverage_curves": coverage_curves,
+        "complexes_auc": complexes_auc,
         "filters": {
             "no_mtRibo_ETCI": sorted(mtRibo_ids),
             "no_small_highAUPRC": sorted(small_hi_ids),
@@ -1435,6 +1513,9 @@ def mpr_prepare(
     dsave(mpr_data, "mpr", name)
+    # Convenience: store AUCs as their own category for easy export / plotting.
+    dsave(complexes_auc, "mpr_complexes_auc", name)
 ### OLD FUNCTIONS

{pythonflex-0.3.1 → pythonflex-0.3.3}/src/pythonflex/examples/basic_usage.py RENAMED Viewed

@@ -8,32 +8,34 @@ import pythonflex as flex
 inputs = {
     "Melanoma (63 Screens)": {
         "path": flex.get_example_data_path("melanoma_cell_lines_500_genes.csv"),
-        "sort": "high"
+        "sort": "high",
+        "color": "#FF0000"
     },
     "Liver (24 Screens)": {
         "path": flex.get_example_data_path("liver_cell_lines_500_genes.csv"),
-        "sort": "high"
+        "sort": "high",
+        "color": "#FFDD00"
     },
     "Neuroblastoma (37 Screens)": {
         "path": flex.get_example_data_path("neuroblastoma_cell_lines_500_genes.csv"),
-        "sort": "high"
+        "sort": "high",
+        "color": "#FFDDDD"
     },
 }
-#%%
 default_config = {
     "min_genes_in_complex": 0,
     "min_genes_per_complex_analysis": 3,
-    "output_folder": "output",
+    "output_folder": "CORUM",
     "gold_standard": "CORUM",
-    "color_map": "RdYlBu",
-    "jaccard": True,
+    "color_map": "BuGn",
+    "jaccard": False,
     "use_common_genes": False,  # Set to False for individual dataset-gold standard intersections
     "plotting": {
         "save_plot": True,
-        "output_type": "pdf",
+        "output_type": "png",
     },
     "preprocessing": {
         "fill_na": True,
@@ -41,7 +43,8 @@ default_config = {
     },
     "corr_function": "numpy",
     "logging": {
-        "visible_levels": ["DONE","STARTED"]  # "PROGRESS", "STARTED", ,"INFO","WARNING"
+        "visible_levels": ["DONE"]
+        # "PROGRESS", "STARTED", ,"INFO","WARNING"
     }
 }
@@ -52,26 +55,33 @@ flex.initialize(default_config)
 data, _ = flex.load_datasets(inputs)
 terms, genes_in_terms = flex.load_gold_standard()
-#%%
 # Run analysis
 for name, dataset in data.items():
     pra = flex.pra(name, dataset, is_corr=False)
     fpc = flex.pra_percomplex(name, dataset, is_corr=False)
     cc = flex.complex_contributions(name)
+    flex.mpr_prepare(name)
 #%%
 # Generate plots
-flex.plot_auc_scores()
-flex.plot_precision_recall_curve()
-flex.plot_percomplex_scatter(n_top=20)
-flex.plot_percomplex_scatter_bysize()
-flex.plot_significant_complexes()
-flex.plot_complex_contributions()
+# flex.plot_precision_recall_curve()
+# flex.plot_auc_scores()
+# flex.plot_significant_complexes()
+# flex.plot_percomplex_scatter(n_top=20)
+# flex.plot_percomplex_scatter_bysize()
+# flex.plot_complex_contributions()
+#%%
+#flex.plot_mpr_tp_multi(show_filters="all")
+flex.plot_mpr_complexes_multi(show_filters="all")
 #%%
 # Save results to CSV
 flex.save_results_to_csv()
+# %%
+flex.plot_mpr_complexes_auc_scores("all")
+# %%

pythonflex-0.3.3/src/pythonflex/examples/manuscript.py ADDED Viewed

@@ -0,0 +1,111 @@
+"""
+Basic usage example of the pythonFLEX package.
+Demonstrates initialization, data loading, analysis, and plotting.
+"""
+#%%
+import pythonflex as flex
+import pandas as pd
+gene_effect = pd.read_csv('C:/Users/yd/Desktop/projects/_datasets/depmap/25Q2/gene_effect.csv', index_col=0)
+skin = pd.read_csv('C:/Users/yd/Desktop/projects/_datasets/depmap/25Q2/subset/skin_cell_lines.csv', index_col=0)
+soft = pd.read_csv('C:/Users/yd/Desktop/projects/_datasets/depmap/25Q2/subset/soft_tissue_cell_lines.csv', index_col=0)
+cholesky = pd.read_csv('C:/Users/yd/Desktop/projects/_datasets/depmap/25Q2/25Q2_chronos_whitened_Cholesky.csv', index_col=0).T
+# inputs = {
+#     "All Screens": {
+#         "path": gene_effect,
+#         "sort": "high",
+#         "color": "#000000"
+#     },
+#     "Skin": {
+#         "path": skin,
+#         "sort": "high",
+#         "color": "#FF0000"
+#     },
+#     "Soft Tissue": {
+#         "path": soft,
+#         "sort": "high",
+#         "color": "#FFFF00"
+#     },
+# }
+inputs = {
+    "DM All Screens": {
+        "path": gene_effect,
+        "sort": "high",
+        "color": "#000000"
+    },
+    "DM Cholesky Whitening": {
+        "path": cholesky,
+        "sort": "high",
+        "color": "#FF0000"
+    },
+}
+default_config = {
+    "min_genes_in_complex": 2,
+    "min_genes_per_complex_analysis": 3,
+    "output_folder": "CORUM_DMvsCholesky",
+    "gold_standard": "CORUM",
+    "color_map": "BuGn",
+    "jaccard": False,
+    "use_common_genes": False,  # Set to False for individual dataset-gold standard intersections
+    "plotting": {
+        "save_plot": True,
+        "output_type": "pdf",
+    },
+    "preprocessing": {
+        "fill_na": True,
+        "normalize": False,
+    },
+    "corr_function": "numpy",
+    "logging": {
+        "visible_levels": ["DONE"]
+        # "PROGRESS", "STARTED", ,"INFO","WARNING"
+    }
+}
+# Initialize logger, config, and output folder
+flex.initialize(default_config)
+# Load datasets and gold standard terms
+data, _ = flex.load_datasets(inputs)
+terms, genes_in_terms = flex.load_gold_standard()
+# Run analysis
+for name, dataset in data.items():
+    pra = flex.pra(name, dataset, is_corr=False)
+    fpc = flex.pra_percomplex(name, dataset, is_corr=False)
+    cc = flex.complex_contributions(name)
+    flex.mpr_prepare(name)
+#%%
+# Generate plots
+flex.plot_precision_recall_curve()
+flex.plot_auc_scores()
+flex.plot_significant_complexes()
+flex.plot_percomplex_scatter(n_top=20)
+flex.plot_percomplex_scatter_bysize()
+flex.plot_complex_contributions()
+##
+#%%
+flex.plot_mpr_tp_multi(show_filters="all")
+flex.plot_mpr_complexes_multi(show_filters="all")
+# Save results to CSV
+flex.save_results_to_csv()
+# %%
+# %%

pythonflex 0.3.1__tar.gz → 0.3.3__tar.gz

pythonflex 0.3.1tar.gz → 0.3.3tar.gz