pythonflex 0.3.1__tar.gz → 0.3.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. {pythonflex-0.3.1 → pythonflex-0.3.3}/PKG-INFO +7 -1
  2. {pythonflex-0.3.1 → pythonflex-0.3.3}/pyproject.toml +10 -2
  3. {pythonflex-0.3.1 → pythonflex-0.3.3}/src/pythonflex/__init__.py +4 -4
  4. {pythonflex-0.3.1 → pythonflex-0.3.3}/src/pythonflex/analysis.py +82 -1
  5. {pythonflex-0.3.1 → pythonflex-0.3.3}/src/pythonflex/examples/basic_usage.py +29 -19
  6. pythonflex-0.3.3/src/pythonflex/examples/manuscript.py +111 -0
  7. {pythonflex-0.3.1 → pythonflex-0.3.3}/src/pythonflex/plotting.py +331 -18
  8. {pythonflex-0.3.1 → pythonflex-0.3.3}/src/pythonflex/preprocessing.py +28 -21
  9. pythonflex-0.3.1/src/pythonflex/examples/comparison.py +0 -78
  10. pythonflex-0.3.1/src/pythonflex/examples/dataset_filtering.py +0 -42
  11. pythonflex-0.3.1/src/pythonflex/examples/diag.py +0 -106
  12. pythonflex-0.3.1/src/pythonflex/examples/test.py +0 -104
  13. pythonflex-0.3.1/src/pythonflex/examples/test2.py +0 -11
  14. {pythonflex-0.3.1 → pythonflex-0.3.3}/.gitignore +0 -0
  15. {pythonflex-0.3.1 → pythonflex-0.3.3}/.python-version +0 -0
  16. {pythonflex-0.3.1 → pythonflex-0.3.3}/README.md +0 -0
  17. {pythonflex-0.3.1 → pythonflex-0.3.3}/src/pythonflex/data/__init__.py +0 -0
  18. {pythonflex-0.3.1 → pythonflex-0.3.3}/src/pythonflex/data/dataset/__init__.py +0 -0
  19. {pythonflex-0.3.1 → pythonflex-0.3.3}/src/pythonflex/data/dataset/liver_cell_lines_500_genes.csv +0 -0
  20. {pythonflex-0.3.1 → pythonflex-0.3.3}/src/pythonflex/data/dataset/melanoma_cell_lines_500_genes.csv +0 -0
  21. {pythonflex-0.3.1 → pythonflex-0.3.3}/src/pythonflex/data/dataset/neuroblastoma_cell_lines_500_genes.csv +0 -0
  22. {pythonflex-0.3.1 → pythonflex-0.3.3}/src/pythonflex/data/gold_standard/CORUM.parquet +0 -0
  23. {pythonflex-0.3.1 → pythonflex-0.3.3}/src/pythonflex/data/gold_standard/GOBP.parquet +0 -0
  24. {pythonflex-0.3.1 → pythonflex-0.3.3}/src/pythonflex/data/gold_standard/PATHWAY.parquet +0 -0
  25. {pythonflex-0.3.1 → pythonflex-0.3.3}/src/pythonflex/data/gold_standard/__init__.py +0 -0
  26. {pythonflex-0.3.1 → pythonflex-0.3.3}/src/pythonflex/data/gold_standard/corum.csv +0 -0
  27. {pythonflex-0.3.1 → pythonflex-0.3.3}/src/pythonflex/data/gold_standard/gobp.csv +0 -0
  28. {pythonflex-0.3.1 → pythonflex-0.3.3}/src/pythonflex/data/gold_standard/pathway.csv +0 -0
  29. {pythonflex-0.3.1 → pythonflex-0.3.3}/src/pythonflex/logging_config.py +0 -0
  30. {pythonflex-0.3.1 → pythonflex-0.3.3}/src/pythonflex/utils.py +0 -0
  31. {pythonflex-0.3.1 → pythonflex-0.3.3}/todo.txt +0 -0
  32. {pythonflex-0.3.1 → pythonflex-0.3.3}/uv.lock +0 -0
@@ -1,8 +1,14 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pythonflex
3
- Version: 0.3.1
3
+ Version: 0.3.3
4
4
  Summary: pythonFLEX is a benchmarking toolkit for evaluating CRISPR screen results against biological gold standards. The toolkit computes gene-level and complex-level performance metrics, helping researchers systematically assess the biological relevance and resolution of their CRISPR screening data.
5
5
  Author-email: Yasir Demirtaş <tyasird@hotmail.com>
6
+ Classifier: License :: OSI Approved :: MIT License
7
+ Classifier: Operating System :: OS Independent
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Programming Language :: Python :: 3.9
10
+ Classifier: Programming Language :: Python :: 3.10
11
+ Classifier: Programming Language :: Python :: 3.11
6
12
  Requires-Python: >=3.9
7
13
  Requires-Dist: adjusttext
8
14
  Requires-Dist: art
@@ -1,13 +1,20 @@
1
1
  [project]
2
2
  name = "pythonflex"
3
- version = "0.3.1"
3
+ version = "0.3.3"
4
4
  description = "pythonFLEX is a benchmarking toolkit for evaluating CRISPR screen results against biological gold standards. The toolkit computes gene-level and complex-level performance metrics, helping researchers systematically assess the biological relevance and resolution of their CRISPR screening data."
5
5
  readme = "README.md"
6
6
  authors = [
7
7
  { name = "Yasir Demirtaş", email = "tyasird@hotmail.com" }
8
8
  ]
9
9
  requires-python = ">=3.9"
10
-
10
+ classifiers = [
11
+ "Programming Language :: Python :: 3",
12
+ "Programming Language :: Python :: 3.9",
13
+ "Programming Language :: Python :: 3.10",
14
+ "Programming Language :: Python :: 3.11",
15
+ "License :: OSI Approved :: MIT License",
16
+ "Operating System :: OS Independent",
17
+ ]
11
18
 
12
19
  # Exclude the input folder
13
20
  exclude = ["src/pythonflex/input/*", "src/pythonflex/output/*", "src/pythonflex/examples/output/*",
@@ -67,3 +74,4 @@ pythonflex = { workspace = true }
67
74
  dev = [
68
75
  "pythonflex",
69
76
  ]
77
+
@@ -3,9 +3,9 @@ from .utils import dsave, dload
3
3
  from .preprocessing import get_example_data_path, load_datasets, get_common_genes, filter_matrix_by_genes, load_gold_standard, filter_duplicate_terms
4
4
  from .analysis import initialize, pra, pra_percomplex, fast_corr, perform_corr, is_symmetric, binary, has_mirror_of_first_pair, convert_full_to_half_matrix, drop_mirror_pairs, quick_sort, complex_contributions, save_results_to_csv, update_matploblib_config, mpr_prepare
5
5
  from .plotting import (
6
- adjust_text_positions, plot_precision_recall_curve, plot_percomplex_scatter,
6
+ adjust_text_positions, plot_precision_recall_curve, plot_aggregated_pra, plot_iqr_pra, plot_all_runs_pra, plot_percomplex_scatter,
7
7
  plot_percomplex_scatter_bysize, plot_complex_contributions, plot_significant_complexes, plot_auc_scores,
8
- plot_mpr_tp, plot_mpr_complexes, plot_mpr_tp_multi, plot_mpr_complexes_multi
8
+ plot_mpr_tp, plot_mpr_complexes, plot_mpr_tp_multi, plot_mpr_complexes_multi, plot_mpr_complexes_auc_scores
9
9
  )
10
10
 
11
11
  __all__ = [ "log", "get_example_data_path", "fast_corr",
@@ -13,8 +13,8 @@ __all__ = [ "log", "get_example_data_path", "fast_corr",
13
13
  "filter_matrix_by_genes", "load_gold_standard", "filter_duplicate_terms", "pra", "pra_percomplex",
14
14
  "perform_corr", "is_symmetric", "binary", "has_mirror_of_first_pair", "convert_full_to_half_matrix",
15
15
  "drop_mirror_pairs", "quick_sort", "complex_contributions", "adjust_text_positions", "plot_precision_recall_curve",
16
- "plot_percomplex_scatter", "plot_percomplex_scatter_bysize", "plot_complex_contributions",
17
- "plot_significant_complexes", "plot_auc_scores", "save_results_to_csv", "update_matploblib_config",
16
+ "plot_aggregated_pra", "plot_iqr_pra", "plot_all_runs_pra", "plot_percomplex_scatter", "plot_percomplex_scatter_bysize", "plot_complex_contributions",
17
+ "plot_significant_complexes", "plot_auc_scores", "plot_mpr_complexes_auc_scores", "save_results_to_csv", "update_matploblib_config",
18
18
  "mpr_prepare", "plot_mpr_tp", "plot_mpr_complexes",
19
19
  "plot_mpr_tp_multi", "plot_mpr_complexes_multi"
20
20
  ]
@@ -844,7 +844,7 @@ def quick_sort(df, ascending=False):
844
844
  log.done("Pair-wise matrix sorting.")
845
845
  return sorted_df
846
846
 
847
- def save_results_to_csv(categories = ["complex_contributions", "pr_auc", "pra_percomplex"]):
847
+ def save_results_to_csv(categories = ["complex_contributions", "pr_auc", "pra_percomplex", "mpr_complexes_auc"]):
848
848
 
849
849
  config = dload("config") # Load config to get output folder
850
850
  output_folder = Path(config.get("output_folder", "output"))
@@ -856,6 +856,18 @@ def save_results_to_csv(categories = ["complex_contributions", "pr_auc", "pra_pe
856
856
  if data is None:
857
857
  log.warning(f"No data found for category '{category}'. Skipping save.")
858
858
  continue
859
+
860
+ if category == "mpr_complexes_auc" and isinstance(data, dict):
861
+ # Dict[dataset_name -> Dict[filter_key -> auc]]
862
+ try:
863
+ df = pd.DataFrame.from_dict(data, orient="index")
864
+ df.index.name = "Dataset"
865
+ csv_path = output_folder / f"{category}.csv"
866
+ df.to_csv(csv_path, index=True)
867
+ log.info(f"Saved '{category}' to {csv_path}")
868
+ except Exception as e:
869
+ log.warning(f"Failed to convert and save '{category}': {e}")
870
+ continue
859
871
 
860
872
  if category == "pr_auc" and isinstance(data, dict):
861
873
  # Special handling: Convert dict to DataFrame (assuming keys are indices, values are data)
@@ -1312,6 +1324,64 @@ def _mpr_module_coverage(contrib_df, terms, tp_th=1, percent_th=0.1):
1312
1324
  return coverage
1313
1325
 
1314
1326
 
1327
+ def _mpr_complexes_auc(
1328
+ coverage: np.ndarray,
1329
+ precision_cutoffs: np.ndarray,
1330
+ max_complexes: float = 200.0,
1331
+ ) -> float:
1332
+ """Compute AUC for the Fig. 1F-style mPR curve (#complexes vs precision).
1333
+
1334
+ The plot uses:
1335
+ x = #covered complexes (capped at `max_complexes`, shown on a log axis)
1336
+ y = precision cutoff
1337
+
1338
+ We compute a normalized AUC by integrating precision over the *normalized*
1339
+ coverage axis:
1340
+ AUC = \int y \, d(x/max_complexes)
1341
+
1342
+ This yields a score in [0, 1] (or NaN if insufficient data).
1343
+ """
1344
+ cov = np.asarray(coverage, dtype=float)
1345
+ prec = np.asarray(precision_cutoffs, dtype=float)
1346
+
1347
+ if cov.size == 0 or prec.size == 0:
1348
+ return 0.0
1349
+
1350
+ # Match plot_mpr_complexes_multi(): only count cov>0 (log-x cannot show 0)
1351
+ mask = (
1352
+ np.isfinite(cov)
1353
+ & np.isfinite(prec)
1354
+ & (cov > 0)
1355
+ & (cov <= max_complexes)
1356
+ & (prec >= 0)
1357
+ & (prec <= 1.0)
1358
+ )
1359
+ if not np.any(mask):
1360
+ return 0.0
1361
+
1362
+ x_cov = cov[mask]
1363
+ y = prec[mask]
1364
+
1365
+ # x-axis is log-scaled in the plot; normalize so cov=1 -> 0, cov=max_complexes -> 1
1366
+ # (This matches the plot's tick hack where 1 is labeled as "0".)
1367
+ x = np.log10(x_cov) / np.log10(float(max_complexes))
1368
+
1369
+ # Sort by x and collapse duplicate x values by taking max y (upper envelope)
1370
+ order = np.argsort(x)
1371
+ x = x[order]
1372
+ y = y[order]
1373
+
1374
+ x_unique = np.unique(x)
1375
+ if x_unique.size != x.size:
1376
+ y = np.array([float(np.nanmax(y[x == xv])) for xv in x_unique], dtype=float)
1377
+ x = x_unique
1378
+
1379
+ if x.size < 2:
1380
+ return 0.0
1381
+
1382
+ return float(np.trapz(y, x))
1383
+
1384
+
1315
1385
 
1316
1386
 
1317
1387
 
@@ -1379,6 +1449,7 @@ def mpr_prepare(
1379
1449
 
1380
1450
  tp_curves = {}
1381
1451
  coverage_curves = {}
1452
+ complexes_auc = {}
1382
1453
  precision_cutoffs = None
1383
1454
 
1384
1455
  for label, removed in filter_sets.items():
@@ -1393,6 +1464,7 @@ def mpr_prepare(
1393
1464
  "precision": np.array([], dtype=float),
1394
1465
  }
1395
1466
  coverage_curves[label] = np.zeros(0, dtype=float)
1467
+ complexes_auc[label] = float("nan")
1396
1468
  continue
1397
1469
 
1398
1470
  tp_cum = true.cumsum()
@@ -1417,11 +1489,17 @@ def mpr_prepare(
1417
1489
  percent_th=percent_th,
1418
1490
  )
1419
1491
  coverage_curves[label] = cov
1492
+ complexes_auc[label] = _mpr_complexes_auc(
1493
+ cov,
1494
+ precision_cutoffs,
1495
+ max_complexes=200.0,
1496
+ )
1420
1497
 
1421
1498
  mpr_data = {
1422
1499
  "precision_cutoffs": precision_cutoffs,
1423
1500
  "tp_curves": tp_curves,
1424
1501
  "coverage_curves": coverage_curves,
1502
+ "complexes_auc": complexes_auc,
1425
1503
  "filters": {
1426
1504
  "no_mtRibo_ETCI": sorted(mtRibo_ids),
1427
1505
  "no_small_highAUPRC": sorted(small_hi_ids),
@@ -1435,6 +1513,9 @@ def mpr_prepare(
1435
1513
 
1436
1514
  dsave(mpr_data, "mpr", name)
1437
1515
 
1516
+ # Convenience: store AUCs as their own category for easy export / plotting.
1517
+ dsave(complexes_auc, "mpr_complexes_auc", name)
1518
+
1438
1519
 
1439
1520
 
1440
1521
  ### OLD FUNCTIONS
@@ -8,32 +8,34 @@ import pythonflex as flex
8
8
  inputs = {
9
9
  "Melanoma (63 Screens)": {
10
10
  "path": flex.get_example_data_path("melanoma_cell_lines_500_genes.csv"),
11
- "sort": "high"
11
+ "sort": "high",
12
+ "color": "#FF0000"
12
13
  },
13
14
  "Liver (24 Screens)": {
14
15
  "path": flex.get_example_data_path("liver_cell_lines_500_genes.csv"),
15
- "sort": "high"
16
+ "sort": "high",
17
+ "color": "#FFDD00"
16
18
  },
17
19
  "Neuroblastoma (37 Screens)": {
18
20
  "path": flex.get_example_data_path("neuroblastoma_cell_lines_500_genes.csv"),
19
- "sort": "high"
21
+ "sort": "high",
22
+ "color": "#FFDDDD"
20
23
  },
21
24
  }
22
25
 
23
26
 
24
27
 
25
- #%%
26
28
  default_config = {
27
29
  "min_genes_in_complex": 0,
28
30
  "min_genes_per_complex_analysis": 3,
29
- "output_folder": "output",
31
+ "output_folder": "CORUM",
30
32
  "gold_standard": "CORUM",
31
- "color_map": "RdYlBu",
32
- "jaccard": True,
33
+ "color_map": "BuGn",
34
+ "jaccard": False,
33
35
  "use_common_genes": False, # Set to False for individual dataset-gold standard intersections
34
36
  "plotting": {
35
37
  "save_plot": True,
36
- "output_type": "pdf",
38
+ "output_type": "png",
37
39
  },
38
40
  "preprocessing": {
39
41
  "fill_na": True,
@@ -41,7 +43,8 @@ default_config = {
41
43
  },
42
44
  "corr_function": "numpy",
43
45
  "logging": {
44
- "visible_levels": ["DONE","STARTED"] # "PROGRESS", "STARTED", ,"INFO","WARNING"
46
+ "visible_levels": ["DONE"]
47
+ # "PROGRESS", "STARTED", ,"INFO","WARNING"
45
48
  }
46
49
  }
47
50
 
@@ -52,26 +55,33 @@ flex.initialize(default_config)
52
55
  data, _ = flex.load_datasets(inputs)
53
56
  terms, genes_in_terms = flex.load_gold_standard()
54
57
 
55
-
56
- #%%
57
58
  # Run analysis
58
59
  for name, dataset in data.items():
59
60
  pra = flex.pra(name, dataset, is_corr=False)
60
61
  fpc = flex.pra_percomplex(name, dataset, is_corr=False)
61
62
  cc = flex.complex_contributions(name)
62
-
63
+ flex.mpr_prepare(name)
64
+
65
+
63
66
 
64
67
 
65
68
  #%%
66
69
  # Generate plots
67
- flex.plot_auc_scores()
68
- flex.plot_precision_recall_curve()
69
- flex.plot_percomplex_scatter(n_top=20)
70
- flex.plot_percomplex_scatter_bysize()
71
- flex.plot_significant_complexes()
72
- flex.plot_complex_contributions()
73
-
70
+ # flex.plot_precision_recall_curve()
71
+ # flex.plot_auc_scores()
72
+ # flex.plot_significant_complexes()
73
+ # flex.plot_percomplex_scatter(n_top=20)
74
+ # flex.plot_percomplex_scatter_bysize()
75
+ # flex.plot_complex_contributions()
76
+ #%%
77
+ #flex.plot_mpr_tp_multi(show_filters="all")
78
+ flex.plot_mpr_complexes_multi(show_filters="all")
74
79
 
75
80
  #%%
76
81
  # Save results to CSV
77
82
  flex.save_results_to_csv()
83
+
84
+
85
+ # %%
86
+ flex.plot_mpr_complexes_auc_scores("all")
87
+ # %%
@@ -0,0 +1,111 @@
1
+ """
2
+ Basic usage example of the pythonFLEX package.
3
+ Demonstrates initialization, data loading, analysis, and plotting.
4
+ """
5
+ #%%
6
+ import pythonflex as flex
7
+ import pandas as pd
8
+
9
+ gene_effect = pd.read_csv('C:/Users/yd/Desktop/projects/_datasets/depmap/25Q2/gene_effect.csv', index_col=0)
10
+
11
+ skin = pd.read_csv('C:/Users/yd/Desktop/projects/_datasets/depmap/25Q2/subset/skin_cell_lines.csv', index_col=0)
12
+
13
+ soft = pd.read_csv('C:/Users/yd/Desktop/projects/_datasets/depmap/25Q2/subset/soft_tissue_cell_lines.csv', index_col=0)
14
+
15
+
16
+ cholesky = pd.read_csv('C:/Users/yd/Desktop/projects/_datasets/depmap/25Q2/25Q2_chronos_whitened_Cholesky.csv', index_col=0).T
17
+
18
+ # inputs = {
19
+ # "All Screens": {
20
+ # "path": gene_effect,
21
+ # "sort": "high",
22
+ # "color": "#000000"
23
+ # },
24
+ # "Skin": {
25
+ # "path": skin,
26
+ # "sort": "high",
27
+ # "color": "#FF0000"
28
+ # },
29
+ # "Soft Tissue": {
30
+ # "path": soft,
31
+ # "sort": "high",
32
+ # "color": "#FFFF00"
33
+ # },
34
+ # }
35
+
36
+
37
+ inputs = {
38
+ "DM All Screens": {
39
+ "path": gene_effect,
40
+ "sort": "high",
41
+ "color": "#000000"
42
+ },
43
+ "DM Cholesky Whitening": {
44
+ "path": cholesky,
45
+ "sort": "high",
46
+ "color": "#FF0000"
47
+ },
48
+
49
+ }
50
+
51
+
52
+
53
+
54
+ default_config = {
55
+ "min_genes_in_complex": 2,
56
+ "min_genes_per_complex_analysis": 3,
57
+ "output_folder": "CORUM_DMvsCholesky",
58
+ "gold_standard": "CORUM",
59
+ "color_map": "BuGn",
60
+ "jaccard": False,
61
+ "use_common_genes": False, # Set to False for individual dataset-gold standard intersections
62
+ "plotting": {
63
+ "save_plot": True,
64
+ "output_type": "pdf",
65
+ },
66
+ "preprocessing": {
67
+ "fill_na": True,
68
+ "normalize": False,
69
+ },
70
+ "corr_function": "numpy",
71
+ "logging": {
72
+ "visible_levels": ["DONE"]
73
+ # "PROGRESS", "STARTED", ,"INFO","WARNING"
74
+ }
75
+ }
76
+
77
+ # Initialize logger, config, and output folder
78
+ flex.initialize(default_config)
79
+
80
+ # Load datasets and gold standard terms
81
+ data, _ = flex.load_datasets(inputs)
82
+ terms, genes_in_terms = flex.load_gold_standard()
83
+
84
+ # Run analysis
85
+ for name, dataset in data.items():
86
+ pra = flex.pra(name, dataset, is_corr=False)
87
+ fpc = flex.pra_percomplex(name, dataset, is_corr=False)
88
+ cc = flex.complex_contributions(name)
89
+ flex.mpr_prepare(name)
90
+
91
+
92
+
93
+
94
+ #%%
95
+ # Generate plots
96
+ flex.plot_precision_recall_curve()
97
+ flex.plot_auc_scores()
98
+ flex.plot_significant_complexes()
99
+ flex.plot_percomplex_scatter(n_top=20)
100
+ flex.plot_percomplex_scatter_bysize()
101
+ flex.plot_complex_contributions()
102
+ ##
103
+ #%%
104
+ flex.plot_mpr_tp_multi(show_filters="all")
105
+ flex.plot_mpr_complexes_multi(show_filters="all")
106
+
107
+ # Save results to CSV
108
+ flex.save_results_to_csv()
109
+
110
+ # %%
111
+ # %%