pythonflex 0.1.5__tar.gz → 0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. pythonflex-0.2/.vscode/settings.json +5 -0
  2. {pythonflex-0.1.5 → pythonflex-0.2}/PKG-INFO +1 -1
  3. {pythonflex-0.1.5 → pythonflex-0.2}/pyproject.toml +1 -1
  4. {pythonflex-0.1.5 → pythonflex-0.2}/src/pythonflex/__init__.py +2 -2
  5. {pythonflex-0.1.5 → pythonflex-0.2}/src/pythonflex/analysis.py +182 -85
  6. {pythonflex-0.1.5 → pythonflex-0.2}/src/pythonflex/examples/basic_usage.py +15 -35
  7. {pythonflex-0.1.5 → pythonflex-0.2}/src/pythonflex/examples/dataset_filtering.py +2 -0
  8. pythonflex-0.2/src/pythonflex/examples/test.py +104 -0
  9. pythonflex-0.2/src/pythonflex/plotting.py +672 -0
  10. pythonflex-0.1.5/src/pythonflex/plotting.py +0 -510
  11. {pythonflex-0.1.5 → pythonflex-0.2}/.gitignore +0 -0
  12. {pythonflex-0.1.5 → pythonflex-0.2}/.python-version +0 -0
  13. {pythonflex-0.1.5 → pythonflex-0.2}/README.md +0 -0
  14. {pythonflex-0.1.5 → pythonflex-0.2}/src/pythonflex/data/dataset/liver_cell_lines_500_genes.csv +0 -0
  15. {pythonflex-0.1.5 → pythonflex-0.2}/src/pythonflex/data/dataset/melanoma_cell_lines_500_genes.csv +0 -0
  16. {pythonflex-0.1.5 → pythonflex-0.2}/src/pythonflex/data/dataset/neuroblastoma_cell_lines_500_genes.csv +0 -0
  17. {pythonflex-0.1.5 → pythonflex-0.2}/src/pythonflex/data/gold_standard/CORUM.parquet +0 -0
  18. {pythonflex-0.1.5 → pythonflex-0.2}/src/pythonflex/data/gold_standard/GOBP.parquet +0 -0
  19. {pythonflex-0.1.5 → pythonflex-0.2}/src/pythonflex/data/gold_standard/PATHWAY.parquet +0 -0
  20. {pythonflex-0.1.5 → pythonflex-0.2}/src/pythonflex/data/gold_standard/corum.csv +0 -0
  21. {pythonflex-0.1.5 → pythonflex-0.2}/src/pythonflex/data/gold_standard/gobp.csv +0 -0
  22. {pythonflex-0.1.5 → pythonflex-0.2}/src/pythonflex/data/gold_standard/pathway.csv +0 -0
  23. {pythonflex-0.1.5 → pythonflex-0.2}/src/pythonflex/logging_config.py +0 -0
  24. {pythonflex-0.1.5 → pythonflex-0.2}/src/pythonflex/preprocessing.py +0 -0
  25. {pythonflex-0.1.5 → pythonflex-0.2}/src/pythonflex/utils.py +0 -0
  26. {pythonflex-0.1.5 → pythonflex-0.2}/uv.lock +0 -0
@@ -0,0 +1,5 @@
1
+ {
2
+ "python-envs.defaultEnvManager": "ms-python.python:conda",
3
+ "python-envs.defaultPackageManager": "ms-python.python:conda",
4
+ "python-envs.pythonProjects": []
5
+ }
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pythonflex
3
- Version: 0.1.5
3
+ Version: 0.2
4
4
  Summary: pythonFLEX is a benchmarking toolkit for evaluating CRISPR screen results against biological gold standards. The toolkit computes gene-level and complex-level performance metrics, helping researchers systematically assess the biological relevance and resolution of their CRISPR screening data.
5
5
  Author-email: Yasir Demirtaş <tyasird@hotmail.com>
6
6
  Requires-Python: >=3.9
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "pythonflex"
3
- version = "0.1.5"
3
+ version = "0.2"
4
4
  description = "pythonFLEX is a benchmarking toolkit for evaluating CRISPR screen results against biological gold standards. The toolkit computes gene-level and complex-level performance metrics, helping researchers systematically assess the biological relevance and resolution of their CRISPR screening data."
5
5
  readme = "README.md"
6
6
  authors = [
@@ -2,7 +2,7 @@
2
2
  from .logging_config import log
3
3
  from .utils import dsave, dload
4
4
  from .preprocessing import get_example_data_path, load_datasets, get_common_genes, filter_matrix_by_genes, load_gold_standard, filter_duplicate_terms
5
- from .analysis import initialize, pra, pra_percomplex, fast_corr, perform_corr, is_symmetric, binary, has_mirror_of_first_pair, convert_full_to_half_matrix, drop_mirror_pairs, quick_sort, complex_contributions, save_results_to_csv
5
+ from .analysis import initialize, pra, pra_percomplex, fast_corr, perform_corr, is_symmetric, binary, has_mirror_of_first_pair, convert_full_to_half_matrix, drop_mirror_pairs, quick_sort, complex_contributions, save_results_to_csv, update_matploblib_config
6
6
  from .plotting import (
7
7
  adjust_text_positions, plot_precision_recall_curve, plot_percomplex_scatter,
8
8
  plot_percomplex_scatter_bysize, plot_complex_contributions, plot_significant_complexes, plot_auc_scores
@@ -14,5 +14,5 @@ __all__ = [ "log", "get_example_data_path", "fast_corr",
14
14
  "perform_corr", "is_symmetric", "binary", "has_mirror_of_first_pair", "convert_full_to_half_matrix",
15
15
  "drop_mirror_pairs", "quick_sort", "complex_contributions", "adjust_text_positions", "plot_precision_recall_curve",
16
16
  "plot_percomplex_scatter", "plot_percomplex_scatter_bysize", "plot_complex_contributions",
17
- "plot_significant_complexes", "plot_auc_scores", "save_results_to_csv"
17
+ "plot_significant_complexes", "plot_auc_scores", "save_results_to_csv", "update_matploblib_config"
18
18
  ]
@@ -23,7 +23,7 @@ from .logging_config import log
23
23
  from .preprocessing import filter_matrix_by_genes
24
24
  from .utils import dsave, dload, _sanitize
25
25
 
26
-
26
+ import matplotlib as mpl
27
27
 
28
28
  def deep_update(source, overrides):
29
29
  """Recursively update the source dict with the overrides."""
@@ -40,7 +40,7 @@ def initialize(config={}):
40
40
 
41
41
  default_config = {
42
42
  "min_genes_in_complex": 3,
43
- "min_genes_per_complex_analysis": 3,
43
+ "min_genes_per_complex_analysis": 2,
44
44
  "output_folder": "output",
45
45
  "gold_standard": "CORUM",
46
46
  "color_map": "RdYlBu",
@@ -48,7 +48,7 @@ def initialize(config={}):
48
48
  "plotting": {
49
49
  "save_plot": True,
50
50
  "show_plot": True,
51
- "output_type": "png",
51
+ "output_type": "pdf",
52
52
  },
53
53
  "preprocessing": {
54
54
  "normalize": False,
@@ -95,31 +95,105 @@ def initialize(config={}):
95
95
 
96
96
 
97
97
 
98
- def update_matploblib_config(config={}):
99
- log.progress("Updating matplotlib settings.")
100
- plt.rcParams.update({
101
- "font.family": "DejaVu Sans", # ← change if you prefer Arial, etc.
102
- "mathtext.fontset": "dejavusans",
103
- 'font.size': 7, # General font size
104
- 'axes.titlesize': 10, # Title size
105
- 'axes.labelsize': 7, # Axis labels (xlabel/ylabel)
106
- 'legend.fontsize': 7, # Legend text
107
- 'xtick.labelsize': 6, # X-axis tick labels
108
- 'ytick.labelsize': 6, # Y-axis tick labels
109
- 'lines.linewidth': 1.5, # Line width for plots
110
- 'figure.dpi': 300, # Figure resolution
111
- 'figure.figsize': (8, 6), # Default figure size
112
- 'grid.linestyle': '--', # Grid line style
113
- 'grid.linewidth': 0.5, # Grid line width
114
- 'grid.alpha': 0.2, # Grid transparency
115
- 'axes.spines.right': False, # Hide right spine
116
- 'axes.spines.top': False, # Hide top spine
117
- 'image.cmap': config['color_map'], # Default colormap
118
- 'axes.edgecolor': 'black', # Axis edge color
119
- 'axes.facecolor': 'none', # Transparent axes background
120
- 'text.usetex': False # Ensure LaTeX is off
98
+
99
+
100
+ def update_matploblib_config(config=None, font_family="Arial", layout="single"):
101
+ """
102
+ Configure matplotlib settings optimized for Nature journal figures:
103
+ - 7 pt fonts (labels, ticks, legend), 9 pt titles
104
+ - Thin spines (0.5 pt), ticks out (left/bottom only), no minor ticks
105
+ - No grid, clean minimalist look
106
+ - Colorblind-friendly Tableau 10 color cycle
107
+ - Illustrator-safe PDF export (Type 42)
108
+ - Figure sizes: "single" (~89 mm), "double" (~183 mm), or custom (width, height) in inches
109
+
110
+ Args:
111
+ config (dict, optional): Configuration dict (e.g., {'color_map': 'RdYlBu'}).
112
+ font_family (str): Preferred font (e.g., 'Arial', falls back to 'Helvetica').
113
+ layout (str or tuple): 'single' (~89 mm), 'double' (~183 mm), or (width, height) in inches.
114
+ """
115
+ if config is None:
116
+ config = {}
117
+ # Fallback if chosen font missing
118
+ try:
119
+ from matplotlib.font_manager import findfont, FontProperties
120
+ findfont(FontProperties(family=font_family))
121
+ except Exception:
122
+ font_family = "Helvetica" # Nature prefers Helvetica if Arial unavailable
123
+ print(f"Warning: '{font_family}' not found, falling back to 'Helvetica'.")
124
+
125
+ # Figure size presets (Nature: single ≈ 89 mm, double ≈ 183 mm at 25.4 mm/inch)
126
+ if isinstance(layout, tuple):
127
+ fig_w, fig_h = layout
128
+ else:
129
+ if layout == "double":
130
+ fig_w = 7.2 # ~183 mm
131
+ fig_h = 5.4 # Adjusted aspect
132
+ else: # "single"
133
+ fig_w = 4.0 # Increased from 3.5" for more space (~102 mm)
134
+ fig_h = 3.0 # Increased from 2.6" for better aspect (~76 mm)
135
+ # Colorblind-friendly cycle (Tableau 10 adapted)
136
+ cb_cycle = [
137
+ "#4E79A7", "#F28E2B", "#E15759", "#76B7B2", "#59A14F",
138
+ "#EDC948", "#B07AA1", "#FF9DA7", "#9C755F", "#BAB0AC"
139
+ ]
140
+ mpl.rcParams.update({
141
+ # --- Text & Fonts ---
142
+ "text.usetex": False, # Avoid LaTeX
143
+ "font.family": [font_family], # Explicit font
144
+ "mathtext.fontset": "dejavusans", # Disable mathtext
145
+ "mathtext.default": "regular", # Plain text
146
+ "axes.unicode_minus": True, # Proper minus signs
147
+ # --- Sizes (7 pt baseline, adjusted for space) ---
148
+ "font.size": 7, # Reduced from 8 pt
149
+ "axes.titlesize": 9, # Reduced from 10 pt
150
+ "axes.labelsize": 7,
151
+ "legend.fontsize": 7,
152
+ "xtick.labelsize": 7,
153
+ "ytick.labelsize": 7,
154
+ # --- Lines & Markers ---
155
+ "lines.linewidth": 1.5, # Kept for data visibility
156
+ "lines.markersize": 4.0,
157
+ "patch.linewidth": 0.5,
158
+ "errorbar.capsize": 2,
159
+ # --- Axes, Spines, Ticks ---
160
+ "axes.linewidth": 0.5,
161
+ "axes.edgecolor": "black",
162
+ "axes.facecolor": "none",
163
+ "axes.titlepad": 3.0,
164
+ "axes.labelpad": 2.0,
165
+ "axes.prop_cycle": mpl.cycler(color=cb_cycle),
166
+ "xtick.direction": "out",
167
+ "ytick.direction": "out",
168
+ "xtick.major.size": 2.5,
169
+ "ytick.major.size": 2.5,
170
+ "xtick.minor.visible": False,
171
+ "ytick.minor.visible": False,
172
+ "xtick.major.width": 0.5,
173
+ "ytick.major.width": 0.5,
174
+ "xtick.top": False,
175
+ "ytick.right": False,
176
+ # --- Grid ---
177
+ "axes.grid": False,
178
+ # --- Legend ---
179
+ "legend.frameon": False,
180
+ "legend.handlelength": 1.6, # Slightly adjusted
181
+ "legend.handletextpad": 0.4,
182
+ "legend.borderaxespad": 0.3,
183
+ "legend.loc": "best", # Dynamic placement to avoid overlap
184
+ # --- Figure & Save ---
185
+ "figure.dpi": 600,
186
+ "figure.figsize": (fig_w, fig_h),
187
+ "savefig.dpi": 600,
188
+ "savefig.bbox": "tight",
189
+ "savefig.pad_inches": 0.1, # Increased for spacing
190
+ "savefig.transparent": False, # White background
191
+ # --- PDF/SVG Export ---
192
+ "pdf.fonttype": 42,
193
+ "ps.fonttype": 42,
194
+ "pdf.use14corefonts": False,
195
+ "svg.fonttype": "none",
121
196
  })
122
- log.done("Matplotlib settings updated.")
123
197
 
124
198
 
125
199
 
@@ -172,15 +246,14 @@ def pra(dataset_name, matrix, is_corr=False):
172
246
  pr_auc = metrics.auc(recall, precision)
173
247
  df["precision"] = precision
174
248
  df["recall"] = recall
175
-
249
+
176
250
  log.info(f"PR-AUC: {pr_auc:.4f}, Number of true positives: {df['prediction'].sum()}")
177
251
  dsave(df, "pra", dataset_name)
178
252
  dsave(pr_auc, "pr_auc", dataset_name)
179
- log.done(f"Global PRA completed for {dataset_name}")
180
- return df, pr_auc
181
-
182
-
253
+ dsave( _corrected_auc(df) , "corrected_pr_auc", dataset_name)
183
254
 
255
+ log.done(f"Global PRA completed for {dataset_name}")
256
+ return df
184
257
 
185
258
 
186
259
 
@@ -189,6 +262,9 @@ def pra(dataset_name, matrix, is_corr=False):
189
262
  # helper functions for PRA per-complex analysis
190
263
  # --------------------------------------------------------------------------
191
264
 
265
+ def _corrected_auc(df: pd.DataFrame) -> float:
266
+ return np.trapz(df["precision"], df["recall"]) - df["precision"].iloc[-1]
267
+
192
268
  def _build_gene_to_pair_indices(pairwise_df):
193
269
  indices = pairwise_df.index.values
194
270
  genes = pd.concat([pairwise_df['gene1'], pairwise_df['gene2']], ignore_index=True)
@@ -240,10 +316,15 @@ def _dump_pairwise_memmap(df: pd.DataFrame, tag: str) -> Path:
240
316
 
241
317
 
242
318
 
243
- def _init_worker(memmap_path, gene_to_pair_indices):
319
+ # Global variables for worker processes (compatible with older joblib)
320
+ PAIRWISE_DF = None
321
+ GENE2IDX = None
322
+
323
+ def _init_worker_globals(memmap_path, gene_to_pair_indices):
324
+ """Initialize global variables for worker processes"""
244
325
  global PAIRWISE_DF, GENE2IDX
245
326
  PAIRWISE_DF = load(memmap_path)
246
- GENE2IDX = gene_to_pair_indices
327
+ GENE2IDX = gene_to_pair_indices
247
328
 
248
329
 
249
330
 
@@ -263,42 +344,52 @@ def delete_memmap(memmap_path, log, wait_seconds=0.1):
263
344
  # --------------------------------------------------------------------------
264
345
  # Process each chunk of terms
265
346
  # --------------------------------------------------------------------------
266
- def _process_chunk(chunk_terms, min_genes):
267
- pairwise_df = PAIRWISE_DF
268
- gene_to_pair_indices = GENE2IDX
269
- local_auc_scores = {}
270
-
271
- for idx, row in chunk_terms.iterrows():
272
- gene_set = set(row.used_genes)
273
- if len(gene_set) < min_genes:
274
- continue
347
+ def _process_chunk(chunk_terms, min_genes, memmap_path, gene_to_pair_indices):
348
+ """Process a chunk of terms - compatible with older joblib versions"""
349
+ try:
350
+ # Load data in each worker (compatible with older joblib)
351
+ pairwise_df = load(memmap_path)
352
+ local_auc_scores = {}
353
+ local_corrected_auc_scores = {}
354
+
355
+ for idx, row in chunk_terms.iterrows():
356
+ gene_set = set(row.used_genes)
357
+ if len(gene_set) < min_genes:
358
+ continue
275
359
 
276
- candidate_indices = bitarray(len(pairwise_df))
277
- for g in gene_set:
278
- if g in gene_to_pair_indices:
279
- candidate_indices[gene_to_pair_indices[g]] = True
280
- if not candidate_indices.any():
281
- continue
360
+ candidate_indices = bitarray(len(pairwise_df))
361
+ for g in gene_set:
362
+ if g in gene_to_pair_indices:
363
+ candidate_indices[gene_to_pair_indices[g]] = True
364
+ if not candidate_indices.any():
365
+ continue
282
366
 
283
- selected = np.unpackbits(candidate_indices).view(bool)[:len(pairwise_df)]
284
- sub_df = pairwise_df.iloc[selected]
367
+ selected = np.unpackbits(candidate_indices).view(bool)[:len(pairwise_df)]
368
+ sub_df = pairwise_df.iloc[selected]
285
369
 
286
- complex_id = str(idx)
287
- pattern = r'(?:^|;)' + re.escape(complex_id) + r'(?:;|$)'
288
- true_label = sub_df["complex_ids"].str.contains(pattern, regex=True).astype(int)
289
- mask = (sub_df["complex_ids"] == "") | (true_label == 1)
290
- preds = true_label[mask]
370
+ complex_id = str(idx)
371
+ pattern = r'(?:^|;)' + re.escape(complex_id) + r'(?:;|$)'
372
+ true_label = sub_df["complex_ids"].str.contains(pattern, regex=True).astype(int)
373
+ mask = (sub_df["complex_ids"] == "") | (true_label == 1)
374
+ preds = true_label[mask]
291
375
 
292
- if preds.sum() == 0:
293
- continue
376
+ if preds.sum() == 0:
377
+ continue
294
378
 
295
- tp_cum = preds.cumsum()
296
- precision = tp_cum / (np.arange(len(preds)) + 1)
297
- recall = tp_cum / tp_cum.iloc[-1]
298
- if len(recall) >= 2 and recall.iloc[-1] != 0:
299
- local_auc_scores[idx] = metrics.auc(recall, precision)
379
+ tp_cum = preds.cumsum()
380
+ precision = tp_cum / (np.arange(len(preds)) + 1)
381
+ recall = tp_cum / tp_cum.iloc[-1]
382
+ if len(recall) >= 2 and recall.iloc[-1] != 0:
383
+ # Compute regular AUC
384
+ local_auc_scores[idx] = metrics.auc(recall, precision)
385
+ # Compute corrected AUC using the same logic as _corrected_auc function
386
+ local_corrected_auc_scores[idx] = np.trapz(precision, recall) - precision.iloc[-1]
300
387
 
301
- return local_auc_scores
388
+ return {'auc': local_auc_scores, 'corrected_auc': local_corrected_auc_scores}
389
+
390
+ except Exception as e:
391
+ # Return error info for debugging
392
+ return {'error': str(e), 'chunk_size': len(chunk_terms)}
302
393
 
303
394
 
304
395
 
@@ -345,26 +436,23 @@ def pra_percomplex(dataset_name, matrix, is_corr=False, chunk_size=200):
345
436
  results = None
346
437
 
347
438
  try:
348
- # Simplified parallel execution without progress callback interference
439
+ # Compatible parallel execution for older joblib versions
349
440
  log.started("Processing chunks in parallel")
350
- with tqdm(total=len(chunks), desc="Per-complex PRA") as pbar:
351
- results = Parallel(
352
- n_jobs=8,
353
- temp_folder=os.path.dirname(memmap_path),
354
- max_nbytes=None,
355
- mmap_mode="r",
356
- initializer=_init_worker,
357
- initargs=(memmap_path, gene_to_pair_indices),
358
- verbose=0 # Reduce joblib verbosity
359
- )(delayed(_process_chunk)(chunk, min_genes) for chunk in chunks)
360
-
361
- # Update progress bar once all tasks are complete
362
- pbar.update(len(chunks))
441
+
442
+ # Use a more conservative approach with older joblib
443
+ results = Parallel(
444
+ n_jobs=min(4, len(chunks)), # Limit to 4 workers or number of chunks
445
+ temp_folder=os.path.dirname(memmap_path),
446
+ max_nbytes='100M', # Set memory limit
447
+ verbose=1 # Show progress
448
+ )(delayed(_process_chunk)(chunk, min_genes, memmap_path, gene_to_pair_indices)
449
+ for chunk in tqdm(chunks, desc="Per-complex PRA"))
363
450
 
364
451
  log.done("Processing chunks in parallel")
365
452
 
366
453
  except Exception as e:
367
454
  log.error(f"Error during parallel processing: {e}")
455
+ log.error(f"Error type: {type(e).__name__}")
368
456
  # Still try to clean up the memmap file
369
457
  try:
370
458
  if os.path.exists(memmap_path):
@@ -383,19 +471,29 @@ def pra_percomplex(dataset_name, matrix, is_corr=False, chunk_size=200):
383
471
  except OSError as e:
384
472
  log.warning(f"Failed to remove memmap file {memmap_path}: {e}")
385
473
 
386
- # Merge results with error handling
474
+ # Merge results with enhanced error handling
387
475
  auc_scores = {}
476
+ corrected_auc_scores = {}
388
477
  if results:
389
- for res in results:
478
+ for i, res in enumerate(results):
390
479
  if isinstance(res, dict):
391
- auc_scores.update(res)
392
- elif isinstance(res, tuple) and res[0] is None:
393
- log.error(res[1]) # Log the error message from the chunk
480
+ if 'error' in res:
481
+ log.error(f"Error in chunk {i}: {res['error']}")
482
+ elif 'auc' in res and 'corrected_auc' in res:
483
+ # New format with both AUC types
484
+ auc_scores.update(res['auc'])
485
+ corrected_auc_scores.update(res['corrected_auc'])
486
+ else:
487
+ # Fallback for old format (backward compatibility)
488
+ auc_scores.update(res)
489
+ elif isinstance(res, tuple) and len(res) >= 2 and res[0] is None:
490
+ log.error(f"Chunk {i} error: {res[1]}")
394
491
  else:
395
- log.error(f"Ignoring unexpected chunk result: {res}")
492
+ log.warning(f"Unexpected result type from chunk {i}: {type(res)} - {res}")
396
493
 
397
494
  # Add the computed AUC scores to the terms DataFrame.
398
495
  terms["auc_score"] = pd.Series(auc_scores)
496
+ terms["corrected_auc_score"] = pd.Series(corrected_auc_scores)
399
497
  terms.drop(columns=["hash"], inplace=True)
400
498
  dsave(terms, "pra_percomplex", dataset_name)
401
499
  log.done(f"Per-complex PRA completed.")
@@ -1296,4 +1394,3 @@ def save_results_to_csv(categories = ["complex_contributions", "pr_auc", "pra_pe
1296
1394
  # dsave(pr_auc, "pr_auc", dataset_name)
1297
1395
  # log.done(f"Global PRA completed for {dataset_name}")
1298
1396
  # return df, pr_auc
1299
-
@@ -6,18 +6,22 @@ Demonstrates initialization, data loading, analysis, and plotting.
6
6
  import pythonflex as flex
7
7
 
8
8
  inputs = {
9
- "SNF": {
10
- "path": "C:/Users/yd/Desktop/projects/datasets/fused_similarity_network.csv",
9
+ "Melanoma (63 Screens)": {
10
+ "path": flex.get_example_data_path("melanoma_cell_lines_500_genes.csv"),
11
11
  "sort": "high"
12
12
  },
13
- "miss_SNF": {
14
- "path": "C:/Users/yd/Desktop/projects/datasets/miss_snf_fused_similarity_network.csv",
13
+ "Liver (24 Screens)": {
14
+ "path": flex.get_example_data_path("liver_cell_lines_500_genes.csv"),
15
15
  "sort": "high"
16
- }
16
+ },
17
+ "Neuroblastoma (37 Screens)": {
18
+ "path": flex.get_example_data_path("neuroblastoma_cell_lines_500_genes.csv"),
19
+ "sort": "high"
20
+ },
17
21
  }
18
22
 
19
- #%%
20
23
 
24
+ #%%
21
25
  default_config = {
22
26
  "min_genes_in_complex": 0,
23
27
  "min_genes_per_complex_analysis": 3,
@@ -27,7 +31,7 @@ default_config = {
27
31
  "jaccard": True,
28
32
  "plotting": {
29
33
  "save_plot": True,
30
- "output_type": "PNG",
34
+ "output_type": "pdf",
31
35
  },
32
36
  "preprocessing": {
33
37
  "fill_na": True,
@@ -43,7 +47,6 @@ default_config = {
43
47
  flex.initialize(default_config)
44
48
 
45
49
  # Load datasets and gold standard terms
46
-
47
50
  data, _ = flex.load_datasets(inputs)
48
51
  terms, genes_in_terms = flex.load_gold_standard()
49
52
 
@@ -51,16 +54,17 @@ terms, genes_in_terms = flex.load_gold_standard()
51
54
  #%%
52
55
  # Run analysis
53
56
  for name, dataset in data.items():
54
- df, pr_auc = flex.pra(name, dataset, is_corr=True)
55
- fpc = flex.pra_percomplex(name, dataset, is_corr=True)
57
+ pra = flex.pra(name, dataset, is_corr=False)
58
+ fpc = flex.pra_percomplex(name, dataset, is_corr=False)
56
59
  cc = flex.complex_contributions(name)
60
+
57
61
 
58
62
 
59
63
  #%%
60
64
  # Generate plots
61
65
  flex.plot_auc_scores()
62
66
  flex.plot_precision_recall_curve()
63
- flex.plot_percomplex_scatter()
67
+ flex.plot_percomplex_scatter(n_top=20)
64
68
  flex.plot_percomplex_scatter_bysize()
65
69
  flex.plot_significant_complexes()
66
70
  flex.plot_complex_contributions()
@@ -82,27 +86,3 @@ flex.save_results_to_csv()
82
86
 
83
87
 
84
88
 
85
- # %%
86
- import os
87
- import glob
88
-
89
- inputs = {
90
- "depmap all": {
91
- "path": "../../../../datasets/depmap/24Q4/depmap_geneeffect_all_cellines.csv",
92
- "sort": "high"
93
- }
94
- }
95
-
96
- # Now auto-discover the rest of the CSVs in the folder
97
- DATA_DIR = "../../../../datasets/depmap/24Q4/subset/"
98
- for path in glob.glob(os.path.join(DATA_DIR, "*.csv")):
99
-
100
- # Derive the key name from filename (without extension)
101
- key = os.path.splitext(os.path.basename(path))[0]
102
- inputs[key] = {
103
- "path": path,
104
- "sort": "high"
105
- }
106
-
107
- # inputs now has "depmap all" first, then one entry per CSV in DATA_DIR
108
- print(inputs)
@@ -8,6 +8,8 @@ model = pd.read_csv("../../../../datasets/depmap/24Q4/Model.csv",index_col=0)
8
8
  df.columns = df.columns.str.split(" \\(").str[0]
9
9
  df = df.T
10
10
 
11
+ #%%
12
+
11
13
  # %%
12
14
  # get ModelID of selected disease for example OncotreePrimaryDisease==Melanoma
13
15
  melanoma = model[model.OncotreePrimaryDisease=="Melanoma"].index.unique().values
@@ -0,0 +1,104 @@
1
+ #%%
2
+ import pythonflex as flex
3
+ import os
4
+
5
+ # # Define specific cell line types you're interested in
6
+ DATA_DIR = "C:/Users/yd/Desktop/projects/_datasets/depmap/25Q2/subset/"
7
+
8
+ # Specific cell lines of interest with "_cell_lines" suffix removed
9
+ cell_line_files = [
10
+ "soft_tissue_cell_lines.csv",
11
+ "skin_cell_lines.csv",
12
+ # "lung_cell_lines.csv",
13
+ # "head_and_neck_cell_lines.csv",
14
+ # "esophagus_stomach_cell_lines.csv",
15
+ ]
16
+
17
+ inputs = {}
18
+
19
+ # Create inputs dict with shortened names (removing "_cell_lines" suffix)
20
+ for filename in cell_line_files:
21
+ # Remove .csv extension and _cell_lines suffix
22
+ key = filename.replace("_cell_lines.csv", "")
23
+ full_path = os.path.join(DATA_DIR, filename)
24
+
25
+ inputs[key] = {
26
+ "path": full_path,
27
+ "sort": "high"
28
+ }
29
+
30
+ inputs['depmap'] = {
31
+ "path": "C:/Users/yd/Desktop/projects/_datasets/depmap/25Q2/gene_effect.csv",
32
+ "sort": "high"
33
+ }
34
+
35
+ # Print the resulting inputs dictionary
36
+ print("Configured inputs:")
37
+ for key, value in inputs.items():
38
+ print(f" {key}: {value['path']}")
39
+
40
+
41
+
42
+ default_config = {
43
+ "min_genes_in_complex": 2,
44
+ "min_genes_per_complex_analysis": 2,
45
+ "output_folder": "25q2_min_genes_2",
46
+ "gold_standard": "CORUM",
47
+ "color_map": "RdYlBu",
48
+ "jaccard": True,
49
+ "plotting": {
50
+ "save_plot": True,
51
+ "output_type": "pdf",
52
+ },
53
+ "preprocessing": {
54
+ "fill_na": True,
55
+ "normalize": False,
56
+ },
57
+ "corr_function": "numpy",
58
+ "logging": {
59
+ "visible_levels": ["DONE","STARTED"] # "PROGRESS", "STARTED", ,"INFO","WARNING"
60
+ }
61
+ }
62
+
63
+ # Initialize logger, config, and output folder
64
+ flex.initialize(default_config)
65
+
66
+ # Load datasets and gold standard terms
67
+ data, _ = flex.load_datasets(inputs)
68
+ terms, genes_in_terms = flex.load_gold_standard()
69
+
70
+
71
+ #%%
72
+ # Run analysis
73
+ for name, dataset in data.items():
74
+ pra = flex.pra(name, dataset, is_corr=False)
75
+ fpc = flex.pra_percomplex(name, dataset, is_corr=False)
76
+ cc = flex.complex_contributions(name)
77
+
78
+
79
+
80
+ #%%
81
+ # Generate plots
82
+ flex.plot_auc_scores()
83
+ flex.plot_precision_recall_curve()
84
+ flex.plot_percomplex_scatter()
85
+ flex.plot_percomplex_scatter_bysize()
86
+ flex.plot_significant_complexes()
87
+ flex.plot_complex_contributions()
88
+
89
+
90
+ #%%
91
+ # Save results to CSV
92
+ flex.save_results_to_csv()
93
+
94
+
95
+
96
+
97
+
98
+
99
+
100
+
101
+
102
+ #%%
103
+
104
+