pythonflex 0.1.6__py3-none-any.whl → 0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pythonflex/__init__.py CHANGED
@@ -2,7 +2,7 @@
2
2
  from .logging_config import log
3
3
  from .utils import dsave, dload
4
4
  from .preprocessing import get_example_data_path, load_datasets, get_common_genes, filter_matrix_by_genes, load_gold_standard, filter_duplicate_terms
5
- from .analysis import initialize, pra, pra_percomplex, fast_corr, perform_corr, is_symmetric, binary, has_mirror_of_first_pair, convert_full_to_half_matrix, drop_mirror_pairs, quick_sort, complex_contributions, save_results_to_csv
5
+ from .analysis import initialize, pra, pra_percomplex, fast_corr, perform_corr, is_symmetric, binary, has_mirror_of_first_pair, convert_full_to_half_matrix, drop_mirror_pairs, quick_sort, complex_contributions, save_results_to_csv, update_matploblib_config
6
6
  from .plotting import (
7
7
  adjust_text_positions, plot_precision_recall_curve, plot_percomplex_scatter,
8
8
  plot_percomplex_scatter_bysize, plot_complex_contributions, plot_significant_complexes, plot_auc_scores
@@ -14,5 +14,5 @@ __all__ = [ "log", "get_example_data_path", "fast_corr",
14
14
  "perform_corr", "is_symmetric", "binary", "has_mirror_of_first_pair", "convert_full_to_half_matrix",
15
15
  "drop_mirror_pairs", "quick_sort", "complex_contributions", "adjust_text_positions", "plot_precision_recall_curve",
16
16
  "plot_percomplex_scatter", "plot_percomplex_scatter_bysize", "plot_complex_contributions",
17
- "plot_significant_complexes", "plot_auc_scores", "save_results_to_csv"
17
+ "plot_significant_complexes", "plot_auc_scores", "save_results_to_csv", "update_matploblib_config"
18
18
  ]
pythonflex/analysis.py CHANGED
@@ -23,7 +23,7 @@ from .logging_config import log
23
23
  from .preprocessing import filter_matrix_by_genes
24
24
  from .utils import dsave, dload, _sanitize
25
25
 
26
-
26
+ import matplotlib as mpl
27
27
 
28
28
  def deep_update(source, overrides):
29
29
  """Recursively update the source dict with the overrides."""
@@ -40,7 +40,7 @@ def initialize(config={}):
40
40
 
41
41
  default_config = {
42
42
  "min_genes_in_complex": 3,
43
- "min_genes_per_complex_analysis": 3,
43
+ "min_genes_per_complex_analysis": 2,
44
44
  "output_folder": "output",
45
45
  "gold_standard": "CORUM",
46
46
  "color_map": "RdYlBu",
@@ -48,7 +48,7 @@ def initialize(config={}):
48
48
  "plotting": {
49
49
  "save_plot": True,
50
50
  "show_plot": True,
51
- "output_type": "png",
51
+ "output_type": "pdf",
52
52
  },
53
53
  "preprocessing": {
54
54
  "normalize": False,
@@ -95,31 +95,105 @@ def initialize(config={}):
95
95
 
96
96
 
97
97
 
98
- def update_matploblib_config(config={}):
99
- log.progress("Updating matplotlib settings.")
100
- plt.rcParams.update({
101
- "font.family": "DejaVu Sans", # ← change if you prefer Arial, etc.
102
- "mathtext.fontset": "dejavusans",
103
- 'font.size': 7, # General font size
104
- 'axes.titlesize': 10, # Title size
105
- 'axes.labelsize': 7, # Axis labels (xlabel/ylabel)
106
- 'legend.fontsize': 7, # Legend text
107
- 'xtick.labelsize': 6, # X-axis tick labels
108
- 'ytick.labelsize': 6, # Y-axis tick labels
109
- 'lines.linewidth': 1.5, # Line width for plots
110
- 'figure.dpi': 300, # Figure resolution
111
- 'figure.figsize': (8, 6), # Default figure size
112
- 'grid.linestyle': '--', # Grid line style
113
- 'grid.linewidth': 0.5, # Grid line width
114
- 'grid.alpha': 0.2, # Grid transparency
115
- 'axes.spines.right': False, # Hide right spine
116
- 'axes.spines.top': False, # Hide top spine
117
- 'image.cmap': config['color_map'], # Default colormap
118
- 'axes.edgecolor': 'black', # Axis edge color
119
- 'axes.facecolor': 'none', # Transparent axes background
120
- 'text.usetex': False # Ensure LaTeX is off
98
+
99
+
100
+ def update_matploblib_config(config=None, font_family="Arial", layout="single"):
101
+ """
102
+ Configure matplotlib settings optimized for Nature journal figures:
103
+ - 7 pt fonts (labels, ticks, legend), 9 pt titles
104
+ - Thin spines (0.5 pt), ticks out (left/bottom only), no minor ticks
105
+ - No grid, clean minimalist look
106
+ - Colorblind-friendly Tableau 10 color cycle
107
+ - Illustrator-safe PDF export (Type 42)
108
+ - Figure sizes: "single" (~89 mm), "double" (~183 mm), or custom (width, height) in inches
109
+
110
+ Args:
111
+ config (dict, optional): Configuration dict (e.g., {'color_map': 'RdYlBu'}).
112
+ font_family (str): Preferred font (e.g., 'Arial', falls back to 'Helvetica').
113
+ layout (str or tuple): 'single' (~89 mm), 'double' (~183 mm), or (width, height) in inches.
114
+ """
115
+ if config is None:
116
+ config = {}
117
+ # Fallback if chosen font missing
118
+ try:
119
+ from matplotlib.font_manager import findfont, FontProperties
120
+ findfont(FontProperties(family=font_family))
121
+ except Exception:
122
+ font_family = "Helvetica" # Nature prefers Helvetica if Arial unavailable
123
+ print(f"Warning: '{font_family}' not found, falling back to 'Helvetica'.")
124
+
125
+ # Figure size presets (Nature: single ≈ 89 mm, double ≈ 183 mm at 25.4 mm/inch)
126
+ if isinstance(layout, tuple):
127
+ fig_w, fig_h = layout
128
+ else:
129
+ if layout == "double":
130
+ fig_w = 7.2 # ~183 mm
131
+ fig_h = 5.4 # Adjusted aspect
132
+ else: # "single"
133
+ fig_w = 4.0 # Increased from 3.5" for more space (~102 mm)
134
+ fig_h = 3.0 # Increased from 2.6" for better aspect (~76 mm)
135
+ # Colorblind-friendly cycle (Tableau 10 adapted)
136
+ cb_cycle = [
137
+ "#4E79A7", "#F28E2B", "#E15759", "#76B7B2", "#59A14F",
138
+ "#EDC948", "#B07AA1", "#FF9DA7", "#9C755F", "#BAB0AC"
139
+ ]
140
+ mpl.rcParams.update({
141
+ # --- Text & Fonts ---
142
+ "text.usetex": False, # Avoid LaTeX
143
+ "font.family": [font_family], # Explicit font
144
+ "mathtext.fontset": "dejavusans", # Disable mathtext
145
+ "mathtext.default": "regular", # Plain text
146
+ "axes.unicode_minus": True, # Proper minus signs
147
+ # --- Sizes (7 pt baseline, adjusted for space) ---
148
+ "font.size": 7, # Reduced from 8 pt
149
+ "axes.titlesize": 9, # Reduced from 10 pt
150
+ "axes.labelsize": 7,
151
+ "legend.fontsize": 7,
152
+ "xtick.labelsize": 7,
153
+ "ytick.labelsize": 7,
154
+ # --- Lines & Markers ---
155
+ "lines.linewidth": 1.5, # Kept for data visibility
156
+ "lines.markersize": 4.0,
157
+ "patch.linewidth": 0.5,
158
+ "errorbar.capsize": 2,
159
+ # --- Axes, Spines, Ticks ---
160
+ "axes.linewidth": 0.5,
161
+ "axes.edgecolor": "black",
162
+ "axes.facecolor": "none",
163
+ "axes.titlepad": 3.0,
164
+ "axes.labelpad": 2.0,
165
+ "axes.prop_cycle": mpl.cycler(color=cb_cycle),
166
+ "xtick.direction": "out",
167
+ "ytick.direction": "out",
168
+ "xtick.major.size": 2.5,
169
+ "ytick.major.size": 2.5,
170
+ "xtick.minor.visible": False,
171
+ "ytick.minor.visible": False,
172
+ "xtick.major.width": 0.5,
173
+ "ytick.major.width": 0.5,
174
+ "xtick.top": False,
175
+ "ytick.right": False,
176
+ # --- Grid ---
177
+ "axes.grid": False,
178
+ # --- Legend ---
179
+ "legend.frameon": False,
180
+ "legend.handlelength": 1.6, # Slightly adjusted
181
+ "legend.handletextpad": 0.4,
182
+ "legend.borderaxespad": 0.3,
183
+ "legend.loc": "best", # Dynamic placement to avoid overlap
184
+ # --- Figure & Save ---
185
+ "figure.dpi": 600,
186
+ "figure.figsize": (fig_w, fig_h),
187
+ "savefig.dpi": 600,
188
+ "savefig.bbox": "tight",
189
+ "savefig.pad_inches": 0.1, # Increased for spacing
190
+ "savefig.transparent": False, # White background
191
+ # --- PDF/SVG Export ---
192
+ "pdf.fonttype": 42,
193
+ "ps.fonttype": 42,
194
+ "pdf.use14corefonts": False,
195
+ "svg.fonttype": "none",
121
196
  })
122
- log.done("Matplotlib settings updated.")
123
197
 
124
198
 
125
199
 
@@ -22,7 +22,6 @@ inputs = {
22
22
 
23
23
 
24
24
  #%%
25
-
26
25
  default_config = {
27
26
  "min_genes_in_complex": 0,
28
27
  "min_genes_per_complex_analysis": 3,
@@ -32,7 +31,7 @@ default_config = {
32
31
  "jaccard": True,
33
32
  "plotting": {
34
33
  "save_plot": True,
35
- "output_type": "PNG",
34
+ "output_type": "pdf",
36
35
  },
37
36
  "preprocessing": {
38
37
  "fill_na": True,
@@ -48,7 +47,6 @@ default_config = {
48
47
  flex.initialize(default_config)
49
48
 
50
49
  # Load datasets and gold standard terms
51
-
52
50
  data, _ = flex.load_datasets(inputs)
53
51
  terms, genes_in_terms = flex.load_gold_standard()
54
52
 
@@ -59,6 +57,7 @@ for name, dataset in data.items():
59
57
  pra = flex.pra(name, dataset, is_corr=False)
60
58
  fpc = flex.pra_percomplex(name, dataset, is_corr=False)
61
59
  cc = flex.complex_contributions(name)
60
+
62
61
 
63
62
 
64
63
  #%%
@@ -0,0 +1,104 @@
1
+ #%%
2
+ import pythonflex as flex
3
+ import os
4
+
5
+ # # Define specific cell line types you're interested in
6
+ DATA_DIR = "C:/Users/yd/Desktop/projects/_datasets/depmap/25Q2/subset/"
7
+
8
+ # Specific cell lines of interest with "_cell_lines" suffix removed
9
+ cell_line_files = [
10
+ "soft_tissue_cell_lines.csv",
11
+ "skin_cell_lines.csv",
12
+ # "lung_cell_lines.csv",
13
+ # "head_and_neck_cell_lines.csv",
14
+ # "esophagus_stomach_cell_lines.csv",
15
+ ]
16
+
17
+ inputs = {}
18
+
19
+ # Create inputs dict with shortened names (removing "_cell_lines" suffix)
20
+ for filename in cell_line_files:
21
+ # Remove .csv extension and _cell_lines suffix
22
+ key = filename.replace("_cell_lines.csv", "")
23
+ full_path = os.path.join(DATA_DIR, filename)
24
+
25
+ inputs[key] = {
26
+ "path": full_path,
27
+ "sort": "high"
28
+ }
29
+
30
+ inputs['depmap'] = {
31
+ "path": "C:/Users/yd/Desktop/projects/_datasets/depmap/25Q2/gene_effect.csv",
32
+ "sort": "high"
33
+ }
34
+
35
+ # Print the resulting inputs dictionary
36
+ print("Configured inputs:")
37
+ for key, value in inputs.items():
38
+ print(f" {key}: {value['path']}")
39
+
40
+
41
+
42
+ default_config = {
43
+ "min_genes_in_complex": 2,
44
+ "min_genes_per_complex_analysis": 2,
45
+ "output_folder": "25q2_min_genes_2",
46
+ "gold_standard": "CORUM",
47
+ "color_map": "RdYlBu",
48
+ "jaccard": True,
49
+ "plotting": {
50
+ "save_plot": True,
51
+ "output_type": "pdf",
52
+ },
53
+ "preprocessing": {
54
+ "fill_na": True,
55
+ "normalize": False,
56
+ },
57
+ "corr_function": "numpy",
58
+ "logging": {
59
+ "visible_levels": ["DONE","STARTED"] # "PROGRESS", "STARTED", ,"INFO","WARNING"
60
+ }
61
+ }
62
+
63
+ # Initialize logger, config, and output folder
64
+ flex.initialize(default_config)
65
+
66
+ # Load datasets and gold standard terms
67
+ data, _ = flex.load_datasets(inputs)
68
+ terms, genes_in_terms = flex.load_gold_standard()
69
+
70
+
71
+ #%%
72
+ # Run analysis
73
+ for name, dataset in data.items():
74
+ pra = flex.pra(name, dataset, is_corr=False)
75
+ fpc = flex.pra_percomplex(name, dataset, is_corr=False)
76
+ cc = flex.complex_contributions(name)
77
+
78
+
79
+
80
+ #%%
81
+ # Generate plots
82
+ flex.plot_auc_scores()
83
+ flex.plot_precision_recall_curve()
84
+ flex.plot_percomplex_scatter()
85
+ flex.plot_percomplex_scatter_bysize()
86
+ flex.plot_significant_complexes()
87
+ flex.plot_complex_contributions()
88
+
89
+
90
+ #%%
91
+ # Save results to CSV
92
+ flex.save_results_to_csv()
93
+
94
+
95
+
96
+
97
+
98
+
99
+
100
+
101
+
102
+ #%%
103
+
104
+
pythonflex/plotting.py CHANGED
@@ -8,302 +8,458 @@ import pandas as pd
8
8
  import matplotlib.pyplot as plt
9
9
  from matplotlib import patches
10
10
  from matplotlib.cm import get_cmap
11
- from IPython.display import set_matplotlib_formats
11
+ from matplotlib.ticker import NullFormatter, NullLocator
12
+
13
+ # Completely disable LaTeX and clear all font cache/references
14
+ import matplotlib as mpl
15
+ import matplotlib.font_manager as fm
16
+
17
+ # Disable LaTeX rendering completely
18
+ mpl.rcParams['text.usetex'] = False
19
+
20
+
21
+ # Reset all font-related parameters to system defaults
22
+ mpl.rcParams['font.family'] = 'sans-serif'
23
+ mpl.rcParams['font.serif'] = ['DejaVu Serif', 'Times New Roman', 'Bitstream Vera Serif', 'serif']
24
+ mpl.rcParams['font.sans-serif'] = ['DejaVu Sans', 'Arial', 'Bitstream Vera Sans', 'sans-serif']
25
+ mpl.rcParams['font.cursive'] = ['Apple Chancery', 'Textile', 'Zapf Chancery', 'Sand', 'Script MT', 'Felipa', 'cursive']
26
+ mpl.rcParams['font.fantasy'] = ['Comic Sans MS', 'Chicago', 'Charcoal', 'Impact', 'Western', 'Humor Sans', 'fantasy']
27
+ mpl.rcParams['font.monospace'] = ['DejaVu Sans Mono', 'Bitstream Vera Sans Mono', 'Computer Modern Typewriter', 'Andale Mono', 'Nimbus Mono L', 'Courier New', 'Courier', 'Fixed', 'Terminal', 'monospace']
28
+
29
+ # Remove any LaTeX-specific math font settings
30
+ mpl.rcParams['mathtext.fontset'] = 'dejavusans'
31
+ mpl.rcParams['mathtext.default'] = 'regular'
32
+
33
+ # Force font manager to rebuild with system fonts only
34
+ try:
35
+ fm.fontManager.__init__()
36
+ except:
37
+ pass
38
+
12
39
 
13
40
  # Local modules
14
41
  from .utils import dload
15
42
  from .logging_config import log
16
43
 
17
- # Configuration
18
- set_matplotlib_formats('svg', 'pdf')
19
44
 
20
45
 
21
46
 
22
47
 
23
- def plot_precision_recall_curve():
48
+ def plot_precision_recall_curve(line_width=2.0, hide_minor_ticks=True):
24
49
  pra = dload("pra")
25
50
  config = dload("config")
26
51
  plot_config = config["plotting"]
27
52
 
28
- # Create figure using rcParams defaults (figsize and dpi are already set)
29
53
  fig, ax = plt.subplots()
30
54
  ax.set_xscale("log")
31
55
 
32
- # Color map from rcParams (no need to get from config again)
33
- cmap = get_cmap() # Uses rcParams['image.cmap'] by default
56
+ # optionally hide minor ticks on the log axis
57
+ if hide_minor_ticks:
58
+ ax.xaxis.set_minor_locator(NullLocator())
59
+ ax.xaxis.set_minor_formatter(NullFormatter())
60
+
61
+ cmap = get_cmap()
34
62
  num_colors = len(pra) if isinstance(pra, dict) else 1
35
63
  colors = [cmap(float(i) / max(num_colors - 1, 1)) for i in range(num_colors)]
36
64
 
37
65
  if isinstance(pra, dict):
38
66
  for (key, val), color in zip(pra.items(), colors):
39
67
  val = val[val.tp > 10]
40
- ax.plot(val.tp, val.precision, c=color, label=key, linewidth=2, alpha=0.8)
68
+ ax.plot(val.tp, val.precision, c=color, label=key, linewidth=line_width, alpha=0.9)
41
69
  else:
42
70
  pra = pra[pra.tp > 10]
43
- ax.plot(pra.tp, pra.precision, c="black", label="Precision Recall Curve", linewidth=2, alpha=0.8)
71
+ ax.plot(pra.tp, pra.precision, c="black", label="Precision Recall Curve", linewidth=line_width, alpha=0.9)
44
72
 
45
- # Labels and title (sizes handled by rcParams)
46
73
  ax.set(title="Precision-Recall Performance of Datasets",
47
74
  xlabel="Number of True Positives (TP)",
48
75
  ylabel="Precision")
49
- ax.legend(loc="upper right", frameon=True)
50
-
51
- # Fix Y-axis to always go from 0 to 1
76
+ ax.legend(loc="upper right", frameon=False)
52
77
  ax.set_ylim(0, 1)
53
78
 
54
- # Grid and spines (styles handled by rcParams)
55
- ax.grid(True) # Style comes from rcParams
56
- # Spines visibility handled by rcParams ('axes.spines.right' and 'axes.spines.top')
79
+ # Nature style: no grid, open top/right spines
80
+ ax.grid(False)
81
+ ax.spines['top'].set_visible(False)
82
+ ax.spines['right'].set_visible(False)
57
83
 
58
- # Save handling (output config still needed)
59
84
  if plot_config["save_plot"]:
60
85
  output_type = plot_config["output_type"]
61
86
  output_path = Path(config["output_folder"]) / f"precision_recall_curve.{output_type}"
62
- fig.savefig(output_path, bbox_inches="tight", format=output_type) # dpi comes from rcParams
87
+ fig.savefig(output_path, bbox_inches="tight", format=output_type)
63
88
 
64
89
  if plot_config.get("show_plot", True):
65
90
  plt.show()
66
-
67
91
  plt.close(fig)
68
92
 
69
93
 
70
94
 
71
- def plot_percomplex_scatter(n_top=10):
95
+ def plot_percomplex_scatter(n_top=10, sig_color='#B71A2A', nonsig_color='#DBDDDD', label_color='black', border_color='black', border_width=1.0):
72
96
  config = dload("config")
73
97
  plot_config = config["plotting"]
74
98
  rdict = dload("pra_percomplex")
75
99
 
76
- # Ensure there are at least two datasets to compare
77
100
  if len(rdict) < 2:
78
101
  print("Skipping plot: At least two datasets are required for per-complex scatter plot.")
79
102
  return
80
-
103
+
81
104
  column_pairs = list(combinations(rdict.keys(), 2))
82
105
  df = pd.DataFrame()
83
-
84
- # Data loading
106
+
85
107
  for i, (key, val) in enumerate(rdict.items()):
86
108
  val = val.rename(columns={"auc_score": key})
87
109
  if i == 0:
88
- df = val.copy().drop(columns=["Genes", "Length", "used_genes"])
110
+ df = val.copy().drop(columns=["Genes", "Length", "used_genes"], errors="ignore")
89
111
  else:
90
112
  df = pd.concat([df, val[key]], axis=1)
91
-
92
- # Plotting
113
+
93
114
  for pair in column_pairs:
94
115
  extreme_indices_0 = df[pair[0]].sort_values(ascending=False).head(n_top).index
95
116
  extreme_indices_1 = df[pair[1]].sort_values(ascending=False).head(n_top).index
96
-
97
- # Figure created with rcParams defaults
117
+ significant_indices = extreme_indices_0.union(extreme_indices_1)
118
+
119
+ bg_df = df.drop(index=significant_indices)
120
+ sig_df = df.loc[significant_indices]
121
+
98
122
  fig, ax = plt.subplots()
123
+
124
+ # Background cloud (filled dots with black borders, not rasterized)
125
+ bg_sizes = (bg_df['n_used_genes'] if 'n_used_genes' in bg_df else pd.Series(1, index=bg_df.index)) * 5
126
+ ax.scatter(
127
+ bg_df[pair[0]], bg_df[pair[1]],
128
+ facecolors=nonsig_color, edgecolors=border_color,
129
+ s=bg_sizes, linewidth=border_width, alpha=1.0,
130
+ zorder=0
131
+ )
132
+
133
+ # Significant points (filled dots with black borders)
134
+ sig_sizes = (sig_df['n_used_genes'] if 'n_used_genes' in sig_df else pd.Series(1, index=sig_df.index)) * 8
135
+ ax.scatter(
136
+ sig_df[pair[0]], sig_df[pair[1]],
137
+ facecolors=sig_color, edgecolors=border_color,
138
+ s=sig_sizes, linewidth=border_width, zorder=2
139
+ )
140
+
141
+ # Label only significant with adaptive spacing
142
+ coords = sorted(
143
+ [(sig_df.loc[idx, pair[0]], sig_df.loc[idx, pair[1]], idx) for idx in sig_df.index],
144
+ key=lambda c: (-c[1], -c[0])
145
+ )
99
146
 
100
- # Base scatter plot (keep color overrides)
101
- sizes = df['n_used_genes'] * 8
102
- ax.scatter(df[pair[0]], df[pair[1]],
103
- edgecolors="black",
104
- marker='o',
105
- s=sizes,
106
- linewidth=0.7,
107
- zorder=1)
147
+ # Calculate proper parameters for normalized coordinate system (0-1 range)
148
+ max_y = 1.0 # Normalized plots use 0-1 range
149
+ scale_factor = 1.0 # Standard scaling for normalized plots
150
+ min_distance = 0.05 # Appropriate spacing for 0-1 range
108
151
 
109
- # Highlight significant points
110
- significant_indices = extreme_indices_0.union(extreme_indices_1)
111
- sig_sizes = df.loc[significant_indices, 'n_used_genes'] * 8
112
- ax.scatter(df.loc[significant_indices, pair[0]],
113
- df.loc[significant_indices, pair[1]],
114
- facecolors='black',
115
- edgecolors='black',
116
- s=sig_sizes,
117
- linewidth=0.1,
118
- zorder=2)
119
-
120
- all_points = list(zip(df[pair[0]], df[pair[1]]))
121
- coords = sorted([(df.loc[idx, pair[0]], df.loc[idx, pair[1]], idx)
122
- for idx in significant_indices], key=lambda c: (-c[1], -c[0]))
152
+ adjusted_coords = adjust_text_positions(
153
+ coords, sig_sizes,
154
+ min_distance=min_distance,
155
+ max_y=max_y,
156
+ scale_factor=scale_factor
157
+ )
123
158
 
124
- adjusted_coords = adjust_text_positions(coords, sig_sizes)
125
-
126
- # Draw vertical lines and right-aligned text
127
159
  for x, adj_y, idx in adjusted_coords:
128
160
  y = df.loc[idx, pair[1]]
129
- ax.plot([x, x], [y, adj_y],
130
- color='black',
131
- linewidth=0.7,
132
- alpha=0.3,
133
- zorder=3)
134
-
135
- ax.text(x, adj_y + 0.005,
136
- df.loc[idx, 'Name'][:20] + '.',
137
- fontsize=6,
138
- ha='left',
139
- va='bottom',
140
- linespacing=1.5,
141
- zorder=4,
142
- bbox=dict(facecolor="white", alpha=0.8, edgecolor="none", pad=1.5)
143
- )
144
-
145
- # Reference line and labels
146
- ax.plot([0, 1], [0, 1],
147
- linestyle='-',
148
- color='lightgray',
149
- alpha=0.4,
150
- zorder=0)
151
-
152
- # Add padding to axes for better visibility of points near edges
153
- padding = 0.02 # Small offset (adjust as needed, e.g., 0.05 for more space)
161
+ ax.plot([x, x], [y, adj_y], color=label_color, linewidth=0.6, alpha=0.3, zorder=3)
162
+ ax.text(
163
+ x, adj_y + 0.005,
164
+ df.loc[idx, 'Name'][:15] + '..',
165
+ fontsize=4, ha='left', va='bottom', color=label_color,
166
+ linespacing=1.5, zorder=4,
167
+ #bbox=dict(facecolor="white", alpha=0.65, edgecolor="white", pad=1.2)
168
+ )
169
+
170
+ # Diagonal & axes cosmetics
171
+ ax.plot([0, 1], [0, 1], linestyle='-', color='lightgray', alpha=0.4, linewidth=0.5, zorder=1)
172
+ padding = 0.02
154
173
  ax.set_xlim(-padding, 1 + padding)
155
174
  ax.set_ylim(-padding, 1 + padding)
156
-
157
- # Labels use rcParams sizes automatically
158
175
  ax.set_xlabel(f"{pair[0]} PR-AUC score")
159
176
  ax.set_ylabel(f"{pair[1]} PR-AUC score")
160
177
  ax.set_title(f"{pair[0]} vs {pair[1]} - Comparison of complex performance")
161
-
178
+
179
+ # Nature style: no grid, open top/right spines
180
+ ax.grid(False)
181
+ ax.spines['top'].set_visible(False)
182
+ ax.spines['right'].set_visible(False)
183
+
162
184
  plt.tight_layout()
163
-
164
- # Save handling
185
+
165
186
  if plot_config["save_plot"]:
166
187
  output_type = plot_config["output_type"]
167
188
  output_path = Path(config["output_folder"]) / f"percomplex_scatter_{pair[0]}_vs_{pair[1]}.{output_type}"
168
189
  fig.savefig(output_path, bbox_inches="tight", format=output_type)
169
-
190
+
170
191
  if plot_config.get("show_plot", True):
171
192
  plt.show()
172
-
193
+
173
194
  plt.close(fig)
174
195
 
175
196
 
176
197
 
198
+ def smart_direction_assignment(point_y, y_max, min_safe_distance=20.0):
199
+ """Determine the best direction for label placement based on Y position."""
200
+ lower_threshold = y_max / 3
201
+ upper_threshold = 2 * y_max / 3
202
+
203
+ if point_y < lower_threshold:
204
+ return "up_only"
205
+ elif point_y > upper_threshold:
206
+ return "prefer_down"
207
+ else:
208
+ return "both_directions"
177
209
 
178
210
 
211
+ def group_points_by_y_proximity(coords, y_tolerance=5.0):
212
+ """Group points that have similar Y values (within tolerance)."""
213
+ groups = []
214
+ remaining_coords = coords.copy()
215
+
216
+ while remaining_coords:
217
+ # Start a new group with the first remaining point
218
+ seed_point = remaining_coords.pop(0)
219
+ current_group = [seed_point]
220
+ seed_y = seed_point[1]
221
+
222
+ # Find all points within Y tolerance of the seed point
223
+ i = 0
224
+ while i < len(remaining_coords):
225
+ if abs(remaining_coords[i][1] - seed_y) <= y_tolerance:
226
+ current_group.append(remaining_coords.pop(i))
227
+ else:
228
+ i += 1
229
+
230
+ groups.append(current_group)
231
+
232
+ return groups
179
233
 
180
234
 
181
235
  def adjust_text_positions(coords, sizes, min_distance=0.08, max_y=1.0, scale_factor=1.0):
236
+ """Enhanced text positioning with adaptive spacing for dense clusters."""
182
237
  adjusted = []
183
- text_height = 0.04 * scale_factor # Scaled text height
184
-
185
- for (x, y, idx) in coords:
186
- base_offset = np.sqrt(sizes.loc[idx]) * 0.04 * scale_factor if idx in sizes else 0.04 * scale_factor
187
- adj_y = y + base_offset # Move text upwards (scaled)
188
- safety = 0
189
-
190
- # Ensure text stays within plot area
191
- max_possible_y = max_y - text_height
192
-
193
- while safety < 20:
194
- conflict = False
195
- # Check against existing labels
196
- for (ax, aay, _) in adjusted:
197
- if abs(x - ax) < 0.01 and abs(adj_y - aay) < min_distance:
198
- conflict = True
199
- break
200
-
201
- if conflict:
202
- adj_y += (0.03 + (base_offset * 0.1)) # Move further upwards (base_offset already scaled)
203
- safety += 1
204
-
205
- # If we're going beyond plot area, stop
206
- if adj_y > max_possible_y:
207
- adj_y = max_possible_y
208
- break
238
+
239
+ # Fix scaling issues - use data coordinates, not pixel scaling
240
+ if max_y > 10: # For gene count plots (large Y values)
241
+ text_height = max_y * 0.02 # 2% of Y range
242
+ min_safe_distance = max_y * 0.05 # 5% of Y range
243
+ y_tolerance = max_y * 0.02 # 2% of Y range for grouping
244
+ else: # For normalized plots (Y values 0-1)
245
+ text_height = 0.04 * scale_factor
246
+ min_safe_distance = 20 * scale_factor
247
+ y_tolerance = 5 * scale_factor
248
+
249
+ # Group points by Y proximity
250
+ groups = group_points_by_y_proximity(coords, y_tolerance)
251
+
252
+ for group in groups:
253
+ group_size = len(group)
254
+
255
+ # Calculate adaptive spacing based on cluster density
256
+ density_multiplier = calculate_density_multiplier(group_size)
257
+
258
+ if group_size == 1:
259
+ # Single point - use original logic but with direction awareness
260
+ x, y, idx = group[0]
261
+ direction = smart_direction_assignment(y, max_y, min_safe_distance)
262
+
263
+ # Use reasonable base offset relative to Y range
264
+ if max_y > 10: # Gene count plots
265
+ base_offset = max(3, max_y * 0.03) # 3% of Y range, minimum 3 units
266
+ else: # Normalized plots
267
+ base_offset = np.sqrt(sizes.loc[idx]) * 0.04 * scale_factor if idx in sizes else 0.04 * scale_factor
268
+
269
+ if direction == "up_only" or direction == "both_directions":
270
+ adj_y = y + base_offset
271
+ elif direction == "prefer_down" and y - base_offset > min_safe_distance:
272
+ adj_y = y - base_offset
209
273
  else:
210
- # Clamp final position
211
- adj_y = min(adj_y, max_possible_y)
274
+ adj_y = y + base_offset
275
+
276
+ # Ensure within bounds with proper limits
277
+ adj_y = max(min_safe_distance, min(adj_y, max_y - text_height))
278
+
279
+ # Additional safety check to prevent extreme values
280
+ if adj_y < 0 or adj_y > max_y * 1.2: # Allow 20% overflow for safety
281
+ adj_y = y + base_offset # Fallback to simple offset
282
+
283
+ adjusted.append((x, adj_y, idx))
284
+
285
+ else:
286
+ # Multiple points with similar Y - use adaptive distribution
287
+ group.sort(key=lambda p: p[0]) # Sort by X coordinate
288
+
289
+ # Determine available directions for this Y level
290
+ group_y = group[0][1] # All have similar Y, use first as representative
291
+ direction = smart_direction_assignment(group_y, max_y, min_safe_distance)
292
+
293
+ # Calculate adaptive spacing and base offset
294
+ adaptive_spacing = calculate_adaptive_spacing(
295
+ group_size, min_distance, text_height, max_y, density_multiplier
296
+ )
297
+ adaptive_base_offset = calculate_adaptive_base_offset(
298
+ group_size, max_y, scale_factor, density_multiplier
299
+ )
300
+
301
+ for i, (x, y, idx) in enumerate(group):
302
+ if direction == "up_only":
303
+ # Stack all labels upward with adaptive spacing
304
+ adj_y = y + adaptive_base_offset + (i * adaptive_spacing)
305
+
306
+ elif direction == "prefer_down":
307
+ # Alternate down and up with adaptive spacing
308
+ if i % 2 == 0 and y - adaptive_base_offset - (i//2 * adaptive_spacing) > min_safe_distance:
309
+ # Even indices go down
310
+ adj_y = y - adaptive_base_offset - (i//2 * adaptive_spacing)
311
+ else:
312
+ # Odd indices or insufficient space below - go up
313
+ up_level = (i//2) if i % 2 == 0 else ((i+1)//2)
314
+ adj_y = y + adaptive_base_offset + (up_level * adaptive_spacing)
315
+
316
+ else: # both_directions
317
+ # Alternate up and down with adaptive spacing
318
+ if i % 2 == 0:
319
+ # Even indices go up
320
+ adj_y = y + adaptive_base_offset + (i//2 * adaptive_spacing)
321
+ else:
322
+ # Odd indices go down (if safe)
323
+ potential_down = y - adaptive_base_offset - ((i+1)//2 * adaptive_spacing)
324
+ if potential_down > min_safe_distance:
325
+ adj_y = potential_down
326
+ else:
327
+ # Not safe to go down, stack upward instead
328
+ adj_y = y + adaptive_base_offset + (i//2 * adaptive_spacing)
329
+
330
+ # Final bounds check with stricter limits
331
+ adj_y = max(min_safe_distance, min(adj_y, max_y - text_height))
332
+
333
+ # Additional safety check to prevent extreme values
334
+ if adj_y < 0 or adj_y > max_y * 1.2: # Allow 20% overflow for safety
335
+ adj_y = y + adaptive_base_offset # Fallback to simple offset
336
+
212
337
  adjusted.append((x, adj_y, idx))
213
- break
214
-
338
+
215
339
  return adjusted
216
340
 
217
341
 
342
+ def calculate_density_multiplier(group_size):
343
+ """Calculate multiplier for spacing based on cluster density."""
344
+ if group_size <= 3:
345
+ return 1.0
346
+ elif group_size <= 6:
347
+ return 1.3
348
+ elif group_size <= 10:
349
+ return 1.6
350
+ elif group_size <= 15:
351
+ return 2.0
352
+ elif group_size <= 20:
353
+ return 2.5
354
+ else: # 20+ points
355
+ return 3.0 + (group_size - 20) * 0.1 # Progressive scaling for very dense clusters
356
+
357
+
358
+ def calculate_adaptive_spacing(group_size, min_distance, text_height, max_y, density_multiplier):
359
+ """Calculate adaptive vertical spacing between labels based on cluster density."""
360
+ base_spacing = max(min_distance, text_height * 1.5)
361
+
362
+ # Scale spacing based on density and coordinate system
363
+ if max_y > 10: # Gene count plots
364
+ adaptive_spacing = base_spacing * density_multiplier * (max_y / 100.0)
365
+ # Ensure minimum readable spacing for dense clusters
366
+ adaptive_spacing = max(adaptive_spacing, max_y * 0.03)
367
+ else: # Normalized plots
368
+ adaptive_spacing = base_spacing * density_multiplier
369
+ # Ensure minimum readable spacing
370
+ adaptive_spacing = max(adaptive_spacing, 0.05)
371
+
372
+ return adaptive_spacing
373
+
374
+
375
+ def calculate_adaptive_base_offset(group_size, max_y, scale_factor, density_multiplier):
376
+ """Calculate adaptive base offset (connector line height) based on cluster density."""
377
+ if max_y > 10: # Gene count plots
378
+ base_offset = max(3, max_y * 0.03)
379
+ # Increase connector line height for dense clusters
380
+ adaptive_offset = base_offset * density_multiplier
381
+ # Cap to reasonable maximum
382
+ adaptive_offset = min(adaptive_offset, max_y * 0.15)
383
+ else: # Normalized plots
384
+ base_offset = 0.04 * scale_factor
385
+ # Increase connector line height for dense clusters
386
+ adaptive_offset = base_offset * density_multiplier
387
+ # Cap to reasonable maximum
388
+ adaptive_offset = min(adaptive_offset, 0.2)
389
+
390
+ return adaptive_offset
391
+
218
392
 
219
- def plot_percomplex_scatter_bysize():
393
+ def plot_percomplex_scatter_bysize(n_labels=10, n_top=10, sig_color='#B71A2A', nonsig_color='#DBDDDD', label_color='black', border_color='black', border_width=1.0):
220
394
  config = dload("config")
221
395
  plot_config = config["plotting"]
222
396
  rdict = dload("pra_percomplex")
223
-
397
+
224
398
  for key, per_complex in rdict.items():
225
399
  sorted_pc = per_complex.sort_values(by="auc_score", ascending=False, na_position="last")
226
- top_10, rest = sorted_pc.head(10), sorted_pc.iloc[10:]
227
-
228
- # Create figure using rcParams defaults
400
+ top_labels, rest = sorted_pc.head(n_labels), sorted_pc.iloc[n_labels:]
401
+
229
402
  fig, ax = plt.subplots()
230
-
231
- # Base scatter plot (simple swap of X/Y data)
403
+
404
+ # Background (REST): filled dots with black borders, not rasterized
232
405
  ax.scatter(
233
- rest.auc_score, rest.n_used_genes, # auc_score on X, n_used_genes on Y
234
- edgecolors="black",
235
- linewidth=0.5,
236
- s=rest.n_used_genes * 10,
237
- label="Other Complexes"
406
+ rest.auc_score, rest.n_used_genes,
407
+ facecolors=nonsig_color, edgecolors=border_color,
408
+ linewidth=border_width, s=rest.n_used_genes * 10,
409
+ alpha=1.0, label="Other Complexes",
410
+ zorder=0
238
411
  )
239
-
240
- # Top 10 scatter plot (simple swap of X/Y data)
412
+
413
+ # Top N: filled dots with black borders
241
414
  ax.scatter(
242
- top_10.auc_score, top_10.n_used_genes, # auc_score on X, n_used_genes on Y
243
- facecolors='black',
244
- edgecolors='black',
245
- linewidth=0.5,
246
- s=top_10.n_used_genes * 10,
247
- label="Top 10 AUC Scores"
415
+ top_labels.auc_score, top_labels.n_used_genes,
416
+ facecolors=sig_color, edgecolors=border_color,
417
+ linewidth=border_width, s=top_labels.n_used_genes * 10,
418
+ label=f"Top {n_labels} AUC Scores", alpha=1.0, zorder=2
248
419
  )
249
420
 
250
-
251
- # Text annotation handling (swapped coords: auc on X, size on Y)
252
- coords = [(row.auc_score, row.n_used_genes, idx) for idx, row in top_10.iterrows()]
253
- sizes = top_10.n_used_genes * 10
421
+ # Labels with corrected scaling
422
+ coords = [(row.auc_score, row.n_used_genes, idx) for idx, row in top_labels.iterrows()]
423
+ sizes = top_labels.n_used_genes * 10
424
+ max_y = sorted_pc.n_used_genes.max() + 50
254
425
 
255
- # Dynamically set max_y and scale_factor to make lines visible/long like original
256
- max_y = sorted_pc.n_used_genes.max() + 50 # Larger buffer (increase to +100 if lines still short)
257
- scale_factor = max_y / 1.0 # Scale offsets for visibility on new range
258
- adjusted_coords = adjust_text_positions(
259
- coords, sizes,
260
- min_distance=0.08 * scale_factor,
261
- max_y=max_y,
262
- scale_factor=scale_factor # New param to make lengths visible
263
- )
426
+ # Fix scaling issue - use reasonable scale factor
427
+ scale_factor = min(max_y / 100.0, 3.0) # Cap scale factor to prevent extreme positioning
264
428
 
265
- for x, adj_y, idx in adjusted_coords:
266
- y = top_10.loc[idx, "n_used_genes"] # Pull from n_used_genes (now Y)
267
- ax.plot([x, x], [y, adj_y],
268
- color='black',
269
- linewidth=0.7,
270
- alpha=0.3,
271
- zorder=3)
272
-
273
- # Dynamic alignment: left if x < 0.5 (extends right), right if x >= 0.5 (extends left)
274
- if x < 0.5:
275
- ha = 'left'
276
- text_x = x + 0.01 # Small rightward offset to avoid line overlap
277
- else:
278
- ha = 'right'
279
- text_x = x - 0.01 # Small leftward offset to avoid line overlap
280
-
281
- ax.text(text_x, adj_y + (0.005 * scale_factor),
282
- top_10.loc[idx, 'Name'][:20] + '.',
283
- fontsize=6,
284
- ha=ha,
285
- va='bottom',
286
- linespacing=1.5,
287
- zorder=4,
288
- bbox=dict(facecolor="white", alpha=0.8, edgecolor="none", pad=1.5))
289
-
290
- # Axis configuration (integer ticks now on Y)
291
- ax.yaxis.get_major_locator().set_params(integer=True)
292
- ax.set_xlabel("PR-AUC score") # Swapped label
293
- ax.set_ylabel("Number of genes in the complex") # Swapped label
429
+ adjusted = adjust_text_positions(
430
+ coords, sizes,
431
+ min_distance=max(5.0, max_y * 0.02), # Use reasonable spacing relative to Y range
432
+ max_y=max_y,
433
+ scale_factor=scale_factor
434
+ )
435
+ for x, adj_y, idx in adjusted:
436
+ y = top_labels.loc[idx, "n_used_genes"]
437
+ ax.plot([x, x], [y, adj_y], color=label_color, linewidth=0.5, alpha=0.3, zorder=3)
438
+ ha = 'left' if x < 0.5 else 'right'
439
+ text_x = x + 0.01 if x < 0.5 else x - 0.01
440
+ ax.text(
441
+ text_x, adj_y + (0.005 * scale_factor),
442
+ top_labels.loc[idx, 'Name'][:15] + '..',
443
+ fontsize=4, ha=ha, va='bottom', color=label_color, linespacing=1.5, zorder=4,
444
+ bbox=dict(facecolor="white", alpha=0.65, edgecolor="white", pad=1.5)
445
+ )
446
+
447
+ # Set y-axis to show integer values only
448
+ from matplotlib.ticker import MaxNLocator
449
+ ax.yaxis.set_major_locator(MaxNLocator(integer=True))
450
+ ax.set_xlabel("PR-AUC score")
451
+ ax.set_ylabel("Number of genes in the complex")
294
452
  ax.set_title(f"{key} - Complex performance: PR-AUC score vs complex size")
295
- ax.grid(False)
296
-
297
- # Fixed limits (X fixed to 0-1 for AUC, Y with buffer for lines/labels)
298
- ax.set_xlim(0, 1.0)
299
- ax.set_ylim(0, max_y)
300
453
 
301
- # Adjust subplot margins to give extra space on right for labels (without extending axis; optional)
302
- plt.subplots_adjust(right=0.8) # Adjust to 0.7 or remove if not needed
454
+ # No ruler + open spines
455
+ ax.grid(visible=False, which='both', axis='both')
456
+ ax.set_xlim(0, 1.0); ax.set_ylim(0, max_y)
457
+ ax.spines['top'].set_visible(False)
458
+ ax.spines['right'].set_visible(False)
303
459
 
460
+ plt.subplots_adjust(right=0.8)
304
461
  plt.tight_layout()
305
462
 
306
- # Save handling (dpi comes from rcParams)
307
463
  if plot_config["save_plot"]:
308
464
  output_type = plot_config["output_type"]
309
465
  output_path = Path(config["output_folder"]) / f"percomplex_scatter_by_complexsize_{key}.{output_type}"
@@ -311,20 +467,28 @@ def plot_percomplex_scatter_bysize():
311
467
 
312
468
  if plot_config.get("show_plot", True):
313
469
  plt.show()
314
-
315
470
  plt.close(fig)
316
471
 
317
472
 
318
473
 
319
- def plot_complex_contributions(min_pairs=10, min_precision_cutoff=0.5, num_complex_to_show=10, y_lim=None, fig_title=None, fig_labs=['Fraction of TP', 'Precision']):
474
+ def plot_complex_contributions(
475
+ min_pairs=10,
476
+ min_precision_cutoff=0.5,
477
+ num_complex_to_show=10,
478
+ y_lim=None,
479
+ fig_title=None,
480
+ fig_labs=['Fraction of TP', 'Precision'],
481
+ legend_rows=3, # <— NEW: rows for legend layout (try 3 or 4)
482
+ ):
320
483
  config = dload("config")
321
484
  plot_config = config["plotting"]
322
485
  plot_data_dict = dload("complex_contributions")
486
+
323
487
  for key, plot_data in plot_data_dict.items():
324
488
  s = plot_data.set_index('Name').sum()
325
489
  find_last_precision = s[s > min_pairs].index[-1]
326
- last_prec_value = float(find_last_precision.split('_')[1]) # Parse the float value
327
-
490
+ last_prec_value = float(find_last_precision.split('_')[1])
491
+
328
492
  plot_data = plot_data.drop_duplicates(subset='Name')
329
493
  cont_stepwise_anno = plot_data['Name']
330
494
  cont_stepwise_mat = plot_data.drop(columns=['Name'])
@@ -340,117 +504,117 @@ def plot_complex_contributions(min_pairs=10, min_precision_cutoff=0.5, num_compl
340
504
  x_df = pd.DataFrame(x, index=cont_stepwise_anno, columns=cont_stepwise_mat.columns)
341
505
  ind_for_mean = y >= (last_prec_value - min_precision_cutoff)
342
506
  if sum(ind_for_mean) == 0:
343
- log.info("No values above 'min.precision.cutoff'")
344
- return False
507
+ log.info("No values above 'min.precision.cutoff'"); return False
345
508
  if sum(ind_for_mean) == 1:
346
- log.info("Only one value above 'min.precision.cutoff', unable to calculate meaningful contribution structure")
347
- return False
348
- # Select top complexes
509
+ log.info("Only one value above 'min.precision.cutoff'"); return False
510
+
349
511
  a = x_df.loc[:, ind_for_mean].mean(axis=1).sort_values()[-num_complex_to_show:]
350
512
  subset = x_df.loc[a.index, :]
351
- # Use the RdYlBu colormap for the top 10 points
352
- cmap = plt.get_cmap() # Get default from rcParams
513
+
514
+ cmap = plt.get_cmap()
353
515
  colors = cmap(np.linspace(0, 1, num_complex_to_show))
354
- colors = np.vstack(([0.5, 0.5, 0.5, 1.0], colors))
516
+ colors = np.vstack(([0.5, 0.5, 0.5, 1.0], colors)) # 'others' + top K
355
517
  others = pd.DataFrame(1 - subset.sum(axis=0), columns=['others']).T
356
518
  merged = pd.concat([others, subset], ignore_index=False)
357
- x = merged.to_numpy()
358
- x1 = np.zeros_like(x)
359
- x2 = np.zeros_like(x)
360
- for i in range(x.shape[0]):
519
+ X = merged.to_numpy()
520
+ x1 = np.zeros_like(X); x2 = np.zeros_like(X)
521
+ for i in range(X.shape[0]):
361
522
  if i == 0:
362
- x2[i, :] = x[0, :]
523
+ x2[i, :] = X[0, :]
363
524
  elif i == 1:
364
- x1[i, :] = x[0, :]
525
+ x1[i, :] = X[0, :]
365
526
  else:
366
- x1[i, :] = x[:i, :].sum(axis=0)
527
+ x1[i, :] = X[:i, :].sum(axis=0)
367
528
  if i > 0:
368
- x2[i, :] = x[:i + 1, :].sum(axis=0)
369
-
529
+ x2[i, :] = X[:i + 1, :].sum(axis=0)
370
530
 
371
- # FORCE recalculation of y_lim - ignore any passed parameter
372
- padding = 0.02 # Small padding to avoid clipping (adjust as needed, e.g., 0.05 for more space)
531
+ padding = 0.02
373
532
  lower = max(0, min(y) - padding)
374
- upper = last_prec_value + padding # Use the actual last precision value instead of 1.0
533
+ upper = last_prec_value + padding
375
534
  y_lim = (lower, upper)
376
-
377
-
378
- fig, ax = plt.subplots(2, 1, gridspec_kw={'height_ratios': [5, 1]})
535
+
536
+ # Give legend a bit more room
537
+ fig, ax = plt.subplots(2, 1, gridspec_kw={'height_ratios': [5, 1.8]})
379
538
  ax[0].set_xlim(0, 1)
380
- ax[0].set_ylim(*y_lim) # Use dynamic ylim
539
+ ax[0].set_ylim(*y_lim)
381
540
  ax[0].set_xlabel(fig_labs[0])
382
541
  ax[0].set_ylabel(fig_labs[1])
383
542
  ax[0].set_title(fig_title if fig_title else f"{key} - Contribution of complexes")
384
- for i in range(x.shape[0]):
543
+ for i in range(X.shape[0]):
385
544
  ax[0].fill_betweenx(y, x1[i, :], x2[i, :], color=colors[i], edgecolor='white')
386
545
 
387
- # Legend handling (keep custom settings)
388
- legend_labels = [f'{label[:20]}.' for label in merged.index]
389
- patches_list = [patches.Patch(color=colors[i], label=legend_labels[i]) for i in range(len(legend_labels))]
546
+ # Legend: multi-row, constrained to width
547
+ def _short(s, n=14): return (s[:n-1] + '…') if len(s) > n else s
548
+ labels = [_short(lbl) for lbl in merged.index]
549
+ handles = [patches.Patch(color=colors[i], label=labels[i]) for i in range(len(labels))]
390
550
  ax[1].axis('off')
391
- ax[1].legend(handles=patches_list, loc='center', ncol=3, frameon=False, title="Complexes")
551
+ n_items = len(handles)
552
+ ncols = int(np.ceil(n_items / max(1, legend_rows))) # spread across rows
553
+ ax[1].legend(
554
+ handles=handles,
555
+ loc='center',
556
+ ncol=ncols,
557
+ frameon=False,
558
+ title="Complexes",
559
+ fontsize=6, title_fontsize=6,
560
+ handlelength=0.9, handletextpad=0.25,
561
+ borderaxespad=0.0,
562
+ labelspacing=0.25, columnspacing=0.6,
563
+ mode='expand'
564
+ )
565
+
392
566
  plt.tight_layout()
393
567
 
394
- # Save handling (remove explicit dpi)
395
568
  if plot_config["save_plot"]:
396
- output_type = plot_config["output_type"]
397
- output_folder = Path(config["output_folder"])
398
- output_path = output_folder / f"complex_contributions_{key}.{output_type}"
569
+ output_type = plot_config["output_type"]
570
+ output_folder= Path(config["output_folder"])
571
+ output_path = output_folder / f"complex_contributions_{key}.{output_type}"
399
572
  fig.savefig(output_path, bbox_inches="tight", format=output_type)
400
573
 
401
574
  if plot_config.get("show_plot", True):
402
575
  plt.show()
403
-
404
576
  plt.close(fig)
405
577
 
406
578
 
579
+
407
580
  def plot_significant_complexes():
408
581
  config = dload("config")
409
582
  plot_config = config["plotting"]
410
583
  pra_percomplex = dload("pra_percomplex")
411
584
 
412
- # Define thresholds and prepare data
413
585
  thresholds = [0.1, 0.2, 0.3, 0.4, 0.5]
414
586
  datasets = list(pra_percomplex.keys())
415
587
  num_datasets = len(datasets)
416
588
 
417
- # Create a DataFrame to store results
418
589
  df = pd.DataFrame(index=thresholds)
419
590
  for key, complex_data in pra_percomplex.items():
420
591
  df[key] = [complex_data.query(f'auc_score >= {t}').shape[0] for t in thresholds]
421
592
 
422
- # Create figure
423
593
  fig, ax = plt.subplots()
424
594
 
425
- # Use colormap from rcParams
426
595
  cmap = plt.get_cmap()
427
596
  colors = [cmap(i / (num_datasets + 1)) for i in range(1, num_datasets + 1)]
428
597
 
429
- # Plot bars
430
- bar_width = 0.8 / num_datasets # Dynamic width based on dataset count
598
+ bar_width = 0.8 / num_datasets
431
599
  for i, dataset in enumerate(datasets):
432
600
  x = np.arange(len(thresholds)) + i * bar_width
433
601
  ax.bar(x, df[dataset], width=bar_width, color=colors[i], edgecolor='black', label=dataset)
434
602
 
435
- # Customize x-axis labels
436
603
  ax.set_xticks(np.arange(len(thresholds)) + (num_datasets - 1) * bar_width / 2)
437
- ax.set_xticklabels(thresholds, rotation=0, ha='center')
604
+ ax.set_xticklabels([str(t) for t in thresholds], rotation=0, ha='center')
438
605
 
439
- # Set title and axis labels (handled by rcParams)
440
606
  ax.set_title("Number of significant complexes above PR-AUC thresholds")
441
607
  ax.set_xlabel("PR-AUC score thresholds")
442
608
  ax.set_ylabel("Number of complexes")
443
609
 
444
- # Add grid (already handled by rcParams, but ensured)
445
- ax.grid(axis='y')
610
+ # Nature style: no grid; open top/right spines
611
+ ax.grid(False)
612
+ for spine in ('right', 'top'):
613
+ ax.spines[spine].set_visible(False)
446
614
 
447
- # Add legend
448
- ax.legend(loc='upper right')
449
-
450
- # Adjust layout
615
+ ax.legend(loc='upper right', frameon=False)
451
616
  plt.tight_layout()
452
617
 
453
- # Save figure if required
454
618
  if plot_config["save_plot"]:
455
619
  output_type = plot_config["output_type"]
456
620
  output_folder = Path(config["output_folder"])
@@ -459,7 +623,7 @@ def plot_significant_complexes():
459
623
 
460
624
  if plot_config.get("show_plot", True):
461
625
  plt.show()
462
-
626
+
463
627
  plt.close(fig)
464
628
  return df
465
629
 
@@ -470,34 +634,30 @@ def plot_auc_scores():
470
634
  plot_config = config["plotting"]
471
635
  pra_dict = dload("pr_auc")
472
636
 
473
-
474
637
  sorted_items = sorted(pra_dict.items(), key=lambda x: x[1], reverse=True)
475
638
  datasets = [k for k, _ in sorted_items]
476
639
  auc_scores = [v for _, v in sorted_items]
477
640
 
478
- # Create figure and axis
479
641
  fig, ax = plt.subplots()
480
642
 
481
- # Use colormap from rcParams
482
643
  cmap = plt.get_cmap()
483
644
  num_datasets = len(datasets)
484
645
  colors = [cmap(i / (num_datasets + 1)) for i in range(1, num_datasets + 1)]
485
646
 
486
- # Plot bars
487
647
  ax.bar(datasets, auc_scores, color=colors, edgecolor="black")
488
648
 
489
- # Set y-axis limits dynamically
490
649
  ax.set_ylim(0, max(auc_scores) + 0.01)
491
-
492
- # Set title and labels
493
650
  ax.set_title("AUC scores for the datasets")
494
651
  ax.set_ylabel("AUC score")
495
652
  plt.xticks(rotation=45, ha="right")
496
653
 
497
- # Add grid (already handled by rcParams)
498
- ax.grid(axis='y')
654
+ # Hard-disable any grid/ruler
655
+ ax.grid(visible=False, which='both', axis='both')
656
+ ax.set_axisbelow(False) # make sure nothing faint is drawn beneath
657
+ # Open spines
658
+ ax.spines['top'].set_visible(False)
659
+ ax.spines['right'].set_visible(False)
499
660
 
500
- # Save the figure if required
501
661
  if plot_config["save_plot"]:
502
662
  output_type = plot_config["output_type"]
503
663
  output_folder = Path(config["output_folder"])
@@ -507,6 +667,6 @@ def plot_auc_scores():
507
667
 
508
668
  if plot_config.get("show_plot", True):
509
669
  plt.show()
510
-
670
+
511
671
  plt.close(fig)
512
672
  return pra_dict
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pythonflex
3
- Version: 0.1.6
3
+ Version: 0.2
4
4
  Summary: pythonFLEX is a benchmarking toolkit for evaluating CRISPR screen results against biological gold standards. The toolkit computes gene-level and complex-level performance metrics, helping researchers systematically assess the biological relevance and resolution of their CRISPR screening data.
5
5
  Author-email: Yasir Demirtaş <tyasird@hotmail.com>
6
6
  Requires-Python: >=3.9
@@ -1,7 +1,7 @@
1
- pythonflex/__init__.py,sha256=rz-8y-zEJQR3ThbhlIUV0N1q7Z-4UNHpHbOglXFp52c,1292
2
- pythonflex/analysis.py,sha256=-_FPrV9fImfU5edncdPhotgpANkIEO-4LJclL_sG6_g,54513
1
+ pythonflex/__init__.py,sha256=rW_MdM0ijaN9HAzFL-P7oVfLRJRGq0M83Izfmdr_s64,1346
2
+ pythonflex/analysis.py,sha256=cGUZKb5swJ7h1f6PJcIg4AW-BulY2ETGjJul9cg3u-k,57217
3
3
  pythonflex/logging_config.py,sha256=iqRKK18zvtfV_-bYHWrXtSZywiUtYxoHkw0ZnVORQBQ,2015
4
- pythonflex/plotting.py,sha256=eW0coNDFv5LFAE8xg-kWUJynzTPzd5ieWW05KC54LkE,19615
4
+ pythonflex/plotting.py,sha256=ywUa95UxUaxtkaWGffrCcaQWw1WCF17GF6nF8KgWhc0,27429
5
5
  pythonflex/preprocessing.py,sha256=oIGPdmETuBQh4mdsIOWB5DOmYndp9S-sW8r7h_ek0Zo,8583
6
6
  pythonflex/utils.py,sha256=nyVlGu5OXpz5YPj48hXueL5ja88sQ2PUiJ76c4USg4A,3886
7
7
  pythonflex/data/dataset/liver_cell_lines_500_genes.csv,sha256=qfKsqPjL41Y1GuxxAhc-MfaNO0mX6Qju_SeynKSpEiM,238639
@@ -13,9 +13,10 @@ pythonflex/data/gold_standard/PATHWAY.parquet,sha256=bFRDe3PQ_TFc7B1uZuynwOGcgxE
13
13
  pythonflex/data/gold_standard/corum.csv,sha256=2rZeyr2Ghm7f-gFxCZnhPtxI2jxRoiZMUEH2EJwAgsI,208889
14
14
  pythonflex/data/gold_standard/gobp.csv,sha256=TO9yfx9mO8WkXvWfSB-pFId9T8xYfqdZpshAXC0Fyj8,1739167
15
15
  pythonflex/data/gold_standard/pathway.csv,sha256=J3HKVLUZ_Oxucmn_14ieYp3Wr2lcKtp0nIl4_8_K2Yc,489424
16
- pythonflex/examples/basic_usage.py,sha256=xDXzBH0xZqljLXRD5BAdf4a3X9Q3BbgAG0S6-fJy0No,1917
16
+ pythonflex/examples/basic_usage.py,sha256=4Kv3OdiyBruq30Ppwx2xYx1ioEtl8jeAg6mAJxzA6Go,1919
17
17
  pythonflex/examples/dataset_filtering.py,sha256=56ZXgsbUNaHoGjX8QdQZ74CjUXDi-qdzfeMhmP1WHAA,978
18
- pythonflex-0.1.6.dist-info/METADATA,sha256=JAJlViRvsO9TbuYN0P7t_bEQ8Hh8u_H28-Z55TrLA1k,3928
19
- pythonflex-0.1.6.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
20
- pythonflex-0.1.6.dist-info/entry_points.txt,sha256=37liK1baI_CRVDivpjsn8JDClL9_YeTTuSMAZ3Ty7oE,47
21
- pythonflex-0.1.6.dist-info/RECORD,,
18
+ pythonflex/examples/test.py,sha256=B8-JE5AU7be5loSr6Qv2rOviXXe1NRCYpaEGfGjaow0,2388
19
+ pythonflex-0.2.dist-info/METADATA,sha256=8URwIkDildA8Hh-WvsGwIywP2ssMCrp1-Z2zIdYckRM,3926
20
+ pythonflex-0.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
21
+ pythonflex-0.2.dist-info/entry_points.txt,sha256=37liK1baI_CRVDivpjsn8JDClL9_YeTTuSMAZ3Ty7oE,47
22
+ pythonflex-0.2.dist-info/RECORD,,