masster 0.5.22__py3-none-any.whl → 0.5.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

masster/study/plot.py CHANGED
@@ -11,6 +11,7 @@ from tqdm import tqdm
11
11
 
12
12
  # Import cmap for colormap handling
13
13
  from cmap import Colormap
14
+
14
15
  hv.extension("bokeh")
15
16
 
16
17
 
@@ -22,84 +23,93 @@ from bokeh.layouts import row as bokeh_row
22
23
  def _export_with_webdriver_manager(plot_obj, filename, format_type, logger=None):
23
24
  """
24
25
  Export plot to PNG or SVG using webdriver-manager for automatic driver management.
25
-
26
+
26
27
  Parameters:
27
28
  plot_obj: Bokeh plot object or holoviews object to export
28
- filename: Output filename
29
+ filename: Output filename
29
30
  format_type: Either "png" or "svg"
30
31
  logger: Logger for error reporting (optional)
31
-
32
+
32
33
  Returns:
33
34
  bool: True if export successful, False otherwise
34
35
  """
35
36
  try:
36
37
  # Convert holoviews to bokeh if needed
37
- if hasattr(plot_obj, 'opts'): # Likely a holoviews object
38
+ if hasattr(plot_obj, "opts"): # Likely a holoviews object
38
39
  import holoviews as hv
40
+
39
41
  bokeh_plot = hv.render(plot_obj)
40
42
  else:
41
43
  bokeh_plot = plot_obj
42
-
44
+
43
45
  # Try webdriver-manager export first
44
46
  try:
45
47
  from webdriver_manager.chrome import ChromeDriverManager
46
48
  from selenium import webdriver
47
49
  from selenium.webdriver.chrome.service import Service
48
50
  from selenium.webdriver.chrome.options import Options
49
-
51
+
50
52
  # Set up Chrome options for headless operation
51
53
  chrome_options = Options()
52
54
  chrome_options.add_argument("--headless")
53
55
  chrome_options.add_argument("--no-sandbox")
54
56
  chrome_options.add_argument("--disable-dev-shm-usage")
55
57
  chrome_options.add_argument("--disable-gpu")
56
-
58
+
57
59
  # Use webdriver-manager to automatically get the correct ChromeDriver
58
60
  service = Service(ChromeDriverManager().install())
59
61
  driver = webdriver.Chrome(service=service, options=chrome_options)
60
-
62
+
61
63
  # Export with managed webdriver
62
64
  if format_type == "png":
63
65
  from bokeh.io import export_png
66
+
64
67
  export_png(bokeh_plot, filename=filename, webdriver=driver)
65
68
  elif format_type == "svg":
66
69
  from bokeh.io import export_svg
70
+
67
71
  export_svg(bokeh_plot, filename=filename, webdriver=driver)
68
72
  else:
69
73
  raise ValueError(f"Unsupported format: {format_type}")
70
-
74
+
71
75
  driver.quit()
72
76
  return True
73
-
77
+
74
78
  except ImportError:
75
79
  if logger:
76
80
  logger.debug(f"webdriver-manager not available, using default {format_type.upper()} export")
77
81
  # Fall back to default export
78
82
  if format_type == "png":
79
83
  from bokeh.io import export_png
84
+
80
85
  export_png(bokeh_plot, filename=filename)
81
86
  elif format_type == "svg":
82
87
  from bokeh.io import export_svg
88
+
83
89
  export_svg(bokeh_plot, filename=filename)
84
90
  return True
85
-
91
+
86
92
  except Exception as e:
87
93
  if logger:
88
- logger.debug(f"{format_type.upper()} export with webdriver-manager failed: {e}, using default {format_type.upper()} export")
94
+ logger.debug(
95
+ f"{format_type.upper()} export with webdriver-manager failed: {e}, using default {format_type.upper()} export"
96
+ )
89
97
  try:
90
98
  # Final fallback to default export
91
99
  if format_type == "png":
92
100
  from bokeh.io import export_png
101
+
93
102
  export_png(bokeh_plot, filename=filename)
94
103
  elif format_type == "svg":
95
104
  from bokeh.io import export_svg
105
+
96
106
  export_svg(bokeh_plot, filename=filename)
97
107
  return True
98
108
  except Exception as e2:
99
109
  if logger:
100
110
  logger.error(f"{format_type.upper()} export failed: {e2}")
101
111
  return False
102
-
112
+
103
113
  except Exception as e:
104
114
  if logger:
105
115
  logger.error(f"Export preparation failed: {e}")
@@ -117,11 +127,11 @@ def _isolated_save_plot(plot_object, filename, abs_filename, logger, plot_title=
117
127
  from bokeh.embed import file_html
118
128
 
119
129
  # Create HTML content without affecting global state
120
- resources = Resources(mode='cdn')
130
+ resources = Resources(mode="cdn")
121
131
  html = file_html(plot_object, resources, title=plot_title)
122
132
 
123
133
  # Write directly to file
124
- with open(filename, 'w', encoding='utf-8') as f:
134
+ with open(filename, "w", encoding="utf-8") as f:
125
135
  f.write(html)
126
136
 
127
137
  logger.info(f"Plot saved to: {abs_filename}")
@@ -132,15 +142,15 @@ def _isolated_save_plot(plot_object, filename, abs_filename, logger, plot_title=
132
142
  logger.info(f"Plot saved to: {abs_filename}")
133
143
  else:
134
144
  # Fall back to HTML if PNG export not available
135
- html_filename = filename.replace('.png', '.html')
136
- abs_html_filename = html_filename if abs_filename == filename else abs_filename.replace('.png', '.html')
145
+ html_filename = filename.replace(".png", ".html")
146
+ abs_html_filename = html_filename if abs_filename == filename else abs_filename.replace(".png", ".html")
137
147
  from bokeh.resources import Resources
138
148
  from bokeh.embed import file_html
139
149
 
140
- resources = Resources(mode='cdn')
150
+ resources = Resources(mode="cdn")
141
151
  html = file_html(plot_object, resources, title=plot_title)
142
152
 
143
- with open(html_filename, 'w', encoding='utf-8') as f:
153
+ with open(html_filename, "w", encoding="utf-8") as f:
144
154
  f.write(html)
145
155
 
146
156
  logger.warning(f"PNG export not available. Saved as HTML instead: {abs_html_filename}")
@@ -150,21 +160,21 @@ def _isolated_save_plot(plot_object, filename, abs_filename, logger, plot_title=
150
160
  logger.info(f"Plot saved to: {abs_filename}")
151
161
  else:
152
162
  # Fall back to HTML if SVG export not available
153
- html_filename = filename.replace('.svg', '.html')
154
- abs_html_filename = html_filename if abs_filename == filename else abs_filename.replace('.svg', '.html')
163
+ html_filename = filename.replace(".svg", ".html")
164
+ abs_html_filename = html_filename if abs_filename == filename else abs_filename.replace(".svg", ".html")
155
165
  from bokeh.resources import Resources
156
166
  from bokeh.embed import file_html
157
167
 
158
- resources = Resources(mode='cdn')
168
+ resources = Resources(mode="cdn")
159
169
  html = file_html(plot_object, resources, title=plot_title)
160
170
 
161
- with open(html_filename, 'w', encoding='utf-8') as f:
171
+ with open(html_filename, "w", encoding="utf-8") as f:
162
172
  f.write(html)
163
173
 
164
174
  logger.warning(f"SVG export not available. Saved as HTML instead: {abs_html_filename}")
165
175
  html = file_html(plot_object, resources, title=plot_title)
166
176
 
167
- with open(html_filename, 'w', encoding='utf-8') as f:
177
+ with open(html_filename, "w", encoding="utf-8") as f:
168
178
  f.write(html)
169
179
 
170
180
  logger.warning(f"SVG export not available. Saved as HTML instead: {abs_html_filename}")
@@ -173,10 +183,10 @@ def _isolated_save_plot(plot_object, filename, abs_filename, logger, plot_title=
173
183
  from bokeh.resources import Resources
174
184
  from bokeh.embed import file_html
175
185
 
176
- resources = Resources(mode='cdn')
186
+ resources = Resources(mode="cdn")
177
187
  html = file_html(plot_object, resources, title=plot_title)
178
188
 
179
- with open(filename, 'w', encoding='utf-8') as f:
189
+ with open(filename, "w", encoding="utf-8") as f:
180
190
  f.write(html)
181
191
 
182
192
  logger.info(f"Plot saved to: {abs_filename}")
@@ -194,7 +204,7 @@ def _isolated_show_notebook(plot_object):
194
204
 
195
205
  # Suppress both warnings and logging messages for the specific Bokeh callback warnings
196
206
  # that occur when Panel components with Python callbacks are converted to standalone Bokeh
197
- bokeh_logger = logging.getLogger('bokeh.embed.util')
207
+ bokeh_logger = logging.getLogger("bokeh.embed.util")
198
208
  original_level = bokeh_logger.level
199
209
  bokeh_logger.setLevel(logging.ERROR) # Suppress WARNING level messages
200
210
 
@@ -210,8 +220,8 @@ def _isolated_show_notebook(plot_object):
210
220
  output_notebook(hide_banner=True)
211
221
 
212
222
  # Reset Holoviews to notebook mode
213
- hv.extension('bokeh', logo=False)
214
- hv.output(backend='bokeh', mode='jupyter')
223
+ hv.extension("bokeh", logo=False)
224
+ hv.output(backend="bokeh", mode="jupyter")
215
225
 
216
226
  # Show in notebook
217
227
  show(plot_object)
@@ -245,13 +255,14 @@ def _isolated_save_panel_plot(panel_obj, filename, abs_filename, logger, plot_ti
245
255
  elif filename.endswith(".png"):
246
256
  try:
247
257
  from panel.io.save import save_png
258
+
248
259
  # Convert Panel to Bokeh models before saving
249
260
  bokeh_layout = panel_obj.get_root()
250
261
  save_png(bokeh_layout, filename=filename)
251
262
  logger.info(f"{plot_title} saved to: {abs_filename}")
252
263
  except Exception:
253
264
  # Fall back to HTML if PNG export not available
254
- html_filename = filename.replace('.png', '.html')
265
+ html_filename = filename.replace(".png", ".html")
255
266
  abs_html_filename = os.path.abspath(html_filename)
256
267
  try:
257
268
  panel_obj.save(html_filename, embed=True)
@@ -263,12 +274,13 @@ def _isolated_save_panel_plot(panel_obj, filename, abs_filename, logger, plot_ti
263
274
  # Try to save as PDF, fall back to HTML if not available
264
275
  try:
265
276
  from bokeh.io.export import export_pdf
277
+
266
278
  bokeh_layout = panel_obj.get_root()
267
279
  export_pdf(bokeh_layout, filename=filename)
268
280
  logger.info(f"{plot_title} saved to: {abs_filename}")
269
281
  except ImportError:
270
282
  # Fall back to HTML if PDF export not available
271
- html_filename = filename.replace('.pdf', '.html')
283
+ html_filename = filename.replace(".pdf", ".html")
272
284
  abs_html_filename = os.path.abspath(html_filename)
273
285
  try:
274
286
  panel_obj.save(html_filename, embed=True)
@@ -279,12 +291,13 @@ def _isolated_save_panel_plot(panel_obj, filename, abs_filename, logger, plot_ti
279
291
  # Try to save as SVG, fall back to HTML if not available
280
292
  try:
281
293
  from bokeh.io.export import export_svg
294
+
282
295
  bokeh_layout = panel_obj.get_root()
283
296
  export_svg(bokeh_layout, filename=filename)
284
297
  logger.info(f"{plot_title} saved to: {abs_filename}")
285
298
  except Exception as e:
286
299
  # Fall back to HTML if SVG export not available
287
- html_filename = filename.replace('.svg', '.html')
300
+ html_filename = filename.replace(".svg", ".html")
288
301
  abs_html_filename = os.path.abspath(html_filename)
289
302
  try:
290
303
  panel_obj.save(html_filename, embed=True)
@@ -318,16 +331,18 @@ def _isolated_show_panel_notebook(panel_obj):
318
331
  output_notebook(hide_banner=True)
319
332
 
320
333
  # Reset Holoviews to notebook mode
321
- hv.extension('bokeh', logo=False)
322
- hv.output(backend='bokeh', mode='jupyter')
334
+ hv.extension("bokeh", logo=False)
335
+ hv.output(backend="bokeh", mode="jupyter")
323
336
 
324
337
  # For Panel objects in notebooks, use on.extension and display inline
325
338
  import panel as on
339
+
326
340
  try:
327
341
  # Configure Panel for notebook display
328
- on.extension('bokeh', inline=True, comms='vscode')
342
+ on.extension("bokeh", inline=True, comms="vscode")
329
343
  # Use IPython display to show inline instead of show()
330
344
  from IPython.display import display
345
+
331
346
  display(panel_obj)
332
347
  except Exception:
333
348
  # Fallback to regular Panel show
@@ -344,8 +359,8 @@ def plot_alignment(
344
359
  ):
345
360
  """Visualize retention time alignment using two synchronized Bokeh scatter plots.
346
361
 
347
- Uses ``features_df`` to create side-by-side plots showing Original RT (left)
348
- and Current/Aligned RT (right). If no alignment has been performed yet,
362
+ Uses ``features_df`` to create side-by-side plots showing Original RT (left)
363
+ and Current/Aligned RT (right). If no alignment has been performed yet,
349
364
  both plots show the current RT values.
350
365
 
351
366
  Parameters:
@@ -409,27 +424,33 @@ def plot_alignment(
409
424
  for sample_idx, sample in enumerate(samples_list):
410
425
  # Filter sample data
411
426
  sample_data = features_df.filter(pl.col(sample_col) == sample)
412
-
427
+
413
428
  # Sample data if too large for performance
414
429
  max_points_per_sample = 10000
415
430
  if sample_data.height > max_points_per_sample:
416
- self.logger.info(f"Sample {sample}: Sampling {max_points_per_sample} points from {sample_data.height} features for performance")
431
+ self.logger.info(
432
+ f"Sample {sample}: Sampling {max_points_per_sample} points from {sample_data.height} features for performance"
433
+ )
417
434
  sample_data = sample_data.sample(n=max_points_per_sample, seed=42)
418
435
 
419
436
  # Calculate max intensity for alpha scaling
420
437
  max_inty = sample_data.select(pl.col("inty").max()).item() or 1
421
438
 
422
439
  # Get sample information
423
- sample_uid = sample if sample_col == "sample_uid" else sample_data.select(pl.col("sample_uid")).item() if "sample_uid" in sample_data.columns else sample
424
-
440
+ sample_uid = (
441
+ sample
442
+ if sample_col == "sample_uid"
443
+ else sample_data.select(pl.col("sample_uid")).item()
444
+ if "sample_uid" in sample_data.columns
445
+ else sample
446
+ )
447
+
425
448
  # Try to get actual sample name from samples_df if available
426
449
  sample_name = str(sample) # fallback
427
450
  if hasattr(self, "samples_df") and self.samples_df is not None and sample_uid is not None:
428
451
  try:
429
452
  sample_name_result = (
430
- self.samples_df.filter(pl.col("sample_uid") == sample_uid)
431
- .select("sample_name")
432
- .to_series()
453
+ self.samples_df.filter(pl.col("sample_uid") == sample_uid).select("sample_name").to_series()
433
454
  )
434
455
  if len(sample_name_result) > 0 and sample_name_result[0] is not None:
435
456
  sample_name = str(sample_name_result[0])
@@ -441,7 +462,7 @@ def plot_alignment(
441
462
  cols_to_select = ["rt", "mz", "inty"]
442
463
  if has_alignment:
443
464
  cols_to_select.append("rt_original")
444
-
465
+
445
466
  sample_dict = sample_data.select(cols_to_select).to_dicts()
446
467
 
447
468
  for row_dict in sample_dict:
@@ -490,7 +511,7 @@ def plot_alignment(
490
511
  # Get colors from samples_df if available
491
512
  sample_uids_list = list(sample_idx_to_uid.values())
492
513
  color_map: dict[int, str] = {}
493
-
514
+
494
515
  if sample_uids_list and hasattr(self, "samples_df") and self.samples_df is not None:
495
516
  try:
496
517
  sample_colors = (
@@ -499,7 +520,7 @@ def plot_alignment(
499
520
  .to_dict(as_series=False)
500
521
  )
501
522
  uid_to_color = dict(zip(sample_colors["sample_uid"], sample_colors["sample_color"]))
502
-
523
+
503
524
  for sample_idx, sample_uid in sample_idx_to_uid.items():
504
525
  color_map[sample_idx] = uid_to_color.get(sample_uid, "#1f77b4")
505
526
  except Exception:
@@ -522,7 +543,7 @@ def plot_alignment(
522
543
  # Create Bokeh figures
523
544
  title_before = "Original RT" if has_alignment else "Current RT (No Alignment)"
524
545
  title_after = "Aligned RT" if has_alignment else "Current RT (Copy)"
525
-
546
+
526
547
  p1 = figure(
527
548
  width=width,
528
549
  height=height,
@@ -605,6 +626,7 @@ def plot_alignment(
605
626
  if filename is not None:
606
627
  # Convert relative paths to absolute paths using study folder as base
607
628
  import os
629
+
608
630
  if not os.path.isabs(filename):
609
631
  filename = os.path.join(self.folder, filename)
610
632
 
@@ -642,7 +664,7 @@ def plot_consensus_2d(
642
664
  Parameters:
643
665
  filename (str, optional): Path to save the plot
644
666
  colorby (str): Column name to use for color mapping (default: "number_samples")
645
- Automatically detects if column contains categorical (string) or
667
+ Automatically detects if column contains categorical (string) or
646
668
  numeric data and applies appropriate color mapping:
647
669
  - Categorical: Uses factor_cmap with distinct colors and legend
648
670
  - Numeric: Uses LinearColorMapper with continuous colorbar
@@ -657,7 +679,7 @@ def plot_consensus_2d(
657
679
  height (int): Plot height in pixels (default: 900)
658
680
  mz_range (tuple, optional): m/z range for filtering consensus features (min_mz, max_mz)
659
681
  rt_range (tuple, optional): Retention time range for filtering consensus features (min_rt, max_rt)
660
- legend (str, optional): Legend position for categorical data. Options: 'top_right', 'top_left',
682
+ legend (str, optional): Legend position for categorical data. Options: 'top_right', 'top_left',
661
683
  'bottom_right', 'bottom_left', 'right', 'left', 'top', 'bottom'.
662
684
  If None, legend is hidden. Only applies to categorical coloring (default: "bottom_right")
663
685
  show_none (bool): Whether to display points with None values for colorby column (default: True)
@@ -742,7 +764,7 @@ def plot_consensus_2d(
742
764
  # Filter out None values for colorby column if show_none=False
743
765
  if not show_none and colorby in data.columns:
744
766
  data = data.filter(pl.col(colorby).is_not_null())
745
-
767
+
746
768
  # Convert Polars DataFrame to pandas for Bokeh compatibility
747
769
  data_pd = data.to_pandas()
748
770
  source = ColumnDataSource(data_pd)
@@ -786,20 +808,22 @@ def plot_consensus_2d(
786
808
  # Check if colorby column contains categorical data (string/object)
787
809
  colorby_values = data[colorby].to_list()
788
810
  is_categorical = (
789
- data_pd[colorby].dtype in ["object", "string", "category"] or
790
- isinstance(colorby_values[0], str) if colorby_values else False
811
+ data_pd[colorby].dtype in ["object", "string", "category"] or isinstance(colorby_values[0], str)
812
+ if colorby_values
813
+ else False
791
814
  )
792
-
815
+
793
816
  if is_categorical:
794
817
  # Handle categorical coloring
795
818
  # Use natural order of unique values - don't sort to preserve correct legend mapping
796
819
  # Sorting would break the correspondence between legend labels and point colors
797
820
  unique_values = [v for v in data_pd[colorby].unique() if v is not None]
798
-
821
+
799
822
  # Use the custom palette from cmap if available, otherwise fall back to defaults
800
823
  if len(palette) >= len(unique_values):
801
824
  # Use custom colormap palette - sample evenly across the palette
802
825
  import numpy as np
826
+
803
827
  indices = np.linspace(0, len(palette) - 1, len(unique_values)).astype(int)
804
828
  categorical_palette = [palette[i] for i in indices]
805
829
  elif len(unique_values) <= 20:
@@ -808,7 +832,7 @@ def plot_consensus_2d(
808
832
  else:
809
833
  # For many categories, use a subset of the viridis palette
810
834
  categorical_palette = viridis(min(256, len(unique_values)))
811
-
835
+
812
836
  color_mapper = factor_cmap(colorby, categorical_palette, unique_values)
813
837
  else:
814
838
  # Handle numeric coloring with LinearColorMapper
@@ -832,11 +856,12 @@ def plot_consensus_2d(
832
856
  all_unique_values = list(data_pd[colorby].unique())
833
857
  unique_values = [v for v in all_unique_values if v is not None]
834
858
  has_none_values = None in all_unique_values
835
-
859
+
836
860
  # Use the custom palette from cmap if available, otherwise fall back to defaults
837
861
  if len(palette) >= len(unique_values):
838
862
  # Use custom colormap palette - sample evenly across the palette
839
863
  import numpy as np
864
+
840
865
  indices = np.linspace(0, len(palette) - 1, len(unique_values)).astype(int)
841
866
  categorical_palette = [palette[i] for i in indices]
842
867
  elif len(unique_values) <= 20:
@@ -844,23 +869,23 @@ def plot_consensus_2d(
844
869
  categorical_palette = Category20[min(20, max(3, len(unique_values)))]
845
870
  else:
846
871
  categorical_palette = viridis(min(256, len(unique_values)))
847
-
872
+
848
873
  # Handle None values with black color FIRST so they appear in the background
849
874
  if has_none_values and show_none:
850
875
  # Filter data for None values
851
876
  none_data = data.filter(pl.col(colorby).is_null())
852
877
  none_data_pd = none_data.to_pandas()
853
878
  none_source = bp.ColumnDataSource(none_data_pd)
854
-
879
+
855
880
  if scaling.lower() in ["dyn", "dynamic"]:
856
881
  # Calculate appropriate radius for dynamic scaling
857
882
  rt_range = data["rt"].max() - data["rt"].min()
858
883
  mz_range = data["mz"].max() - data["mz"].min()
859
884
  dynamic_radius = min(rt_range, mz_range) * 0.0005 * markersize
860
-
885
+
861
886
  renderer = p.circle(
862
887
  x="rt",
863
- y="mz",
888
+ y="mz",
864
889
  radius=dynamic_radius,
865
890
  fill_color="lightgray",
866
891
  line_color=None,
@@ -872,32 +897,32 @@ def plot_consensus_2d(
872
897
  renderer = p.scatter(
873
898
  x="rt",
874
899
  y="mz",
875
- size="markersize",
900
+ size="markersize",
876
901
  fill_color="lightgray",
877
902
  line_color=None,
878
903
  alpha=alpha,
879
904
  source=none_source,
880
905
  legend_label="None",
881
906
  )
882
-
907
+
883
908
  # Create a separate renderer for each non-None category (plotted on top of None values)
884
909
  for i, category in enumerate(unique_values):
885
910
  # Filter data for this category
886
911
  category_data = data.filter(pl.col(colorby) == category)
887
912
  category_data_pd = category_data.to_pandas()
888
913
  category_source = bp.ColumnDataSource(category_data_pd)
889
-
914
+
890
915
  color = categorical_palette[i % len(categorical_palette)]
891
-
916
+
892
917
  if scaling.lower() in ["dyn", "dynamic"]:
893
918
  # Calculate appropriate radius for dynamic scaling
894
919
  rt_range = data["rt"].max() - data["rt"].min()
895
920
  mz_range = data["mz"].max() - data["mz"].min()
896
921
  dynamic_radius = min(rt_range, mz_range) * 0.0005 * markersize
897
-
922
+
898
923
  renderer = p.circle(
899
924
  x="rt",
900
- y="mz",
925
+ y="mz",
901
926
  radius=dynamic_radius,
902
927
  fill_color=color,
903
928
  line_color=None,
@@ -909,17 +934,17 @@ def plot_consensus_2d(
909
934
  renderer = p.scatter(
910
935
  x="rt",
911
936
  y="mz",
912
- size="markersize",
937
+ size="markersize",
913
938
  fill_color=color,
914
939
  line_color=None,
915
940
  alpha=alpha,
916
941
  source=category_source,
917
942
  legend_label=str(category),
918
943
  )
919
-
944
+
920
945
  # No single scatter_renderer for categorical data
921
946
  scatter_renderer = None
922
-
947
+
923
948
  else:
924
949
  # Handle numeric coloring - single renderer with color mapping
925
950
  if scaling.lower() in ["dyn", "dynamic"]:
@@ -927,7 +952,7 @@ def plot_consensus_2d(
927
952
  rt_range = data["rt"].max() - data["rt"].min()
928
953
  mz_range = data["mz"].max() - data["mz"].min()
929
954
  dynamic_radius = min(rt_range, mz_range) * 0.0005 * markersize
930
-
955
+
931
956
  scatter_renderer = p.circle(
932
957
  x="rt",
933
958
  y="mz",
@@ -957,7 +982,7 @@ def plot_consensus_2d(
957
982
  ("number_ms2", "@number_ms2"),
958
983
  ("inty_mean", "@inty_mean"),
959
984
  ]
960
-
985
+
961
986
  # Add id_top_* columns if they exist and have non-null values
962
987
  id_top_columns = ["id_top_name", "id_top_adduct", "id_top_class", "id_top_score"]
963
988
  for col in id_top_columns:
@@ -969,7 +994,7 @@ def plot_consensus_2d(
969
994
  tooltips.append((col, f"@{col}{{0.0}}"))
970
995
  else:
971
996
  tooltips.append((col, f"@{col}"))
972
-
997
+
973
998
  hover = HoverTool(
974
999
  tooltips=tooltips,
975
1000
  )
@@ -977,7 +1002,7 @@ def plot_consensus_2d(
977
1002
  # For numeric data, specify the single renderer
978
1003
  if not is_categorical and scatter_renderer:
979
1004
  hover.renderers = [scatter_renderer]
980
-
1005
+
981
1006
  p.add_tools(hover)
982
1007
 
983
1008
  # add colorbar only for numeric data (LinearColorMapper)
@@ -996,15 +1021,15 @@ def plot_consensus_2d(
996
1021
  # Map legend position parameter to Bokeh legend position
997
1022
  legend_position_map = {
998
1023
  "top_right": "top_right",
999
- "top_left": "top_left",
1024
+ "top_left": "top_left",
1000
1025
  "bottom_right": "bottom_right",
1001
1026
  "bottom_left": "bottom_left",
1002
1027
  "right": "right",
1003
1028
  "left": "left",
1004
1029
  "top": "top",
1005
- "bottom": "bottom"
1030
+ "bottom": "bottom",
1006
1031
  }
1007
-
1032
+
1008
1033
  bokeh_legend_pos = legend_position_map.get(legend, "bottom_right")
1009
1034
  p.legend.location = bokeh_legend_pos
1010
1035
  p.legend.click_policy = "hide"
@@ -1015,6 +1040,7 @@ def plot_consensus_2d(
1015
1040
  if filename is not None:
1016
1041
  # Convert relative paths to absolute paths using study folder as base
1017
1042
  import os
1043
+
1018
1044
  if not os.path.isabs(filename):
1019
1045
  filename = os.path.join(self.folder, filename)
1020
1046
 
@@ -1249,6 +1275,7 @@ def plot_samples_2d(
1249
1275
  if filename is not None:
1250
1276
  # Convert relative paths to absolute paths using study folder as base
1251
1277
  import os
1278
+
1252
1279
  if not os.path.isabs(filename):
1253
1280
  filename = os.path.join(self.folder, filename)
1254
1281
 
@@ -1422,6 +1449,7 @@ def plot_bpc(
1422
1449
  if filename is not None:
1423
1450
  # Convert relative paths to absolute paths using study folder as base
1424
1451
  import os
1452
+
1425
1453
  if not os.path.isabs(filename):
1426
1454
  filename = os.path.join(self.folder, filename)
1427
1455
 
@@ -1593,6 +1621,7 @@ def plot_eic(
1593
1621
  if filename is not None:
1594
1622
  # Convert relative paths to absolute paths using study folder as base
1595
1623
  import os
1624
+
1596
1625
  if not os.path.isabs(filename):
1597
1626
  filename = os.path.join(self.folder, filename)
1598
1627
 
@@ -1659,15 +1688,13 @@ def plot_rt_correction(
1659
1688
  sample_names_dict = {}
1660
1689
  if hasattr(self, "samples_df") and self.samples_df is not None:
1661
1690
  try:
1662
- sample_name_mapping = (
1663
- self.samples_df
1664
- .filter(pl.col("sample_uid").is_in(sample_uids))
1665
- .select(["sample_uid", "sample_name"])
1691
+ sample_name_mapping = self.samples_df.filter(pl.col("sample_uid").is_in(sample_uids)).select([
1692
+ "sample_uid",
1693
+ "sample_name",
1694
+ ])
1695
+ sample_names_dict = dict(
1696
+ zip(sample_name_mapping["sample_uid"].to_list(), sample_name_mapping["sample_name"].to_list())
1666
1697
  )
1667
- sample_names_dict = dict(zip(
1668
- sample_name_mapping["sample_uid"].to_list(),
1669
- sample_name_mapping["sample_name"].to_list()
1670
- ))
1671
1698
  except Exception:
1672
1699
  pass
1673
1700
 
@@ -1686,10 +1713,8 @@ def plot_rt_correction(
1686
1713
  # OPTIMIZED: Filter once, group once instead of per-sample filtering
1687
1714
  try:
1688
1715
  # Filter all data once for selected samples and required conditions
1689
- all_sample_feats = self.features_df.filter(
1690
- pl.col(sample_id_col).is_in(sample_uids)
1691
- )
1692
-
1716
+ all_sample_feats = self.features_df.filter(pl.col(sample_id_col).is_in(sample_uids))
1717
+
1693
1718
  if all_sample_feats.is_empty():
1694
1719
  self.logger.warning("No features found for the selected samples.")
1695
1720
  return
@@ -1708,14 +1733,8 @@ def plot_rt_correction(
1708
1733
 
1709
1734
  # Filter nulls, add delta column, and sort - all in one operation
1710
1735
  all_sample_feats = (
1711
- all_sample_feats
1712
- .filter(
1713
- pl.col("rt").is_not_null() &
1714
- pl.col("rt_original").is_not_null()
1715
- )
1716
- .with_columns([
1717
- (pl.col("rt") - pl.col("rt_original")).alias("delta")
1718
- ])
1736
+ all_sample_feats.filter(pl.col("rt").is_not_null() & pl.col("rt_original").is_not_null())
1737
+ .with_columns([(pl.col("rt") - pl.col("rt_original")).alias("delta")])
1719
1738
  .sort([sample_id_col, "rt"])
1720
1739
  )
1721
1740
 
@@ -1770,6 +1789,7 @@ def plot_rt_correction(
1770
1789
  if filename is not None:
1771
1790
  # Convert relative paths to absolute paths using study folder as base
1772
1791
  import os
1792
+
1773
1793
  if not os.path.isabs(filename):
1774
1794
  filename = os.path.join(self.folder, filename)
1775
1795
 
@@ -1882,7 +1902,7 @@ def plot_chrom(
1882
1902
  curve = hv.Curve(
1883
1903
  (rt, inty, sample_names_array, sample_uids_array, sample_colors_array),
1884
1904
  kdims=["RT"],
1885
- vdims=["inty", "sample_name", "sample_uid", "sample_color"]
1905
+ vdims=["inty", "sample_name", "sample_uid", "sample_color"],
1886
1906
  ).opts(
1887
1907
  color=color_map[sample],
1888
1908
  line_width=1,
@@ -1892,8 +1912,8 @@ def plot_chrom(
1892
1912
  ("Intensity", "@inty{0,0}"),
1893
1913
  ("Sample Name", "@sample_name"),
1894
1914
  ("Sample UID", "@sample_uid"),
1895
- ("Sample Color", "$color[swatch]:sample_color")
1896
- ]
1915
+ ("Sample Color", "$color[swatch]:sample_color"),
1916
+ ],
1897
1917
  )
1898
1918
  curves.append(curve)
1899
1919
 
@@ -1957,6 +1977,7 @@ def plot_chrom(
1957
1977
  if filename is not None:
1958
1978
  # Convert relative paths to absolute paths using study folder as base
1959
1979
  import os
1980
+
1960
1981
  if not os.path.isabs(filename):
1961
1982
  filename = os.path.join(self.folder, filename)
1962
1983
 
@@ -1989,7 +2010,7 @@ def plot_consensus_stats(
1989
2010
  ):
1990
2011
  """
1991
2012
  Plot histograms/distributions for specific consensus statistics in the requested order.
1992
-
2013
+
1993
2014
  Shows the following properties in order:
1994
2015
  1. rt: Retention time
1995
2016
  2. rt_delta_mean: Mean retention time delta
@@ -2003,7 +2024,7 @@ def plot_consensus_stats(
2003
2024
  10. chrom_coherence_mean: Mean chromatographic coherence
2004
2025
  11. chrom_height_scaled_mean: Mean scaled chromatographic height
2005
2026
  12. chrom_prominence_scaled_mean: Mean scaled chromatographic prominence
2006
-
2027
+
2007
2028
  Parameters:
2008
2029
  filename (str, optional): Output filename for saving the plot
2009
2030
  width (int): Overall width of the plot (default: 840)
@@ -2019,7 +2040,7 @@ def plot_consensus_stats(
2019
2040
 
2020
2041
  # Get the consensus statistics data using the new helper method
2021
2042
  data_df = self.get_consensus_stats()
2022
-
2043
+
2023
2044
  if data_df is None or data_df.is_empty():
2024
2045
  self.logger.error("No consensus statistics data available.")
2025
2046
  return
@@ -2032,39 +2053,52 @@ def plot_consensus_stats(
2032
2053
 
2033
2054
  # Define specific columns to plot in the exact order requested (excluding consensus_uid)
2034
2055
  desired_columns = [
2035
- "rt",
2036
- "rt_delta_mean",
2037
- "mz",
2056
+ "rt",
2057
+ "rt_delta_mean",
2058
+ "mz",
2038
2059
  "mz_range", # mz_max-mz_min
2039
2060
  "log10_inty_mean", # log10(inty_mean)
2040
- "number_samples",
2041
- "number_ms2",
2042
- "charge_mean",
2043
- "quality",
2044
- "chrom_coherence_mean",
2045
- "chrom_height_scaled_mean",
2046
- "chrom_prominence_scaled_mean"
2061
+ "number_samples",
2062
+ "number_ms2",
2063
+ "charge_mean",
2064
+ "quality",
2065
+ "chrom_coherence_mean",
2066
+ "chrom_height_scaled_mean",
2067
+ "chrom_prominence_scaled_mean",
2047
2068
  ]
2048
-
2069
+
2049
2070
  # Filter to only include columns that exist in the dataframe, preserving order
2050
2071
  numeric_columns = [col for col in desired_columns if col in data_df_clean.columns]
2051
-
2072
+
2052
2073
  # Check if the numeric columns are actually numeric
2053
2074
  final_numeric_columns = []
2054
2075
  for col in numeric_columns:
2055
2076
  dtype = data_df_clean[col].dtype
2056
- if dtype in [pl.Int8, pl.Int16, pl.Int32, pl.Int64,
2057
- pl.UInt8, pl.UInt16, pl.UInt32, pl.UInt64,
2058
- pl.Float32, pl.Float64]:
2077
+ if dtype in [
2078
+ pl.Int8,
2079
+ pl.Int16,
2080
+ pl.Int32,
2081
+ pl.Int64,
2082
+ pl.UInt8,
2083
+ pl.UInt16,
2084
+ pl.UInt32,
2085
+ pl.UInt64,
2086
+ pl.Float32,
2087
+ pl.Float64,
2088
+ ]:
2059
2089
  final_numeric_columns.append(col)
2060
-
2090
+
2061
2091
  numeric_columns = final_numeric_columns
2062
2092
 
2063
2093
  if len(numeric_columns) == 0:
2064
- self.logger.error(f"None of the requested consensus statistics columns were found or are numeric. Available columns: {list(data_df_clean.columns)}")
2094
+ self.logger.error(
2095
+ f"None of the requested consensus statistics columns were found or are numeric. Available columns: {list(data_df_clean.columns)}"
2096
+ )
2065
2097
  return
2066
2098
 
2067
- self.logger.debug(f"Creating distribution plots for {len(numeric_columns)} specific consensus columns: {numeric_columns}")
2099
+ self.logger.debug(
2100
+ f"Creating distribution plots for {len(numeric_columns)} specific consensus columns: {numeric_columns}"
2101
+ )
2068
2102
 
2069
2103
  # Select only the numeric columns for plotting
2070
2104
  data_df_clean = data_df_clean.select(numeric_columns)
@@ -2073,15 +2107,23 @@ def plot_consensus_stats(
2073
2107
  all_columns_empty = True
2074
2108
  for col in numeric_columns:
2075
2109
  # Check if column has any non-null, finite values
2076
- non_null_count = data_df_clean[col].filter(
2077
- data_df_clean[col].is_not_null() &
2078
- (data_df_clean[col].is_finite() if data_df_clean[col].dtype in [pl.Float32, pl.Float64] else pl.lit(True))
2079
- ).len()
2080
-
2110
+ non_null_count = (
2111
+ data_df_clean[col]
2112
+ .filter(
2113
+ data_df_clean[col].is_not_null()
2114
+ & (
2115
+ data_df_clean[col].is_finite()
2116
+ if data_df_clean[col].dtype in [pl.Float32, pl.Float64]
2117
+ else pl.lit(True)
2118
+ )
2119
+ )
2120
+ .len()
2121
+ )
2122
+
2081
2123
  if non_null_count > 0:
2082
2124
  all_columns_empty = False
2083
2125
  break
2084
-
2126
+
2085
2127
  if all_columns_empty:
2086
2128
  self.logger.error("All numeric columns contain only NaN/infinite values.")
2087
2129
  return
@@ -2089,24 +2131,24 @@ def plot_consensus_stats(
2089
2131
  # Calculate grid dimensions
2090
2132
  n_plots = len(numeric_columns)
2091
2133
  n_rows = (n_plots + n_cols - 1) // n_cols # Ceiling division
2092
-
2134
+
2093
2135
  # Auto-calculate height if not provided
2094
2136
  if height is None:
2095
2137
  plot_height = 210 # Reduced from 300 (30% smaller)
2096
2138
  height = plot_height * n_rows + 56 # Reduced from 80 (30% smaller)
2097
2139
  else:
2098
2140
  plot_height = (height - 56) // n_rows # Reduced padding (30% smaller)
2099
-
2141
+
2100
2142
  plot_width = (width - 56) // n_cols # Reduced padding (30% smaller)
2101
2143
 
2102
2144
  # Create plots grid
2103
2145
  plots = []
2104
2146
  current_row = []
2105
-
2147
+
2106
2148
  for i, col in enumerate(numeric_columns):
2107
2149
  # Check if this column should use log scale for y-axis
2108
2150
  y_axis_type = "log" if col in ["number_samples", "number_ms2"] else "linear"
2109
-
2151
+
2110
2152
  # Create histogram for this column
2111
2153
  p = figure(
2112
2154
  width=plot_width,
@@ -2114,30 +2156,28 @@ def plot_consensus_stats(
2114
2156
  title=col,
2115
2157
  toolbar_location="above",
2116
2158
  tools="pan,wheel_zoom,box_zoom,reset,save",
2117
- y_axis_type=y_axis_type
2159
+ y_axis_type=y_axis_type,
2118
2160
  )
2119
-
2161
+
2120
2162
  # Set white background
2121
2163
  p.background_fill_color = "white"
2122
2164
  p.border_fill_color = "white"
2123
-
2165
+
2124
2166
  # Calculate histogram using Polars
2125
2167
  # Get valid (non-null, finite) values for this column
2126
2168
  if data_df_clean[col].dtype in [pl.Float32, pl.Float64]:
2127
- valid_values = data_df_clean.filter(
2128
- data_df_clean[col].is_not_null() & data_df_clean[col].is_finite()
2129
- )[col]
2169
+ valid_values = data_df_clean.filter(data_df_clean[col].is_not_null() & data_df_clean[col].is_finite())[col]
2130
2170
  else:
2131
2171
  valid_values = data_df_clean.filter(data_df_clean[col].is_not_null())[col]
2132
-
2172
+
2133
2173
  if valid_values.len() == 0:
2134
2174
  self.logger.warning(f"No valid values for column {col}")
2135
2175
  continue
2136
-
2176
+
2137
2177
  # Convert to numpy for histogram calculation
2138
2178
  values_array = valid_values.to_numpy()
2139
2179
  hist, edges = np.histogram(values_array, bins=bins)
2140
-
2180
+
2141
2181
  # Handle log y-axis: replace zero counts with small positive values
2142
2182
  if y_axis_type == "log":
2143
2183
  # Replace zero counts with a small value (1e-1) to make them visible on log scale
@@ -2146,7 +2186,7 @@ def plot_consensus_stats(
2146
2186
  else:
2147
2187
  hist_log_safe = hist
2148
2188
  bottom_val = 0
2149
-
2189
+
2150
2190
  # Create histogram bars
2151
2191
  p.quad(
2152
2192
  top=hist_log_safe,
@@ -2157,7 +2197,7 @@ def plot_consensus_stats(
2157
2197
  line_color="white",
2158
2198
  alpha=alpha,
2159
2199
  )
2160
-
2200
+
2161
2201
  # Style the plot
2162
2202
  p.title.text_font_size = "10pt" # Reduced from 12pt
2163
2203
  p.xaxis.axis_label = "" # Remove x-axis title
@@ -2166,12 +2206,12 @@ def plot_consensus_stats(
2166
2206
  p.grid.grid_line_dash = [6, 4] # Dashed grid lines
2167
2207
  p.xgrid.visible = False # Hide x-axis grid
2168
2208
  p.outline_line_color = None # Remove gray border around plot area
2169
-
2209
+
2170
2210
  # Remove y-axis label but keep y-axis visible
2171
2211
  p.yaxis.axis_label = ""
2172
-
2212
+
2173
2213
  current_row.append(p)
2174
-
2214
+
2175
2215
  # If we've filled a row or reached the end, add the row to plots
2176
2216
  if len(current_row) == n_cols or i == n_plots - 1:
2177
2217
  # Fill remaining spots in the last row with None if needed
@@ -2182,15 +2222,15 @@ def plot_consensus_stats(
2182
2222
 
2183
2223
  # Create grid layout with white background
2184
2224
  grid = gridplot(plots, toolbar_location="above", merge_tools=True)
2185
-
2225
+
2186
2226
  # The background should be white by default in Bokeh
2187
2227
  # Individual plots already have white backgrounds set above
2188
2228
 
2189
-
2190
2229
  # Apply consistent save/display behavior
2191
2230
  if filename is not None:
2192
2231
  # Convert relative paths to absolute paths using study folder as base
2193
2232
  import os
2233
+
2194
2234
  if not os.path.isabs(filename):
2195
2235
  filename = os.path.join(self.folder, filename)
2196
2236
 
@@ -2456,6 +2496,7 @@ def plot_samples_pca(
2456
2496
  if filename is not None:
2457
2497
  # Convert relative paths to absolute paths using study folder as base
2458
2498
  import os
2499
+
2459
2500
  if not os.path.isabs(filename):
2460
2501
  filename = os.path.join(self.folder, filename)
2461
2502
 
@@ -2503,7 +2544,7 @@ def plot_samples_umap(
2503
2544
  random_state (int or None): Random state for reproducibility (default: 42).
2504
2545
  - Use an integer (e.g., 42) for reproducible results (slower, single-threaded)
2505
2546
  - Use None for faster computation with multiple cores (non-reproducible)
2506
-
2547
+
2507
2548
  Note:
2508
2549
  Setting random_state forces single-threaded computation but ensures reproducible results.
2509
2550
  Set random_state=None to enable parallel processing for faster computation.
@@ -2574,7 +2615,7 @@ def plot_samples_umap(
2574
2615
  min_dist=min_dist,
2575
2616
  metric=metric,
2576
2617
  random_state=random_state,
2577
- n_jobs=1
2618
+ n_jobs=1,
2578
2619
  )
2579
2620
  umap_result = reducer.fit_transform(matrix_scaled)
2580
2621
 
@@ -2743,6 +2784,7 @@ def plot_samples_umap(
2743
2784
  if filename is not None:
2744
2785
  # Convert relative paths to absolute paths using study folder as base
2745
2786
  import os
2787
+
2746
2788
  if not os.path.isabs(filename):
2747
2789
  filename = os.path.join(self.folder, filename)
2748
2790
 
@@ -2897,6 +2939,7 @@ def plot_tic(
2897
2939
  if filename is not None:
2898
2940
  # Convert relative paths to absolute paths using study folder as base
2899
2941
  import os
2942
+
2900
2943
  if not os.path.isabs(filename):
2901
2944
  filename = os.path.join(self.folder, filename)
2902
2945
 
@@ -2915,11 +2958,14 @@ def plot_tic(
2915
2958
  def plot_pca(self, *args, **kwargs):
2916
2959
  """Deprecated: Use plot_samples_pca instead."""
2917
2960
  import warnings
2961
+
2918
2962
  warnings.warn("plot_pca is deprecated, use plot_samples_pca instead", DeprecationWarning, stacklevel=2)
2919
2963
  return self.plot_samples_pca(*args, **kwargs)
2920
2964
 
2965
+
2921
2966
  def plot_umap(self, *args, **kwargs):
2922
2967
  """Deprecated: Use plot_samples_umap instead."""
2923
2968
  import warnings
2969
+
2924
2970
  warnings.warn("plot_umap is deprecated, use plot_samples_umap instead", DeprecationWarning, stacklevel=2)
2925
2971
  return self.plot_samples_umap(*args, **kwargs)