masster 0.5.22__py3-none-any.whl → 0.5.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

masster/sample/plot.py CHANGED
@@ -66,12 +66,12 @@ from cmap import Colormap
66
66
  def _process_cmap(cmap, fallback="viridis", logger=None):
67
67
  """
68
68
  Process colormap using the cmap package, similar to study's implementation.
69
-
69
+
70
70
  Parameters:
71
71
  cmap: Colormap specification (string name, cmap.Colormap object, or None)
72
72
  fallback: Fallback colormap name if cmap processing fails
73
73
  logger: Logger for warnings (optional)
74
-
74
+
75
75
  Returns:
76
76
  list: List of hex color strings for the colormap
77
77
  """
@@ -80,19 +80,20 @@ def _process_cmap(cmap, fallback="viridis", logger=None):
80
80
  cmap = "viridis"
81
81
  elif cmap == "grey":
82
82
  cmap = "greys"
83
-
83
+
84
84
  # If cmap package is not available, fall back to process_cmap
85
85
  if Colormap is None:
86
86
  if logger:
87
87
  logger.warning("cmap package not available, using holoviews process_cmap")
88
88
  return process_cmap(cmap, provider="bokeh")
89
-
89
+
90
90
  try:
91
91
  # Handle colormap using cmap.Colormap
92
92
  if isinstance(cmap, str):
93
93
  colormap = Colormap(cmap)
94
94
  # Generate 256 colors and convert to hex
95
95
  import matplotlib.colors as mcolors
96
+
96
97
  colors = colormap(np.linspace(0, 1, 256))
97
98
  palette = [mcolors.rgb2hex(color) for color in colors]
98
99
  else:
@@ -104,16 +105,18 @@ def _process_cmap(cmap, fallback="viridis", logger=None):
104
105
  if not isinstance(palette, (list, tuple)):
105
106
  # Fall back to generating colors manually
106
107
  import matplotlib.colors as mcolors
108
+
107
109
  colors = colormap(np.linspace(0, 1, 256))
108
110
  palette = [mcolors.rgb2hex(color) for color in colors]
109
111
  except AttributeError:
110
112
  # Fall back to generating colors manually
111
113
  import matplotlib.colors as mcolors
114
+
112
115
  colors = colormap(np.linspace(0, 1, 256))
113
116
  palette = [mcolors.rgb2hex(color) for color in colors]
114
-
117
+
115
118
  return palette
116
-
119
+
117
120
  except (AttributeError, ValueError, TypeError) as e:
118
121
  # Fallback to process_cmap if cmap interpretation fails
119
122
  if logger:
@@ -202,60 +205,63 @@ def _display_plot(plot_object, layout=None):
202
205
  def _export_with_webdriver_manager(plot_obj, filename, format_type, logger=None):
203
206
  """
204
207
  Export plot to PNG or SVG using webdriver-manager for automatic driver management.
205
-
208
+
206
209
  Parameters:
207
210
  plot_obj: Bokeh plot object or holoviews object to export
208
- filename: Output filename
211
+ filename: Output filename
209
212
  format_type: Either "png" or "svg"
210
213
  logger: Logger for error reporting (optional)
211
-
214
+
212
215
  Returns:
213
216
  bool: True if export successful, False otherwise
214
217
  """
215
218
  try:
216
219
  # Convert holoviews to bokeh if needed
217
- if hasattr(plot_obj, 'opts'): # Likely a holoviews object
220
+ if hasattr(plot_obj, "opts"): # Likely a holoviews object
218
221
  import holoviews as hv
222
+
219
223
  bokeh_plot = hv.render(plot_obj)
220
224
  else:
221
225
  bokeh_plot = plot_obj
222
-
226
+
223
227
  # Try webdriver-manager export first
224
228
  try:
225
229
  from webdriver_manager.chrome import ChromeDriverManager
226
230
  from selenium import webdriver
227
231
  from selenium.webdriver.chrome.service import Service
228
232
  from selenium.webdriver.chrome.options import Options
229
-
233
+
230
234
  # Set up Chrome options for headless operation
231
235
  chrome_options = Options()
232
236
  chrome_options.add_argument("--headless")
233
237
  chrome_options.add_argument("--no-sandbox")
234
238
  chrome_options.add_argument("--disable-dev-shm-usage")
235
239
  chrome_options.add_argument("--disable-gpu")
236
-
240
+
237
241
  # Use webdriver-manager to automatically get the correct ChromeDriver
238
242
  service = Service(ChromeDriverManager().install())
239
243
  driver = webdriver.Chrome(service=service, options=chrome_options)
240
-
244
+
241
245
  # Export with managed webdriver
242
246
  with warnings.catch_warnings():
243
247
  warnings.simplefilter("ignore", category=UserWarning)
244
248
  # Filter out bokeh.io.export warnings specifically
245
249
  warnings.filterwarnings("ignore", module="bokeh.io.export")
246
-
250
+
247
251
  if format_type == "png":
248
252
  from bokeh.io import export_png
253
+
249
254
  export_png(bokeh_plot, filename=filename, webdriver=driver)
250
255
  elif format_type == "svg":
251
256
  from bokeh.io import export_svg
257
+
252
258
  export_svg(bokeh_plot, filename=filename, webdriver=driver)
253
259
  else:
254
260
  raise ValueError(f"Unsupported format: {format_type}")
255
-
261
+
256
262
  driver.quit()
257
263
  return True
258
-
264
+
259
265
  except ImportError:
260
266
  if logger:
261
267
  logger.debug(f"webdriver-manager not available, using default {format_type.upper()} export")
@@ -264,37 +270,43 @@ def _export_with_webdriver_manager(plot_obj, filename, format_type, logger=None)
264
270
  warnings.simplefilter("ignore", category=UserWarning)
265
271
  # Filter out bokeh.io.export warnings specifically
266
272
  warnings.filterwarnings("ignore", module="bokeh.io.export")
267
-
273
+
268
274
  if format_type == "png":
269
275
  from bokeh.io import export_png
276
+
270
277
  export_png(bokeh_plot, filename=filename)
271
278
  elif format_type == "svg":
272
279
  from bokeh.io import export_svg
280
+
273
281
  export_svg(bokeh_plot, filename=filename)
274
282
  return True
275
-
283
+
276
284
  except Exception as e:
277
285
  if logger:
278
- logger.debug(f"{format_type.upper()} export with webdriver-manager failed: {e}, using default {format_type.upper()} export")
286
+ logger.debug(
287
+ f"{format_type.upper()} export with webdriver-manager failed: {e}, using default {format_type.upper()} export"
288
+ )
279
289
  try:
280
290
  # Final fallback to default export
281
291
  with warnings.catch_warnings():
282
292
  warnings.simplefilter("ignore", category=UserWarning)
283
293
  # Filter out bokeh.io.export warnings specifically
284
294
  warnings.filterwarnings("ignore", module="bokeh.io.export")
285
-
295
+
286
296
  if format_type == "png":
287
297
  from bokeh.io import export_png
298
+
288
299
  export_png(bokeh_plot, filename=filename)
289
300
  elif format_type == "svg":
290
301
  from bokeh.io import export_svg
302
+
291
303
  export_svg(bokeh_plot, filename=filename)
292
304
  return True
293
305
  except Exception as e2:
294
306
  if logger:
295
307
  logger.error(f"{format_type.upper()} export failed: {e2}")
296
308
  return False
297
-
309
+
298
310
  except Exception as e:
299
311
  if logger:
300
312
  logger.error(f"Export preparation failed: {e}")
@@ -313,7 +325,8 @@ def _handle_sample_plot_output(self, plot_obj, filename=None, plot_type="bokeh")
313
325
  if filename is not None:
314
326
  # Convert relative paths to absolute paths using sample folder as base
315
327
  import os
316
- if hasattr(self, 'folder') and self.folder and not os.path.isabs(filename):
328
+
329
+ if hasattr(self, "folder") and self.folder and not os.path.isabs(filename):
317
330
  filename = os.path.join(self.folder, filename)
318
331
 
319
332
  # Convert to absolute path for logging
@@ -324,10 +337,12 @@ def _handle_sample_plot_output(self, plot_obj, filename=None, plot_type="bokeh")
324
337
  plot_obj.save(filename, embed=True) # type: ignore[attr-defined]
325
338
  elif plot_type == "holoviews":
326
339
  import panel
340
+
327
341
  panel.panel(plot_obj).save(filename, embed=True) # type: ignore[attr-defined]
328
342
  elif plot_type == "bokeh":
329
343
  from bokeh.plotting import output_file
330
344
  from bokeh.io import save
345
+
331
346
  output_file(filename)
332
347
  save(plot_obj)
333
348
  self.logger.success(f"Plot saved to: {abs_filename}")
@@ -337,16 +352,18 @@ def _handle_sample_plot_output(self, plot_obj, filename=None, plot_type="bokeh")
337
352
  self.logger.success(f"Plot saved to: {abs_filename}")
338
353
  else:
339
354
  # Fall back to HTML if PNG export fails completely
340
- html_filename = filename.replace('.png', '.html')
355
+ html_filename = filename.replace(".png", ".html")
341
356
  abs_html_filename = os.path.abspath(html_filename)
342
357
  if plot_type == "panel":
343
358
  plot_obj.save(html_filename, embed=True) # type: ignore[attr-defined]
344
359
  elif plot_type == "holoviews":
345
360
  import panel
361
+
346
362
  panel.panel(plot_obj).save(html_filename, embed=True) # type: ignore[attr-defined]
347
363
  elif plot_type == "bokeh":
348
364
  from bokeh.plotting import output_file
349
365
  from bokeh.io import save
366
+
350
367
  output_file(html_filename)
351
368
  save(plot_obj)
352
369
  self.logger.warning(f"PNG export not available, saved as HTML instead: {abs_html_filename}")
@@ -356,16 +373,18 @@ def _handle_sample_plot_output(self, plot_obj, filename=None, plot_type="bokeh")
356
373
  self.logger.success(f"Plot saved to: {abs_filename}")
357
374
  else:
358
375
  # Fall back to HTML if SVG export fails completely
359
- html_filename = filename.replace('.svg', '.html')
376
+ html_filename = filename.replace(".svg", ".html")
360
377
  abs_html_filename = os.path.abspath(html_filename)
361
378
  if plot_type == "panel":
362
379
  plot_obj.save(html_filename, embed=True) # type: ignore[attr-defined]
363
380
  elif plot_type == "holoviews":
364
381
  import panel
382
+
365
383
  panel.panel(plot_obj).save(html_filename, embed=True) # type: ignore[attr-defined]
366
384
  elif plot_type == "bokeh":
367
385
  from bokeh.plotting import output_file
368
386
  from bokeh.io import save
387
+
369
388
  output_file(html_filename)
370
389
  save(plot_obj)
371
390
  self.logger.warning(f"SVG export not available, saved as HTML instead: {abs_html_filename}")
@@ -374,23 +393,27 @@ def _handle_sample_plot_output(self, plot_obj, filename=None, plot_type="bokeh")
374
393
  try:
375
394
  if plot_type == "bokeh":
376
395
  from bokeh.io.export import export_pdf
396
+
377
397
  export_pdf(plot_obj, filename=filename)
378
398
  elif plot_type in ["panel", "holoviews"]:
379
399
  import holoviews as hv
400
+
380
401
  hv.save(plot_obj, filename, fmt="pdf")
381
402
  self.logger.success(f"Plot saved to: {abs_filename}")
382
403
  except ImportError:
383
404
  # Fall back to HTML if PDF export not available
384
- html_filename = filename.replace('.pdf', '.html')
405
+ html_filename = filename.replace(".pdf", ".html")
385
406
  abs_html_filename = os.path.abspath(html_filename)
386
407
  if plot_type == "panel":
387
408
  plot_obj.save(html_filename, embed=True) # type: ignore[attr-defined]
388
409
  elif plot_type == "holoviews":
389
410
  import panel
411
+
390
412
  panel.panel(plot_obj).save(html_filename, embed=True) # type: ignore[attr-defined]
391
413
  elif plot_type == "bokeh":
392
414
  from bokeh.plotting import output_file
393
415
  from bokeh.io import save
416
+
394
417
  output_file(html_filename)
395
418
  save(plot_obj)
396
419
  self.logger.warning(f"PDF export not available, saved as HTML instead: {abs_html_filename}")
@@ -400,10 +423,12 @@ def _handle_sample_plot_output(self, plot_obj, filename=None, plot_type="bokeh")
400
423
  plot_obj.save(filename, embed=True) # type: ignore[attr-defined]
401
424
  elif plot_type == "holoviews":
402
425
  import panel
426
+
403
427
  panel.panel(plot_obj).save(filename, embed=True) # type: ignore[attr-defined]
404
428
  elif plot_type == "bokeh":
405
429
  from bokeh.plotting import output_file
406
430
  from bokeh.io import save
431
+
407
432
  output_file(filename)
408
433
  save(plot_obj)
409
434
  self.logger.success(f"Plot saved to: {abs_filename}")
@@ -413,9 +438,11 @@ def _handle_sample_plot_output(self, plot_obj, filename=None, plot_type="bokeh")
413
438
  plot_obj.show() # type: ignore[attr-defined]
414
439
  elif plot_type == "holoviews":
415
440
  import panel
441
+
416
442
  return panel.panel(plot_obj)
417
443
  elif plot_type == "bokeh":
418
444
  from bokeh.plotting import show
445
+
419
446
  show(plot_obj)
420
447
 
421
448
 
@@ -554,12 +581,25 @@ def plot_chrom(
554
581
  self._handle_sample_plot_output(layout, filename, "panel")
555
582
 
556
583
 
557
- def _create_raster_plot(sample, mz_range=None, rt_range=None, raster_cmap='greys',
558
- raster_log=True, raster_min=1, raster_dynamic=True, raster_threshold=0.8, raster_max_px=8,
559
- width=750, height=600, filename=None):
584
+ def _create_raster_plot(
585
+ sample,
586
+ mz_range=None,
587
+ rt_range=None,
588
+ raster_cmap="greys",
589
+ raster_log=True,
590
+ raster_min=1,
591
+ raster_dynamic=True,
592
+ raster_threshold=0.8,
593
+ raster_max_px=8,
594
+ width=750,
595
+ height=600,
596
+ filename=None,
597
+ ):
560
598
  """Create the raster plot layer from MS1 data."""
561
599
  # Process colormap using the cmap package with proper error handling
562
- raster_cmap_processed = _process_cmap(raster_cmap if raster_cmap is not None else 'greys', fallback="greys", logger=sample.logger)
600
+ raster_cmap_processed = _process_cmap(
601
+ raster_cmap if raster_cmap is not None else "greys", fallback="greys", logger=sample.logger
602
+ )
563
603
 
564
604
  # get columns rt, mz, inty from sample.ms1_df, It's polars DataFrame
565
605
  spectradf = sample.ms1_df.to_pandas()
@@ -620,7 +660,7 @@ def _create_raster_plot(sample, mz_range=None, rt_range=None, raster_cmap='greys
620
660
  ylabel="m/z",
621
661
  colorbar=True,
622
662
  colorbar_position="right",
623
- axiswise=True
663
+ axiswise=True,
624
664
  )
625
665
  raster = hd.dynspread(
626
666
  raster,
@@ -629,17 +669,16 @@ def _create_raster_plot(sample, mz_range=None, rt_range=None, raster_cmap='greys
629
669
  shape="square",
630
670
  max_px=raster_max_px,
631
671
  )
632
-
672
+
633
673
  return raster
634
674
 
635
675
 
636
- def _load_and_merge_oracle_data(sample, oracle_folder, link_by_feature_uid,
637
- min_id_level, max_id_level, min_ms_level):
676
+ def _load_and_merge_oracle_data(sample, oracle_folder, link_by_feature_uid, min_id_level, max_id_level, min_ms_level):
638
677
  """Load oracle data and merge with features."""
639
678
  if sample.features_df is None:
640
679
  sample.logger.error("Cannot plot 2D oracle: features_df is not available")
641
680
  return None
642
-
681
+
643
682
  feats = sample.features_df.clone()
644
683
  sample.logger.debug(f"Features data shape: {len(feats)} rows")
645
684
 
@@ -651,7 +690,7 @@ def _load_and_merge_oracle_data(sample, oracle_folder, link_by_feature_uid,
651
690
  if oracle_folder is None:
652
691
  sample.logger.info("No oracle folder provided, plotting features only")
653
692
  return None
654
-
693
+
655
694
  # try to read the annotationfile as a csv file and add it to feats
656
695
  oracle_file_path = os.path.join(oracle_folder, "diag", "summary_by_feature.csv")
657
696
  sample.logger.debug(f"Loading oracle data from: {oracle_file_path}")
@@ -664,16 +703,25 @@ def _load_and_merge_oracle_data(sample, oracle_folder, link_by_feature_uid,
664
703
 
665
704
  if link_by_feature_uid:
666
705
  cols_to_keep = [
667
- "title", "scan_idx", "mslevel", "hits", "id_level", "id_label",
668
- "id_ion", "id_class", "id_evidence", "score", "score2",
706
+ "title",
707
+ "scan_idx",
708
+ "mslevel",
709
+ "hits",
710
+ "id_level",
711
+ "id_label",
712
+ "id_ion",
713
+ "id_class",
714
+ "id_evidence",
715
+ "score",
716
+ "score2",
669
717
  ]
670
718
  oracle_data = oracle_data[cols_to_keep]
671
-
719
+
672
720
  # extract feature_uid from title. It begins with "uid:XYZ,"
673
721
  sample.logger.debug("Extracting feature UIDs from oracle titles using pattern 'uid:(\\d+)'")
674
722
  oracle_data["feature_uid"] = oracle_data["title"].str.extract(r"uid:(\d+)")
675
723
  oracle_data["feature_uid"] = oracle_data["feature_uid"].astype(int)
676
-
724
+
677
725
  # sort by id_level, remove duplicate feature_uid, keep the first one
678
726
  sample.logger.debug("Sorting by ID level and removing duplicates")
679
727
  oracle_data = oracle_data.sort_values(by=["id_level"], ascending=False)
@@ -681,12 +729,23 @@ def _load_and_merge_oracle_data(sample, oracle_folder, link_by_feature_uid,
681
729
  sample.logger.debug(f"After deduplication: {len(oracle_data)} unique oracle annotations")
682
730
  else:
683
731
  cols_to_keep = [
684
- "precursor", "rt", "title", "scan_idx", "mslevel", "hits", "id_level",
685
- "id_label", "id_ion", "id_class", "id_evidence", "score", "score2",
732
+ "precursor",
733
+ "rt",
734
+ "title",
735
+ "scan_idx",
736
+ "mslevel",
737
+ "hits",
738
+ "id_level",
739
+ "id_label",
740
+ "id_ion",
741
+ "id_class",
742
+ "id_evidence",
743
+ "score",
744
+ "score2",
686
745
  ]
687
746
  oracle_data = oracle_data[cols_to_keep]
688
747
  oracle_data["feature_uid"] = None
689
-
748
+
690
749
  # iterate over the rows and find the feature_uid in feats by looking at the closest rt and mz
691
750
  for i, row in oracle_data.iterrows():
692
751
  candidates = feats[
@@ -716,7 +775,7 @@ def _load_and_merge_oracle_data(sample, oracle_folder, link_by_feature_uid,
716
775
  if min_ms_level is not None:
717
776
  feats = feats[(feats["mslevel"] >= min_ms_level)]
718
777
  sample.logger.debug(f"After min_ms_level filter ({min_ms_level}): {len(feats)} features")
719
-
778
+
720
779
  sample.logger.info(f"Feature filtering complete: {initial_count} → {len(feats)} features remaining")
721
780
  return feats
722
781
 
@@ -724,12 +783,12 @@ def _load_and_merge_oracle_data(sample, oracle_folder, link_by_feature_uid,
724
783
  def _setup_color_mapping(sample, feats, colorby, cmap, legend_groups=None):
725
784
  """Set up categorical color mapping for features."""
726
785
  import matplotlib.colors as mcolors
727
-
786
+
728
787
  feats["color"] = "black" # Default fallback color
729
788
  cvalues = None
730
789
  color_column = "color" # Default to fixed color
731
790
  colors = []
732
-
791
+
733
792
  # Determine which column to use for categorical coloring
734
793
  if colorby in ["class", "hg", "id_class", "id_hg"]:
735
794
  categorical_column = "id_class"
@@ -739,33 +798,33 @@ def _setup_color_mapping(sample, feats, colorby, cmap, legend_groups=None):
739
798
  categorical_column = "id_ion"
740
799
  feats[categorical_column] = feats[categorical_column].fillna("mix")
741
800
  elif colorby in ["evidence", "id_evidence"]:
742
- categorical_column = "id_evidence"
801
+ categorical_column = "id_evidence"
743
802
  feats[categorical_column] = feats[categorical_column].fillna("mix")
744
803
  elif colorby in ["level", "id_level"]:
745
804
  categorical_column = "id_level"
746
805
  feats[categorical_column] = feats[categorical_column].fillna("mix")
747
806
  else:
748
807
  categorical_column = None
749
-
808
+
750
809
  if categorical_column is not None:
751
810
  # Use provided legend_groups or derive from data
752
811
  if legend_groups is not None:
753
812
  # Use all specified groups to ensure consistent legend/coloring
754
813
  cvalues = legend_groups[:] # Copy the list
755
814
  # Ensure 'mix' is always present as the last group if not already included
756
- if 'mix' not in cvalues:
757
- cvalues.append('mix')
815
+ if "mix" not in cvalues:
816
+ cvalues.append("mix")
758
817
  sample.logger.info(f"Using provided legend_groups for legend: {cvalues}")
759
-
818
+
760
819
  # Check which provided groups actually have data
761
820
  present_groups = feats[categorical_column].unique()
762
821
  missing_groups = [grp for grp in cvalues if grp not in present_groups]
763
822
  if missing_groups:
764
823
  sample.logger.warning(f"Provided legend_groups not found in data: {missing_groups}")
765
824
  sample.logger.info(f"Groups present in data: {sorted(present_groups)}")
766
-
825
+
767
826
  # Assign any points not in legend_groups to 'mix'
768
- feats.loc[~feats[categorical_column].isin(cvalues[:-1]), categorical_column] = 'mix'
827
+ feats.loc[~feats[categorical_column].isin(cvalues[:-1]), categorical_column] = "mix"
769
828
  else:
770
829
  # Original behavior: use only groups present in data
771
830
  cvalues = feats[categorical_column].unique()
@@ -784,7 +843,7 @@ def _setup_color_mapping(sample, feats, colorby, cmap, legend_groups=None):
784
843
  # Process colormap for categorical data
785
844
  if cvalues is not None:
786
845
  num_colors = len(cvalues)
787
-
846
+
788
847
  # Use colormap for categorical data - use _process_cmap for proper handling
789
848
  try:
790
849
  colormap = Colormap(cmap)
@@ -794,7 +853,7 @@ def _setup_color_mapping(sample, feats, colorby, cmap, legend_groups=None):
794
853
  t = i / (num_colors - 1) if num_colors > 1 else 0.5
795
854
  color = colormap(t)
796
855
  # Convert to hex - handle different color formats
797
- if hasattr(color, '__len__') and len(color) >= 3:
856
+ if hasattr(color, "__len__") and len(color) >= 3:
798
857
  # It's an array-like color (RGB or RGBA)
799
858
  colors.append(mcolors.to_hex(color[:3]))
800
859
  else:
@@ -826,22 +885,25 @@ def _setup_color_mapping(sample, feats, colorby, cmap, legend_groups=None):
826
885
  return cvalues, color_column, colors
827
886
 
828
887
 
829
- def _create_feature_overlay(sample, raster, feats, cvalues, color_column, colors,
830
- markersize, title, legend):
888
+ def _create_feature_overlay(sample, raster, feats, cvalues, color_column, colors, markersize, title, legend):
831
889
  """Create feature overlay with identified and unidentified features."""
832
890
  # replace NaN with 0 in id_level
833
891
  feats["id_level"] = feats["id_level"].fillna(0)
834
-
892
+
835
893
  # Create unified visualization with all features in single layer
836
894
  # This avoids the multiple layer legend conflicts that cause dark colors and shared toggling
837
895
  sample.logger.debug("Creating unified feature visualization with categorical coloring")
838
-
896
+
839
897
  # Prepare categorical coloring for identified features only (id_level >= 1)
840
- identified_feats = feats[feats["id_level"] >= 1].copy() if len(feats[feats["id_level"] >= 1]) > 0 else pd.DataFrame()
841
- unidentified_feats = feats[feats["id_level"] < 1].copy() if len(feats[feats["id_level"] < 1]) > 0 else pd.DataFrame()
842
-
898
+ identified_feats = (
899
+ feats[feats["id_level"] >= 1].copy() if len(feats[feats["id_level"] >= 1]) > 0 else pd.DataFrame()
900
+ )
901
+ unidentified_feats = (
902
+ feats[feats["id_level"] < 1].copy() if len(feats[feats["id_level"] < 1]) > 0 else pd.DataFrame()
903
+ )
904
+
843
905
  overlay = raster
844
-
906
+
845
907
  # Single layer for identified features with categorical coloring
846
908
  if len(identified_feats) > 0 and cvalues is not None:
847
909
  # Create proper confidence-based marker styling
@@ -851,7 +913,7 @@ def _create_feature_overlay(sample, raster, feats, cvalues, color_column, colors
851
913
  identified_feats["fill_alpha"] = identified_feats["id_level"].apply(
852
914
  lambda x: 1.0 if x >= 2 else 0.3 # Full opacity for high conf, transparent for medium
853
915
  )
854
-
916
+
855
917
  oracle_hover_identified = HoverTool(
856
918
  tooltips=[
857
919
  ("rt", "@rt"),
@@ -866,10 +928,10 @@ def _create_feature_overlay(sample, raster, feats, cvalues, color_column, colors
866
928
  ("score2", "@score2"),
867
929
  ],
868
930
  )
869
-
931
+
870
932
  # Create completely separate overlay elements for each category
871
933
  overlays_to_combine = [raster] # Start with raster base
872
-
934
+
873
935
  for i, category in enumerate(cvalues):
874
936
  category_data = identified_feats[identified_feats[color_column] == category].copy()
875
937
  if len(category_data) > 0:
@@ -878,10 +940,18 @@ def _create_feature_overlay(sample, raster, feats, cvalues, color_column, colors
878
940
  category_data,
879
941
  kdims=["rt", "mz"],
880
942
  vdims=[
881
- "inty", "feature_uid", "id_level", "id_class", "id_label",
882
- "id_ion", "id_evidence", "score", "score2", "fill_alpha"
943
+ "inty",
944
+ "feature_uid",
945
+ "id_level",
946
+ "id_class",
947
+ "id_label",
948
+ "id_ion",
949
+ "id_evidence",
950
+ "score",
951
+ "score2",
952
+ "fill_alpha",
883
953
  ],
884
- label=str(category) # This becomes the legend label
954
+ label=str(category), # This becomes the legend label
885
955
  ).options(
886
956
  color=colors[i], # Use pre-computed hex color for this category
887
957
  marker="circle",
@@ -893,17 +963,38 @@ def _create_feature_overlay(sample, raster, feats, cvalues, color_column, colors
893
963
  overlays_to_combine.append(category_points)
894
964
  else:
895
965
  # Create empty Points element for categories with no data to ensure they appear in legend
896
- empty_data = pd.DataFrame(columns=['rt', 'mz', 'inty', 'feature_uid', 'id_level',
897
- 'id_class', 'id_label', 'id_ion', 'id_evidence',
898
- 'score', 'score2', 'fill_alpha'])
966
+ empty_data = pd.DataFrame(
967
+ columns=[
968
+ "rt",
969
+ "mz",
970
+ "inty",
971
+ "feature_uid",
972
+ "id_level",
973
+ "id_class",
974
+ "id_label",
975
+ "id_ion",
976
+ "id_evidence",
977
+ "score",
978
+ "score2",
979
+ "fill_alpha",
980
+ ]
981
+ )
899
982
  category_points = hv.Points(
900
983
  empty_data,
901
984
  kdims=["rt", "mz"],
902
985
  vdims=[
903
- "inty", "feature_uid", "id_level", "id_class", "id_label",
904
- "id_ion", "id_evidence", "score", "score2", "fill_alpha"
986
+ "inty",
987
+ "feature_uid",
988
+ "id_level",
989
+ "id_class",
990
+ "id_label",
991
+ "id_ion",
992
+ "id_evidence",
993
+ "score",
994
+ "score2",
995
+ "fill_alpha",
905
996
  ],
906
- label=str(category) # This becomes the legend label
997
+ label=str(category), # This becomes the legend label
907
998
  ).options(
908
999
  color=colors[i], # Use pre-computed hex color for this category
909
1000
  marker="circle",
@@ -913,16 +1004,16 @@ def _create_feature_overlay(sample, raster, feats, cvalues, color_column, colors
913
1004
  show_legend=True,
914
1005
  )
915
1006
  overlays_to_combine.append(category_points)
916
-
1007
+
917
1008
  # Combine all overlays
918
1009
  overlay = overlays_to_combine[0] # Start with raster
919
1010
  for layer in overlays_to_combine[1:]:
920
1011
  overlay = overlay * layer
921
-
1012
+
922
1013
  else:
923
1014
  # No categorical data - just set overlay to raster
924
1015
  overlay = raster
925
-
1016
+
926
1017
  # Separate layer for unidentified features (always black crosses)
927
1018
  if len(unidentified_feats) > 0:
928
1019
  oracle_hover_no_id = HoverTool(
@@ -933,20 +1024,20 @@ def _create_feature_overlay(sample, raster, feats, cvalues, color_column, colors
933
1024
  ("id_level", "@id_level"),
934
1025
  ],
935
1026
  )
936
-
1027
+
937
1028
  feature_points_no_id = hv.Points(
938
1029
  unidentified_feats,
939
1030
  kdims=["rt", "mz"],
940
1031
  vdims=["inty", "feature_uid", "id_level"],
941
1032
  ).options(
942
1033
  color="black",
943
- marker="x",
1034
+ marker="x",
944
1035
  size=markersize,
945
1036
  alpha=1.0,
946
1037
  tools=[oracle_hover_no_id],
947
1038
  show_legend=False,
948
1039
  )
949
-
1040
+
950
1041
  overlay = overlay * feature_points_no_id
951
1042
 
952
1043
  if title is not None:
@@ -955,34 +1046,33 @@ def _create_feature_overlay(sample, raster, feats, cvalues, color_column, colors
955
1046
 
956
1047
  # Configure legend if requested and categorical coloring is available
957
1048
  if legend is not None and cvalues is not None and len(cvalues) > 1:
958
- sample.logger.debug(f"Configuring integrated legend at '{legend}' position with {len(cvalues)} categories: {cvalues}")
959
-
1049
+ sample.logger.debug(
1050
+ f"Configuring integrated legend at '{legend}' position with {len(cvalues)} categories: {cvalues}"
1051
+ )
1052
+
960
1053
  # Map legend position parameter to HoloViews legend position
961
1054
  legend_position_map = {
962
1055
  "top_right": "top_right",
963
- "top_left": "top_left",
1056
+ "top_left": "top_left",
964
1057
  "bottom_right": "bottom_right",
965
1058
  "bottom_left": "bottom_left",
966
1059
  "right": "right",
967
1060
  "left": "left",
968
1061
  "top": "top",
969
- "bottom": "bottom"
1062
+ "bottom": "bottom",
970
1063
  }
971
-
1064
+
972
1065
  hv_legend_pos = legend_position_map.get(legend, "bottom_right")
973
-
1066
+
974
1067
  # Apply legend configuration to the overlay
975
- overlay = overlay.opts(
976
- legend_position=hv_legend_pos,
977
- legend_opts={'title': '', 'padding': 2, 'spacing': 2}
978
- )
979
-
1068
+ overlay = overlay.opts(legend_position=hv_legend_pos, legend_opts={"title": "", "padding": 2, "spacing": 2})
1069
+
980
1070
  sample.logger.debug(f"Applied integrated legend at position '{hv_legend_pos}'")
981
1071
  elif legend is None:
982
1072
  # Explicitly hide legend when legend=None
983
1073
  overlay = overlay.opts(show_legend=False)
984
1074
  sample.logger.debug("Legend hidden (legend=None)")
985
-
1075
+
986
1076
  return overlay
987
1077
 
988
1078
 
@@ -1007,7 +1097,7 @@ def _handle_output(sample, overlay, filename):
1007
1097
  sample.logger.warning(f"PNG export failed: {os.path.abspath(filename)}")
1008
1098
  else:
1009
1099
  # Default to PNG for any other format
1010
- png_filename = filename + ".png" if not filename.endswith(('.png', '.svg', '.html')) else filename
1100
+ png_filename = filename + ".png" if not filename.endswith((".png", ".svg", ".html")) else filename
1011
1101
  success = _export_with_webdriver_manager(overlay, png_filename, "png", sample.logger)
1012
1102
  if success:
1013
1103
  sample.logger.success(f"PNG exported: {os.path.abspath(png_filename)}")
@@ -1029,7 +1119,7 @@ def plot_2d(
1029
1119
  show_ms2=False,
1030
1120
  show_in_browser=False,
1031
1121
  title=None,
1032
- cmap='iridescent',
1122
+ cmap="iridescent",
1033
1123
  marker="circle",
1034
1124
  markersize=5,
1035
1125
  size="static",
@@ -1043,7 +1133,7 @@ def plot_2d(
1043
1133
  mz_range=None,
1044
1134
  rt_range=None,
1045
1135
  legend=None,
1046
- colorby=None
1136
+ colorby=None,
1047
1137
  ):
1048
1138
  """
1049
1139
  Plot a two-dimensional visualization of MS1 survey scan data with optional overlays
@@ -1089,7 +1179,7 @@ def plot_2d(
1089
1179
  Maximum pixel size for dynamic rasterization when using dynspread.
1090
1180
  raster_threshold (float, default 0.8):
1091
1181
  Threshold used for the dynspread process in dynamic rasterization.
1092
- legend (str, optional):
1182
+ legend (str, optional):
1093
1183
  Legend position for categorical feature coloring ("top_right", "bottom_left", etc.) or None.
1094
1184
  Only applies when colorby is not None and contains categorical data.
1095
1185
  colorby (str, optional):
@@ -1212,12 +1302,12 @@ def plot_2d(
1212
1302
 
1213
1303
  color_1 = "forestgreen"
1214
1304
  color_2 = "darkorange"
1215
-
1305
+
1216
1306
  # Handle colorby parameter for feature coloring
1217
1307
  use_categorical_coloring = False
1218
1308
  feature_colors = {}
1219
1309
  categorical_groups = []
1220
-
1310
+
1221
1311
  if filename is not None:
1222
1312
  dyn = False
1223
1313
  if not filename.endswith(".html"):
@@ -1290,21 +1380,22 @@ def plot_2d(
1290
1380
  if colorby is not None and colorby in feats.columns:
1291
1381
  # Check if colorby data is categorical (string-like)
1292
1382
  colorby_values = feats[colorby].dropna()
1293
- is_categorical = (
1294
- feats[colorby].dtype in ["object", "string", "category"] or
1295
- (len(colorby_values) > 0 and isinstance(colorby_values.iloc[0], str))
1383
+ is_categorical = feats[colorby].dtype in ["object", "string", "category"] or (
1384
+ len(colorby_values) > 0 and isinstance(colorby_values.iloc[0], str)
1296
1385
  )
1297
-
1386
+
1298
1387
  if is_categorical:
1299
1388
  use_categorical_coloring = True
1300
1389
  # Get unique categories, sorted
1301
1390
  categorical_groups = sorted(feats[colorby].dropna().unique())
1302
-
1391
+
1303
1392
  # Set up colors for categorical data using matplotlib colormap
1304
1393
  from matplotlib.colors import to_hex
1394
+
1305
1395
  try:
1306
1396
  from matplotlib.cm import get_cmap
1307
- colormap_func = get_cmap(cmap if cmap != 'iridescent' else 'tab20')
1397
+
1398
+ colormap_func = get_cmap(cmap if cmap != "iridescent" else "tab20")
1308
1399
  feature_colors = {}
1309
1400
  for i, group in enumerate(categorical_groups):
1310
1401
  if len(categorical_groups) <= 20:
@@ -1324,13 +1415,13 @@ def plot_2d(
1324
1415
  group_features = feats[feats[colorby] == group]
1325
1416
  if len(group_features) == 0:
1326
1417
  continue
1327
-
1418
+
1328
1419
  # Split by MS2 status
1329
1420
  group_with_ms2 = group_features[group_features["ms2_scans"].notnull()]
1330
1421
  group_without_ms2 = group_features[group_features["ms2_scans"].isnull()]
1331
-
1422
+
1332
1423
  group_color = feature_colors.get(group, color_1)
1333
-
1424
+
1334
1425
  if len(group_with_ms2) > 0:
1335
1426
  feature_hover = HoverTool(
1336
1427
  tooltips=[
@@ -1370,7 +1461,7 @@ def plot_2d(
1370
1461
  feature_points_1 = group_points_ms2
1371
1462
  else:
1372
1463
  feature_points_1 = feature_points_1 * group_points_ms2
1373
-
1464
+
1374
1465
  if len(group_without_ms2) > 0:
1375
1466
  feature_hover = HoverTool(
1376
1467
  tooltips=[
@@ -1447,7 +1538,7 @@ def plot_2d(
1447
1538
  tools=[feature_hover_1],
1448
1539
  hooks=hooks,
1449
1540
  )
1450
-
1541
+
1451
1542
  # find features without MS2 data
1452
1543
  features_df = feats[feats["ms2_scans"].isnull()]
1453
1544
  feature_hover_2 = HoverTool(
@@ -1600,22 +1691,19 @@ def plot_2d(
1600
1691
  # Map legend position parameter to HoloViews legend position
1601
1692
  legend_position_map = {
1602
1693
  "top_right": "top_right",
1603
- "top_left": "top_left",
1694
+ "top_left": "top_left",
1604
1695
  "bottom_right": "bottom_right",
1605
1696
  "bottom_left": "bottom_left",
1606
1697
  "right": "right",
1607
1698
  "left": "left",
1608
1699
  "top": "top",
1609
- "bottom": "bottom"
1700
+ "bottom": "bottom",
1610
1701
  }
1611
-
1702
+
1612
1703
  hv_legend_pos = legend_position_map.get(legend, "bottom_right")
1613
-
1704
+
1614
1705
  # Apply legend configuration to the overlay
1615
- overlay = overlay.opts(
1616
- legend_position=hv_legend_pos,
1617
- legend_opts={'title': '', 'padding': 2, 'spacing': 2}
1618
- )
1706
+ overlay = overlay.opts(legend_position=hv_legend_pos, legend_opts={"title": "", "padding": 2, "spacing": 2})
1619
1707
  elif legend is None and use_categorical_coloring:
1620
1708
  # Explicitly hide legend when legend=None but categorical coloring is used
1621
1709
  overlay = overlay.opts(show_legend=False)
@@ -1766,8 +1854,8 @@ def plot_2d_oracle(
1766
1854
  colorby="hg",
1767
1855
  legend_groups=None,
1768
1856
  markersize=5,
1769
- cmap='Turbo',
1770
- raster_cmap='grey',
1857
+ cmap="Turbo",
1858
+ raster_cmap="grey",
1771
1859
  raster_log=True,
1772
1860
  raster_min=1,
1773
1861
  raster_dynamic=True,
@@ -1784,12 +1872,12 @@ def plot_2d_oracle(
1784
1872
  """
1785
1873
  Plot a 2D visualization combining MS1 raster data and oracle-annotated features.
1786
1874
 
1787
- Creates an interactive plot overlaying MS1 survey scan data with feature annotations
1875
+ Creates an interactive plot overlaying MS1 survey scan data with feature annotations
1788
1876
  from oracle files. Features are colored categorically based on identification class,
1789
1877
  ion type, or evidence level.
1790
1878
 
1791
1879
  Parameters:
1792
- oracle_folder (str, optional): Path to oracle folder containing
1880
+ oracle_folder (str, optional): Path to oracle folder containing
1793
1881
  "diag/summary_by_feature.csv". Required for oracle annotations.
1794
1882
  link_by_feature_uid (bool): Whether to link features by UID (True) or by m/z/RT proximity.
1795
1883
  min_id_level (int): Minimum identification confidence level to include.
@@ -1822,14 +1910,16 @@ def plot_2d_oracle(
1822
1910
  """
1823
1911
 
1824
1912
  self.logger.info(f"Starting plot_2d_oracle with oracle_folder: {oracle_folder}")
1825
- self.logger.debug(f"Parameters - link_by_feature_uid: {link_by_feature_uid}, min_id_level: {min_id_level}, max_id_level: {max_id_level}")
1913
+ self.logger.debug(
1914
+ f"Parameters - link_by_feature_uid: {link_by_feature_uid}, min_id_level: {min_id_level}, max_id_level: {max_id_level}"
1915
+ )
1826
1916
  self.logger.debug(f"Plot parameters - colorby: {colorby}, markersize: {markersize}, filename: {filename}")
1827
1917
 
1828
1918
  # Early validation
1829
1919
  if self.features_df is None:
1830
1920
  self.logger.error("Cannot plot 2D oracle: features_df is not available")
1831
1921
  return
1832
-
1922
+
1833
1923
  if oracle_folder is None:
1834
1924
  self.logger.info("No oracle folder provided, plotting features only")
1835
1925
  return
@@ -1847,7 +1937,7 @@ def plot_2d_oracle(
1847
1937
  raster_max_px=raster_max_px,
1848
1938
  width=width,
1849
1939
  height=height,
1850
- filename=filename
1940
+ filename=filename,
1851
1941
  )
1852
1942
 
1853
1943
  # Load and process oracle data
@@ -1857,9 +1947,9 @@ def plot_2d_oracle(
1857
1947
  link_by_feature_uid=link_by_feature_uid,
1858
1948
  min_id_level=min_id_level,
1859
1949
  max_id_level=max_id_level,
1860
- min_ms_level=min_ms_level
1950
+ min_ms_level=min_ms_level,
1861
1951
  )
1862
-
1952
+
1863
1953
  if feats is None:
1864
1954
  return
1865
1955
 
@@ -1876,7 +1966,7 @@ def plot_2d_oracle(
1876
1966
  colors=colors,
1877
1967
  markersize=markersize,
1878
1968
  title=title,
1879
- legend=legend
1969
+ legend=legend,
1880
1970
  )
1881
1971
 
1882
1972
  # Handle output: export or display
@@ -2369,7 +2459,7 @@ def plot_features_stats(
2369
2459
 
2370
2460
  # Apply log10 transformation to intensity (handling non-positive values)
2371
2461
  feats["inty"] = np.where(feats["inty"] <= 0, np.nan, np.log10(feats["inty"]))
2372
-
2462
+
2373
2463
  # Apply log10 transformation to quality (handling non-positive values)
2374
2464
  feats["quality"] = np.where(feats["quality"] <= 0, np.nan, np.log10(feats["quality"]))
2375
2465
 
@@ -2380,7 +2470,7 @@ def plot_features_stats(
2380
2470
  # Define the specific metrics to plot
2381
2471
  cols_to_plot = [
2382
2472
  "mz",
2383
- "rt",
2473
+ "rt",
2384
2474
  "inty", # Already log10 transformed above
2385
2475
  "rt_delta",
2386
2476
  "quality", # Already log10 transformed above
@@ -2406,7 +2496,7 @@ def plot_features_stats(
2406
2496
  alpha=0.6,
2407
2497
  )
2408
2498
  dist_without = hv.Distribution(data_without, label="Without MS2").opts(
2409
- color="red",
2499
+ color="red",
2410
2500
  alpha=0.6,
2411
2501
  )
2412
2502
 
@@ -2416,7 +2506,7 @@ def plot_features_stats(
2416
2506
  title = "log10(inty)"
2417
2507
  elif col == "quality":
2418
2508
  title = "log10(quality)"
2419
-
2509
+
2420
2510
  overlay = (dist_with * dist_without).opts(
2421
2511
  title=title,
2422
2512
  show_legend=True,