masster 0.5.9__py3-none-any.whl → 0.5.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

masster/sample/plot.py CHANGED
@@ -43,6 +43,7 @@ See Also:
43
43
  """
44
44
 
45
45
  import os
46
+ import warnings
46
47
 
47
48
  import datashader as ds
48
49
  import holoviews as hv
@@ -55,18 +56,11 @@ import polars as pl
55
56
  from bokeh.models import HoverTool
56
57
  from holoviews import dim
57
58
  from holoviews.plotting.util import process_cmap
58
- from matplotlib.colors import rgb2hex
59
59
 
60
- # Import cmap for colormap handling
61
- try:
62
- from cmap import Colormap
63
- except ImportError:
64
- Colormap = None
60
+ from cmap import Colormap
65
61
 
66
62
  # Parameters removed - using hardcoded defaults
67
-
68
-
69
- hv.extension("bokeh")
63
+ # hv.extension("bokeh")
70
64
 
71
65
 
72
66
  def _process_cmap(cmap, fallback="viridis", logger=None):
@@ -85,8 +79,8 @@ def _process_cmap(cmap, fallback="viridis", logger=None):
85
79
  if cmap is None:
86
80
  cmap = "viridis"
87
81
  elif cmap == "grey":
88
- cmap = "Greys256"
89
-
82
+ cmap = "greys"
83
+
90
84
  # If cmap package is not available, fall back to process_cmap
91
85
  if Colormap is None:
92
86
  if logger:
@@ -205,6 +199,108 @@ def _display_plot(plot_object, layout=None):
205
199
  return None
206
200
 
207
201
 
202
+ def _export_with_webdriver_manager(plot_obj, filename, format_type, logger=None):
203
+ """
204
+ Export plot to PNG or SVG using webdriver-manager for automatic driver management.
205
+
206
+ Parameters:
207
+ plot_obj: Bokeh plot object or holoviews object to export
208
+ filename: Output filename
209
+ format_type: Either "png" or "svg"
210
+ logger: Logger for error reporting (optional)
211
+
212
+ Returns:
213
+ bool: True if export successful, False otherwise
214
+ """
215
+ try:
216
+ # Convert holoviews to bokeh if needed
217
+ if hasattr(plot_obj, 'opts'): # Likely a holoviews object
218
+ import holoviews as hv
219
+ bokeh_plot = hv.render(plot_obj)
220
+ else:
221
+ bokeh_plot = plot_obj
222
+
223
+ # Try webdriver-manager export first
224
+ try:
225
+ from webdriver_manager.chrome import ChromeDriverManager
226
+ from selenium import webdriver
227
+ from selenium.webdriver.chrome.service import Service
228
+ from selenium.webdriver.chrome.options import Options
229
+
230
+ # Set up Chrome options for headless operation
231
+ chrome_options = Options()
232
+ chrome_options.add_argument("--headless")
233
+ chrome_options.add_argument("--no-sandbox")
234
+ chrome_options.add_argument("--disable-dev-shm-usage")
235
+ chrome_options.add_argument("--disable-gpu")
236
+
237
+ # Use webdriver-manager to automatically get the correct ChromeDriver
238
+ service = Service(ChromeDriverManager().install())
239
+ driver = webdriver.Chrome(service=service, options=chrome_options)
240
+
241
+ # Export with managed webdriver
242
+ with warnings.catch_warnings():
243
+ warnings.simplefilter("ignore", category=UserWarning)
244
+ # Filter out bokeh.io.export warnings specifically
245
+ warnings.filterwarnings("ignore", module="bokeh.io.export")
246
+
247
+ if format_type == "png":
248
+ from bokeh.io import export_png
249
+ export_png(bokeh_plot, filename=filename, webdriver=driver)
250
+ elif format_type == "svg":
251
+ from bokeh.io import export_svg
252
+ export_svg(bokeh_plot, filename=filename, webdriver=driver)
253
+ else:
254
+ raise ValueError(f"Unsupported format: {format_type}")
255
+
256
+ driver.quit()
257
+ return True
258
+
259
+ except ImportError:
260
+ if logger:
261
+ logger.debug(f"webdriver-manager not available, using default {format_type.upper()} export")
262
+ # Fall back to default export
263
+ with warnings.catch_warnings():
264
+ warnings.simplefilter("ignore", category=UserWarning)
265
+ # Filter out bokeh.io.export warnings specifically
266
+ warnings.filterwarnings("ignore", module="bokeh.io.export")
267
+
268
+ if format_type == "png":
269
+ from bokeh.io import export_png
270
+ export_png(bokeh_plot, filename=filename)
271
+ elif format_type == "svg":
272
+ from bokeh.io import export_svg
273
+ export_svg(bokeh_plot, filename=filename)
274
+ return True
275
+
276
+ except Exception as e:
277
+ if logger:
278
+ logger.debug(f"{format_type.upper()} export with webdriver-manager failed: {e}, using default {format_type.upper()} export")
279
+ try:
280
+ # Final fallback to default export
281
+ with warnings.catch_warnings():
282
+ warnings.simplefilter("ignore", category=UserWarning)
283
+ # Filter out bokeh.io.export warnings specifically
284
+ warnings.filterwarnings("ignore", module="bokeh.io.export")
285
+
286
+ if format_type == "png":
287
+ from bokeh.io import export_png
288
+ export_png(bokeh_plot, filename=filename)
289
+ elif format_type == "svg":
290
+ from bokeh.io import export_svg
291
+ export_svg(bokeh_plot, filename=filename)
292
+ return True
293
+ except Exception as e2:
294
+ if logger:
295
+ logger.error(f"{format_type.upper()} export failed: {e2}")
296
+ return False
297
+
298
+ except Exception as e:
299
+ if logger:
300
+ logger.error(f"Export preparation failed: {e}")
301
+ return False
302
+
303
+
208
304
  def _handle_sample_plot_output(self, plot_obj, filename=None, plot_type="bokeh"):
209
305
  """
210
306
  Helper function to handle consistent save/display behavior for sample plots.
@@ -236,16 +332,11 @@ def _handle_sample_plot_output(self, plot_obj, filename=None, plot_type="bokeh")
236
332
  save(plot_obj)
237
333
  self.logger.success(f"Plot saved to: {abs_filename}")
238
334
  elif filename.endswith(".png"):
239
- try:
240
- if plot_type == "bokeh":
241
- from bokeh.io.export import export_png
242
- export_png(plot_obj, filename=filename)
243
- elif plot_type in ["panel", "holoviews"]:
244
- import holoviews as hv
245
- hv.save(plot_obj, filename, fmt="png")
335
+ success = _export_with_webdriver_manager(plot_obj, filename, "png", self.logger)
336
+ if success:
246
337
  self.logger.success(f"Plot saved to: {abs_filename}")
247
- except Exception:
248
- # Fall back to HTML if PNG export not available
338
+ else:
339
+ # Fall back to HTML if PNG export fails completely
249
340
  html_filename = filename.replace('.png', '.html')
250
341
  abs_html_filename = os.path.abspath(html_filename)
251
342
  if plot_type == "panel":
@@ -259,6 +350,25 @@ def _handle_sample_plot_output(self, plot_obj, filename=None, plot_type="bokeh")
259
350
  output_file(html_filename)
260
351
  save(plot_obj)
261
352
  self.logger.warning(f"PNG export not available, saved as HTML instead: {abs_html_filename}")
353
+ elif filename.endswith(".svg"):
354
+ success = _export_with_webdriver_manager(plot_obj, filename, "svg", self.logger)
355
+ if success:
356
+ self.logger.success(f"Plot saved to: {abs_filename}")
357
+ else:
358
+ # Fall back to HTML if SVG export fails completely
359
+ html_filename = filename.replace('.svg', '.html')
360
+ abs_html_filename = os.path.abspath(html_filename)
361
+ if plot_type == "panel":
362
+ plot_obj.save(html_filename, embed=True) # type: ignore[attr-defined]
363
+ elif plot_type == "holoviews":
364
+ import panel
365
+ panel.panel(plot_obj).save(html_filename, embed=True) # type: ignore[attr-defined]
366
+ elif plot_type == "bokeh":
367
+ from bokeh.plotting import output_file
368
+ from bokeh.io import save
369
+ output_file(html_filename)
370
+ save(plot_obj)
371
+ self.logger.warning(f"SVG export not available, saved as HTML instead: {abs_html_filename}")
262
372
  elif filename.endswith(".pdf"):
263
373
  # Try to save as PDF, fall back to HTML if not available
264
374
  try:
@@ -444,101 +554,25 @@ def plot_chrom(
444
554
  self._handle_sample_plot_output(layout, filename, "panel")
445
555
 
446
556
 
447
- def plot_2d(
448
- self,
449
- filename=None,
450
- show_features=True,
451
- show_only_features_with_ms2=False,
452
- show_isotopes=False,
453
- show_ms2=False,
454
- show_in_browser=False,
455
- title=None,
456
- cmap='iridescent',
457
- marker="circle",
458
- markersize=5,
459
- size="static",
460
- raster_dynamic=True,
461
- raster_max_px=8,
462
- raster_threshold=0.8,
463
- height=600,
464
- width=750,
465
- mz_range=None,
466
- rt_range=None
467
- ):
468
- """
469
- Plot a two-dimensional visualization of MS1 survey scan data with optional overlays
470
- of feature and MS2 scan information.
471
- This method creates a plot from the internal MS1 data loaded into self.ms1_df
472
- and optionally overlays various feature and MS2 information depending on the provided
473
- parameters. The visualization is built using HoloViews and Holoviews dynamic rasterization,
474
- together with Panel for layout and exporting.
475
- Parameters:
476
- filename (str, optional):
477
- Path to save the plot. If provided and ends with ".html", the plot is saved as an
478
- interactive HTML file; otherwise, it is saved as a PNG image.
479
- show_features (bool, default True):
480
- Whether to overlay detected features on the plot.
481
- show_only_features_with_ms2 (bool, default False):
482
- If True, only display features that have associated MS2 scans. When False,
483
- features without MS2 data are also shown.
484
- show_isotopes (bool, default False):
485
- Whether to overlay isotope information on top of the features.
486
- show_ms2 (bool, default False):
487
- Whether to overlay MS2 scan information on the plot.
488
- title (str, optional):
489
- Title of the plot.
490
- cmap (str, optional):
491
- Colormap to use for the background rasterized data. Defaults to "iridescent_r" unless
492
- modified (e.g., if set to "grey", it is changed to "Greys256").
493
- marker (str, default 'circle'):
494
- Marker type to use for feature and MS2 points.
495
- markersize (int, default 10):
496
- Base size of the markers used for plotting points.
497
- size (str, default 'dynamic'):
498
- Controls marker sizing behavior. Options: 'dynamic', 'static', or 'slider'.
499
- - 'dynamic': Uses coordinate-based sizing that scales with zoom level (markers get larger when zooming in)
500
- - 'static': Uses screen-based sizing that remains constant regardless of zoom level
501
- - 'slider': Provides an interactive slider to dynamically adjust marker size
502
- raster_dynamic (bool, default True):
503
- Whether to use dynamic rasterization for the background point cloud.
504
- raster_max_px (int, default 8):
505
- Maximum pixel size for dynamic rasterization when using dynspread.
506
- raster_threshold (float, default 0.8):
507
- Threshold used for the dynspread process in dynamic rasterization.
508
- Behavior:
509
- - Checks for a loaded mzML file by verifying that self.file_obj is not None.
510
- - Converts internal MS1 data (a Polars DataFrame) to a Pandas DataFrame and filters out low-intensity
511
- points (inty < 1).
512
- - Sets up the plot bounds for retention time (rt) and mass-to-charge ratio (mz) using a hook function.
513
- - Renders the MS1 data as a background rasterized image with a logarithmic intensity normalization.
514
- - Conditionally overlays feature points (with and without MS2 information), isotopes (if requested),
515
- and MS2 scan points based on internal DataFrame data.
516
- - Depending on the filename parameter, either displays the plot interactively using Panel or
517
- saves it as an HTML or PNG file.
518
- Returns:
519
- None
520
- Side Effects:
521
- - May print a warning if no mzML file is loaded.
522
- - Either shows the plot interactively or writes the output to a file.
523
- """
524
-
525
- if self.ms1_df is None:
526
- self.logger.error("No MS1 data available.")
527
- return
557
+ def _create_raster_plot(sample, mz_range=None, rt_range=None, raster_cmap='greys',
558
+ raster_log=True, raster_min=1, raster_dynamic=True, raster_threshold=0.8, raster_max_px=8,
559
+ width=750, height=600, filename=None):
560
+ """Create the raster plot layer from MS1 data."""
561
+ # Process colormap using the cmap package with proper error handling
562
+ raster_cmap_processed = _process_cmap(raster_cmap if raster_cmap is not None else 'greys', fallback="greys", logger=sample.logger)
528
563
 
529
- # Process colormap using the cmap package
530
- cmap_palette = _process_cmap(cmap, fallback="iridescent", logger=self.logger)
564
+ # get columns rt, mz, inty from sample.ms1_df, It's polars DataFrame
565
+ spectradf = sample.ms1_df.to_pandas()
531
566
 
532
- # get columns rt, mz, inty from self.ms1_df, It's polars DataFrame
533
- spectradf = self.ms1_df.select(["rt", "mz", "inty"])
534
- # remove any inty<1
535
- spectradf = spectradf.filter(pl.col("inty") >= 1)
567
+ # remove any inty<raster_min
568
+ spectradf = spectradf[spectradf["inty"] >= raster_min]
536
569
  # keep only rt, mz, and inty
537
- spectradf = spectradf.select(["rt", "mz", "inty"])
570
+ spectradf = spectradf[["rt", "mz", "inty"]]
538
571
  if mz_range is not None:
539
- spectradf = spectradf.filter((pl.col("mz") >= mz_range[0]) & (pl.col("mz") <= mz_range[1]))
572
+ spectradf = spectradf[(spectradf["mz"] >= mz_range[0]) & (spectradf["mz"] <= mz_range[1])]
540
573
  if rt_range is not None:
541
- spectradf = spectradf.filter((pl.col("rt") >= rt_range[0]) & (pl.col("rt") <= rt_range[1]))
574
+ spectradf = spectradf[(spectradf["rt"] >= rt_range[0]) & (spectradf["rt"] <= rt_range[1])]
575
+
542
576
  maxrt = spectradf["rt"].max()
543
577
  minrt = spectradf["rt"].min()
544
578
  maxmz = spectradf["mz"].max()
@@ -563,103 +597,31 @@ def plot_2d(
563
597
  tools=["hover"],
564
598
  )
565
599
 
566
- # Configure marker and size behavior based on size parameter
567
- use_dynamic_sizing = size.lower() in ["dyn", "dynamic"]
568
- use_slider_sizing = size.lower() == "slider"
569
-
570
- def dynamic_sizing_hook(plot, element):
571
- """Hook to convert size-based markers to radius-based for dynamic behavior"""
572
- try:
573
- if use_dynamic_sizing and hasattr(plot, "state") and hasattr(plot.state, "renderers"):
574
- from bokeh.models import Circle
575
-
576
- for renderer in plot.state.renderers:
577
- if hasattr(renderer, "glyph"):
578
- glyph = renderer.glyph
579
- # Check if it's a circle/scatter glyph that we can convert
580
- if hasattr(glyph, "size") and marker_type == "circle":
581
- # Create a new Circle glyph with radius instead of size
582
- new_glyph = Circle(
583
- x=glyph.x,
584
- y=glyph.y,
585
- radius=base_radius,
586
- fill_color=glyph.fill_color,
587
- line_color=glyph.line_color,
588
- fill_alpha=glyph.fill_alpha,
589
- line_alpha=glyph.line_alpha,
590
- )
591
- renderer.glyph = new_glyph
592
- except Exception:
593
- # Silently fail and use regular sizing if hook doesn't work
594
- pass
595
-
596
- if use_dynamic_sizing:
597
- # Dynamic sizing: use coordinate-based sizing that scales with zoom
598
- marker_type = "circle"
599
- # Calculate radius based on data range for coordinate-based sizing
600
- rtrange = maxrt - minrt
601
- mzrange = maxmz - minmz
602
- # Use a fraction of the smaller dimension for radius
603
- base_radius = min(rtrange, mzrange) * 0.0005 * markersize
604
- size_1 = markersize # Use regular size initially, hook will convert to radius
605
- size_2 = markersize
606
- hooks = [dynamic_sizing_hook]
607
- elif use_slider_sizing:
608
- # Slider sizing: create an interactive slider for marker size
609
- marker_type = marker # Use the original marker parameter
610
- size_1 = markersize # Use markersize initially, will be updated by slider
611
- size_2 = markersize
612
- base_radius = None # Not used in slider mode
613
- hooks = []
614
- else:
615
- # Static sizing: use pixel-based sizing that stays fixed
616
- marker_type = marker # Use the original marker parameter
617
- size_1 = markersize
618
- size_2 = markersize
619
- base_radius = None # Not used in static mode
620
- hooks = []
621
-
622
- color_1 = "forestgreen"
623
- color_2 = "darkorange"
624
600
  if filename is not None:
625
601
  dyn = False
626
602
  if not filename.endswith(".html"):
627
- if use_dynamic_sizing:
628
- # For exported files, use smaller coordinate-based size
629
- size_1 = 2
630
- size_2 = 2
631
- else:
632
- size_1 = 2
633
- size_2 = 2
634
- color_1 = "forestgreen"
635
- color_2 = "darkorange"
636
603
  raster_dynamic = False
637
604
 
638
- # For slider functionality, disable raster dynamic to avoid DynamicMap nesting
639
- if use_slider_sizing:
640
- raster_dynamic = False
641
-
642
605
  dyn = raster_dynamic
643
606
  raster = hd.rasterize(
644
607
  points,
645
608
  aggregator=ds.max("inty"),
646
609
  interpolation="bilinear",
647
- dynamic=dyn, # alpha=10, min_alpha=0,
610
+ dynamic=dyn,
648
611
  ).opts(
649
612
  active_tools=["box_zoom"],
650
- cmap=cmap_palette,
613
+ cmap=raster_cmap_processed,
651
614
  tools=["hover"],
652
615
  hooks=[new_bounds_hook],
653
616
  width=width,
654
617
  height=height,
655
- cnorm="log",
618
+ cnorm="log" if raster_log else "linear",
656
619
  xlabel="Retention time (s)",
657
620
  ylabel="m/z",
658
621
  colorbar=True,
659
622
  colorbar_position="right",
660
- axiswise=True,
623
+ axiswise=True
661
624
  )
662
-
663
625
  raster = hd.dynspread(
664
626
  raster,
665
627
  threshold=raster_threshold,
@@ -667,97 +629,861 @@ def plot_2d(
667
629
  shape="square",
668
630
  max_px=raster_max_px,
669
631
  )
670
- feature_points_1 = None
671
- feature_points_2 = None
672
- feature_points_3 = None
673
- feature_points_4 = None
674
- feature_points_iso = None
675
- # Plot features as red dots if features is True
676
- if self.features_df is not None and show_features:
677
- feats = self.features_df.clone()
678
- # Convert to pandas for operations that require pandas functionality
679
- if hasattr(feats, "to_pandas"):
680
- feats = feats.to_pandas()
681
- # if ms2_scans is not null, keep only the first element of the list
682
- feats["ms2_scans"] = feats["ms2_scans"].apply(
683
- lambda x: x[0] if isinstance(x, list) else x,
684
- )
685
- if mz_range is not None:
686
- feats = feats[(feats["mz"] >= mz_range[0]) & (feats["mz"] <= mz_range[1])]
687
- if rt_range is not None:
688
- feats = feats[(feats["rt"] >= rt_range[0]) & (feats["rt"] <= rt_range[1])]
689
- # keep only iso==0, i.e. the main
690
- feats = feats[feats["iso"] == 0]
691
- # find features with ms2_scans not None and iso==0
692
- features_df = feats[feats["ms2_scans"].notnull()]
693
- # Create feature points with proper sizing method
694
- feature_hover_1 = HoverTool(
695
- tooltips=[
696
- ("rt", "@rt"),
697
- ("m/z", "@mz{0.0000}"),
698
- ("feature_uid", "@feature_uid"),
699
- ("inty", "@inty"),
700
- ("iso", "@iso"),
701
- ("adduct", "@adduct"),
702
- ("chrom_coherence", "@chrom_coherence"),
703
- ("chrom_prominence_scaled", "@chrom_prominence_scaled"),
704
- ],
705
- )
706
- feature_points_1 = hv.Points(
707
- features_df,
708
- kdims=["rt", "mz"],
709
- vdims=[
710
- "feature_uid",
711
- "inty",
712
- "iso",
713
- "adduct",
714
- "ms2_scans",
715
- "chrom_coherence",
716
- "chrom_prominence_scaled",
717
- ],
718
- label="Features with MS2 data",
719
- ).options(
720
- color=color_1,
721
- marker=marker_type,
722
- size=size_1,
723
- tools=[feature_hover_1],
724
- hooks=hooks,
725
- )
726
- # find features without MS2 data
727
- features_df = feats[feats["ms2_scans"].isnull()]
728
- feature_hover_2 = HoverTool(
729
- tooltips=[
730
- ("rt", "@rt"),
731
- ("m/z", "@mz{0.0000}"),
732
- ("feature_uid", "@feature_uid"),
733
- ("inty", "@inty"),
734
- ("iso", "@iso"),
735
- ("adduct", "@adduct"),
736
- ("chrom_coherence", "@chrom_coherence"),
737
- ("chrom_prominence_scaled", "@chrom_prominence_scaled"),
738
- ],
739
- )
740
- feature_points_2 = hv.Points(
741
- features_df,
742
- kdims=["rt", "mz"],
743
- vdims=[
744
- "feature_uid",
745
- "inty",
746
- "iso",
747
- "adduct",
748
- "chrom_coherence",
749
- "chrom_prominence_scaled",
750
- ],
751
- label="Features without MS2 data",
752
- ).options(
753
- color="red",
754
- marker=marker_type,
755
- size=size_2,
756
- tools=[feature_hover_2],
757
- hooks=hooks,
758
- )
632
+
633
+ return raster
759
634
 
760
- if show_isotopes:
635
+
636
+ def _load_and_merge_oracle_data(sample, oracle_folder, link_by_feature_uid,
637
+ min_id_level, max_id_level, min_ms_level):
638
+ """Load oracle data and merge with features."""
639
+ if sample.features_df is None:
640
+ sample.logger.error("Cannot plot 2D oracle: features_df is not available")
641
+ return None
642
+
643
+ feats = sample.features_df.clone()
644
+ sample.logger.debug(f"Features data shape: {len(feats)} rows")
645
+
646
+ # Convert to pandas for oracle operations that require pandas functionality
647
+ if hasattr(feats, "to_pandas"):
648
+ feats = feats.to_pandas()
649
+
650
+ # check if annotationfile is not None
651
+ if oracle_folder is None:
652
+ sample.logger.info("No oracle folder provided, plotting features only")
653
+ return None
654
+
655
+ # try to read the annotationfile as a csv file and add it to feats
656
+ oracle_file_path = os.path.join(oracle_folder, "diag", "summary_by_feature.csv")
657
+ sample.logger.debug(f"Loading oracle data from: {oracle_file_path}")
658
+ try:
659
+ oracle_data = pd.read_csv(oracle_file_path)
660
+ sample.logger.info(f"Oracle data loaded successfully with {len(oracle_data)} rows")
661
+ except Exception as e:
662
+ sample.logger.error(f"Could not read {oracle_file_path}: {e}")
663
+ return None
664
+
665
+ if link_by_feature_uid:
666
+ cols_to_keep = [
667
+ "title", "scan_idx", "mslevel", "hits", "id_level", "id_label",
668
+ "id_ion", "id_class", "id_evidence", "score", "score2",
669
+ ]
670
+ oracle_data = oracle_data[cols_to_keep]
671
+
672
+ # extract feature_uid from title. It begins with "uid:XYZ,"
673
+ sample.logger.debug("Extracting feature UIDs from oracle titles using pattern 'uid:(\\d+)'")
674
+ oracle_data["feature_uid"] = oracle_data["title"].str.extract(r"uid:(\d+)")
675
+ oracle_data["feature_uid"] = oracle_data["feature_uid"].astype(int)
676
+
677
+ # sort by id_level, remove duplicate feature_uid, keep the first one
678
+ sample.logger.debug("Sorting by ID level and removing duplicates")
679
+ oracle_data = oracle_data.sort_values(by=["id_level"], ascending=False)
680
+ oracle_data = oracle_data.drop_duplicates(subset=["feature_uid"], keep="first")
681
+ sample.logger.debug(f"After deduplication: {len(oracle_data)} unique oracle annotations")
682
+ else:
683
+ cols_to_keep = [
684
+ "precursor", "rt", "title", "scan_idx", "mslevel", "hits", "id_level",
685
+ "id_label", "id_ion", "id_class", "id_evidence", "score", "score2",
686
+ ]
687
+ oracle_data = oracle_data[cols_to_keep]
688
+ oracle_data["feature_uid"] = None
689
+
690
+ # iterate over the rows and find the feature_uid in feats by looking at the closest rt and mz
691
+ for i, row in oracle_data.iterrows():
692
+ candidates = feats[
693
+ (abs(feats["rt"] - row["rt"]) < 1) & (abs(feats["mz"] - row["precursor"]) < 0.005)
694
+ ].copy()
695
+ if len(candidates) > 0:
696
+ # sort by delta rt
697
+ candidates["delta_rt"] = abs(candidates["rt"] - row["rt"])
698
+ candidates = candidates.sort_values(by=["delta_rt"])
699
+ oracle_data.at[i, "feature_uid"] = candidates["feature_uid"].values[0]
700
+ # remove precursor and rt columns
701
+ oracle_data = oracle_data.drop(columns=["precursor", "rt"])
702
+
703
+ # Merge features with oracle data
704
+ sample.logger.debug(f"Merging {len(feats)} features with oracle data")
705
+ feats = feats.merge(oracle_data, how="left", on="feature_uid")
706
+ sample.logger.debug(f"After merge: {len(feats)} total features")
707
+
708
+ # filter feats by id_level
709
+ initial_count = len(feats)
710
+ if min_id_level is not None:
711
+ feats = feats[(feats["id_level"] >= min_id_level)]
712
+ sample.logger.debug(f"After min_id_level filter ({min_id_level}): {len(feats)} features")
713
+ if max_id_level is not None:
714
+ feats = feats[(feats["id_level"] <= max_id_level)]
715
+ sample.logger.debug(f"After max_id_level filter ({max_id_level}): {len(feats)} features")
716
+ if min_ms_level is not None:
717
+ feats = feats[(feats["mslevel"] >= min_ms_level)]
718
+ sample.logger.debug(f"After min_ms_level filter ({min_ms_level}): {len(feats)} features")
719
+
720
+ sample.logger.info(f"Feature filtering complete: {initial_count} → {len(feats)} features remaining")
721
+ return feats
722
+
723
+
724
+ def _setup_color_mapping(sample, feats, colorby, cmap, legend_groups=None):
725
+ """Set up categorical color mapping for features."""
726
+ import matplotlib.colors as mcolors
727
+
728
+ feats["color"] = "black" # Default fallback color
729
+ cvalues = None
730
+ color_column = "color" # Default to fixed color
731
+ colors = []
732
+
733
+ # Determine which column to use for categorical coloring
734
+ if colorby in ["class", "hg", "id_class", "id_hg"]:
735
+ categorical_column = "id_class"
736
+ # replace nans with 'mix'
737
+ feats[categorical_column] = feats[categorical_column].fillna("mix")
738
+ elif colorby in ["ion", "id_ion"]:
739
+ categorical_column = "id_ion"
740
+ feats[categorical_column] = feats[categorical_column].fillna("mix")
741
+ elif colorby in ["evidence", "id_evidence"]:
742
+ categorical_column = "id_evidence"
743
+ feats[categorical_column] = feats[categorical_column].fillna("mix")
744
+ elif colorby in ["level", "id_level"]:
745
+ categorical_column = "id_level"
746
+ feats[categorical_column] = feats[categorical_column].fillna("mix")
747
+ else:
748
+ categorical_column = None
749
+
750
+ if categorical_column is not None:
751
+ # Use provided legend_groups or derive from data
752
+ if legend_groups is not None:
753
+ # Use all specified groups to ensure consistent legend/coloring
754
+ cvalues = legend_groups[:] # Copy the list
755
+ # Ensure 'mix' is always present as the last group if not already included
756
+ if 'mix' not in cvalues:
757
+ cvalues.append('mix')
758
+ sample.logger.info(f"Using provided legend_groups for legend: {cvalues}")
759
+
760
+ # Check which provided groups actually have data
761
+ present_groups = feats[categorical_column].unique()
762
+ missing_groups = [grp for grp in cvalues if grp not in present_groups]
763
+ if missing_groups:
764
+ sample.logger.warning(f"Provided legend_groups not found in data: {missing_groups}")
765
+ sample.logger.info(f"Groups present in data: {sorted(present_groups)}")
766
+
767
+ # Assign any points not in legend_groups to 'mix'
768
+ feats.loc[~feats[categorical_column].isin(cvalues[:-1]), categorical_column] = 'mix'
769
+ else:
770
+ # Original behavior: use only groups present in data
771
+ cvalues = feats[categorical_column].unique()
772
+ # sort alphabetically
773
+ cvalues = sorted(cvalues)
774
+ # flip the strings left to right
775
+ fcvalues = [cvalues[i][::-1] for i in range(len(cvalues))]
776
+ # sort in alphabetical order the flipped strings and return the index
777
+ idx = np.argsort(fcvalues)
778
+ # apply to cvalues
779
+ cvalues = [cvalues[i] for i in idx]
780
+ sample.logger.info(f"Using groups derived from data: {cvalues}")
781
+
782
+ color_column = categorical_column # Use categorical coloring
783
+
784
+ # Process colormap for categorical data
785
+ if cvalues is not None:
786
+ num_colors = len(cvalues)
787
+
788
+ # Use colormap for categorical data - use _process_cmap for proper handling
789
+ try:
790
+ colormap = Colormap(cmap)
791
+ colors = []
792
+ for i in range(num_colors):
793
+ # Generate evenly spaced colors across the colormap
794
+ t = i / (num_colors - 1) if num_colors > 1 else 0.5
795
+ color = colormap(t)
796
+ # Convert to hex - handle different color formats
797
+ if hasattr(color, '__len__') and len(color) >= 3:
798
+ # It's an array-like color (RGB or RGBA)
799
+ colors.append(mcolors.to_hex(color[:3]))
800
+ else:
801
+ # It's a single value, convert to RGB
802
+ colors.append(mcolors.to_hex([color, color, color]))
803
+ except (AttributeError, ValueError, TypeError):
804
+ # Fallback to using _process_cmap if direct Colormap fails
805
+ cmap_palette = _process_cmap(cmap, fallback="viridis", logger=sample.logger)
806
+ # Sample colors from the palette
807
+ colors = []
808
+ for i in range(num_colors):
809
+ idx = int(i * (len(cmap_palette) - 1) / (num_colors - 1)) if num_colors > 1 else len(cmap_palette) // 2
810
+ colors.append(cmap_palette[idx])
811
+
812
+ # Create a mapping from class name to color to ensure consistent color assignment
813
+ # Each class gets the same color based on its position in the cvalues list
814
+ class_to_color = {class_name: colors[i] for i, class_name in enumerate(cvalues)}
815
+
816
+ # assign color to each row based on colorby category
817
+ feats["color"] = "black"
818
+ for class_name, color in class_to_color.items():
819
+ if colorby in ["class", "hg", "id_class", "id_hg"]:
820
+ feats.loc[feats["id_class"] == class_name, "color"] = color
821
+ elif colorby in ["ion", "id_ion"]:
822
+ feats.loc[feats["id_ion"] == class_name, "color"] = color
823
+ elif colorby in ["id_evidence", "ms2_evidence"]:
824
+ feats.loc[feats["id_evidence"] == class_name, "color"] = color
825
+
826
+ return cvalues, color_column, colors
827
+
828
+
829
+ def _create_feature_overlay(sample, raster, feats, cvalues, color_column, colors,
830
+ markersize, title, legend):
831
+ """Create feature overlay with identified and unidentified features."""
832
+ # replace NaN with 0 in id_level
833
+ feats["id_level"] = feats["id_level"].fillna(0)
834
+
835
+ # Create unified visualization with all features in single layer
836
+ # This avoids the multiple layer legend conflicts that cause dark colors and shared toggling
837
+ sample.logger.debug("Creating unified feature visualization with categorical coloring")
838
+
839
+ # Prepare categorical coloring for identified features only (id_level >= 1)
840
+ identified_feats = feats[feats["id_level"] >= 1].copy() if len(feats[feats["id_level"] >= 1]) > 0 else pd.DataFrame()
841
+ unidentified_feats = feats[feats["id_level"] < 1].copy() if len(feats[feats["id_level"] < 1]) > 0 else pd.DataFrame()
842
+
843
+ overlay = raster
844
+
845
+ # Single layer for identified features with categorical coloring
846
+ if len(identified_feats) > 0 and cvalues is not None:
847
+ # Create proper confidence-based marker styling
848
+ identified_feats["marker_style"] = identified_feats["id_level"].apply(
849
+ lambda x: "circle" if x >= 2 else "circle_cross"
850
+ )
851
+ identified_feats["fill_alpha"] = identified_feats["id_level"].apply(
852
+ lambda x: 1.0 if x >= 2 else 0.3 # Full opacity for high conf, transparent for medium
853
+ )
854
+
855
+ oracle_hover_identified = HoverTool(
856
+ tooltips=[
857
+ ("rt", "@rt"),
858
+ ("m/z", "@mz{0.0000}"),
859
+ ("feature_uid", "@feature_uid"),
860
+ ("id_level", "@id_level"),
861
+ ("id_class", "@id_class"),
862
+ ("id_label", "@id_label"),
863
+ ("id_ion", "@id_ion"),
864
+ ("id_evidence", "@id_evidence"),
865
+ ("score", "@score"),
866
+ ("score2", "@score2"),
867
+ ],
868
+ )
869
+
870
+ # Create completely separate overlay elements for each category
871
+ overlays_to_combine = [raster] # Start with raster base
872
+
873
+ for i, category in enumerate(cvalues):
874
+ category_data = identified_feats[identified_feats[color_column] == category].copy()
875
+ if len(category_data) > 0:
876
+ # Create a completely separate Points element for this category
877
+ category_points = hv.Points(
878
+ category_data,
879
+ kdims=["rt", "mz"],
880
+ vdims=[
881
+ "inty", "feature_uid", "id_level", "id_class", "id_label",
882
+ "id_ion", "id_evidence", "score", "score2", "fill_alpha"
883
+ ],
884
+ label=str(category) # This becomes the legend label
885
+ ).options(
886
+ color=colors[i], # Use pre-computed hex color for this category
887
+ marker="circle",
888
+ size=markersize,
889
+ alpha="fill_alpha",
890
+ tools=[oracle_hover_identified],
891
+ show_legend=True,
892
+ )
893
+ overlays_to_combine.append(category_points)
894
+ else:
895
+ # Create empty Points element for categories with no data to ensure they appear in legend
896
+ empty_data = pd.DataFrame(columns=['rt', 'mz', 'inty', 'feature_uid', 'id_level',
897
+ 'id_class', 'id_label', 'id_ion', 'id_evidence',
898
+ 'score', 'score2', 'fill_alpha'])
899
+ category_points = hv.Points(
900
+ empty_data,
901
+ kdims=["rt", "mz"],
902
+ vdims=[
903
+ "inty", "feature_uid", "id_level", "id_class", "id_label",
904
+ "id_ion", "id_evidence", "score", "score2", "fill_alpha"
905
+ ],
906
+ label=str(category) # This becomes the legend label
907
+ ).options(
908
+ color=colors[i], # Use pre-computed hex color for this category
909
+ marker="circle",
910
+ size=markersize,
911
+ alpha=1.0,
912
+ tools=[oracle_hover_identified],
913
+ show_legend=True,
914
+ )
915
+ overlays_to_combine.append(category_points)
916
+
917
+ # Combine all overlays
918
+ overlay = overlays_to_combine[0] # Start with raster
919
+ for layer in overlays_to_combine[1:]:
920
+ overlay = overlay * layer
921
+
922
+ else:
923
+ # No categorical data - just set overlay to raster
924
+ overlay = raster
925
+
926
+ # Separate layer for unidentified features (always black crosses)
927
+ if len(unidentified_feats) > 0:
928
+ oracle_hover_no_id = HoverTool(
929
+ tooltips=[
930
+ ("rt", "@rt"),
931
+ ("m/z", "@mz{0.0000}"),
932
+ ("feature_uid", "@feature_uid"),
933
+ ("id_level", "@id_level"),
934
+ ],
935
+ )
936
+
937
+ feature_points_no_id = hv.Points(
938
+ unidentified_feats,
939
+ kdims=["rt", "mz"],
940
+ vdims=["inty", "feature_uid", "id_level"],
941
+ ).options(
942
+ color="black",
943
+ marker="x",
944
+ size=markersize,
945
+ alpha=1.0,
946
+ tools=[oracle_hover_no_id],
947
+ show_legend=False,
948
+ )
949
+
950
+ overlay = overlay * feature_points_no_id
951
+
952
+ if title is not None:
953
+ sample.logger.debug(f"Setting plot title: {title}")
954
+ overlay = overlay.opts(title=title)
955
+
956
+ # Configure legend if requested and categorical coloring is available
957
+ if legend is not None and cvalues is not None and len(cvalues) > 1:
958
+ sample.logger.debug(f"Configuring integrated legend at '{legend}' position with {len(cvalues)} categories: {cvalues}")
959
+
960
+ # Map legend position parameter to HoloViews legend position
961
+ legend_position_map = {
962
+ "top_right": "top_right",
963
+ "top_left": "top_left",
964
+ "bottom_right": "bottom_right",
965
+ "bottom_left": "bottom_left",
966
+ "right": "right",
967
+ "left": "left",
968
+ "top": "top",
969
+ "bottom": "bottom"
970
+ }
971
+
972
+ hv_legend_pos = legend_position_map.get(legend, "bottom_right")
973
+
974
+ # Apply legend configuration to the overlay
975
+ overlay = overlay.opts(
976
+ legend_position=hv_legend_pos,
977
+ legend_opts={'title': '', 'padding': 2, 'spacing': 2}
978
+ )
979
+
980
+ sample.logger.debug(f"Applied integrated legend at position '{hv_legend_pos}'")
981
+ elif legend is None:
982
+ # Explicitly hide legend when legend=None
983
+ overlay = overlay.opts(show_legend=False)
984
+ sample.logger.debug("Legend hidden (legend=None)")
985
+
986
+ return overlay
987
+
988
+
989
+ def _handle_output(sample, overlay, filename):
990
+ """Handle plot export or display."""
991
+ if filename is not None:
992
+ # if filename includes .html, save the layout to an HTML file
993
+ if filename.endswith(".html"):
994
+ # For HoloViews overlay, we need to convert to Panel for saving
995
+ panel.Column(overlay).save(filename, embed=True)
996
+ elif filename.endswith(".svg"):
997
+ success = _export_with_webdriver_manager(overlay, filename, "svg", sample.logger)
998
+ if success:
999
+ sample.logger.success(f"SVG exported: {os.path.abspath(filename)}")
1000
+ else:
1001
+ sample.logger.warning(f"SVG export failed: {os.path.abspath(filename)}")
1002
+ elif filename.endswith(".png"):
1003
+ success = _export_with_webdriver_manager(overlay, filename, "png", sample.logger)
1004
+ if success:
1005
+ sample.logger.success(f"PNG exported: {os.path.abspath(filename)}")
1006
+ else:
1007
+ sample.logger.warning(f"PNG export failed: {os.path.abspath(filename)}")
1008
+ else:
1009
+ # Default to PNG for any other format
1010
+ png_filename = filename + ".png" if not filename.endswith(('.png', '.svg', '.html')) else filename
1011
+ success = _export_with_webdriver_manager(overlay, png_filename, "png", sample.logger)
1012
+ if success:
1013
+ sample.logger.success(f"PNG exported: {os.path.abspath(png_filename)}")
1014
+ else:
1015
+ sample.logger.warning(f"PNG export failed: {os.path.abspath(png_filename)}")
1016
+ else:
1017
+ # Create a Panel layout for consistent alignment with plot_2d()
1018
+ layout = panel.Column(overlay)
1019
+ # Return the Panel layout (consistent with plot_2d behavior)
1020
+ return layout
1021
+
1022
+
1023
+ def plot_2d(
1024
+ self,
1025
+ filename=None,
1026
+ show_features=True,
1027
+ show_only_features_with_ms2=False,
1028
+ show_isotopes=False,
1029
+ show_ms2=False,
1030
+ show_in_browser=False,
1031
+ title=None,
1032
+ cmap='iridescent',
1033
+ marker="circle",
1034
+ markersize=5,
1035
+ size="static",
1036
+ raster_log=True,
1037
+ raster_min=1,
1038
+ raster_dynamic=True,
1039
+ raster_max_px=8,
1040
+ raster_threshold=0.8,
1041
+ height=600,
1042
+ width=750,
1043
+ mz_range=None,
1044
+ rt_range=None,
1045
+ legend=None,
1046
+ colorby=None
1047
+ ):
1048
+ """
1049
+ Plot a two-dimensional visualization of MS1 survey scan data with optional overlays
1050
+ of feature and MS2 scan information.
1051
+ This method creates a plot from the internal MS1 data loaded into self.ms1_df
1052
+ and optionally overlays various feature and MS2 information depending on the provided
1053
+ parameters. The visualization is built using HoloViews and Holoviews dynamic rasterization,
1054
+ together with Panel for layout and exporting.
1055
+ Parameters:
1056
+ filename (str, optional):
1057
+ Path to save the plot. If provided and ends with ".html", the plot is saved as an
1058
+ interactive HTML file; otherwise, it is saved as a PNG image.
1059
+ show_features (bool, default True):
1060
+ Whether to overlay detected features on the plot.
1061
+ show_only_features_with_ms2 (bool, default False):
1062
+ If True, only display features that have associated MS2 scans. When False,
1063
+ features without MS2 data are also shown.
1064
+ show_isotopes (bool, default False):
1065
+ Whether to overlay isotope information on top of the features.
1066
+ show_ms2 (bool, default False):
1067
+ Whether to overlay MS2 scan information on the plot.
1068
+ title (str, optional):
1069
+ Title of the plot.
1070
+ cmap (str, optional):
1071
+ Colormap to use for the background rasterized data. Defaults to "iridescent_r" unless
1072
+ modified (e.g., if set to "grey", it is changed to "Greys256").
1073
+ marker (str, default 'circle'):
1074
+ Marker type to use for feature and MS2 points.
1075
+ markersize (int, default 10):
1076
+ Base size of the markers used for plotting points.
1077
+ size (str, default 'dynamic'):
1078
+ Controls marker sizing behavior. Options: 'dynamic', 'static', or 'slider'.
1079
+ - 'dynamic': Uses coordinate-based sizing that scales with zoom level (markers get larger when zooming in)
1080
+ - 'static': Uses screen-based sizing that remains constant regardless of zoom level
1081
+ - 'slider': Provides an interactive slider to dynamically adjust marker size
1082
+ raster_log (bool, default True):
1083
+ Use logarithmic scaling for raster intensity (True) or linear scaling (False).
1084
+ raster_min (float, default 1):
1085
+ Minimum intensity threshold for raster data filtering.
1086
+ raster_dynamic (bool, default True):
1087
+ Whether to use dynamic rasterization for the background point cloud.
1088
+ raster_max_px (int, default 8):
1089
+ Maximum pixel size for dynamic rasterization when using dynspread.
1090
+ raster_threshold (float, default 0.8):
1091
+ Threshold used for the dynspread process in dynamic rasterization.
1092
+ legend (str, optional):
1093
+ Legend position for categorical feature coloring ("top_right", "bottom_left", etc.) or None.
1094
+ Only applies when colorby is not None and contains categorical data.
1095
+ colorby (str, optional):
1096
+ Feature property to use for coloring. If None (default), uses current green/red scheme
1097
+ for features with/without MS2 data. If specified and contains categorical data, applies
1098
+ categorical coloring with legend support (similar to plot_2d_oracle).
1099
+ Behavior:
1100
+ - Checks for a loaded mzML file by verifying that self.file_obj is not None.
1101
+ - Converts internal MS1 data (a Polars DataFrame) to a Pandas DataFrame and filters out low-intensity
1102
+ points (inty < 1).
1103
+ - Sets up the plot bounds for retention time (rt) and mass-to-charge ratio (mz) using a hook function.
1104
+ - Renders the MS1 data as a background rasterized image with a logarithmic intensity normalization.
1105
+ - Conditionally overlays feature points (with and without MS2 information), isotopes (if requested),
1106
+ and MS2 scan points based on internal DataFrame data.
1107
+ - Depending on the filename parameter, either displays the plot interactively using Panel or
1108
+ saves it as an HTML or PNG file.
1109
+ Returns:
1110
+ None
1111
+ Side Effects:
1112
+ - May print a warning if no mzML file is loaded.
1113
+ - Either shows the plot interactively or writes the output to a file.
1114
+ """
1115
+
1116
+ if self.ms1_df is None:
1117
+ self.logger.error("No MS1 data available.")
1118
+ return
1119
+
1120
+ # Process colormap using the cmap package
1121
+ cmap_palette = _process_cmap(cmap, fallback="iridescent", logger=self.logger)
1122
+
1123
+ # get columns rt, mz, inty from self.ms1_df, It's polars DataFrame
1124
+ spectradf = self.ms1_df.select(["rt", "mz", "inty"])
1125
+ # remove any inty<raster_min
1126
+ spectradf = spectradf.filter(pl.col("inty") >= raster_min)
1127
+ # keep only rt, mz, and inty
1128
+ spectradf = spectradf.select(["rt", "mz", "inty"])
1129
+ if mz_range is not None:
1130
+ spectradf = spectradf.filter((pl.col("mz") >= mz_range[0]) & (pl.col("mz") <= mz_range[1]))
1131
+ if rt_range is not None:
1132
+ spectradf = spectradf.filter((pl.col("rt") >= rt_range[0]) & (pl.col("rt") <= rt_range[1]))
1133
+ maxrt = spectradf["rt"].max()
1134
+ minrt = spectradf["rt"].min()
1135
+ maxmz = spectradf["mz"].max()
1136
+ minmz = spectradf["mz"].min()
1137
+
1138
+ def new_bounds_hook(plot, elem):
1139
+ x_range = plot.state.x_range
1140
+ y_range = plot.state.y_range
1141
+ x_range.bounds = minrt, maxrt
1142
+ y_range.bounds = minmz, maxmz
1143
+
1144
+ points = hv.Points(
1145
+ spectradf,
1146
+ kdims=["rt", "mz"],
1147
+ vdims=["inty"],
1148
+ label="MS1 survey scans",
1149
+ ).opts(
1150
+ fontsize={"title": 16, "labels": 14, "xticks": 6, "yticks": 12},
1151
+ color=np.log(dim("inty")),
1152
+ colorbar=True,
1153
+ cmap="Magma",
1154
+ tools=["hover"],
1155
+ )
1156
+
1157
+ # Configure marker and size behavior based on size parameter
1158
+ use_dynamic_sizing = size.lower() in ["dyn", "dynamic"]
1159
+ use_slider_sizing = size.lower() == "slider"
1160
+
1161
+ def dynamic_sizing_hook(plot, element):
1162
+ """Hook to convert size-based markers to radius-based for dynamic behavior"""
1163
+ try:
1164
+ if use_dynamic_sizing and hasattr(plot, "state") and hasattr(plot.state, "renderers"):
1165
+ from bokeh.models import Circle
1166
+
1167
+ for renderer in plot.state.renderers:
1168
+ if hasattr(renderer, "glyph"):
1169
+ glyph = renderer.glyph
1170
+ # Check if it's a circle/scatter glyph that we can convert
1171
+ if hasattr(glyph, "size") and marker_type == "circle":
1172
+ # Create a new Circle glyph with radius instead of size
1173
+ new_glyph = Circle(
1174
+ x=glyph.x,
1175
+ y=glyph.y,
1176
+ radius=base_radius,
1177
+ fill_color=glyph.fill_color,
1178
+ line_color=glyph.line_color,
1179
+ fill_alpha=glyph.fill_alpha,
1180
+ line_alpha=glyph.line_alpha,
1181
+ )
1182
+ renderer.glyph = new_glyph
1183
+ except Exception:
1184
+ # Silently fail and use regular sizing if hook doesn't work
1185
+ pass
1186
+
1187
+ if use_dynamic_sizing:
1188
+ # Dynamic sizing: use coordinate-based sizing that scales with zoom
1189
+ marker_type = "circle"
1190
+ # Calculate radius based on data range for coordinate-based sizing
1191
+ rtrange = maxrt - minrt
1192
+ mzrange = maxmz - minmz
1193
+ # Use a fraction of the smaller dimension for radius
1194
+ base_radius = min(rtrange, mzrange) * 0.0005 * markersize
1195
+ size_1 = markersize # Use regular size initially, hook will convert to radius
1196
+ size_2 = markersize
1197
+ hooks = [dynamic_sizing_hook]
1198
+ elif use_slider_sizing:
1199
+ # Slider sizing: create an interactive slider for marker size
1200
+ marker_type = marker # Use the original marker parameter
1201
+ size_1 = markersize # Use markersize initially, will be updated by slider
1202
+ size_2 = markersize
1203
+ base_radius = None # Not used in slider mode
1204
+ hooks = []
1205
+ else:
1206
+ # Static sizing: use pixel-based sizing that stays fixed
1207
+ marker_type = marker # Use the original marker parameter
1208
+ size_1 = markersize
1209
+ size_2 = markersize
1210
+ base_radius = None # Not used in static mode
1211
+ hooks = []
1212
+
1213
+ color_1 = "forestgreen"
1214
+ color_2 = "darkorange"
1215
+
1216
+ # Handle colorby parameter for feature coloring
1217
+ use_categorical_coloring = False
1218
+ feature_colors = {}
1219
+ categorical_groups = []
1220
+
1221
+ if filename is not None:
1222
+ dyn = False
1223
+ if not filename.endswith(".html"):
1224
+ if use_dynamic_sizing:
1225
+ # For exported files, use smaller coordinate-based size
1226
+ size_1 = 2
1227
+ size_2 = 2
1228
+ else:
1229
+ size_1 = 2
1230
+ size_2 = 2
1231
+ color_1 = "forestgreen"
1232
+ color_2 = "darkorange"
1233
+ raster_dynamic = False
1234
+
1235
+ # For slider functionality, disable raster dynamic to avoid DynamicMap nesting
1236
+ if use_slider_sizing:
1237
+ raster_dynamic = False
1238
+
1239
+ dyn = raster_dynamic
1240
+ raster = hd.rasterize(
1241
+ points,
1242
+ aggregator=ds.max("inty"),
1243
+ interpolation="bilinear",
1244
+ dynamic=dyn, # alpha=10, min_alpha=0,
1245
+ ).opts(
1246
+ active_tools=["box_zoom"],
1247
+ cmap=cmap_palette,
1248
+ tools=["hover"],
1249
+ hooks=[new_bounds_hook],
1250
+ width=width,
1251
+ height=height,
1252
+ cnorm="log" if raster_log else "linear",
1253
+ xlabel="Retention time (s)",
1254
+ ylabel="m/z",
1255
+ colorbar=True,
1256
+ colorbar_position="right",
1257
+ axiswise=True,
1258
+ )
1259
+
1260
+ raster = hd.dynspread(
1261
+ raster,
1262
+ threshold=raster_threshold,
1263
+ how="add",
1264
+ shape="square",
1265
+ max_px=raster_max_px,
1266
+ )
1267
+ feature_points_1 = None
1268
+ feature_points_2 = None
1269
+ feature_points_3 = None
1270
+ feature_points_4 = None
1271
+ feature_points_iso = None
1272
+ # Plot features as red dots if features is True
1273
+ if self.features_df is not None and show_features:
1274
+ feats = self.features_df.clone()
1275
+ # Convert to pandas for operations that require pandas functionality
1276
+ if hasattr(feats, "to_pandas"):
1277
+ feats = feats.to_pandas()
1278
+ # if ms2_scans is not null, keep only the first element of the list
1279
+ feats["ms2_scans"] = feats["ms2_scans"].apply(
1280
+ lambda x: x[0] if isinstance(x, list) else x,
1281
+ )
1282
+ if mz_range is not None:
1283
+ feats = feats[(feats["mz"] >= mz_range[0]) & (feats["mz"] <= mz_range[1])]
1284
+ if rt_range is not None:
1285
+ feats = feats[(feats["rt"] >= rt_range[0]) & (feats["rt"] <= rt_range[1])]
1286
+ # keep only iso==0, i.e. the main
1287
+ feats = feats[feats["iso"] == 0]
1288
+
1289
+ # Handle colorby parameter
1290
+ if colorby is not None and colorby in feats.columns:
1291
+ # Check if colorby data is categorical (string-like)
1292
+ colorby_values = feats[colorby].dropna()
1293
+ is_categorical = (
1294
+ feats[colorby].dtype in ["object", "string", "category"] or
1295
+ (len(colorby_values) > 0 and isinstance(colorby_values.iloc[0], str))
1296
+ )
1297
+
1298
+ if is_categorical:
1299
+ use_categorical_coloring = True
1300
+ # Get unique categories, sorted
1301
+ categorical_groups = sorted(feats[colorby].dropna().unique())
1302
+
1303
+ # Set up colors for categorical data using matplotlib colormap
1304
+ from matplotlib.colors import to_hex
1305
+ try:
1306
+ from matplotlib.cm import get_cmap
1307
+ colormap_func = get_cmap(cmap if cmap != 'iridescent' else 'tab20')
1308
+ feature_colors = {}
1309
+ for i, group in enumerate(categorical_groups):
1310
+ if len(categorical_groups) <= 20:
1311
+ # Use qualitative colors for small number of categories
1312
+ color_val = colormap_func(i / max(1, len(categorical_groups) - 1))
1313
+ else:
1314
+ # Use continuous colormap for many categories
1315
+ color_val = colormap_func(i / max(1, len(categorical_groups) - 1))
1316
+ feature_colors[group] = to_hex(color_val)
1317
+ except Exception as e:
1318
+ self.logger.warning(f"Could not set up categorical coloring: {e}, using default colors")
1319
+ use_categorical_coloring = False
1320
+
1321
+ if use_categorical_coloring and colorby is not None:
1322
+ # Create separate feature points for each category
1323
+ for i, group in enumerate(categorical_groups):
1324
+ group_features = feats[feats[colorby] == group]
1325
+ if len(group_features) == 0:
1326
+ continue
1327
+
1328
+ # Split by MS2 status
1329
+ group_with_ms2 = group_features[group_features["ms2_scans"].notnull()]
1330
+ group_without_ms2 = group_features[group_features["ms2_scans"].isnull()]
1331
+
1332
+ group_color = feature_colors.get(group, color_1)
1333
+
1334
+ if len(group_with_ms2) > 0:
1335
+ feature_hover = HoverTool(
1336
+ tooltips=[
1337
+ ("rt", "@rt"),
1338
+ ("m/z", "@mz{0.0000}"),
1339
+ ("feature_uid", "@feature_uid"),
1340
+ ("inty", "@inty"),
1341
+ ("iso", "@iso"),
1342
+ ("adduct", "@adduct"),
1343
+ ("chrom_coherence", "@chrom_coherence"),
1344
+ ("chrom_prominence_scaled", "@chrom_prominence_scaled"),
1345
+ (colorby, f"@{colorby}"),
1346
+ ],
1347
+ )
1348
+ group_points_ms2 = hv.Points(
1349
+ group_with_ms2,
1350
+ kdims=["rt", "mz"],
1351
+ vdims=[
1352
+ "feature_uid",
1353
+ "inty",
1354
+ "iso",
1355
+ "adduct",
1356
+ "ms2_scans",
1357
+ "chrom_coherence",
1358
+ "chrom_prominence_scaled",
1359
+ colorby,
1360
+ ],
1361
+ label=f"{group} (MS2)",
1362
+ ).options(
1363
+ color=group_color,
1364
+ marker=marker_type,
1365
+ size=size_1,
1366
+ tools=[feature_hover],
1367
+ hooks=hooks,
1368
+ )
1369
+ if feature_points_1 is None:
1370
+ feature_points_1 = group_points_ms2
1371
+ else:
1372
+ feature_points_1 = feature_points_1 * group_points_ms2
1373
+
1374
+ if len(group_without_ms2) > 0:
1375
+ feature_hover = HoverTool(
1376
+ tooltips=[
1377
+ ("rt", "@rt"),
1378
+ ("m/z", "@mz{0.0000}"),
1379
+ ("feature_uid", "@feature_uid"),
1380
+ ("inty", "@inty"),
1381
+ ("iso", "@iso"),
1382
+ ("adduct", "@adduct"),
1383
+ ("chrom_coherence", "@chrom_coherence"),
1384
+ ("chrom_prominence_scaled", "@chrom_prominence_scaled"),
1385
+ (colorby, f"@{colorby}"),
1386
+ ],
1387
+ )
1388
+ group_points_no_ms2 = hv.Points(
1389
+ group_without_ms2,
1390
+ kdims=["rt", "mz"],
1391
+ vdims=[
1392
+ "feature_uid",
1393
+ "inty",
1394
+ "iso",
1395
+ "adduct",
1396
+ "chrom_coherence",
1397
+ "chrom_prominence_scaled",
1398
+ colorby,
1399
+ ],
1400
+ label=f"{group} (no MS2)",
1401
+ ).options(
1402
+ color=group_color,
1403
+ marker=marker_type,
1404
+ size=size_2,
1405
+ tools=[feature_hover],
1406
+ hooks=hooks,
1407
+ )
1408
+ if feature_points_2 is None:
1409
+ feature_points_2 = group_points_no_ms2
1410
+ else:
1411
+ feature_points_2 = feature_points_2 * group_points_no_ms2
1412
+ else:
1413
+ # Use original green/red coloring scheme for MS2 presence
1414
+ # find features with ms2_scans not None and iso==0
1415
+ features_df = feats[feats["ms2_scans"].notnull()]
1416
+ # Create feature points with proper sizing method
1417
+ feature_hover_1 = HoverTool(
1418
+ tooltips=[
1419
+ ("rt", "@rt"),
1420
+ ("m/z", "@mz{0.0000}"),
1421
+ ("feature_uid", "@feature_uid"),
1422
+ ("inty", "@inty"),
1423
+ ("iso", "@iso"),
1424
+ ("adduct", "@adduct"),
1425
+ ("chrom_coherence", "@chrom_coherence"),
1426
+ ("chrom_prominence_scaled", "@chrom_prominence_scaled"),
1427
+ ],
1428
+ )
1429
+ if len(features_df) > 0:
1430
+ feature_points_1 = hv.Points(
1431
+ features_df,
1432
+ kdims=["rt", "mz"],
1433
+ vdims=[
1434
+ "feature_uid",
1435
+ "inty",
1436
+ "iso",
1437
+ "adduct",
1438
+ "ms2_scans",
1439
+ "chrom_coherence",
1440
+ "chrom_prominence_scaled",
1441
+ ],
1442
+ label="Features with MS2 data",
1443
+ ).options(
1444
+ color=color_1,
1445
+ marker=marker_type,
1446
+ size=size_1,
1447
+ tools=[feature_hover_1],
1448
+ hooks=hooks,
1449
+ )
1450
+
1451
+ # find features without MS2 data
1452
+ features_df = feats[feats["ms2_scans"].isnull()]
1453
+ feature_hover_2 = HoverTool(
1454
+ tooltips=[
1455
+ ("rt", "@rt"),
1456
+ ("m/z", "@mz{0.0000}"),
1457
+ ("feature_uid", "@feature_uid"),
1458
+ ("inty", "@inty"),
1459
+ ("iso", "@iso"),
1460
+ ("adduct", "@adduct"),
1461
+ ("chrom_coherence", "@chrom_coherence"),
1462
+ ("chrom_prominence_scaled", "@chrom_prominence_scaled"),
1463
+ ],
1464
+ )
1465
+ if len(features_df) > 0:
1466
+ feature_points_2 = hv.Points(
1467
+ features_df,
1468
+ kdims=["rt", "mz"],
1469
+ vdims=[
1470
+ "feature_uid",
1471
+ "inty",
1472
+ "iso",
1473
+ "adduct",
1474
+ "chrom_coherence",
1475
+ "chrom_prominence_scaled",
1476
+ ],
1477
+ label="Features without MS2 data",
1478
+ ).options(
1479
+ color="red",
1480
+ marker=marker_type,
1481
+ size=size_2,
1482
+ tools=[feature_hover_2],
1483
+ hooks=hooks,
1484
+ )
1485
+
1486
+ if show_isotopes:
761
1487
  # Use proper Polars filter syntax to avoid boolean indexing issues
762
1488
  features_df = self.features_df.filter(pl.col("iso") > 0)
763
1489
  # Convert to pandas for plotting compatibility
@@ -869,6 +1595,31 @@ def plot_2d(
869
1595
  if title is not None:
870
1596
  overlay = overlay.opts(title=title)
871
1597
 
1598
+ # Handle legend positioning for categorical coloring
1599
+ if legend is not None and use_categorical_coloring and len(categorical_groups) > 1:
1600
+ # Map legend position parameter to HoloViews legend position
1601
+ legend_position_map = {
1602
+ "top_right": "top_right",
1603
+ "top_left": "top_left",
1604
+ "bottom_right": "bottom_right",
1605
+ "bottom_left": "bottom_left",
1606
+ "right": "right",
1607
+ "left": "left",
1608
+ "top": "top",
1609
+ "bottom": "bottom"
1610
+ }
1611
+
1612
+ hv_legend_pos = legend_position_map.get(legend, "bottom_right")
1613
+
1614
+ # Apply legend configuration to the overlay
1615
+ overlay = overlay.opts(
1616
+ legend_position=hv_legend_pos,
1617
+ legend_opts={'title': '', 'padding': 2, 'spacing': 2}
1618
+ )
1619
+ elif legend is None and use_categorical_coloring:
1620
+ # Explicitly hide legend when legend=None but categorical coloring is used
1621
+ overlay = overlay.opts(show_legend=False)
1622
+
872
1623
  # Handle slider functionality
873
1624
  if use_slider_sizing:
874
1625
  # For slider functionality, we need to work with the feature points directly
@@ -931,569 +1682,205 @@ def plot_2d(
931
1682
  on.pane.HTML("<b>Marker Size Control:</b>", width=150, height=40, margin=(5, 10)),
932
1683
  size_slider,
933
1684
  height=60,
934
- margin=10,
935
- )
936
-
937
- # Create slider widget
938
- size_slider = on.widgets.FloatSlider(
939
- name="Marker Size",
940
- start=1.0,
941
- end=20.0,
942
- step=0.5,
943
- value=markersize,
944
- width=300,
945
- height=40,
946
- margin=(5, 5),
947
- show_value=True,
948
- )
949
-
950
- slider_widget = on.Row(
951
- on.pane.HTML("<b>Marker Size:</b>", width=100, height=40, margin=(5, 10)),
952
- size_slider,
953
- height=60,
954
- margin=10,
955
- )
956
-
957
- # Simple reactive plot - slider mode doesn't use dynamic rasterization
958
- @on.depends(size_slider.param.value)
959
- def reactive_plot(size_val):
960
- overlay = create_feature_overlay(float(size_val))
961
- # Apply static rasterization for slider mode
962
- if raster_dynamic:
963
- return hd.rasterize(
964
- overlay,
965
- aggregator=ds.count(),
966
- width=raster_max_px,
967
- height=raster_max_px,
968
- dynamic=False, # Static raster for slider mode
969
- ).opts(
970
- cnorm="eq_hist",
971
- tools=["hover"],
972
- width=width,
973
- height=height,
974
- )
975
- else:
976
- return overlay
977
-
978
- # Create layout
979
- layout = on.Column(slider_widget, reactive_plot, sizing_mode="stretch_width")
980
-
981
- # Handle filename saving for slider mode
982
- if filename is not None:
983
- if filename.endswith(".html"):
984
- layout.save(filename, embed=True)
985
- else:
986
- # For slider plots, save the current state
987
- hv.save(create_feature_overlay(markersize), filename, fmt="png")
988
- else:
989
- # Use show() for display in notebook
990
- layout.show()
991
- else:
992
- # Create a panel layout without slider
993
- layout = panel.Column(overlay)
994
-
995
- # Handle display logic based on show_in_browser and raster_dynamic
996
- if filename is not None:
997
- # Use consistent save/display behavior
998
- self._handle_sample_plot_output(layout, filename, "panel")
999
- else:
1000
- # Show in browser if both show_in_browser and raster_dynamic are True
1001
- if show_in_browser and raster_dynamic:
1002
- layout.show()
1003
- else:
1004
- # Return to notebook for inline display
1005
- return layout
1006
-
1007
-
1008
- def plot_2d_oracle(
1009
- self,
1010
- oracle_folder=None,
1011
- link_by_feature_uid=None,
1012
- colorby="hg",
1013
- filename=None,
1014
- min_id_level=None,
1015
- max_id_level=None,
1016
- min_ms_level=None,
1017
- title=None,
1018
- cmap=None,
1019
- markersize=10,
1020
- raster_dynamic=True,
1021
- raster_max_px=8,
1022
- raster_threshold=0.8,
1023
- mz_range=None,
1024
- rt_range=None,
1025
- ):
1026
- """
1027
- Plot a 2D overlay visualization of MS1 survey scans and feature annotations, including oracle annotation data if provided.
1028
-
1029
- This function reads the primary mass spectrometry data, applies filtering, processes oracle annotation data (if provided),
1030
- and produces an interactive plot combining various data layers. The visualization includes rasterized MS1 data and feature
1031
- points colored by annotation.
1032
-
1033
- Parameters:
1034
- self: The object instance containing MS1 and feature data.
1035
- oracle_folder (str, optional): Path to the oracle folder containing the annotation file
1036
- (expected at "<oracle_folder>/diag/summary_by_feature.csv"). If None, oracle data is not used.
1037
- link_by_feature_uid (bool, optional): Whether to link features by their IDs in the overlay.
1038
- colorby (str, optional): Parameter that determines the color assignment for annotated features.
1039
- Expected values include 'hg', 'class', 'id_class', or 'id_hg'. Default is 'hg'.
1040
- filename (str, optional): Name of the file where the plot should be saved. If provided and ends with
1041
- ".html", the panel layout is saved as an interactive HTML file; otherwise, the output is saved as a PNG.
1042
- min_id_level (int, optional): Minimum identification level for oracle annotations to include.
1043
- max_id_level (int, optional): Maximum identification level for oracle annotations to include.
1044
- min_ms_level (int, optional): Minimum MS level for features to include.
1045
- title (str, optional): Title to be displayed on the resulting plot. Default is None.
1046
- cmap (str, optional): Colormap to be used for the rasterized plot. Acceptable values include None, "grey",
1047
- "iridescent", or other valid colormap names. Default is None. When None, 'Greys256' is used.
1048
- markersize (int, optional): Marker size for feature points in the overlay. Default is 10.
1049
- raster_dynamic (bool, optional): If True, enables dynamic rasterization of the overlay. If filename is provided
1050
- and does not end with ".html", raster_dynamic is set to False. Default is True.
1051
- raster_max_px (int, optional): Maximum pixel size for dynamic rasterization. Default is 8.
1052
- raster_threshold (float, optional): Threshold for dynamic raster spread. Default is 0.8.
1053
- mz_range (tuple, optional): m/z range for filtering MS1 data.
1054
- rt_range (tuple, optional): Retention time range for filtering MS1 data.
1055
-
1056
- Returns:
1057
- None
1058
-
1059
- The function either displays the interactive panel layout or saves the visualization to a file based on
1060
- the provided filename. If the primary file object or feature data is missing, the function prints an
1061
- informative message and returns without plotting.
1062
- """
1063
-
1064
- if self.file_obj is None:
1065
- print("Please load a file first.")
1066
- return
1067
-
1068
- # Process colormap using the cmap package
1069
- cmap_palette = _process_cmap(cmap, fallback="Greys256", logger=self.logger)
1070
-
1071
- # get columns rt, mz, inty from self.ms1_df, It's polars DataFrame
1072
- spectradf = self.ms1_df.to_pandas()
1073
-
1074
- # remove any inty<1
1075
- spectradf = spectradf[spectradf["inty"] >= 1]
1076
- # keep only rt, mz, and inty
1077
- spectradf = spectradf[["rt", "mz", "inty"]]
1078
- if mz_range is not None:
1079
- spectradf = spectradf[(spectradf["mz"] >= mz_range[0]) & (spectradf["mz"] <= mz_range[1])]
1080
- if rt_range is not None:
1081
- spectradf = spectradf[(spectradf["rt"] >= rt_range[0]) & (spectradf["rt"] <= rt_range[1])]
1082
-
1083
- maxrt = spectradf["rt"].max()
1084
- minrt = spectradf["rt"].min()
1085
- maxmz = spectradf["mz"].max()
1086
- minmz = spectradf["mz"].min()
1087
-
1088
- def new_bounds_hook(plot, elem):
1089
- x_range = plot.state.x_range
1090
- y_range = plot.state.y_range
1091
- x_range.bounds = minrt, maxrt
1092
- y_range.bounds = minmz, maxmz
1093
-
1094
- points = hv.Points(
1095
- spectradf,
1096
- kdims=["rt", "mz"],
1097
- vdims=["inty"],
1098
- label="MS1 survey scans",
1099
- ).opts(
1100
- fontsize={"title": 16, "labels": 14, "xticks": 6, "yticks": 12},
1101
- color=np.log(dim("inty")),
1102
- colorbar=True,
1103
- cmap="Magma",
1104
- tools=["hover"],
1105
- )
1106
-
1107
- if filename is not None:
1108
- dyn = False
1109
- if not filename.endswith(".html"):
1110
- raster_dynamic = False
1111
-
1112
- dyn = raster_dynamic
1113
- raster = hd.rasterize(
1114
- points,
1115
- aggregator=ds.max("inty"),
1116
- interpolation="bilinear",
1117
- dynamic=dyn, # alpha=10, min_alpha=0,
1118
- ).opts(
1119
- active_tools=["box_zoom"],
1120
- cmap=cmap_palette,
1121
- tools=["hover"],
1122
- hooks=[new_bounds_hook],
1123
- width=1000,
1124
- height=1000,
1125
- cnorm="log",
1126
- xlabel="Retention time (s)",
1127
- ylabel="m/z",
1128
- colorbar=True,
1129
- colorbar_position="right",
1130
- axiswise=True,
1131
- )
1132
- raster = hd.dynspread(
1133
- raster,
1134
- threshold=raster_threshold,
1135
- how="add",
1136
- shape="square",
1137
- max_px=raster_max_px,
1138
- )
1139
-
1140
- if self.features_df is None:
1141
- return
1142
- feats = self.features_df.clone()
1143
-
1144
- # Convert to pandas for oracle operations that require pandas functionality
1145
- if hasattr(feats, "to_pandas"):
1146
- feats = feats.to_pandas()
1147
-
1148
- # check if annotationfile is not None
1149
- if oracle_folder is None:
1150
- return
1151
- # try to read the annotationfile as a csv file and add it to feats
1152
- try:
1153
- oracle_data = pd.read_csv(
1154
- os.path.join(oracle_folder, "diag", "summary_by_feature.csv"),
1155
- )
1156
- except Exception:
1157
- print(f"Could not read {oracle_folder}/diag/summary_by_feature.csv")
1158
- return
1159
-
1160
- if link_by_feature_uid:
1161
- # scan_idx slaw_id slaw_ms2_id mz rt level formula ion species name rarity lib_id hg mod lib score score2 score_db score_db_data ms2_tic ms2_evidence ms2_matched_n ms2_missed_n ms2_matched ms2_missed ms2_top1
1162
- cols_to_keep = [
1163
- "title",
1164
- "scan_idx",
1165
- "mslevel",
1166
- "hits",
1167
- "id_level",
1168
- "id_label",
1169
- "id_ion",
1170
- "id_class",
1171
- "id_evidence",
1172
- "score",
1173
- "score2",
1174
- ]
1175
- oracle_data = oracle_data[cols_to_keep]
1176
- # extract feature_uid from title. It begins with "fid:XYZ;"
1177
- oracle_data["feature_uid"] = oracle_data["title"].str.extract(r"fid:(\d+)")
1178
- oracle_data["feature_uid"] = oracle_data["feature_uid"].astype(int)
1179
- # sort by id_level, remove duplicate feature_uid, keep the first one
1180
- oracle_data = oracle_data.sort_values(by=["id_level"], ascending=False)
1181
- oracle_data = oracle_data.drop_duplicates(subset=["feature_uid"], keep="first")
1182
- else:
1183
- cols_to_keep = [
1184
- "precursor",
1185
- "rt",
1186
- "title",
1187
- "scan_idx",
1188
- "mslevel",
1189
- "hits",
1190
- "id_level",
1191
- "id_label",
1192
- "id_ion",
1193
- "id_class",
1194
- "id_evidence",
1195
- "score",
1196
- "score2",
1197
- ]
1198
- # link
1199
- oracle_data = oracle_data[cols_to_keep]
1200
- oracle_data["feature_uid"] = None
1201
- # iterate over the rows and find the feature_uid in feats by looking at the closest rt and mz
1202
- for i, row in oracle_data.iterrows():
1203
- candidates = feats[
1204
- (abs(feats["rt"] - row["rt"]) < 1) & (abs(feats["mz"] - row["precursor"]) < 0.005)
1205
- ].copy()
1206
- if len(candidates) > 0:
1207
- # sort by delta rt
1208
- candidates["delta_rt"] = abs(candidates["rt"] - row["rt"])
1209
- candidates = candidates.sort_values(by=["delta_rt"])
1210
- oracle_data.at[i, "feature_uid"] = candidates["feature_uid"].values[0]
1211
- # remove precursor and rt columns
1212
- oracle_data = oracle_data.drop(columns=["precursor", "rt"])
1685
+ margin=10,
1686
+ )
1213
1687
 
1214
- feats = feats.merge(oracle_data, how="left", on="feature_uid")
1688
+ # Create slider widget
1689
+ size_slider = on.widgets.FloatSlider(
1690
+ name="Marker Size",
1691
+ start=1.0,
1692
+ end=20.0,
1693
+ step=0.5,
1694
+ value=markersize,
1695
+ width=300,
1696
+ height=40,
1697
+ margin=(5, 5),
1698
+ show_value=True,
1699
+ )
1215
1700
 
1216
- # filter feats by id_level
1217
- if min_id_level is not None:
1218
- feats = feats[(feats["id_level"] >= min_id_level)]
1219
- if max_id_level is not None:
1220
- feats = feats[(feats["id_level"] <= max_id_level)]
1221
- if min_ms_level is not None:
1222
- feats = feats[(feats["mslevel"] >= min_ms_level)]
1701
+ slider_widget = on.Row(
1702
+ on.pane.HTML("<b>Marker Size:</b>", width=100, height=40, margin=(5, 10)),
1703
+ size_slider,
1704
+ height=60,
1705
+ margin=10,
1706
+ )
1223
1707
 
1224
- feats["color"] = "black"
1708
+ # Simple reactive plot - slider mode doesn't use dynamic rasterization
1709
+ @on.depends(size_slider.param.value)
1710
+ def reactive_plot(size_val):
1711
+ overlay = create_feature_overlay(float(size_val))
1712
+ # Apply static rasterization for slider mode
1713
+ if raster_dynamic:
1714
+ return hd.rasterize(
1715
+ overlay,
1716
+ aggregator=ds.count(),
1717
+ width=raster_max_px,
1718
+ height=raster_max_px,
1719
+ dynamic=False, # Static raster for slider mode
1720
+ ).opts(
1721
+ cnorm="eq_hist",
1722
+ tools=["hover"],
1723
+ width=width,
1724
+ height=height,
1725
+ )
1726
+ else:
1727
+ return overlay
1225
1728
 
1226
- cvalues = None
1227
- if colorby in ["class", "hg", "id_class", "id_hg"]:
1228
- # replace nans in feats['id_class'] with 'mix'
1229
- feats["id_class"] = feats["id_class"].fillna("mix")
1230
- cvalues = feats["id_class"].unique()
1231
- # sort alphabetically
1232
- cvalues = sorted(cvalues)
1233
- # flip the strings left to right
1234
- fcvalues = [cvalues[i][::-1] for i in range(len(cvalues))]
1235
- # sort in alphabetical order the flipped strings and return the index
1236
- idx = np.argsort(fcvalues)
1237
- # apply to cvalues
1238
- cvalues = [cvalues[i] for i in idx]
1239
- elif colorby in ["ion", "id_ion"]:
1240
- cvalues = feats["id_ion"].unique()
1241
- elif colorby in ["id_evidence", "ms2_evidence"]:
1242
- cvalues = feats["id_evidence"].unique()
1729
+ # Create layout
1730
+ layout = on.Column(slider_widget, reactive_plot, sizing_mode="stretch_width")
1243
1731
 
1244
- if cvalues is not None:
1245
- num_colors = len(cvalues)
1246
-
1247
- # Use cmap package for categorical colormap
1248
- try:
1249
- if Colormap is not None:
1250
- # Use rainbow colormap for categorical data
1251
- colormap = Colormap("rainbow")
1252
- colors = []
1253
- for i in range(num_colors):
1254
- # Generate evenly spaced colors across the colormap
1255
- t = i / (num_colors - 1) if num_colors > 1 else 0.5
1256
- color = colormap(t)
1257
- # Convert to hex
1258
- import matplotlib.colors as mcolors
1259
- # Convert color to hex - handle different color formats
1260
- if hasattr(color, '__len__') and len(color) >= 3:
1261
- # It's an array-like color (RGB or RGBA)
1262
- colors.append(mcolors.rgb2hex(color[:3]))
1263
- else:
1264
- # It's a single value, convert to RGB
1265
- colors.append(mcolors.rgb2hex([color, color, color]))
1732
+ # Handle filename saving for slider mode
1733
+ if filename is not None:
1734
+ if filename.endswith(".html"):
1735
+ layout.save(filename, embed=True)
1266
1736
  else:
1267
- # Fallback to original method
1268
- cmap = "rainbow"
1269
- cmap_provider = "colorcet"
1270
- cm = process_cmap(cmap, ncolors=num_colors, provider=cmap_provider)
1271
- colors = [
1272
- rgb2hex(cm[int(i * (len(cm) - 1) / (num_colors - 1))]) if num_colors > 1 else rgb2hex(cm[0])
1273
- for i in range(num_colors)
1274
- ]
1275
- except Exception:
1276
- # Final fallback to original method
1277
- cmap = "rainbow"
1278
- cmap_provider = "colorcet"
1279
- cm = process_cmap(cmap, ncolors=num_colors, provider=cmap_provider)
1280
- colors = [
1281
- rgb2hex(cm[int(i * (len(cm) - 1) / (num_colors - 1))]) if num_colors > 1 else rgb2hex(cm[0])
1282
- for i in range(num_colors)
1283
- ]
1284
-
1285
- # assign color to each row based on id_class. If id_class is null, assign 'black'
1286
- feats["color"] = "black"
1737
+ # For slider plots, save the current state
1738
+ hv.save(create_feature_overlay(markersize), filename, fmt="png")
1739
+ else:
1740
+ # Use show() for display in notebook
1741
+ layout.show()
1742
+ else:
1743
+ # Create a panel layout without slider
1744
+ layout = panel.Column(overlay)
1287
1745
 
1288
- for i, c in enumerate(cvalues):
1289
- if colorby in ["class", "hg", "id_class", "id_hg"]:
1290
- feats.loc[feats["id_class"] == c, "color"] = colors[i]
1291
- elif colorby in ["ion", "id_ion"]:
1292
- feats.loc[feats["id_ion"] == c, "color"] = colors[i]
1293
- elif colorby in ["id_evidence", "ms2_evidence"]:
1294
- feats.loc[feats["id_evidence"] == c, "color"] = colors[i]
1746
+ # Handle display logic based on show_in_browser and raster_dynamic
1747
+ if filename is not None:
1748
+ # Use consistent save/display behavior
1749
+ self._handle_sample_plot_output(layout, filename, "panel")
1750
+ else:
1751
+ # Show in browser if both show_in_browser and raster_dynamic are True
1752
+ if show_in_browser and raster_dynamic:
1753
+ layout.show()
1754
+ else:
1755
+ # Return to notebook for inline display
1756
+ return layout
1295
1757
 
1296
- # replace NaN with 0 in id_level
1297
- feats["id_level"] = feats["id_level"].fillna(0)
1298
- # feature_points_1 are all features with column ms2_scans not null
1299
- feature_points_1 = None
1300
- feat_df = feats.copy()
1301
- feat_df = feat_df[feat_df["id_level"] == 2]
1302
-
1303
- oracle_hover_1 = HoverTool(
1304
- tooltips=[
1305
- ("rt", "@rt"),
1306
- ("m/z", "@mz{0.0000}"),
1307
- ("feature_uid", "@feature_uid"),
1308
- ("id_level", "@id_level"),
1309
- ("id_class", "@id_class"),
1310
- ("id_label", "@id_label"),
1311
- ("id_ion", "@id_ion"),
1312
- ("id_evidence", "@id_evidence"),
1313
- ("score", "@score"),
1314
- ("score2", "@score2"),
1315
- ],
1316
- )
1317
- feature_points_1 = hv.Points(
1318
- feat_df,
1319
- kdims=["rt", "mz"],
1320
- vdims=[
1321
- "inty",
1322
- "feature_uid",
1323
- "id_level",
1324
- "id_class",
1325
- "id_label",
1326
- "id_ion",
1327
- "id_evidence",
1328
- "score",
1329
- "score2",
1330
- "color",
1331
- ],
1332
- label="ID by MS2",
1333
- ).options(
1334
- color="color",
1335
- marker="circle",
1336
- size=markersize,
1337
- fill_alpha=1.0,
1338
- tools=[oracle_hover_1],
1339
- )
1340
1758
 
1341
- # feature_points_2 are all features that have ms2_scans not null and id_level ==1
1342
- feature_points_2 = None
1343
- feat_df = feats.copy()
1344
- feat_df = feat_df[(feat_df["ms2_scans"].notnull()) & (feat_df["id_level"] == 1)]
1345
- if len(feat_df) > 0:
1346
- oracle_hover_2 = HoverTool(
1347
- tooltips=[
1348
- ("rt", "@rt"),
1349
- ("m/z", "@mz{0.0000}"),
1350
- ("feature_uid", "@feature_uid"),
1351
- ("id_level", "@id_level"),
1352
- ("id_label", "@id_label"),
1353
- ("id_ion", "@id_ion"),
1354
- ("id_class", "@id_class"),
1355
- ],
1356
- )
1357
- feature_points_2 = hv.Points(
1358
- feat_df,
1359
- kdims=["rt", "mz"],
1360
- vdims=[
1361
- "inty",
1362
- "feature_uid",
1363
- "id_level",
1364
- "id_label",
1365
- "id_ion",
1366
- "id_class",
1367
- "color",
1368
- ],
1369
- label="ID by MS1, with MS2",
1370
- ).options(
1371
- color="color",
1372
- marker="circle",
1373
- size=markersize,
1374
- fill_alpha=0.0,
1375
- tools=[oracle_hover_2],
1376
- )
1759
+ def plot_2d_oracle(
1760
+ self,
1761
+ oracle_folder=None,
1762
+ link_by_feature_uid=True,
1763
+ min_id_level=1,
1764
+ max_id_level=4,
1765
+ min_ms_level=2,
1766
+ colorby="hg",
1767
+ legend_groups=None,
1768
+ markersize=5,
1769
+ cmap='Turbo',
1770
+ raster_cmap='grey',
1771
+ raster_log=True,
1772
+ raster_min=1,
1773
+ raster_dynamic=True,
1774
+ raster_max_px=8,
1775
+ raster_threshold=0.8,
1776
+ mz_range=None,
1777
+ rt_range=None,
1778
+ width=750,
1779
+ height=600,
1780
+ filename=None,
1781
+ title=None,
1782
+ legend="bottom_right",
1783
+ ):
1784
+ """
1785
+ Plot a 2D visualization combining MS1 raster data and oracle-annotated features.
1377
1786
 
1378
- # feature_points_3 are all features that have ms2_scans null and id_level ==1
1379
- feature_points_3 = None
1380
- feat_df = feats.copy()
1381
- feat_df = feat_df[(feat_df["ms2_scans"].isnull()) & (feat_df["id_level"] == 1)]
1382
- if len(feat_df) > 0:
1383
- oracle_hover_3 = HoverTool(
1384
- tooltips=[
1385
- ("rt", "@rt"),
1386
- ("m/z", "@mz{0.0000}"),
1387
- ("feature_uid", "@feature_uid"),
1388
- ("id_level", "@id_level"),
1389
- ("id_label", "@id_label"),
1390
- ("id_ion", "@id_ion"),
1391
- ("id_class", "@id_class"),
1392
- ],
1393
- )
1394
- feature_points_3 = hv.Points(
1395
- feat_df,
1396
- kdims=["rt", "mz"],
1397
- vdims=[
1398
- "inty",
1399
- "feature_uid",
1400
- "id_level",
1401
- "id_label",
1402
- "id_ion",
1403
- "id_class",
1404
- "color",
1405
- ],
1406
- label="ID by MS1, no MS2",
1407
- ).options(
1408
- color="color",
1409
- marker="diamond",
1410
- size=markersize,
1411
- fill_alpha=0.0,
1412
- tools=[oracle_hover_3],
1413
- )
1787
+ Creates an interactive plot overlaying MS1 survey scan data with feature annotations
1788
+ from oracle files. Features are colored categorically based on identification class,
1789
+ ion type, or evidence level.
1414
1790
 
1415
- # feature_points_4 are all features that have ms2_scans null and id_level ==0
1416
- feature_points_4 = None
1417
- feat_df = feats.copy()
1418
- feat_df = feat_df[(feat_df["ms2_scans"].notnull()) & (feat_df["id_level"] < 1)]
1419
- if len(feat_df) > 0:
1420
- oracle_hover_4 = HoverTool(
1421
- tooltips=[
1422
- ("rt", "@rt"),
1423
- ("m/z", "@mz{0.0000}"),
1424
- ("feature_uid", "@feature_uid"),
1425
- ("inty", "@inty"),
1426
- ],
1427
- )
1428
- feature_points_4 = hv.Points(
1429
- feat_df,
1430
- kdims=["rt", "mz"],
1431
- vdims=["inty", "feature_uid"],
1432
- label="No ID, with MS2",
1433
- ).options(
1434
- color="gray",
1435
- marker="circle",
1436
- size=markersize,
1437
- fill_alpha=0.0,
1438
- tools=[oracle_hover_4],
1439
- )
1791
+ Parameters:
1792
+ oracle_folder (str, optional): Path to oracle folder containing
1793
+ "diag/summary_by_feature.csv". Required for oracle annotations.
1794
+ link_by_feature_uid (bool): Whether to link features by UID (True) or by m/z/RT proximity.
1795
+ min_id_level (int): Minimum identification confidence level to include.
1796
+ max_id_level (int): Maximum identification confidence level to include.
1797
+ min_ms_level (int): Minimum MS level for features to include.
1798
+ colorby (str): Feature coloring scheme - "id_class", "id_ion", "id_evidence", etc.
1799
+ legend_groups (list, optional): List of groups to include in legend and coloring scheme.
1800
+ If provided, legend will show exactly these groups. 'mix' is automatically added
1801
+ as the last group to contain points not matching other groups. Works for all
1802
+ categorical coloring types (id_class, id_ion, id_evidence, etc.).
1803
+ If None (default), all groups present in the data will be shown without filtering.
1804
+ All specified classes will appear in the legend even if no features are present.
1805
+ markersize (int): Size of feature markers.
1806
+ cmap (str): Colormap name for categorical coloring.
1807
+ raster_cmap (str): Colormap for MS1 raster background.
1808
+ raster_log (bool): Use logarithmic scaling for raster intensity (True) or linear scaling (False).
1809
+ raster_min (float): Minimum intensity threshold for raster data filtering.
1810
+ raster_dynamic (bool): Enable dynamic rasterization.
1811
+ raster_threshold (float): Dynamic raster spread threshold.
1812
+ raster_max_px (int): Maximum pixel size for rasterization.
1813
+ mz_range (tuple, optional): m/z range filter (min, max).
1814
+ rt_range (tuple, optional): Retention time range filter (min, max).
1815
+ width/height (int): Plot dimensions in pixels.
1816
+ filename (str, optional): Export filename (.html/.svg/.png). If None, displays inline.
1817
+ title (str, optional): Plot title.
1818
+ legend (str, optional): Legend position ("top_right", "bottom_left", etc.) or None.
1440
1819
 
1441
- # feature_points_5 are all features that have ms2_scans null and id_level ==0
1442
- feature_points_5 = None
1443
- feat_df = feats.copy()
1444
- feat_df = feat_df[(feat_df["ms2_scans"].isnull()) & (feat_df["id_level"] < 1)]
1445
- if len(feat_df) > 0:
1446
- oracle_hover_5 = HoverTool(
1447
- tooltips=[
1448
- ("rt", "@rt"),
1449
- ("m/z", "@mz{0.0000}"),
1450
- ("feature_uid", "@feature_uid"),
1451
- ("inty", "@inty"),
1452
- ],
1453
- )
1454
- feature_points_5 = hv.Points(
1455
- feat_df,
1456
- kdims=["rt", "mz"],
1457
- vdims=["inty", "feature_uid"],
1458
- label="No ID, no MS2",
1459
- ).options(
1460
- color="gray",
1461
- marker="diamond",
1462
- fill_alpha=0.0,
1463
- size=markersize,
1464
- tools=[oracle_hover_5],
1465
- )
1820
+ Returns:
1821
+ HoloViews layout for display (if filename is None), otherwise None.
1822
+ """
1466
1823
 
1467
- overlay = raster
1824
+ self.logger.info(f"Starting plot_2d_oracle with oracle_folder: {oracle_folder}")
1825
+ self.logger.debug(f"Parameters - link_by_feature_uid: {link_by_feature_uid}, min_id_level: {min_id_level}, max_id_level: {max_id_level}")
1826
+ self.logger.debug(f"Plot parameters - colorby: {colorby}, markersize: {markersize}, filename: {filename}")
1468
1827
 
1469
- if feature_points_1 is not None:
1470
- overlay = overlay * feature_points_1
1471
- if feature_points_2 is not None:
1472
- overlay = overlay * feature_points_2
1473
- if feature_points_3 is not None:
1474
- overlay = overlay * feature_points_3
1475
- if feature_points_4 is not None:
1476
- overlay = overlay * feature_points_4
1477
- # if not show_only_features_with_ms2:
1478
- if feature_points_5 is not None:
1479
- overlay = overlay * feature_points_5
1828
+ # Early validation
1829
+ if self.features_df is None:
1830
+ self.logger.error("Cannot plot 2D oracle: features_df is not available")
1831
+ return
1832
+
1833
+ if oracle_folder is None:
1834
+ self.logger.info("No oracle folder provided, plotting features only")
1835
+ return
1480
1836
 
1481
- if title is not None:
1482
- overlay = overlay.opts(title=title)
1837
+ # Create raster plot layer
1838
+ raster = _create_raster_plot(
1839
+ self,
1840
+ mz_range=mz_range,
1841
+ rt_range=rt_range,
1842
+ raster_cmap=raster_cmap,
1843
+ raster_log=raster_log,
1844
+ raster_min=raster_min,
1845
+ raster_dynamic=raster_dynamic,
1846
+ raster_threshold=raster_threshold,
1847
+ raster_max_px=raster_max_px,
1848
+ width=width,
1849
+ height=height,
1850
+ filename=filename
1851
+ )
1483
1852
 
1484
- # Create a panel layout
1485
- layout = panel.Column(overlay)
1853
+ # Load and process oracle data
1854
+ feats = _load_and_merge_oracle_data(
1855
+ self,
1856
+ oracle_folder=oracle_folder,
1857
+ link_by_feature_uid=link_by_feature_uid,
1858
+ min_id_level=min_id_level,
1859
+ max_id_level=max_id_level,
1860
+ min_ms_level=min_ms_level
1861
+ )
1862
+
1863
+ if feats is None:
1864
+ return
1486
1865
 
1487
- if filename is not None:
1488
- # if filename includes .html, save the panel layout to an HTML file
1489
- if filename.endswith(".html"):
1490
- layout.save(filename, embed=True)
1491
- else:
1492
- # save the panel layout as a png
1493
- hv.save(overlay, filename, fmt="png")
1494
- else:
1495
- # Check if we're in a notebook environment and display appropriately
1496
- return _display_plot(overlay, layout)
1866
+ # Set up color scheme and categorical mapping
1867
+ cvalues, color_column, colors = _setup_color_mapping(self, feats, colorby, cmap, legend_groups)
1868
+
1869
+ # Create feature overlay with all visualization elements
1870
+ overlay = _create_feature_overlay(
1871
+ self,
1872
+ raster=raster,
1873
+ feats=feats,
1874
+ cvalues=cvalues,
1875
+ color_column=color_column,
1876
+ colors=colors,
1877
+ markersize=markersize,
1878
+ title=title,
1879
+ legend=legend
1880
+ )
1881
+
1882
+ # Handle output: export or display
1883
+ return _handle_output(self, overlay, filename)
1497
1884
 
1498
1885
 
1499
1886
  def plot_ms2_eic(
@@ -1756,96 +2143,6 @@ def plot_ms2_cycle(
1756
2143
  max_px=raster_max_px,
1757
2144
  )
1758
2145
 
1759
- """
1760
- feature_points_1 = None
1761
- feature_points_2 = None
1762
- feature_points_3 = None
1763
- feature_points_4 = None
1764
- feature_points_iso = None
1765
- # Plot features as red dots if features is True
1766
- if self.features_df is not None and show_features:
1767
- feats = self.features_df.clone()
1768
- # Convert to pandas for operations that require pandas functionality
1769
- if hasattr(feats, 'to_pandas'):
1770
- feats = feats.to_pandas()
1771
- # if ms2_scans is not null, keep only the first element of the list
1772
- feats['ms2_scans'] = feats['ms2_scans'].apply(lambda x: x[0] if type(x) == list else x)
1773
- # keep only iso==0, i.e. the main
1774
- feats = feats[feats['iso']==0]
1775
- # find features with ms2_scans not None and iso==0
1776
- features_df = feats[feats['ms2_scans'].notnull()]
1777
- feature_points_1 = hv.Points(
1778
- features_df, kdims=["rt", "mz"], vdims=["feature_uid", "inty", "quality", "rt_delta", "ms2_scans"], label="Features with MS2 data"
1779
- ).options(
1780
- color=color_1,
1781
- marker=marker,
1782
- size=size_1,
1783
- tools=["hover"],
1784
- )
1785
- # find features without MS2 data
1786
- features_df = feats[feats['ms2_scans'].isnull()]
1787
- feature_points_2 = hv.Points(
1788
- features_df, kdims=["rt", "mz"], vdims=["feature_uid", "inty", "quality", "rt_delta"], label="Features without MS2 data"
1789
- ).options(
1790
- color='red',
1791
- size=size_2,
1792
- marker=marker,
1793
- tools=["hover"],
1794
- )
1795
-
1796
- if show_isotopes:
1797
- feats = self.features_df
1798
- features_df = feats[feats['iso']>0]
1799
- feature_points_iso = hv.Points(
1800
- features_df, kdims=["rt", "mz"], vdims=["feature_uid", "inty", "quality", "rt_delta", "iso", "iso_of"], label="Isotopes"
1801
- ).options(
1802
- color='violet',
1803
- marker=marker,
1804
- size=size_1,
1805
- tools=["hover"],
1806
- )
1807
- if show_ms2:
1808
- # find all self.scans_df with mslevel 2 that are not linked to a feature
1809
- ms2_orphan = self.scans_df.filter(pl.col('ms_level')==2).filter(pl.col("feature_uid")<0)
1810
-
1811
- if len(ms2_orphan) > 0:
1812
- # pandalize
1813
- ms2 = ms2_orphan.to_pandas()
1814
- feature_points_3 = hv.Points(
1815
- ms2, kdims=["rt", "prec_mz"], vdims=["index", "inty_tot", "bl"], label="Orphan MS2 scans"
1816
- ).options(
1817
- color=color_2,
1818
- marker='x',
1819
- size=size_2,
1820
- tools=["hover"],
1821
- )
1822
-
1823
- ms2_linked = self.scans_df.filter(pl.col('ms_level')==2).filter(pl.col("feature_uid")>=0)
1824
- if len(ms2_linked) > 0:
1825
- # pandalize
1826
- ms2 = ms2_linked.to_pandas()
1827
- feature_points_4 = hv.Points(
1828
- ms2, kdims=["rt", "prec_mz"], vdims=["index", "inty_tot", "bl"], label="Linked MS2 scans"
1829
- ).options(
1830
- color=color_1,
1831
- marker='x',
1832
- size=size_2,
1833
- tools=["hover"],
1834
- )
1835
-
1836
-
1837
- if feature_points_4 is not None:
1838
- overlay = overlay * feature_points_4
1839
- if feature_points_3 is not None:
1840
- overlay = overlay * feature_points_3
1841
- if feature_points_1 is not None:
1842
- overlay = overlay * feature_points_1
1843
- if not show_only_features_with_ms2:
1844
- if feature_points_2 is not None:
1845
- overlay = overlay * feature_points_2
1846
- if feature_points_iso is not None:
1847
- overlay = overlay * feature_points_iso
1848
- """
1849
2146
  if title is not None:
1850
2147
  overlay = overlay.opts(title=title)
1851
2148