masster 0.5.9__py3-none-any.whl → 0.5.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

masster/sample/plot.py CHANGED
@@ -43,6 +43,7 @@ See Also:
43
43
  """
44
44
 
45
45
  import os
46
+ import warnings
46
47
 
47
48
  import datashader as ds
48
49
  import holoviews as hv
@@ -55,18 +56,11 @@ import polars as pl
55
56
  from bokeh.models import HoverTool
56
57
  from holoviews import dim
57
58
  from holoviews.plotting.util import process_cmap
58
- from matplotlib.colors import rgb2hex
59
59
 
60
- # Import cmap for colormap handling
61
- try:
62
- from cmap import Colormap
63
- except ImportError:
64
- Colormap = None
60
+ from cmap import Colormap
65
61
 
66
62
  # Parameters removed - using hardcoded defaults
67
-
68
-
69
- hv.extension("bokeh")
63
+ # hv.extension("bokeh")
70
64
 
71
65
 
72
66
  def _process_cmap(cmap, fallback="viridis", logger=None):
@@ -85,8 +79,8 @@ def _process_cmap(cmap, fallback="viridis", logger=None):
85
79
  if cmap is None:
86
80
  cmap = "viridis"
87
81
  elif cmap == "grey":
88
- cmap = "Greys256"
89
-
82
+ cmap = "greys"
83
+
90
84
  # If cmap package is not available, fall back to process_cmap
91
85
  if Colormap is None:
92
86
  if logger:
@@ -205,6 +199,108 @@ def _display_plot(plot_object, layout=None):
205
199
  return None
206
200
 
207
201
 
202
+ def _export_with_webdriver_manager(plot_obj, filename, format_type, logger=None):
203
+ """
204
+ Export plot to PNG or SVG using webdriver-manager for automatic driver management.
205
+
206
+ Parameters:
207
+ plot_obj: Bokeh plot object or holoviews object to export
208
+ filename: Output filename
209
+ format_type: Either "png" or "svg"
210
+ logger: Logger for error reporting (optional)
211
+
212
+ Returns:
213
+ bool: True if export successful, False otherwise
214
+ """
215
+ try:
216
+ # Convert holoviews to bokeh if needed
217
+ if hasattr(plot_obj, 'opts'): # Likely a holoviews object
218
+ import holoviews as hv
219
+ bokeh_plot = hv.render(plot_obj)
220
+ else:
221
+ bokeh_plot = plot_obj
222
+
223
+ # Try webdriver-manager export first
224
+ try:
225
+ from webdriver_manager.chrome import ChromeDriverManager
226
+ from selenium import webdriver
227
+ from selenium.webdriver.chrome.service import Service
228
+ from selenium.webdriver.chrome.options import Options
229
+
230
+ # Set up Chrome options for headless operation
231
+ chrome_options = Options()
232
+ chrome_options.add_argument("--headless")
233
+ chrome_options.add_argument("--no-sandbox")
234
+ chrome_options.add_argument("--disable-dev-shm-usage")
235
+ chrome_options.add_argument("--disable-gpu")
236
+
237
+ # Use webdriver-manager to automatically get the correct ChromeDriver
238
+ service = Service(ChromeDriverManager().install())
239
+ driver = webdriver.Chrome(service=service, options=chrome_options)
240
+
241
+ # Export with managed webdriver
242
+ with warnings.catch_warnings():
243
+ warnings.simplefilter("ignore", category=UserWarning)
244
+ # Filter out bokeh.io.export warnings specifically
245
+ warnings.filterwarnings("ignore", module="bokeh.io.export")
246
+
247
+ if format_type == "png":
248
+ from bokeh.io import export_png
249
+ export_png(bokeh_plot, filename=filename, webdriver=driver)
250
+ elif format_type == "svg":
251
+ from bokeh.io import export_svg
252
+ export_svg(bokeh_plot, filename=filename, webdriver=driver)
253
+ else:
254
+ raise ValueError(f"Unsupported format: {format_type}")
255
+
256
+ driver.quit()
257
+ return True
258
+
259
+ except ImportError:
260
+ if logger:
261
+ logger.debug(f"webdriver-manager not available, using default {format_type.upper()} export")
262
+ # Fall back to default export
263
+ with warnings.catch_warnings():
264
+ warnings.simplefilter("ignore", category=UserWarning)
265
+ # Filter out bokeh.io.export warnings specifically
266
+ warnings.filterwarnings("ignore", module="bokeh.io.export")
267
+
268
+ if format_type == "png":
269
+ from bokeh.io import export_png
270
+ export_png(bokeh_plot, filename=filename)
271
+ elif format_type == "svg":
272
+ from bokeh.io import export_svg
273
+ export_svg(bokeh_plot, filename=filename)
274
+ return True
275
+
276
+ except Exception as e:
277
+ if logger:
278
+ logger.debug(f"{format_type.upper()} export with webdriver-manager failed: {e}, using default {format_type.upper()} export")
279
+ try:
280
+ # Final fallback to default export
281
+ with warnings.catch_warnings():
282
+ warnings.simplefilter("ignore", category=UserWarning)
283
+ # Filter out bokeh.io.export warnings specifically
284
+ warnings.filterwarnings("ignore", module="bokeh.io.export")
285
+
286
+ if format_type == "png":
287
+ from bokeh.io import export_png
288
+ export_png(bokeh_plot, filename=filename)
289
+ elif format_type == "svg":
290
+ from bokeh.io import export_svg
291
+ export_svg(bokeh_plot, filename=filename)
292
+ return True
293
+ except Exception as e2:
294
+ if logger:
295
+ logger.error(f"{format_type.upper()} export failed: {e2}")
296
+ return False
297
+
298
+ except Exception as e:
299
+ if logger:
300
+ logger.error(f"Export preparation failed: {e}")
301
+ return False
302
+
303
+
208
304
  def _handle_sample_plot_output(self, plot_obj, filename=None, plot_type="bokeh"):
209
305
  """
210
306
  Helper function to handle consistent save/display behavior for sample plots.
@@ -236,16 +332,11 @@ def _handle_sample_plot_output(self, plot_obj, filename=None, plot_type="bokeh")
236
332
  save(plot_obj)
237
333
  self.logger.success(f"Plot saved to: {abs_filename}")
238
334
  elif filename.endswith(".png"):
239
- try:
240
- if plot_type == "bokeh":
241
- from bokeh.io.export import export_png
242
- export_png(plot_obj, filename=filename)
243
- elif plot_type in ["panel", "holoviews"]:
244
- import holoviews as hv
245
- hv.save(plot_obj, filename, fmt="png")
335
+ success = _export_with_webdriver_manager(plot_obj, filename, "png", self.logger)
336
+ if success:
246
337
  self.logger.success(f"Plot saved to: {abs_filename}")
247
- except Exception:
248
- # Fall back to HTML if PNG export not available
338
+ else:
339
+ # Fall back to HTML if PNG export fails completely
249
340
  html_filename = filename.replace('.png', '.html')
250
341
  abs_html_filename = os.path.abspath(html_filename)
251
342
  if plot_type == "panel":
@@ -259,6 +350,25 @@ def _handle_sample_plot_output(self, plot_obj, filename=None, plot_type="bokeh")
259
350
  output_file(html_filename)
260
351
  save(plot_obj)
261
352
  self.logger.warning(f"PNG export not available, saved as HTML instead: {abs_html_filename}")
353
+ elif filename.endswith(".svg"):
354
+ success = _export_with_webdriver_manager(plot_obj, filename, "svg", self.logger)
355
+ if success:
356
+ self.logger.success(f"Plot saved to: {abs_filename}")
357
+ else:
358
+ # Fall back to HTML if SVG export fails completely
359
+ html_filename = filename.replace('.svg', '.html')
360
+ abs_html_filename = os.path.abspath(html_filename)
361
+ if plot_type == "panel":
362
+ plot_obj.save(html_filename, embed=True) # type: ignore[attr-defined]
363
+ elif plot_type == "holoviews":
364
+ import panel
365
+ panel.panel(plot_obj).save(html_filename, embed=True) # type: ignore[attr-defined]
366
+ elif plot_type == "bokeh":
367
+ from bokeh.plotting import output_file
368
+ from bokeh.io import save
369
+ output_file(html_filename)
370
+ save(plot_obj)
371
+ self.logger.warning(f"SVG export not available, saved as HTML instead: {abs_html_filename}")
262
372
  elif filename.endswith(".pdf"):
263
373
  # Try to save as PDF, fall back to HTML if not available
264
374
  try:
@@ -444,6 +554,472 @@ def plot_chrom(
444
554
  self._handle_sample_plot_output(layout, filename, "panel")
445
555
 
446
556
 
557
+ def _create_raster_plot(sample, mz_range=None, rt_range=None, raster_cmap='greys',
558
+ raster_log=True, raster_min=1, raster_dynamic=True, raster_threshold=0.8, raster_max_px=8,
559
+ width=750, height=600, filename=None):
560
+ """Create the raster plot layer from MS1 data."""
561
+ # Process colormap using the cmap package with proper error handling
562
+ raster_cmap_processed = _process_cmap(raster_cmap if raster_cmap is not None else 'greys', fallback="greys", logger=sample.logger)
563
+
564
+ # get columns rt, mz, inty from sample.ms1_df, It's polars DataFrame
565
+ spectradf = sample.ms1_df.to_pandas()
566
+
567
+ # remove any inty<raster_min
568
+ spectradf = spectradf[spectradf["inty"] >= raster_min]
569
+ # keep only rt, mz, and inty
570
+ spectradf = spectradf[["rt", "mz", "inty"]]
571
+ if mz_range is not None:
572
+ spectradf = spectradf[(spectradf["mz"] >= mz_range[0]) & (spectradf["mz"] <= mz_range[1])]
573
+ if rt_range is not None:
574
+ spectradf = spectradf[(spectradf["rt"] >= rt_range[0]) & (spectradf["rt"] <= rt_range[1])]
575
+
576
+ maxrt = spectradf["rt"].max()
577
+ minrt = spectradf["rt"].min()
578
+ maxmz = spectradf["mz"].max()
579
+ minmz = spectradf["mz"].min()
580
+
581
+ def new_bounds_hook(plot, elem):
582
+ x_range = plot.state.x_range
583
+ y_range = plot.state.y_range
584
+ x_range.bounds = minrt, maxrt
585
+ y_range.bounds = minmz, maxmz
586
+
587
+ points = hv.Points(
588
+ spectradf,
589
+ kdims=["rt", "mz"],
590
+ vdims=["inty"],
591
+ label="MS1 survey scans",
592
+ ).opts(
593
+ fontsize={"title": 16, "labels": 14, "xticks": 6, "yticks": 12},
594
+ color=np.log(dim("inty")),
595
+ colorbar=True,
596
+ cmap="Magma",
597
+ tools=["hover"],
598
+ )
599
+
600
+ if filename is not None:
601
+ dyn = False
602
+ if not filename.endswith(".html"):
603
+ raster_dynamic = False
604
+
605
+ dyn = raster_dynamic
606
+ raster = hd.rasterize(
607
+ points,
608
+ aggregator=ds.max("inty"),
609
+ interpolation="bilinear",
610
+ dynamic=dyn,
611
+ ).opts(
612
+ active_tools=["box_zoom"],
613
+ cmap=raster_cmap_processed,
614
+ tools=["hover"],
615
+ hooks=[new_bounds_hook],
616
+ width=width,
617
+ height=height,
618
+ cnorm="log" if raster_log else "linear",
619
+ xlabel="Retention time (s)",
620
+ ylabel="m/z",
621
+ colorbar=True,
622
+ colorbar_position="right",
623
+ axiswise=True
624
+ )
625
+ raster = hd.dynspread(
626
+ raster,
627
+ threshold=raster_threshold,
628
+ how="add",
629
+ shape="square",
630
+ max_px=raster_max_px,
631
+ )
632
+
633
+ return raster
634
+
635
+
636
+ def _load_and_merge_oracle_data(sample, oracle_folder, link_by_feature_uid,
637
+ min_id_level, max_id_level, min_ms_level):
638
+ """Load oracle data and merge with features."""
639
+ if sample.features_df is None:
640
+ sample.logger.error("Cannot plot 2D oracle: features_df is not available")
641
+ return None
642
+
643
+ feats = sample.features_df.clone()
644
+ sample.logger.debug(f"Features data shape: {len(feats)} rows")
645
+
646
+ # Convert to pandas for oracle operations that require pandas functionality
647
+ if hasattr(feats, "to_pandas"):
648
+ feats = feats.to_pandas()
649
+
650
+ # check if annotationfile is not None
651
+ if oracle_folder is None:
652
+ sample.logger.info("No oracle folder provided, plotting features only")
653
+ return None
654
+
655
+ # try to read the annotationfile as a csv file and add it to feats
656
+ oracle_file_path = os.path.join(oracle_folder, "diag", "summary_by_feature.csv")
657
+ sample.logger.debug(f"Loading oracle data from: {oracle_file_path}")
658
+ try:
659
+ oracle_data = pd.read_csv(oracle_file_path)
660
+ sample.logger.info(f"Oracle data loaded successfully with {len(oracle_data)} rows")
661
+ except Exception as e:
662
+ sample.logger.error(f"Could not read {oracle_file_path}: {e}")
663
+ return None
664
+
665
+ if link_by_feature_uid:
666
+ cols_to_keep = [
667
+ "title", "scan_idx", "mslevel", "hits", "id_level", "id_label",
668
+ "id_ion", "id_class", "id_evidence", "score", "score2",
669
+ ]
670
+ oracle_data = oracle_data[cols_to_keep]
671
+
672
+ # extract feature_uid from title. It begins with "uid:XYZ,"
673
+ sample.logger.debug("Extracting feature UIDs from oracle titles using pattern 'uid:(\\d+)'")
674
+ oracle_data["feature_uid"] = oracle_data["title"].str.extract(r"uid:(\d+)")
675
+ oracle_data["feature_uid"] = oracle_data["feature_uid"].astype(int)
676
+
677
+ # sort by id_level, remove duplicate feature_uid, keep the first one
678
+ sample.logger.debug("Sorting by ID level and removing duplicates")
679
+ oracle_data = oracle_data.sort_values(by=["id_level"], ascending=False)
680
+ oracle_data = oracle_data.drop_duplicates(subset=["feature_uid"], keep="first")
681
+ sample.logger.debug(f"After deduplication: {len(oracle_data)} unique oracle annotations")
682
+ else:
683
+ cols_to_keep = [
684
+ "precursor", "rt", "title", "scan_idx", "mslevel", "hits", "id_level",
685
+ "id_label", "id_ion", "id_class", "id_evidence", "score", "score2",
686
+ ]
687
+ oracle_data = oracle_data[cols_to_keep]
688
+ oracle_data["feature_uid"] = None
689
+
690
+ # iterate over the rows and find the feature_uid in feats by looking at the closest rt and mz
691
+ for i, row in oracle_data.iterrows():
692
+ candidates = feats[
693
+ (abs(feats["rt"] - row["rt"]) < 1) & (abs(feats["mz"] - row["precursor"]) < 0.005)
694
+ ].copy()
695
+ if len(candidates) > 0:
696
+ # sort by delta rt
697
+ candidates["delta_rt"] = abs(candidates["rt"] - row["rt"])
698
+ candidates = candidates.sort_values(by=["delta_rt"])
699
+ oracle_data.at[i, "feature_uid"] = candidates["feature_uid"].values[0]
700
+ # remove precursor and rt columns
701
+ oracle_data = oracle_data.drop(columns=["precursor", "rt"])
702
+
703
+ # Merge features with oracle data
704
+ sample.logger.debug(f"Merging {len(feats)} features with oracle data")
705
+ feats = feats.merge(oracle_data, how="left", on="feature_uid")
706
+ sample.logger.debug(f"After merge: {len(feats)} total features")
707
+
708
+ # filter feats by id_level
709
+ initial_count = len(feats)
710
+ if min_id_level is not None:
711
+ feats = feats[(feats["id_level"] >= min_id_level)]
712
+ sample.logger.debug(f"After min_id_level filter ({min_id_level}): {len(feats)} features")
713
+ if max_id_level is not None:
714
+ feats = feats[(feats["id_level"] <= max_id_level)]
715
+ sample.logger.debug(f"After max_id_level filter ({max_id_level}): {len(feats)} features")
716
+ if min_ms_level is not None:
717
+ feats = feats[(feats["mslevel"] >= min_ms_level)]
718
+ sample.logger.debug(f"After min_ms_level filter ({min_ms_level}): {len(feats)} features")
719
+
720
+ sample.logger.info(f"Feature filtering complete: {initial_count} → {len(feats)} features remaining")
721
+ return feats
722
+
723
+
724
+ def _setup_color_mapping(sample, feats, colorby, cmap, legend_groups=None):
725
+ """Set up categorical color mapping for features."""
726
+ import matplotlib.colors as mcolors
727
+
728
+ feats["color"] = "black" # Default fallback color
729
+ cvalues = None
730
+ color_column = "color" # Default to fixed color
731
+ colors = []
732
+
733
+ # Determine which column to use for categorical coloring
734
+ if colorby in ["class", "hg", "id_class", "id_hg"]:
735
+ categorical_column = "id_class"
736
+ # replace nans with 'mix'
737
+ feats[categorical_column] = feats[categorical_column].fillna("mix")
738
+ elif colorby in ["ion", "id_ion"]:
739
+ categorical_column = "id_ion"
740
+ feats[categorical_column] = feats[categorical_column].fillna("mix")
741
+ elif colorby in ["evidence", "id_evidence"]:
742
+ categorical_column = "id_evidence"
743
+ feats[categorical_column] = feats[categorical_column].fillna("mix")
744
+ elif colorby in ["level", "id_level"]:
745
+ categorical_column = "id_level"
746
+ feats[categorical_column] = feats[categorical_column].fillna("mix")
747
+ else:
748
+ categorical_column = None
749
+
750
+ if categorical_column is not None:
751
+ # Use provided legend_groups or derive from data
752
+ if legend_groups is not None:
753
+ # Use all specified groups to ensure consistent legend/coloring
754
+ cvalues = legend_groups[:] # Copy the list
755
+ # Ensure 'mix' is always present as the last group if not already included
756
+ if 'mix' not in cvalues:
757
+ cvalues.append('mix')
758
+ sample.logger.info(f"Using provided legend_groups for legend: {cvalues}")
759
+
760
+ # Check which provided groups actually have data
761
+ present_groups = feats[categorical_column].unique()
762
+ missing_groups = [grp for grp in cvalues if grp not in present_groups]
763
+ if missing_groups:
764
+ sample.logger.warning(f"Provided legend_groups not found in data: {missing_groups}")
765
+ sample.logger.info(f"Groups present in data: {sorted(present_groups)}")
766
+
767
+ # Assign any points not in legend_groups to 'mix'
768
+ feats.loc[~feats[categorical_column].isin(cvalues[:-1]), categorical_column] = 'mix'
769
+ else:
770
+ # Original behavior: use only groups present in data
771
+ cvalues = feats[categorical_column].unique()
772
+ # sort alphabetically
773
+ cvalues = sorted(cvalues)
774
+ # flip the strings left to right
775
+ fcvalues = [cvalues[i][::-1] for i in range(len(cvalues))]
776
+ # sort in alphabetical order the flipped strings and return the index
777
+ idx = np.argsort(fcvalues)
778
+ # apply to cvalues
779
+ cvalues = [cvalues[i] for i in idx]
780
+ sample.logger.info(f"Using groups derived from data: {cvalues}")
781
+
782
+ color_column = categorical_column # Use categorical coloring
783
+
784
+ # Process colormap for categorical data
785
+ if cvalues is not None:
786
+ num_colors = len(cvalues)
787
+
788
+ # Use colormap for categorical data - use _process_cmap for proper handling
789
+ try:
790
+ colormap = Colormap(cmap)
791
+ colors = []
792
+ for i in range(num_colors):
793
+ # Generate evenly spaced colors across the colormap
794
+ t = i / (num_colors - 1) if num_colors > 1 else 0.5
795
+ color = colormap(t)
796
+ # Convert to hex - handle different color formats
797
+ if hasattr(color, '__len__') and len(color) >= 3:
798
+ # It's an array-like color (RGB or RGBA)
799
+ colors.append(mcolors.to_hex(color[:3]))
800
+ else:
801
+ # It's a single value, convert to RGB
802
+ colors.append(mcolors.to_hex([color, color, color]))
803
+ except (AttributeError, ValueError, TypeError):
804
+ # Fallback to using _process_cmap if direct Colormap fails
805
+ cmap_palette = _process_cmap(cmap, fallback="viridis", logger=sample.logger)
806
+ # Sample colors from the palette
807
+ colors = []
808
+ for i in range(num_colors):
809
+ idx = int(i * (len(cmap_palette) - 1) / (num_colors - 1)) if num_colors > 1 else len(cmap_palette) // 2
810
+ colors.append(cmap_palette[idx])
811
+
812
+ # Create a mapping from class name to color to ensure consistent color assignment
813
+ # Each class gets the same color based on its position in the cvalues list
814
+ class_to_color = {class_name: colors[i] for i, class_name in enumerate(cvalues)}
815
+
816
+ # assign color to each row based on colorby category
817
+ feats["color"] = "black"
818
+ for class_name, color in class_to_color.items():
819
+ if colorby in ["class", "hg", "id_class", "id_hg"]:
820
+ feats.loc[feats["id_class"] == class_name, "color"] = color
821
+ elif colorby in ["ion", "id_ion"]:
822
+ feats.loc[feats["id_ion"] == class_name, "color"] = color
823
+ elif colorby in ["id_evidence", "ms2_evidence"]:
824
+ feats.loc[feats["id_evidence"] == class_name, "color"] = color
825
+
826
+ return cvalues, color_column, colors
827
+
828
+
829
+ def _create_feature_overlay(sample, raster, feats, cvalues, color_column, colors,
830
+ markersize, title, legend):
831
+ """Create feature overlay with identified and unidentified features."""
832
+ # replace NaN with 0 in id_level
833
+ feats["id_level"] = feats["id_level"].fillna(0)
834
+
835
+ # Create unified visualization with all features in single layer
836
+ # This avoids the multiple layer legend conflicts that cause dark colors and shared toggling
837
+ sample.logger.debug("Creating unified feature visualization with categorical coloring")
838
+
839
+ # Prepare categorical coloring for identified features only (id_level >= 1)
840
+ identified_feats = feats[feats["id_level"] >= 1].copy() if len(feats[feats["id_level"] >= 1]) > 0 else pd.DataFrame()
841
+ unidentified_feats = feats[feats["id_level"] < 1].copy() if len(feats[feats["id_level"] < 1]) > 0 else pd.DataFrame()
842
+
843
+ overlay = raster
844
+
845
+ # Single layer for identified features with categorical coloring
846
+ if len(identified_feats) > 0 and cvalues is not None:
847
+ # Create proper confidence-based marker styling
848
+ identified_feats["marker_style"] = identified_feats["id_level"].apply(
849
+ lambda x: "circle" if x >= 2 else "circle_cross"
850
+ )
851
+ identified_feats["fill_alpha"] = identified_feats["id_level"].apply(
852
+ lambda x: 1.0 if x >= 2 else 0.3 # Full opacity for high conf, transparent for medium
853
+ )
854
+
855
+ oracle_hover_identified = HoverTool(
856
+ tooltips=[
857
+ ("rt", "@rt"),
858
+ ("m/z", "@mz{0.0000}"),
859
+ ("feature_uid", "@feature_uid"),
860
+ ("id_level", "@id_level"),
861
+ ("id_class", "@id_class"),
862
+ ("id_label", "@id_label"),
863
+ ("id_ion", "@id_ion"),
864
+ ("id_evidence", "@id_evidence"),
865
+ ("score", "@score"),
866
+ ("score2", "@score2"),
867
+ ],
868
+ )
869
+
870
+ # Create completely separate overlay elements for each category
871
+ overlays_to_combine = [raster] # Start with raster base
872
+
873
+ for i, category in enumerate(cvalues):
874
+ category_data = identified_feats[identified_feats[color_column] == category].copy()
875
+ if len(category_data) > 0:
876
+ # Create a completely separate Points element for this category
877
+ category_points = hv.Points(
878
+ category_data,
879
+ kdims=["rt", "mz"],
880
+ vdims=[
881
+ "inty", "feature_uid", "id_level", "id_class", "id_label",
882
+ "id_ion", "id_evidence", "score", "score2", "fill_alpha"
883
+ ],
884
+ label=str(category) # This becomes the legend label
885
+ ).options(
886
+ color=colors[i], # Use pre-computed hex color for this category
887
+ marker="circle",
888
+ size=markersize,
889
+ alpha="fill_alpha",
890
+ tools=[oracle_hover_identified],
891
+ show_legend=True,
892
+ )
893
+ overlays_to_combine.append(category_points)
894
+ else:
895
+ # Create empty Points element for categories with no data to ensure they appear in legend
896
+ empty_data = pd.DataFrame(columns=['rt', 'mz', 'inty', 'feature_uid', 'id_level',
897
+ 'id_class', 'id_label', 'id_ion', 'id_evidence',
898
+ 'score', 'score2', 'fill_alpha'])
899
+ category_points = hv.Points(
900
+ empty_data,
901
+ kdims=["rt", "mz"],
902
+ vdims=[
903
+ "inty", "feature_uid", "id_level", "id_class", "id_label",
904
+ "id_ion", "id_evidence", "score", "score2", "fill_alpha"
905
+ ],
906
+ label=str(category) # This becomes the legend label
907
+ ).options(
908
+ color=colors[i], # Use pre-computed hex color for this category
909
+ marker="circle",
910
+ size=markersize,
911
+ alpha=1.0,
912
+ tools=[oracle_hover_identified],
913
+ show_legend=True,
914
+ )
915
+ overlays_to_combine.append(category_points)
916
+
917
+ # Combine all overlays
918
+ overlay = overlays_to_combine[0] # Start with raster
919
+ for layer in overlays_to_combine[1:]:
920
+ overlay = overlay * layer
921
+
922
+ else:
923
+ # No categorical data - just set overlay to raster
924
+ overlay = raster
925
+
926
+ # Separate layer for unidentified features (always black crosses)
927
+ if len(unidentified_feats) > 0:
928
+ oracle_hover_no_id = HoverTool(
929
+ tooltips=[
930
+ ("rt", "@rt"),
931
+ ("m/z", "@mz{0.0000}"),
932
+ ("feature_uid", "@feature_uid"),
933
+ ("id_level", "@id_level"),
934
+ ],
935
+ )
936
+
937
+ feature_points_no_id = hv.Points(
938
+ unidentified_feats,
939
+ kdims=["rt", "mz"],
940
+ vdims=["inty", "feature_uid", "id_level"],
941
+ ).options(
942
+ color="black",
943
+ marker="x",
944
+ size=markersize,
945
+ alpha=1.0,
946
+ tools=[oracle_hover_no_id],
947
+ show_legend=False,
948
+ )
949
+
950
+ overlay = overlay * feature_points_no_id
951
+
952
+ if title is not None:
953
+ sample.logger.debug(f"Setting plot title: {title}")
954
+ overlay = overlay.opts(title=title)
955
+
956
+ # Configure legend if requested and categorical coloring is available
957
+ if legend is not None and cvalues is not None and len(cvalues) > 1:
958
+ sample.logger.debug(f"Configuring integrated legend at '{legend}' position with {len(cvalues)} categories: {cvalues}")
959
+
960
+ # Map legend position parameter to HoloViews legend position
961
+ legend_position_map = {
962
+ "top_right": "top_right",
963
+ "top_left": "top_left",
964
+ "bottom_right": "bottom_right",
965
+ "bottom_left": "bottom_left",
966
+ "right": "right",
967
+ "left": "left",
968
+ "top": "top",
969
+ "bottom": "bottom"
970
+ }
971
+
972
+ hv_legend_pos = legend_position_map.get(legend, "bottom_right")
973
+
974
+ # Apply legend configuration to the overlay
975
+ overlay = overlay.opts(
976
+ legend_position=hv_legend_pos,
977
+ legend_opts={'title': '', 'padding': 2, 'spacing': 2}
978
+ )
979
+
980
+ sample.logger.debug(f"Applied integrated legend at position '{hv_legend_pos}'")
981
+ elif legend is None:
982
+ # Explicitly hide legend when legend=None
983
+ overlay = overlay.opts(show_legend=False)
984
+ sample.logger.debug("Legend hidden (legend=None)")
985
+
986
+ return overlay
987
+
988
+
989
+ def _handle_output(sample, overlay, filename):
990
+ """Handle plot export or display."""
991
+ if filename is not None:
992
+ # if filename includes .html, save the layout to an HTML file
993
+ if filename.endswith(".html"):
994
+ # For HoloViews overlay, we need to convert to Panel for saving
995
+ panel.Column(overlay).save(filename, embed=True)
996
+ elif filename.endswith(".svg"):
997
+ success = _export_with_webdriver_manager(overlay, filename, "svg", sample.logger)
998
+ if success:
999
+ sample.logger.success(f"SVG exported: {os.path.abspath(filename)}")
1000
+ else:
1001
+ sample.logger.warning(f"SVG export failed: {os.path.abspath(filename)}")
1002
+ elif filename.endswith(".png"):
1003
+ success = _export_with_webdriver_manager(overlay, filename, "png", sample.logger)
1004
+ if success:
1005
+ sample.logger.success(f"PNG exported: {os.path.abspath(filename)}")
1006
+ else:
1007
+ sample.logger.warning(f"PNG export failed: {os.path.abspath(filename)}")
1008
+ else:
1009
+ # Default to PNG for any other format
1010
+ png_filename = filename + ".png" if not filename.endswith(('.png', '.svg', '.html')) else filename
1011
+ success = _export_with_webdriver_manager(overlay, png_filename, "png", sample.logger)
1012
+ if success:
1013
+ sample.logger.success(f"PNG exported: {os.path.abspath(png_filename)}")
1014
+ else:
1015
+ sample.logger.warning(f"PNG export failed: {os.path.abspath(png_filename)}")
1016
+ else:
1017
+ # Create a Panel layout for consistent alignment with plot_2d()
1018
+ layout = panel.Column(overlay)
1019
+ # Return the Panel layout (consistent with plot_2d behavior)
1020
+ return layout
1021
+
1022
+
447
1023
  def plot_2d(
448
1024
  self,
449
1025
  filename=None,
@@ -457,6 +1033,8 @@ def plot_2d(
457
1033
  marker="circle",
458
1034
  markersize=5,
459
1035
  size="static",
1036
+ raster_log=True,
1037
+ raster_min=1,
460
1038
  raster_dynamic=True,
461
1039
  raster_max_px=8,
462
1040
  raster_threshold=0.8,
@@ -499,6 +1077,10 @@ def plot_2d(
499
1077
  - 'dynamic': Uses coordinate-based sizing that scales with zoom level (markers get larger when zooming in)
500
1078
  - 'static': Uses screen-based sizing that remains constant regardless of zoom level
501
1079
  - 'slider': Provides an interactive slider to dynamically adjust marker size
1080
+ raster_log (bool, default True):
1081
+ Use logarithmic scaling for raster intensity (True) or linear scaling (False).
1082
+ raster_min (float, default 1):
1083
+ Minimum intensity threshold for raster data filtering.
502
1084
  raster_dynamic (bool, default True):
503
1085
  Whether to use dynamic rasterization for the background point cloud.
504
1086
  raster_max_px (int, default 8):
@@ -531,8 +1113,8 @@ def plot_2d(
531
1113
 
532
1114
  # get columns rt, mz, inty from self.ms1_df, It's polars DataFrame
533
1115
  spectradf = self.ms1_df.select(["rt", "mz", "inty"])
534
- # remove any inty<1
535
- spectradf = spectradf.filter(pl.col("inty") >= 1)
1116
+ # remove any inty<raster_min
1117
+ spectradf = spectradf.filter(pl.col("inty") >= raster_min)
536
1118
  # keep only rt, mz, and inty
537
1119
  spectradf = spectradf.select(["rt", "mz", "inty"])
538
1120
  if mz_range is not None:
@@ -652,7 +1234,7 @@ def plot_2d(
652
1234
  hooks=[new_bounds_hook],
653
1235
  width=width,
654
1236
  height=height,
655
- cnorm="log",
1237
+ cnorm="log" if raster_log else "linear",
656
1238
  xlabel="Retention time (s)",
657
1239
  ylabel="m/z",
658
1240
  colorbar=True,
@@ -1008,492 +1590,128 @@ def plot_2d(
1008
1590
  def plot_2d_oracle(
1009
1591
  self,
1010
1592
  oracle_folder=None,
1011
- link_by_feature_uid=None,
1593
+ link_by_feature_uid=True,
1594
+ min_id_level=1,
1595
+ max_id_level=4,
1596
+ min_ms_level=2,
1012
1597
  colorby="hg",
1013
- filename=None,
1014
- min_id_level=None,
1015
- max_id_level=None,
1016
- min_ms_level=None,
1017
- title=None,
1018
- cmap=None,
1019
- markersize=10,
1598
+ legend_groups=None,
1599
+ markersize=5,
1600
+ cmap='Turbo',
1601
+ raster_cmap='grey',
1602
+ raster_log=True,
1603
+ raster_min=1,
1020
1604
  raster_dynamic=True,
1021
1605
  raster_max_px=8,
1022
1606
  raster_threshold=0.8,
1023
1607
  mz_range=None,
1024
1608
  rt_range=None,
1609
+ width=750,
1610
+ height=600,
1611
+ filename=None,
1612
+ title=None,
1613
+ legend="bottom_right",
1025
1614
  ):
1026
1615
  """
1027
- Plot a 2D overlay visualization of MS1 survey scans and feature annotations, including oracle annotation data if provided.
1616
+ Plot a 2D visualization combining MS1 raster data and oracle-annotated features.
1028
1617
 
1029
- This function reads the primary mass spectrometry data, applies filtering, processes oracle annotation data (if provided),
1030
- and produces an interactive plot combining various data layers. The visualization includes rasterized MS1 data and feature
1031
- points colored by annotation.
1618
+ Creates an interactive plot overlaying MS1 survey scan data with feature annotations
1619
+ from oracle files. Features are colored categorically based on identification class,
1620
+ ion type, or evidence level.
1032
1621
 
1033
1622
  Parameters:
1034
- self: The object instance containing MS1 and feature data.
1035
- oracle_folder (str, optional): Path to the oracle folder containing the annotation file
1036
- (expected at "<oracle_folder>/diag/summary_by_feature.csv"). If None, oracle data is not used.
1037
- link_by_feature_uid (bool, optional): Whether to link features by their IDs in the overlay.
1038
- colorby (str, optional): Parameter that determines the color assignment for annotated features.
1039
- Expected values include 'hg', 'class', 'id_class', or 'id_hg'. Default is 'hg'.
1040
- filename (str, optional): Name of the file where the plot should be saved. If provided and ends with
1041
- ".html", the panel layout is saved as an interactive HTML file; otherwise, the output is saved as a PNG.
1042
- min_id_level (int, optional): Minimum identification level for oracle annotations to include.
1043
- max_id_level (int, optional): Maximum identification level for oracle annotations to include.
1044
- min_ms_level (int, optional): Minimum MS level for features to include.
1045
- title (str, optional): Title to be displayed on the resulting plot. Default is None.
1046
- cmap (str, optional): Colormap to be used for the rasterized plot. Acceptable values include None, "grey",
1047
- "iridescent", or other valid colormap names. Default is None. When None, 'Greys256' is used.
1048
- markersize (int, optional): Marker size for feature points in the overlay. Default is 10.
1049
- raster_dynamic (bool, optional): If True, enables dynamic rasterization of the overlay. If filename is provided
1050
- and does not end with ".html", raster_dynamic is set to False. Default is True.
1051
- raster_max_px (int, optional): Maximum pixel size for dynamic rasterization. Default is 8.
1052
- raster_threshold (float, optional): Threshold for dynamic raster spread. Default is 0.8.
1053
- mz_range (tuple, optional): m/z range for filtering MS1 data.
1054
- rt_range (tuple, optional): Retention time range for filtering MS1 data.
1623
+ oracle_folder (str, optional): Path to oracle folder containing
1624
+ "diag/summary_by_feature.csv". Required for oracle annotations.
1625
+ link_by_feature_uid (bool): Whether to link features by UID (True) or by m/z/RT proximity.
1626
+ min_id_level (int): Minimum identification confidence level to include.
1627
+ max_id_level (int): Maximum identification confidence level to include.
1628
+ min_ms_level (int): Minimum MS level for features to include.
1629
+ colorby (str): Feature coloring scheme - "id_class", "id_ion", "id_evidence", etc.
1630
+ legend_groups (list, optional): List of groups to include in legend and coloring scheme.
1631
+ If provided, legend will show exactly these groups. 'mix' is automatically added
1632
+ as the last group to contain points not matching other groups. Works for all
1633
+ categorical coloring types (id_class, id_ion, id_evidence, etc.).
1634
+ If None (default), all groups present in the data will be shown without filtering.
1635
+ All specified classes will appear in the legend even if no features are present.
1636
+ markersize (int): Size of feature markers.
1637
+ cmap (str): Colormap name for categorical coloring.
1638
+ raster_cmap (str): Colormap for MS1 raster background.
1639
+ raster_log (bool): Use logarithmic scaling for raster intensity (True) or linear scaling (False).
1640
+ raster_min (float): Minimum intensity threshold for raster data filtering.
1641
+ raster_dynamic (bool): Enable dynamic rasterization.
1642
+ raster_threshold (float): Dynamic raster spread threshold.
1643
+ raster_max_px (int): Maximum pixel size for rasterization.
1644
+ mz_range (tuple, optional): m/z range filter (min, max).
1645
+ rt_range (tuple, optional): Retention time range filter (min, max).
1646
+ width/height (int): Plot dimensions in pixels.
1647
+ filename (str, optional): Export filename (.html/.svg/.png). If None, displays inline.
1648
+ title (str, optional): Plot title.
1649
+ legend (str, optional): Legend position ("top_right", "bottom_left", etc.) or None.
1055
1650
 
1056
1651
  Returns:
1057
- None
1058
-
1059
- The function either displays the interactive panel layout or saves the visualization to a file based on
1060
- the provided filename. If the primary file object or feature data is missing, the function prints an
1061
- informative message and returns without plotting.
1652
+ HoloViews layout for display (if filename is None), otherwise None.
1062
1653
  """
1063
1654
 
1064
- if self.file_obj is None:
1065
- print("Please load a file first.")
1066
- return
1067
-
1068
- # Process colormap using the cmap package
1069
- cmap_palette = _process_cmap(cmap, fallback="Greys256", logger=self.logger)
1070
-
1071
- # get columns rt, mz, inty from self.ms1_df, It's polars DataFrame
1072
- spectradf = self.ms1_df.to_pandas()
1073
-
1074
- # remove any inty<1
1075
- spectradf = spectradf[spectradf["inty"] >= 1]
1076
- # keep only rt, mz, and inty
1077
- spectradf = spectradf[["rt", "mz", "inty"]]
1078
- if mz_range is not None:
1079
- spectradf = spectradf[(spectradf["mz"] >= mz_range[0]) & (spectradf["mz"] <= mz_range[1])]
1080
- if rt_range is not None:
1081
- spectradf = spectradf[(spectradf["rt"] >= rt_range[0]) & (spectradf["rt"] <= rt_range[1])]
1082
-
1083
- maxrt = spectradf["rt"].max()
1084
- minrt = spectradf["rt"].min()
1085
- maxmz = spectradf["mz"].max()
1086
- minmz = spectradf["mz"].min()
1087
-
1088
- def new_bounds_hook(plot, elem):
1089
- x_range = plot.state.x_range
1090
- y_range = plot.state.y_range
1091
- x_range.bounds = minrt, maxrt
1092
- y_range.bounds = minmz, maxmz
1093
-
1094
- points = hv.Points(
1095
- spectradf,
1096
- kdims=["rt", "mz"],
1097
- vdims=["inty"],
1098
- label="MS1 survey scans",
1099
- ).opts(
1100
- fontsize={"title": 16, "labels": 14, "xticks": 6, "yticks": 12},
1101
- color=np.log(dim("inty")),
1102
- colorbar=True,
1103
- cmap="Magma",
1104
- tools=["hover"],
1105
- )
1106
-
1107
- if filename is not None:
1108
- dyn = False
1109
- if not filename.endswith(".html"):
1110
- raster_dynamic = False
1111
-
1112
- dyn = raster_dynamic
1113
- raster = hd.rasterize(
1114
- points,
1115
- aggregator=ds.max("inty"),
1116
- interpolation="bilinear",
1117
- dynamic=dyn, # alpha=10, min_alpha=0,
1118
- ).opts(
1119
- active_tools=["box_zoom"],
1120
- cmap=cmap_palette,
1121
- tools=["hover"],
1122
- hooks=[new_bounds_hook],
1123
- width=1000,
1124
- height=1000,
1125
- cnorm="log",
1126
- xlabel="Retention time (s)",
1127
- ylabel="m/z",
1128
- colorbar=True,
1129
- colorbar_position="right",
1130
- axiswise=True,
1131
- )
1132
- raster = hd.dynspread(
1133
- raster,
1134
- threshold=raster_threshold,
1135
- how="add",
1136
- shape="square",
1137
- max_px=raster_max_px,
1138
- )
1655
+ self.logger.info(f"Starting plot_2d_oracle with oracle_folder: {oracle_folder}")
1656
+ self.logger.debug(f"Parameters - link_by_feature_uid: {link_by_feature_uid}, min_id_level: {min_id_level}, max_id_level: {max_id_level}")
1657
+ self.logger.debug(f"Plot parameters - colorby: {colorby}, markersize: {markersize}, filename: {filename}")
1139
1658
 
1659
+ # Early validation
1140
1660
  if self.features_df is None:
1661
+ self.logger.error("Cannot plot 2D oracle: features_df is not available")
1141
1662
  return
1142
- feats = self.features_df.clone()
1143
-
1144
- # Convert to pandas for oracle operations that require pandas functionality
1145
- if hasattr(feats, "to_pandas"):
1146
- feats = feats.to_pandas()
1147
-
1148
- # check if annotationfile is not None
1663
+
1149
1664
  if oracle_folder is None:
1150
- return
1151
- # try to read the annotationfile as a csv file and add it to feats
1152
- try:
1153
- oracle_data = pd.read_csv(
1154
- os.path.join(oracle_folder, "diag", "summary_by_feature.csv"),
1155
- )
1156
- except Exception:
1157
- print(f"Could not read {oracle_folder}/diag/summary_by_feature.csv")
1665
+ self.logger.info("No oracle folder provided, plotting features only")
1158
1666
  return
1159
1667
 
1160
- if link_by_feature_uid:
1161
- # scan_idx slaw_id slaw_ms2_id mz rt level formula ion species name rarity lib_id hg mod lib score score2 score_db score_db_data ms2_tic ms2_evidence ms2_matched_n ms2_missed_n ms2_matched ms2_missed ms2_top1
1162
- cols_to_keep = [
1163
- "title",
1164
- "scan_idx",
1165
- "mslevel",
1166
- "hits",
1167
- "id_level",
1168
- "id_label",
1169
- "id_ion",
1170
- "id_class",
1171
- "id_evidence",
1172
- "score",
1173
- "score2",
1174
- ]
1175
- oracle_data = oracle_data[cols_to_keep]
1176
- # extract feature_uid from title. It begins with "fid:XYZ;"
1177
- oracle_data["feature_uid"] = oracle_data["title"].str.extract(r"fid:(\d+)")
1178
- oracle_data["feature_uid"] = oracle_data["feature_uid"].astype(int)
1179
- # sort by id_level, remove duplicate feature_uid, keep the first one
1180
- oracle_data = oracle_data.sort_values(by=["id_level"], ascending=False)
1181
- oracle_data = oracle_data.drop_duplicates(subset=["feature_uid"], keep="first")
1182
- else:
1183
- cols_to_keep = [
1184
- "precursor",
1185
- "rt",
1186
- "title",
1187
- "scan_idx",
1188
- "mslevel",
1189
- "hits",
1190
- "id_level",
1191
- "id_label",
1192
- "id_ion",
1193
- "id_class",
1194
- "id_evidence",
1195
- "score",
1196
- "score2",
1197
- ]
1198
- # link
1199
- oracle_data = oracle_data[cols_to_keep]
1200
- oracle_data["feature_uid"] = None
1201
- # iterate over the rows and find the feature_uid in feats by looking at the closest rt and mz
1202
- for i, row in oracle_data.iterrows():
1203
- candidates = feats[
1204
- (abs(feats["rt"] - row["rt"]) < 1) & (abs(feats["mz"] - row["precursor"]) < 0.005)
1205
- ].copy()
1206
- if len(candidates) > 0:
1207
- # sort by delta rt
1208
- candidates["delta_rt"] = abs(candidates["rt"] - row["rt"])
1209
- candidates = candidates.sort_values(by=["delta_rt"])
1210
- oracle_data.at[i, "feature_uid"] = candidates["feature_uid"].values[0]
1211
- # remove precursor and rt columns
1212
- oracle_data = oracle_data.drop(columns=["precursor", "rt"])
1213
-
1214
- feats = feats.merge(oracle_data, how="left", on="feature_uid")
1215
-
1216
- # filter feats by id_level
1217
- if min_id_level is not None:
1218
- feats = feats[(feats["id_level"] >= min_id_level)]
1219
- if max_id_level is not None:
1220
- feats = feats[(feats["id_level"] <= max_id_level)]
1221
- if min_ms_level is not None:
1222
- feats = feats[(feats["mslevel"] >= min_ms_level)]
1223
-
1224
- feats["color"] = "black"
1225
-
1226
- cvalues = None
1227
- if colorby in ["class", "hg", "id_class", "id_hg"]:
1228
- # replace nans in feats['id_class'] with 'mix'
1229
- feats["id_class"] = feats["id_class"].fillna("mix")
1230
- cvalues = feats["id_class"].unique()
1231
- # sort alphabetically
1232
- cvalues = sorted(cvalues)
1233
- # flip the strings left to right
1234
- fcvalues = [cvalues[i][::-1] for i in range(len(cvalues))]
1235
- # sort in alphabetical order the flipped strings and return the index
1236
- idx = np.argsort(fcvalues)
1237
- # apply to cvalues
1238
- cvalues = [cvalues[i] for i in idx]
1239
- elif colorby in ["ion", "id_ion"]:
1240
- cvalues = feats["id_ion"].unique()
1241
- elif colorby in ["id_evidence", "ms2_evidence"]:
1242
- cvalues = feats["id_evidence"].unique()
1243
-
1244
- if cvalues is not None:
1245
- num_colors = len(cvalues)
1246
-
1247
- # Use cmap package for categorical colormap
1248
- try:
1249
- if Colormap is not None:
1250
- # Use rainbow colormap for categorical data
1251
- colormap = Colormap("rainbow")
1252
- colors = []
1253
- for i in range(num_colors):
1254
- # Generate evenly spaced colors across the colormap
1255
- t = i / (num_colors - 1) if num_colors > 1 else 0.5
1256
- color = colormap(t)
1257
- # Convert to hex
1258
- import matplotlib.colors as mcolors
1259
- # Convert color to hex - handle different color formats
1260
- if hasattr(color, '__len__') and len(color) >= 3:
1261
- # It's an array-like color (RGB or RGBA)
1262
- colors.append(mcolors.rgb2hex(color[:3]))
1263
- else:
1264
- # It's a single value, convert to RGB
1265
- colors.append(mcolors.rgb2hex([color, color, color]))
1266
- else:
1267
- # Fallback to original method
1268
- cmap = "rainbow"
1269
- cmap_provider = "colorcet"
1270
- cm = process_cmap(cmap, ncolors=num_colors, provider=cmap_provider)
1271
- colors = [
1272
- rgb2hex(cm[int(i * (len(cm) - 1) / (num_colors - 1))]) if num_colors > 1 else rgb2hex(cm[0])
1273
- for i in range(num_colors)
1274
- ]
1275
- except Exception:
1276
- # Final fallback to original method
1277
- cmap = "rainbow"
1278
- cmap_provider = "colorcet"
1279
- cm = process_cmap(cmap, ncolors=num_colors, provider=cmap_provider)
1280
- colors = [
1281
- rgb2hex(cm[int(i * (len(cm) - 1) / (num_colors - 1))]) if num_colors > 1 else rgb2hex(cm[0])
1282
- for i in range(num_colors)
1283
- ]
1284
-
1285
- # assign color to each row based on id_class. If id_class is null, assign 'black'
1286
- feats["color"] = "black"
1287
-
1288
- for i, c in enumerate(cvalues):
1289
- if colorby in ["class", "hg", "id_class", "id_hg"]:
1290
- feats.loc[feats["id_class"] == c, "color"] = colors[i]
1291
- elif colorby in ["ion", "id_ion"]:
1292
- feats.loc[feats["id_ion"] == c, "color"] = colors[i]
1293
- elif colorby in ["id_evidence", "ms2_evidence"]:
1294
- feats.loc[feats["id_evidence"] == c, "color"] = colors[i]
1295
-
1296
- # replace NaN with 0 in id_level
1297
- feats["id_level"] = feats["id_level"].fillna(0)
1298
- # feature_points_1 are all features with column ms2_scans not null
1299
- feature_points_1 = None
1300
- feat_df = feats.copy()
1301
- feat_df = feat_df[feat_df["id_level"] == 2]
1302
-
1303
- oracle_hover_1 = HoverTool(
1304
- tooltips=[
1305
- ("rt", "@rt"),
1306
- ("m/z", "@mz{0.0000}"),
1307
- ("feature_uid", "@feature_uid"),
1308
- ("id_level", "@id_level"),
1309
- ("id_class", "@id_class"),
1310
- ("id_label", "@id_label"),
1311
- ("id_ion", "@id_ion"),
1312
- ("id_evidence", "@id_evidence"),
1313
- ("score", "@score"),
1314
- ("score2", "@score2"),
1315
- ],
1316
- )
1317
- feature_points_1 = hv.Points(
1318
- feat_df,
1319
- kdims=["rt", "mz"],
1320
- vdims=[
1321
- "inty",
1322
- "feature_uid",
1323
- "id_level",
1324
- "id_class",
1325
- "id_label",
1326
- "id_ion",
1327
- "id_evidence",
1328
- "score",
1329
- "score2",
1330
- "color",
1331
- ],
1332
- label="ID by MS2",
1333
- ).options(
1334
- color="color",
1335
- marker="circle",
1336
- size=markersize,
1337
- fill_alpha=1.0,
1338
- tools=[oracle_hover_1],
1668
+ # Create raster plot layer
1669
+ raster = _create_raster_plot(
1670
+ self,
1671
+ mz_range=mz_range,
1672
+ rt_range=rt_range,
1673
+ raster_cmap=raster_cmap,
1674
+ raster_log=raster_log,
1675
+ raster_min=raster_min,
1676
+ raster_dynamic=raster_dynamic,
1677
+ raster_threshold=raster_threshold,
1678
+ raster_max_px=raster_max_px,
1679
+ width=width,
1680
+ height=height,
1681
+ filename=filename
1339
1682
  )
1340
1683
 
1341
- # feature_points_2 are all features that have ms2_scans not null and id_level ==1
1342
- feature_points_2 = None
1343
- feat_df = feats.copy()
1344
- feat_df = feat_df[(feat_df["ms2_scans"].notnull()) & (feat_df["id_level"] == 1)]
1345
- if len(feat_df) > 0:
1346
- oracle_hover_2 = HoverTool(
1347
- tooltips=[
1348
- ("rt", "@rt"),
1349
- ("m/z", "@mz{0.0000}"),
1350
- ("feature_uid", "@feature_uid"),
1351
- ("id_level", "@id_level"),
1352
- ("id_label", "@id_label"),
1353
- ("id_ion", "@id_ion"),
1354
- ("id_class", "@id_class"),
1355
- ],
1356
- )
1357
- feature_points_2 = hv.Points(
1358
- feat_df,
1359
- kdims=["rt", "mz"],
1360
- vdims=[
1361
- "inty",
1362
- "feature_uid",
1363
- "id_level",
1364
- "id_label",
1365
- "id_ion",
1366
- "id_class",
1367
- "color",
1368
- ],
1369
- label="ID by MS1, with MS2",
1370
- ).options(
1371
- color="color",
1372
- marker="circle",
1373
- size=markersize,
1374
- fill_alpha=0.0,
1375
- tools=[oracle_hover_2],
1376
- )
1377
-
1378
- # feature_points_3 are all features that have ms2_scans null and id_level ==1
1379
- feature_points_3 = None
1380
- feat_df = feats.copy()
1381
- feat_df = feat_df[(feat_df["ms2_scans"].isnull()) & (feat_df["id_level"] == 1)]
1382
- if len(feat_df) > 0:
1383
- oracle_hover_3 = HoverTool(
1384
- tooltips=[
1385
- ("rt", "@rt"),
1386
- ("m/z", "@mz{0.0000}"),
1387
- ("feature_uid", "@feature_uid"),
1388
- ("id_level", "@id_level"),
1389
- ("id_label", "@id_label"),
1390
- ("id_ion", "@id_ion"),
1391
- ("id_class", "@id_class"),
1392
- ],
1393
- )
1394
- feature_points_3 = hv.Points(
1395
- feat_df,
1396
- kdims=["rt", "mz"],
1397
- vdims=[
1398
- "inty",
1399
- "feature_uid",
1400
- "id_level",
1401
- "id_label",
1402
- "id_ion",
1403
- "id_class",
1404
- "color",
1405
- ],
1406
- label="ID by MS1, no MS2",
1407
- ).options(
1408
- color="color",
1409
- marker="diamond",
1410
- size=markersize,
1411
- fill_alpha=0.0,
1412
- tools=[oracle_hover_3],
1413
- )
1414
-
1415
- # feature_points_4 are all features that have ms2_scans null and id_level ==0
1416
- feature_points_4 = None
1417
- feat_df = feats.copy()
1418
- feat_df = feat_df[(feat_df["ms2_scans"].notnull()) & (feat_df["id_level"] < 1)]
1419
- if len(feat_df) > 0:
1420
- oracle_hover_4 = HoverTool(
1421
- tooltips=[
1422
- ("rt", "@rt"),
1423
- ("m/z", "@mz{0.0000}"),
1424
- ("feature_uid", "@feature_uid"),
1425
- ("inty", "@inty"),
1426
- ],
1427
- )
1428
- feature_points_4 = hv.Points(
1429
- feat_df,
1430
- kdims=["rt", "mz"],
1431
- vdims=["inty", "feature_uid"],
1432
- label="No ID, with MS2",
1433
- ).options(
1434
- color="gray",
1435
- marker="circle",
1436
- size=markersize,
1437
- fill_alpha=0.0,
1438
- tools=[oracle_hover_4],
1439
- )
1440
-
1441
- # feature_points_5 are all features that have ms2_scans null and id_level ==0
1442
- feature_points_5 = None
1443
- feat_df = feats.copy()
1444
- feat_df = feat_df[(feat_df["ms2_scans"].isnull()) & (feat_df["id_level"] < 1)]
1445
- if len(feat_df) > 0:
1446
- oracle_hover_5 = HoverTool(
1447
- tooltips=[
1448
- ("rt", "@rt"),
1449
- ("m/z", "@mz{0.0000}"),
1450
- ("feature_uid", "@feature_uid"),
1451
- ("inty", "@inty"),
1452
- ],
1453
- )
1454
- feature_points_5 = hv.Points(
1455
- feat_df,
1456
- kdims=["rt", "mz"],
1457
- vdims=["inty", "feature_uid"],
1458
- label="No ID, no MS2",
1459
- ).options(
1460
- color="gray",
1461
- marker="diamond",
1462
- fill_alpha=0.0,
1463
- size=markersize,
1464
- tools=[oracle_hover_5],
1465
- )
1466
-
1467
- overlay = raster
1468
-
1469
- if feature_points_1 is not None:
1470
- overlay = overlay * feature_points_1
1471
- if feature_points_2 is not None:
1472
- overlay = overlay * feature_points_2
1473
- if feature_points_3 is not None:
1474
- overlay = overlay * feature_points_3
1475
- if feature_points_4 is not None:
1476
- overlay = overlay * feature_points_4
1477
- # if not show_only_features_with_ms2:
1478
- if feature_points_5 is not None:
1479
- overlay = overlay * feature_points_5
1480
-
1481
- if title is not None:
1482
- overlay = overlay.opts(title=title)
1684
+ # Load and process oracle data
1685
+ feats = _load_and_merge_oracle_data(
1686
+ self,
1687
+ oracle_folder=oracle_folder,
1688
+ link_by_feature_uid=link_by_feature_uid,
1689
+ min_id_level=min_id_level,
1690
+ max_id_level=max_id_level,
1691
+ min_ms_level=min_ms_level
1692
+ )
1693
+
1694
+ if feats is None:
1695
+ return
1483
1696
 
1484
- # Create a panel layout
1485
- layout = panel.Column(overlay)
1697
+ # Set up color scheme and categorical mapping
1698
+ cvalues, color_column, colors = _setup_color_mapping(self, feats, colorby, cmap, legend_groups)
1699
+
1700
+ # Create feature overlay with all visualization elements
1701
+ overlay = _create_feature_overlay(
1702
+ self,
1703
+ raster=raster,
1704
+ feats=feats,
1705
+ cvalues=cvalues,
1706
+ color_column=color_column,
1707
+ colors=colors,
1708
+ markersize=markersize,
1709
+ title=title,
1710
+ legend=legend
1711
+ )
1486
1712
 
1487
- if filename is not None:
1488
- # if filename includes .html, save the panel layout to an HTML file
1489
- if filename.endswith(".html"):
1490
- layout.save(filename, embed=True)
1491
- else:
1492
- # save the panel layout as a png
1493
- hv.save(overlay, filename, fmt="png")
1494
- else:
1495
- # Check if we're in a notebook environment and display appropriately
1496
- return _display_plot(overlay, layout)
1713
+ # Handle output: export or display
1714
+ return _handle_output(self, overlay, filename)
1497
1715
 
1498
1716
 
1499
1717
  def plot_ms2_eic(
@@ -1756,96 +1974,6 @@ def plot_ms2_cycle(
1756
1974
  max_px=raster_max_px,
1757
1975
  )
1758
1976
 
1759
- """
1760
- feature_points_1 = None
1761
- feature_points_2 = None
1762
- feature_points_3 = None
1763
- feature_points_4 = None
1764
- feature_points_iso = None
1765
- # Plot features as red dots if features is True
1766
- if self.features_df is not None and show_features:
1767
- feats = self.features_df.clone()
1768
- # Convert to pandas for operations that require pandas functionality
1769
- if hasattr(feats, 'to_pandas'):
1770
- feats = feats.to_pandas()
1771
- # if ms2_scans is not null, keep only the first element of the list
1772
- feats['ms2_scans'] = feats['ms2_scans'].apply(lambda x: x[0] if type(x) == list else x)
1773
- # keep only iso==0, i.e. the main
1774
- feats = feats[feats['iso']==0]
1775
- # find features with ms2_scans not None and iso==0
1776
- features_df = feats[feats['ms2_scans'].notnull()]
1777
- feature_points_1 = hv.Points(
1778
- features_df, kdims=["rt", "mz"], vdims=["feature_uid", "inty", "quality", "rt_delta", "ms2_scans"], label="Features with MS2 data"
1779
- ).options(
1780
- color=color_1,
1781
- marker=marker,
1782
- size=size_1,
1783
- tools=["hover"],
1784
- )
1785
- # find features without MS2 data
1786
- features_df = feats[feats['ms2_scans'].isnull()]
1787
- feature_points_2 = hv.Points(
1788
- features_df, kdims=["rt", "mz"], vdims=["feature_uid", "inty", "quality", "rt_delta"], label="Features without MS2 data"
1789
- ).options(
1790
- color='red',
1791
- size=size_2,
1792
- marker=marker,
1793
- tools=["hover"],
1794
- )
1795
-
1796
- if show_isotopes:
1797
- feats = self.features_df
1798
- features_df = feats[feats['iso']>0]
1799
- feature_points_iso = hv.Points(
1800
- features_df, kdims=["rt", "mz"], vdims=["feature_uid", "inty", "quality", "rt_delta", "iso", "iso_of"], label="Isotopes"
1801
- ).options(
1802
- color='violet',
1803
- marker=marker,
1804
- size=size_1,
1805
- tools=["hover"],
1806
- )
1807
- if show_ms2:
1808
- # find all self.scans_df with mslevel 2 that are not linked to a feature
1809
- ms2_orphan = self.scans_df.filter(pl.col('ms_level')==2).filter(pl.col("feature_uid")<0)
1810
-
1811
- if len(ms2_orphan) > 0:
1812
- # pandalize
1813
- ms2 = ms2_orphan.to_pandas()
1814
- feature_points_3 = hv.Points(
1815
- ms2, kdims=["rt", "prec_mz"], vdims=["index", "inty_tot", "bl"], label="Orphan MS2 scans"
1816
- ).options(
1817
- color=color_2,
1818
- marker='x',
1819
- size=size_2,
1820
- tools=["hover"],
1821
- )
1822
-
1823
- ms2_linked = self.scans_df.filter(pl.col('ms_level')==2).filter(pl.col("feature_uid")>=0)
1824
- if len(ms2_linked) > 0:
1825
- # pandalize
1826
- ms2 = ms2_linked.to_pandas()
1827
- feature_points_4 = hv.Points(
1828
- ms2, kdims=["rt", "prec_mz"], vdims=["index", "inty_tot", "bl"], label="Linked MS2 scans"
1829
- ).options(
1830
- color=color_1,
1831
- marker='x',
1832
- size=size_2,
1833
- tools=["hover"],
1834
- )
1835
-
1836
-
1837
- if feature_points_4 is not None:
1838
- overlay = overlay * feature_points_4
1839
- if feature_points_3 is not None:
1840
- overlay = overlay * feature_points_3
1841
- if feature_points_1 is not None:
1842
- overlay = overlay * feature_points_1
1843
- if not show_only_features_with_ms2:
1844
- if feature_points_2 is not None:
1845
- overlay = overlay * feature_points_2
1846
- if feature_points_iso is not None:
1847
- overlay = overlay * feature_points_iso
1848
- """
1849
1977
  if title is not None:
1850
1978
  overlay = overlay.opts(title=title)
1851
1979