PyPI - masster - Versions diffs - 0.5.9__py3-none-any.whl → 0.5.11__py3-none-any.whl - Mend

masster 0.5.9py3-none-any.whl → 0.5.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of masster might be problematic. Click here for more details.

Files changed (19) hide show

masster/_version.py +1 -1
masster/sample/adducts.py +2 -2
masster/sample/helpers.py +47 -15
masster/sample/plot.py +1209 -912
masster/sample/processing.py +1 -1
masster/sample/sample.py +91 -48
masster/study/export.py +4 -6
masster/study/h5.py +66 -15
masster/study/helpers.py +24 -5
masster/study/load.py +1 -164
masster/study/merge.py +16 -18
masster/study/plot.py +105 -35
masster/study/processing.py +17 -14
masster/study/study5_schema.json +3 -0
{masster-0.5.9.dist-info → masster-0.5.11.dist-info}/METADATA +3 -1
{masster-0.5.9.dist-info → masster-0.5.11.dist-info}/RECORD +19 -19
{masster-0.5.9.dist-info → masster-0.5.11.dist-info}/WHEEL +0 -0
{masster-0.5.9.dist-info → masster-0.5.11.dist-info}/entry_points.txt +0 -0
{masster-0.5.9.dist-info → masster-0.5.11.dist-info}/licenses/LICENSE +0 -0

masster/sample/plot.py CHANGED Viewed

@@ -43,6 +43,7 @@ See Also:
 """
 import os
+import warnings
 import datashader as ds
 import holoviews as hv
@@ -55,18 +56,11 @@ import polars as pl
 from bokeh.models import HoverTool
 from holoviews import dim
 from holoviews.plotting.util import process_cmap
-from matplotlib.colors import rgb2hex
-# Import cmap for colormap handling
-try:
-    from cmap import Colormap
-except ImportError:
-    Colormap = None
+from cmap import Colormap
 # Parameters removed - using hardcoded defaults
-hv.extension("bokeh")
+# hv.extension("bokeh")
 def _process_cmap(cmap, fallback="viridis", logger=None):
@@ -85,8 +79,8 @@ def _process_cmap(cmap, fallback="viridis", logger=None):
     if cmap is None:
         cmap = "viridis"
     elif cmap == "grey":
-        cmap = "Greys256"
+        cmap = "greys"
     # If cmap package is not available, fall back to process_cmap
     if Colormap is None:
         if logger:
@@ -205,6 +199,108 @@ def _display_plot(plot_object, layout=None):
         return None
+def _export_with_webdriver_manager(plot_obj, filename, format_type, logger=None):
+    """
+    Export plot to PNG or SVG using webdriver-manager for automatic driver management.
+    Parameters:
+        plot_obj: Bokeh plot object or holoviews object to export
+        filename: Output filename
+        format_type: Either "png" or "svg"
+        logger: Logger for error reporting (optional)
+    Returns:
+        bool: True if export successful, False otherwise
+    """
+    try:
+        # Convert holoviews to bokeh if needed
+        if hasattr(plot_obj, 'opts'):  # Likely a holoviews object
+            import holoviews as hv
+            bokeh_plot = hv.render(plot_obj)
+        else:
+            bokeh_plot = plot_obj
+        # Try webdriver-manager export first
+        try:
+            from webdriver_manager.chrome import ChromeDriverManager
+            from selenium import webdriver
+            from selenium.webdriver.chrome.service import Service
+            from selenium.webdriver.chrome.options import Options
+            # Set up Chrome options for headless operation
+            chrome_options = Options()
+            chrome_options.add_argument("--headless")
+            chrome_options.add_argument("--no-sandbox")
+            chrome_options.add_argument("--disable-dev-shm-usage")
+            chrome_options.add_argument("--disable-gpu")
+            # Use webdriver-manager to automatically get the correct ChromeDriver
+            service = Service(ChromeDriverManager().install())
+            driver = webdriver.Chrome(service=service, options=chrome_options)
+            # Export with managed webdriver
+            with warnings.catch_warnings():
+                warnings.simplefilter("ignore", category=UserWarning)
+                # Filter out bokeh.io.export warnings specifically
+                warnings.filterwarnings("ignore", module="bokeh.io.export")
+                if format_type == "png":
+                    from bokeh.io import export_png
+                    export_png(bokeh_plot, filename=filename, webdriver=driver)
+                elif format_type == "svg":
+                    from bokeh.io import export_svg
+                    export_svg(bokeh_plot, filename=filename, webdriver=driver)
+                else:
+                    raise ValueError(f"Unsupported format: {format_type}")
+            driver.quit()
+            return True
+        except ImportError:
+            if logger:
+                logger.debug(f"webdriver-manager not available, using default {format_type.upper()} export")
+            # Fall back to default export
+            with warnings.catch_warnings():
+                warnings.simplefilter("ignore", category=UserWarning)
+                # Filter out bokeh.io.export warnings specifically
+                warnings.filterwarnings("ignore", module="bokeh.io.export")
+                if format_type == "png":
+                    from bokeh.io import export_png
+                    export_png(bokeh_plot, filename=filename)
+                elif format_type == "svg":
+                    from bokeh.io import export_svg
+                    export_svg(bokeh_plot, filename=filename)
+            return True
+        except Exception as e:
+            if logger:
+                logger.debug(f"{format_type.upper()} export with webdriver-manager failed: {e}, using default {format_type.upper()} export")
+            try:
+                # Final fallback to default export
+                with warnings.catch_warnings():
+                    warnings.simplefilter("ignore", category=UserWarning)
+                    # Filter out bokeh.io.export warnings specifically
+                    warnings.filterwarnings("ignore", module="bokeh.io.export")
+                    if format_type == "png":
+                        from bokeh.io import export_png
+                        export_png(bokeh_plot, filename=filename)
+                    elif format_type == "svg":
+                        from bokeh.io import export_svg
+                        export_svg(bokeh_plot, filename=filename)
+                return True
+            except Exception as e2:
+                if logger:
+                    logger.error(f"{format_type.upper()} export failed: {e2}")
+                return False
+    except Exception as e:
+        if logger:
+            logger.error(f"Export preparation failed: {e}")
+        return False
 def _handle_sample_plot_output(self, plot_obj, filename=None, plot_type="bokeh"):
     """
     Helper function to handle consistent save/display behavior for sample plots.
@@ -236,16 +332,11 @@ def _handle_sample_plot_output(self, plot_obj, filename=None, plot_type="bokeh")
                 save(plot_obj)
             self.logger.success(f"Plot saved to: {abs_filename}")
         elif filename.endswith(".png"):
-            try:
-                if plot_type == "bokeh":
-                    from bokeh.io.export import export_png
-                    export_png(plot_obj, filename=filename)
-                elif plot_type in ["panel", "holoviews"]:
-                    import holoviews as hv
-                    hv.save(plot_obj, filename, fmt="png")
+            success = _export_with_webdriver_manager(plot_obj, filename, "png", self.logger)
+            if success:
                 self.logger.success(f"Plot saved to: {abs_filename}")
-            except Exception:
-                # Fall back to HTML if PNG export not available
+            else:
+                # Fall back to HTML if PNG export fails completely
                 html_filename = filename.replace('.png', '.html')
                 abs_html_filename = os.path.abspath(html_filename)
                 if plot_type == "panel":
@@ -259,6 +350,25 @@ def _handle_sample_plot_output(self, plot_obj, filename=None, plot_type="bokeh")
                     output_file(html_filename)
                     save(plot_obj)
                 self.logger.warning(f"PNG export not available, saved as HTML instead: {abs_html_filename}")
+        elif filename.endswith(".svg"):
+            success = _export_with_webdriver_manager(plot_obj, filename, "svg", self.logger)
+            if success:
+                self.logger.success(f"Plot saved to: {abs_filename}")
+            else:
+                # Fall back to HTML if SVG export fails completely
+                html_filename = filename.replace('.svg', '.html')
+                abs_html_filename = os.path.abspath(html_filename)
+                if plot_type == "panel":
+                    plot_obj.save(html_filename, embed=True)  # type: ignore[attr-defined]
+                elif plot_type == "holoviews":
+                    import panel
+                    panel.panel(plot_obj).save(html_filename, embed=True)  # type: ignore[attr-defined]
+                elif plot_type == "bokeh":
+                    from bokeh.plotting import output_file
+                    from bokeh.io import save
+                    output_file(html_filename)
+                    save(plot_obj)
+                self.logger.warning(f"SVG export not available, saved as HTML instead: {abs_html_filename}")
         elif filename.endswith(".pdf"):
             # Try to save as PDF, fall back to HTML if not available
             try:
@@ -444,101 +554,25 @@ def plot_chrom(
     self._handle_sample_plot_output(layout, filename, "panel")
-def plot_2d(
-    self,
-    filename=None,
-    show_features=True,
-    show_only_features_with_ms2=False,
-    show_isotopes=False,
-    show_ms2=False,
-    show_in_browser=False,
-    title=None,
-    cmap='iridescent',
-    marker="circle",
-    markersize=5,
-    size="static",
-    raster_dynamic=True,
-    raster_max_px=8,
-    raster_threshold=0.8,
-    height=600,
-    width=750,
-    mz_range=None,
-    rt_range=None
-):
-    """
-    Plot a two-dimensional visualization of MS1 survey scan data with optional overlays
-    of feature and MS2 scan information.
-    This method creates a plot from the internal MS1 data loaded into self.ms1_df
-    and optionally overlays various feature and MS2 information depending on the provided
-    parameters. The visualization is built using HoloViews and Holoviews dynamic rasterization,
-    together with Panel for layout and exporting.
-    Parameters:
-        filename (str, optional):
-            Path to save the plot. If provided and ends with ".html", the plot is saved as an
-            interactive HTML file; otherwise, it is saved as a PNG image.
-        show_features (bool, default True):
-            Whether to overlay detected features on the plot.
-        show_only_features_with_ms2 (bool, default False):
-            If True, only display features that have associated MS2 scans. When False,
-            features without MS2 data are also shown.
-        show_isotopes (bool, default False):
-            Whether to overlay isotope information on top of the features.
-        show_ms2 (bool, default False):
-            Whether to overlay MS2 scan information on the plot.
-        title (str, optional):
-            Title of the plot.
-        cmap (str, optional):
-            Colormap to use for the background rasterized data. Defaults to "iridescent_r" unless
-            modified (e.g., if set to "grey", it is changed to "Greys256").
-        marker (str, default 'circle'):
-            Marker type to use for feature and MS2 points.
-        markersize (int, default 10):
-            Base size of the markers used for plotting points.
-        size (str, default 'dynamic'):
-            Controls marker sizing behavior. Options: 'dynamic', 'static', or 'slider'.
-            - 'dynamic': Uses coordinate-based sizing that scales with zoom level (markers get larger when zooming in)
-            - 'static': Uses screen-based sizing that remains constant regardless of zoom level
-            - 'slider': Provides an interactive slider to dynamically adjust marker size
-        raster_dynamic (bool, default True):
-            Whether to use dynamic rasterization for the background point cloud.
-        raster_max_px (int, default 8):
-            Maximum pixel size for dynamic rasterization when using dynspread.
-        raster_threshold (float, default 0.8):
-            Threshold used for the dynspread process in dynamic rasterization.
-    Behavior:
-        - Checks for a loaded mzML file by verifying that self.file_obj is not None.
-        - Converts internal MS1 data (a Polars DataFrame) to a Pandas DataFrame and filters out low-intensity
-          points (inty < 1).
-        - Sets up the plot bounds for retention time (rt) and mass-to-charge ratio (mz) using a hook function.
-        - Renders the MS1 data as a background rasterized image with a logarithmic intensity normalization.
-        - Conditionally overlays feature points (with and without MS2 information), isotopes (if requested),
-          and MS2 scan points based on internal DataFrame data.
-        - Depending on the filename parameter, either displays the plot interactively using Panel or
-          saves it as an HTML or PNG file.
-    Returns:
-        None
-    Side Effects:
-        - May print a warning if no mzML file is loaded.
-        - Either shows the plot interactively or writes the output to a file.
-    """
-    if self.ms1_df is None:
-        self.logger.error("No MS1 data available.")
-        return
+def _create_raster_plot(sample, mz_range=None, rt_range=None, raster_cmap='greys',
+                       raster_log=True, raster_min=1, raster_dynamic=True, raster_threshold=0.8, raster_max_px=8,
+                       width=750, height=600, filename=None):
+    """Create the raster plot layer from MS1 data."""
+    # Process colormap using the cmap package with proper error handling
+    raster_cmap_processed = _process_cmap(raster_cmap if raster_cmap is not None else 'greys', fallback="greys", logger=sample.logger)
-    # Process colormap using the cmap package
-    cmap_palette = _process_cmap(cmap, fallback="iridescent", logger=self.logger)
+    # get columns rt, mz, inty from sample.ms1_df, It's polars DataFrame
+    spectradf = sample.ms1_df.to_pandas()
-    # get columns rt, mz, inty from self.ms1_df, It's polars DataFrame
-    spectradf = self.ms1_df.select(["rt", "mz", "inty"])
-    # remove any inty<1
-    spectradf = spectradf.filter(pl.col("inty") >= 1)
+    # remove any inty<raster_min
+    spectradf = spectradf[spectradf["inty"] >= raster_min]
     # keep only rt, mz, and inty
-    spectradf = spectradf.select(["rt", "mz", "inty"])
+    spectradf = spectradf[["rt", "mz", "inty"]]
     if mz_range is not None:
-        spectradf = spectradf.filter((pl.col("mz") >= mz_range[0]) & (pl.col("mz") <= mz_range[1]))
+        spectradf = spectradf[(spectradf["mz"] >= mz_range[0]) & (spectradf["mz"] <= mz_range[1])]
     if rt_range is not None:
-        spectradf = spectradf.filter((pl.col("rt") >= rt_range[0]) & (pl.col("rt") <= rt_range[1]))
+        spectradf = spectradf[(spectradf["rt"] >= rt_range[0]) & (spectradf["rt"] <= rt_range[1])]
     maxrt = spectradf["rt"].max()
     minrt = spectradf["rt"].min()
     maxmz = spectradf["mz"].max()
@@ -563,103 +597,31 @@ def plot_2d(
         tools=["hover"],
     )
-    # Configure marker and size behavior based on size parameter
-    use_dynamic_sizing = size.lower() in ["dyn", "dynamic"]
-    use_slider_sizing = size.lower() == "slider"
-    def dynamic_sizing_hook(plot, element):
-        """Hook to convert size-based markers to radius-based for dynamic behavior"""
-        try:
-            if use_dynamic_sizing and hasattr(plot, "state") and hasattr(plot.state, "renderers"):
-                from bokeh.models import Circle
-                for renderer in plot.state.renderers:
-                    if hasattr(renderer, "glyph"):
-                        glyph = renderer.glyph
-                        # Check if it's a circle/scatter glyph that we can convert
-                        if hasattr(glyph, "size") and marker_type == "circle":
-                            # Create a new Circle glyph with radius instead of size
-                            new_glyph = Circle(
-                                x=glyph.x,
-                                y=glyph.y,
-                                radius=base_radius,
-                                fill_color=glyph.fill_color,
-                                line_color=glyph.line_color,
-                                fill_alpha=glyph.fill_alpha,
-                                line_alpha=glyph.line_alpha,
-                            )
-                            renderer.glyph = new_glyph
-        except Exception:
-            # Silently fail and use regular sizing if hook doesn't work
-            pass
-    if use_dynamic_sizing:
-        # Dynamic sizing: use coordinate-based sizing that scales with zoom
-        marker_type = "circle"
-        # Calculate radius based on data range for coordinate-based sizing
-        rtrange = maxrt - minrt
-        mzrange = maxmz - minmz
-        # Use a fraction of the smaller dimension for radius
-        base_radius = min(rtrange, mzrange) * 0.0005 * markersize
-        size_1 = markersize  # Use regular size initially, hook will convert to radius
-        size_2 = markersize
-        hooks = [dynamic_sizing_hook]
-    elif use_slider_sizing:
-        # Slider sizing: create an interactive slider for marker size
-        marker_type = marker  # Use the original marker parameter
-        size_1 = markersize  # Use markersize initially, will be updated by slider
-        size_2 = markersize
-        base_radius = None  # Not used in slider mode
-        hooks = []
-    else:
-        # Static sizing: use pixel-based sizing that stays fixed
-        marker_type = marker  # Use the original marker parameter
-        size_1 = markersize
-        size_2 = markersize
-        base_radius = None  # Not used in static mode
-        hooks = []
-    color_1 = "forestgreen"
-    color_2 = "darkorange"
     if filename is not None:
         dyn = False
         if not filename.endswith(".html"):
-            if use_dynamic_sizing:
-                # For exported files, use smaller coordinate-based size
-                size_1 = 2
-                size_2 = 2
-            else:
-                size_1 = 2
-                size_2 = 2
-            color_1 = "forestgreen"
-            color_2 = "darkorange"
             raster_dynamic = False
-    # For slider functionality, disable raster dynamic to avoid DynamicMap nesting
-    if use_slider_sizing:
-        raster_dynamic = False
     dyn = raster_dynamic
     raster = hd.rasterize(
         points,
         aggregator=ds.max("inty"),
         interpolation="bilinear",
-        dynamic=dyn,  # alpha=10,                min_alpha=0,
+        dynamic=dyn,
     ).opts(
         active_tools=["box_zoom"],
-        cmap=cmap_palette,
+        cmap=raster_cmap_processed,
         tools=["hover"],
         hooks=[new_bounds_hook],
         width=width,
         height=height,
-        cnorm="log",
+        cnorm="log" if raster_log else "linear",
         xlabel="Retention time (s)",
         ylabel="m/z",
         colorbar=True,
         colorbar_position="right",
-        axiswise=True,
+        axiswise=True
     )
     raster = hd.dynspread(
         raster,
         threshold=raster_threshold,
@@ -667,97 +629,861 @@ def plot_2d(
         shape="square",
         max_px=raster_max_px,
     )
-    feature_points_1 = None
-    feature_points_2 = None
-    feature_points_3 = None
-    feature_points_4 = None
-    feature_points_iso = None
-    # Plot features as red dots if features is True
-    if self.features_df is not None and show_features:
-        feats = self.features_df.clone()
-        # Convert to pandas for operations that require pandas functionality
-        if hasattr(feats, "to_pandas"):
-            feats = feats.to_pandas()
-        # if ms2_scans is not null, keep only the first element of the list
-        feats["ms2_scans"] = feats["ms2_scans"].apply(
-            lambda x: x[0] if isinstance(x, list) else x,
-        )
-        if mz_range is not None:
-            feats = feats[(feats["mz"] >= mz_range[0]) & (feats["mz"] <= mz_range[1])]
-        if rt_range is not None:
-            feats = feats[(feats["rt"] >= rt_range[0]) & (feats["rt"] <= rt_range[1])]
-        # keep only iso==0, i.e. the main
-        feats = feats[feats["iso"] == 0]
-        # find features with ms2_scans not None  and iso==0
-        features_df = feats[feats["ms2_scans"].notnull()]
-        # Create feature points with proper sizing method
-        feature_hover_1 = HoverTool(
-            tooltips=[
-                ("rt", "@rt"),
-                ("m/z", "@mz{0.0000}"),
-                ("feature_uid", "@feature_uid"),
-                ("inty", "@inty"),
-                ("iso", "@iso"),
-                ("adduct", "@adduct"),
-                ("chrom_coherence", "@chrom_coherence"),
-                ("chrom_prominence_scaled", "@chrom_prominence_scaled"),
-            ],
-        )
-        feature_points_1 = hv.Points(
-            features_df,
-            kdims=["rt", "mz"],
-            vdims=[
-                "feature_uid",
-                "inty",
-                "iso",
-                "adduct",
-                "ms2_scans",
-                "chrom_coherence",
-                "chrom_prominence_scaled",
-            ],
-            label="Features with MS2 data",
-        ).options(
-            color=color_1,
-            marker=marker_type,
-            size=size_1,
-            tools=[feature_hover_1],
-            hooks=hooks,
-        )
-        # find features without MS2 data
-        features_df = feats[feats["ms2_scans"].isnull()]
-        feature_hover_2 = HoverTool(
-            tooltips=[
-                ("rt", "@rt"),
-                ("m/z", "@mz{0.0000}"),
-                ("feature_uid", "@feature_uid"),
-                ("inty", "@inty"),
-                ("iso", "@iso"),
-                ("adduct", "@adduct"),
-                ("chrom_coherence", "@chrom_coherence"),
-                ("chrom_prominence_scaled", "@chrom_prominence_scaled"),
-            ],
-        )
-        feature_points_2 = hv.Points(
-            features_df,
-            kdims=["rt", "mz"],
-            vdims=[
-                "feature_uid",
-                "inty",
-                "iso",
-                "adduct",
-                "chrom_coherence",
-                "chrom_prominence_scaled",
-            ],
-            label="Features without MS2 data",
-        ).options(
-            color="red",
-            marker=marker_type,
-            size=size_2,
-            tools=[feature_hover_2],
-            hooks=hooks,
-        )
+    return raster
-        if show_isotopes:
+def _load_and_merge_oracle_data(sample, oracle_folder, link_by_feature_uid,
+                               min_id_level, max_id_level, min_ms_level):
+    """Load oracle data and merge with features."""
+    if sample.features_df is None:
+        sample.logger.error("Cannot plot 2D oracle: features_df is not available")
+        return None
+    feats = sample.features_df.clone()
+    sample.logger.debug(f"Features data shape: {len(feats)} rows")
+    # Convert to pandas for oracle operations that require pandas functionality
+    if hasattr(feats, "to_pandas"):
+        feats = feats.to_pandas()
+    # check if annotationfile is not None
+    if oracle_folder is None:
+        sample.logger.info("No oracle folder provided, plotting features only")
+        return None
+    # try to read the annotationfile as a csv file and add it to feats
+    oracle_file_path = os.path.join(oracle_folder, "diag", "summary_by_feature.csv")
+    sample.logger.debug(f"Loading oracle data from: {oracle_file_path}")
+    try:
+        oracle_data = pd.read_csv(oracle_file_path)
+        sample.logger.info(f"Oracle data loaded successfully with {len(oracle_data)} rows")
+    except Exception as e:
+        sample.logger.error(f"Could not read {oracle_file_path}: {e}")
+        return None
+    if link_by_feature_uid:
+        cols_to_keep = [
+            "title", "scan_idx", "mslevel", "hits", "id_level", "id_label",
+            "id_ion", "id_class", "id_evidence", "score", "score2",
+        ]
+        oracle_data = oracle_data[cols_to_keep]
+        # extract feature_uid from title. It begins with "uid:XYZ,"
+        sample.logger.debug("Extracting feature UIDs from oracle titles using pattern 'uid:(\\d+)'")
+        oracle_data["feature_uid"] = oracle_data["title"].str.extract(r"uid:(\d+)")
+        oracle_data["feature_uid"] = oracle_data["feature_uid"].astype(int)
+        # sort by id_level, remove duplicate feature_uid, keep the first one
+        sample.logger.debug("Sorting by ID level and removing duplicates")
+        oracle_data = oracle_data.sort_values(by=["id_level"], ascending=False)
+        oracle_data = oracle_data.drop_duplicates(subset=["feature_uid"], keep="first")
+        sample.logger.debug(f"After deduplication: {len(oracle_data)} unique oracle annotations")
+    else:
+        cols_to_keep = [
+            "precursor", "rt", "title", "scan_idx", "mslevel", "hits", "id_level",
+            "id_label", "id_ion", "id_class", "id_evidence", "score", "score2",
+        ]
+        oracle_data = oracle_data[cols_to_keep]
+        oracle_data["feature_uid"] = None
+        # iterate over the rows and find the feature_uid in feats by looking at the closest rt and mz
+        for i, row in oracle_data.iterrows():
+            candidates = feats[
+                (abs(feats["rt"] - row["rt"]) < 1) & (abs(feats["mz"] - row["precursor"]) < 0.005)
+            ].copy()
+            if len(candidates) > 0:
+                # sort by delta rt
+                candidates["delta_rt"] = abs(candidates["rt"] - row["rt"])
+                candidates = candidates.sort_values(by=["delta_rt"])
+                oracle_data.at[i, "feature_uid"] = candidates["feature_uid"].values[0]
+        # remove precursor and rt columns
+        oracle_data = oracle_data.drop(columns=["precursor", "rt"])
+    # Merge features with oracle data
+    sample.logger.debug(f"Merging {len(feats)} features with oracle data")
+    feats = feats.merge(oracle_data, how="left", on="feature_uid")
+    sample.logger.debug(f"After merge: {len(feats)} total features")
+    # filter feats by id_level
+    initial_count = len(feats)
+    if min_id_level is not None:
+        feats = feats[(feats["id_level"] >= min_id_level)]
+        sample.logger.debug(f"After min_id_level filter ({min_id_level}): {len(feats)} features")
+    if max_id_level is not None:
+        feats = feats[(feats["id_level"] <= max_id_level)]
+        sample.logger.debug(f"After max_id_level filter ({max_id_level}): {len(feats)} features")
+    if min_ms_level is not None:
+        feats = feats[(feats["mslevel"] >= min_ms_level)]
+        sample.logger.debug(f"After min_ms_level filter ({min_ms_level}): {len(feats)} features")
+    sample.logger.info(f"Feature filtering complete: {initial_count} → {len(feats)} features remaining")
+    return feats
+def _setup_color_mapping(sample, feats, colorby, cmap, legend_groups=None):
+    """Set up categorical color mapping for features."""
+    import matplotlib.colors as mcolors
+    feats["color"] = "black"  # Default fallback color
+    cvalues = None
+    color_column = "color"  # Default to fixed color
+    colors = []
+    # Determine which column to use for categorical coloring
+    if colorby in ["class", "hg", "id_class", "id_hg"]:
+        categorical_column = "id_class"
+        # replace nans with 'mix'
+        feats[categorical_column] = feats[categorical_column].fillna("mix")
+    elif colorby in ["ion", "id_ion"]:
+        categorical_column = "id_ion"
+        feats[categorical_column] = feats[categorical_column].fillna("mix")
+    elif colorby in ["evidence", "id_evidence"]:
+        categorical_column = "id_evidence"
+        feats[categorical_column] = feats[categorical_column].fillna("mix")
+    elif colorby in ["level", "id_level"]:
+        categorical_column = "id_level"
+        feats[categorical_column] = feats[categorical_column].fillna("mix")
+    else:
+        categorical_column = None
+    if categorical_column is not None:
+        # Use provided legend_groups or derive from data
+        if legend_groups is not None:
+            # Use all specified groups to ensure consistent legend/coloring
+            cvalues = legend_groups[:]  # Copy the list
+            # Ensure 'mix' is always present as the last group if not already included
+            if 'mix' not in cvalues:
+                cvalues.append('mix')
+            sample.logger.info(f"Using provided legend_groups for legend: {cvalues}")
+            # Check which provided groups actually have data
+            present_groups = feats[categorical_column].unique()
+            missing_groups = [grp for grp in cvalues if grp not in present_groups]
+            if missing_groups:
+                sample.logger.warning(f"Provided legend_groups not found in data: {missing_groups}")
+            sample.logger.info(f"Groups present in data: {sorted(present_groups)}")
+            # Assign any points not in legend_groups to 'mix'
+            feats.loc[~feats[categorical_column].isin(cvalues[:-1]), categorical_column] = 'mix'
+        else:
+            # Original behavior: use only groups present in data
+            cvalues = feats[categorical_column].unique()
+            # sort alphabetically
+            cvalues = sorted(cvalues)
+            # flip the strings left to right
+            fcvalues = [cvalues[i][::-1] for i in range(len(cvalues))]
+            # sort in alphabetical order the flipped strings and return the index
+            idx = np.argsort(fcvalues)
+            # apply to cvalues
+            cvalues = [cvalues[i] for i in idx]
+            sample.logger.info(f"Using groups derived from data: {cvalues}")
+        color_column = categorical_column  # Use categorical coloring
+    # Process colormap for categorical data
+    if cvalues is not None:
+        num_colors = len(cvalues)
+        # Use colormap for categorical data - use _process_cmap for proper handling
+        try:
+            colormap = Colormap(cmap)
+            colors = []
+            for i in range(num_colors):
+                # Generate evenly spaced colors across the colormap
+                t = i / (num_colors - 1) if num_colors > 1 else 0.5
+                color = colormap(t)
+                # Convert to hex - handle different color formats
+                if hasattr(color, '__len__') and len(color) >= 3:
+                    # It's an array-like color (RGB or RGBA)
+                    colors.append(mcolors.to_hex(color[:3]))
+                else:
+                    # It's a single value, convert to RGB
+                    colors.append(mcolors.to_hex([color, color, color]))
+        except (AttributeError, ValueError, TypeError):
+            # Fallback to using _process_cmap if direct Colormap fails
+            cmap_palette = _process_cmap(cmap, fallback="viridis", logger=sample.logger)
+            # Sample colors from the palette
+            colors = []
+            for i in range(num_colors):
+                idx = int(i * (len(cmap_palette) - 1) / (num_colors - 1)) if num_colors > 1 else len(cmap_palette) // 2
+                colors.append(cmap_palette[idx])
+        # Create a mapping from class name to color to ensure consistent color assignment
+        # Each class gets the same color based on its position in the cvalues list
+        class_to_color = {class_name: colors[i] for i, class_name in enumerate(cvalues)}
+        # assign color to each row based on colorby category
+        feats["color"] = "black"
+        for class_name, color in class_to_color.items():
+            if colorby in ["class", "hg", "id_class", "id_hg"]:
+                feats.loc[feats["id_class"] == class_name, "color"] = color
+            elif colorby in ["ion", "id_ion"]:
+                feats.loc[feats["id_ion"] == class_name, "color"] = color
+            elif colorby in ["id_evidence", "ms2_evidence"]:
+                feats.loc[feats["id_evidence"] == class_name, "color"] = color
+    return cvalues, color_column, colors
+def _create_feature_overlay(sample, raster, feats, cvalues, color_column, colors,
+                           markersize, title, legend):
+    """Create feature overlay with identified and unidentified features."""
+    # replace NaN with 0 in id_level
+    feats["id_level"] = feats["id_level"].fillna(0)
+    # Create unified visualization with all features in single layer
+    # This avoids the multiple layer legend conflicts that cause dark colors and shared toggling
+    sample.logger.debug("Creating unified feature visualization with categorical coloring")
+    # Prepare categorical coloring for identified features only (id_level >= 1)
+    identified_feats = feats[feats["id_level"] >= 1].copy() if len(feats[feats["id_level"] >= 1]) > 0 else pd.DataFrame()
+    unidentified_feats = feats[feats["id_level"] < 1].copy() if len(feats[feats["id_level"] < 1]) > 0 else pd.DataFrame()
+    overlay = raster
+    # Single layer for identified features with categorical coloring
+    if len(identified_feats) > 0 and cvalues is not None:
+        # Create proper confidence-based marker styling
+        identified_feats["marker_style"] = identified_feats["id_level"].apply(
+            lambda x: "circle" if x >= 2 else "circle_cross"
+        )
+        identified_feats["fill_alpha"] = identified_feats["id_level"].apply(
+            lambda x: 1.0 if x >= 2 else 0.3  # Full opacity for high conf, transparent for medium
+        )
+        oracle_hover_identified = HoverTool(
+            tooltips=[
+                ("rt", "@rt"),
+                ("m/z", "@mz{0.0000}"),
+                ("feature_uid", "@feature_uid"),
+                ("id_level", "@id_level"),
+                ("id_class", "@id_class"),
+                ("id_label", "@id_label"),
+                ("id_ion", "@id_ion"),
+                ("id_evidence", "@id_evidence"),
+                ("score", "@score"),
+                ("score2", "@score2"),
+            ],
+        )
+        # Create completely separate overlay elements for each category
+        overlays_to_combine = [raster]  # Start with raster base
+        for i, category in enumerate(cvalues):
+            category_data = identified_feats[identified_feats[color_column] == category].copy()
+            if len(category_data) > 0:
+                # Create a completely separate Points element for this category
+                category_points = hv.Points(
+                    category_data,
+                    kdims=["rt", "mz"],
+                    vdims=[
+                        "inty", "feature_uid", "id_level", "id_class", "id_label",
+                        "id_ion", "id_evidence", "score", "score2", "fill_alpha"
+                    ],
+                    label=str(category)  # This becomes the legend label
+                ).options(
+                    color=colors[i],  # Use pre-computed hex color for this category
+                    marker="circle",
+                    size=markersize,
+                    alpha="fill_alpha",
+                    tools=[oracle_hover_identified],
+                    show_legend=True,
+                )
+                overlays_to_combine.append(category_points)
+            else:
+                # Create empty Points element for categories with no data to ensure they appear in legend
+                empty_data = pd.DataFrame(columns=['rt', 'mz', 'inty', 'feature_uid', 'id_level',
+                                                  'id_class', 'id_label', 'id_ion', 'id_evidence',
+                                                  'score', 'score2', 'fill_alpha'])
+                category_points = hv.Points(
+                    empty_data,
+                    kdims=["rt", "mz"],
+                    vdims=[
+                        "inty", "feature_uid", "id_level", "id_class", "id_label",
+                        "id_ion", "id_evidence", "score", "score2", "fill_alpha"
+                    ],
+                    label=str(category)  # This becomes the legend label
+                ).options(
+                    color=colors[i],  # Use pre-computed hex color for this category
+                    marker="circle",
+                    size=markersize,
+                    alpha=1.0,
+                    tools=[oracle_hover_identified],
+                    show_legend=True,
+                )
+                overlays_to_combine.append(category_points)
+        # Combine all overlays
+        overlay = overlays_to_combine[0]  # Start with raster
+        for layer in overlays_to_combine[1:]:
+            overlay = overlay * layer
+    else:
+        # No categorical data - just set overlay to raster
+        overlay = raster
+    # Separate layer for unidentified features (always black crosses)
+    if len(unidentified_feats) > 0:
+        oracle_hover_no_id = HoverTool(
+            tooltips=[
+                ("rt", "@rt"),
+                ("m/z", "@mz{0.0000}"),
+                ("feature_uid", "@feature_uid"),
+                ("id_level", "@id_level"),
+            ],
+        )
+        feature_points_no_id = hv.Points(
+            unidentified_feats,
+            kdims=["rt", "mz"],
+            vdims=["inty", "feature_uid", "id_level"],
+        ).options(
+            color="black",
+            marker="x",
+            size=markersize,
+            alpha=1.0,
+            tools=[oracle_hover_no_id],
+            show_legend=False,
+        )
+        overlay = overlay * feature_points_no_id
+    if title is not None:
+        sample.logger.debug(f"Setting plot title: {title}")
+        overlay = overlay.opts(title=title)
+    # Configure legend if requested and categorical coloring is available
+    if legend is not None and cvalues is not None and len(cvalues) > 1:
+        sample.logger.debug(f"Configuring integrated legend at '{legend}' position with {len(cvalues)} categories: {cvalues}")
+        # Map legend position parameter to HoloViews legend position
+        legend_position_map = {
+            "top_right": "top_right",
+            "top_left": "top_left",
+            "bottom_right": "bottom_right",
+            "bottom_left": "bottom_left",
+            "right": "right",
+            "left": "left",
+            "top": "top",
+            "bottom": "bottom"
+        }
+        hv_legend_pos = legend_position_map.get(legend, "bottom_right")
+        # Apply legend configuration to the overlay
+        overlay = overlay.opts(
+            legend_position=hv_legend_pos,
+            legend_opts={'title': '', 'padding': 2, 'spacing': 2}
+        )
+        sample.logger.debug(f"Applied integrated legend at position '{hv_legend_pos}'")
+    elif legend is None:
+        # Explicitly hide legend when legend=None
+        overlay = overlay.opts(show_legend=False)
+        sample.logger.debug("Legend hidden (legend=None)")
+    return overlay
+def _handle_output(sample, overlay, filename):
+    """Handle plot export or display."""
+    if filename is not None:
+        # if filename includes .html, save the layout to an HTML file
+        if filename.endswith(".html"):
+            # For HoloViews overlay, we need to convert to Panel for saving
+            panel.Column(overlay).save(filename, embed=True)
+        elif filename.endswith(".svg"):
+            success = _export_with_webdriver_manager(overlay, filename, "svg", sample.logger)
+            if success:
+                sample.logger.success(f"SVG exported: {os.path.abspath(filename)}")
+            else:
+                sample.logger.warning(f"SVG export failed: {os.path.abspath(filename)}")
+        elif filename.endswith(".png"):
+            success = _export_with_webdriver_manager(overlay, filename, "png", sample.logger)
+            if success:
+                sample.logger.success(f"PNG exported: {os.path.abspath(filename)}")
+            else:
+                sample.logger.warning(f"PNG export failed: {os.path.abspath(filename)}")
+        else:
+            # Default to PNG for any other format
+            png_filename = filename + ".png" if not filename.endswith(('.png', '.svg', '.html')) else filename
+            success = _export_with_webdriver_manager(overlay, png_filename, "png", sample.logger)
+            if success:
+                sample.logger.success(f"PNG exported: {os.path.abspath(png_filename)}")
+            else:
+                sample.logger.warning(f"PNG export failed: {os.path.abspath(png_filename)}")
+    else:
+        # Create a Panel layout for consistent alignment with plot_2d()
+        layout = panel.Column(overlay)
+        # Return the Panel layout (consistent with plot_2d behavior)
+        return layout
+def plot_2d(
+    self,
+    filename=None,
+    show_features=True,
+    show_only_features_with_ms2=False,
+    show_isotopes=False,
+    show_ms2=False,
+    show_in_browser=False,
+    title=None,
+    cmap='iridescent',
+    marker="circle",
+    markersize=5,
+    size="static",
+    raster_log=True,
+    raster_min=1,
+    raster_dynamic=True,
+    raster_max_px=8,
+    raster_threshold=0.8,
+    height=600,
+    width=750,
+    mz_range=None,
+    rt_range=None,
+    legend=None,
+    colorby=None
+):
+    """
+    Plot a two-dimensional visualization of MS1 survey scan data with optional overlays
+    of feature and MS2 scan information.
+    This method creates a plot from the internal MS1 data loaded into self.ms1_df
+    and optionally overlays various feature and MS2 information depending on the provided
+    parameters. The visualization is built using HoloViews and Holoviews dynamic rasterization,
+    together with Panel for layout and exporting.
+    Parameters:
+        filename (str, optional):
+            Path to save the plot. If provided and ends with ".html", the plot is saved as an
+            interactive HTML file; otherwise, it is saved as a PNG image.
+        show_features (bool, default True):
+            Whether to overlay detected features on the plot.
+        show_only_features_with_ms2 (bool, default False):
+            If True, only display features that have associated MS2 scans. When False,
+            features without MS2 data are also shown.
+        show_isotopes (bool, default False):
+            Whether to overlay isotope information on top of the features.
+        show_ms2 (bool, default False):
+            Whether to overlay MS2 scan information on the plot.
+        title (str, optional):
+            Title of the plot.
+        cmap (str, optional):
+            Colormap to use for the background rasterized data. Defaults to "iridescent_r" unless
+            modified (e.g., if set to "grey", it is changed to "Greys256").
+        marker (str, default 'circle'):
+            Marker type to use for feature and MS2 points.
+        markersize (int, default 10):
+            Base size of the markers used for plotting points.
+        size (str, default 'dynamic'):
+            Controls marker sizing behavior. Options: 'dynamic', 'static', or 'slider'.
+            - 'dynamic': Uses coordinate-based sizing that scales with zoom level (markers get larger when zooming in)
+            - 'static': Uses screen-based sizing that remains constant regardless of zoom level
+            - 'slider': Provides an interactive slider to dynamically adjust marker size
+        raster_log (bool, default True):
+            Use logarithmic scaling for raster intensity (True) or linear scaling (False).
+        raster_min (float, default 1):
+            Minimum intensity threshold for raster data filtering.
+        raster_dynamic (bool, default True):
+            Whether to use dynamic rasterization for the background point cloud.
+        raster_max_px (int, default 8):
+            Maximum pixel size for dynamic rasterization when using dynspread.
+        raster_threshold (float, default 0.8):
+            Threshold used for the dynspread process in dynamic rasterization.
+        legend (str, optional):
+            Legend position for categorical feature coloring ("top_right", "bottom_left", etc.) or None.
+            Only applies when colorby is not None and contains categorical data.
+        colorby (str, optional):
+            Feature property to use for coloring. If None (default), uses current green/red scheme
+            for features with/without MS2 data. If specified and contains categorical data, applies
+            categorical coloring with legend support (similar to plot_2d_oracle).
+    Behavior:
+        - Checks for a loaded mzML file by verifying that self.file_obj is not None.
+        - Converts internal MS1 data (a Polars DataFrame) to a Pandas DataFrame and filters out low-intensity
+          points (inty < 1).
+        - Sets up the plot bounds for retention time (rt) and mass-to-charge ratio (mz) using a hook function.
+        - Renders the MS1 data as a background rasterized image with a logarithmic intensity normalization.
+        - Conditionally overlays feature points (with and without MS2 information), isotopes (if requested),
+          and MS2 scan points based on internal DataFrame data.
+        - Depending on the filename parameter, either displays the plot interactively using Panel or
+          saves it as an HTML or PNG file.
+    Returns:
+        None
+    Side Effects:
+        - May print a warning if no mzML file is loaded.
+        - Either shows the plot interactively or writes the output to a file.
+    """
+    if self.ms1_df is None:
+        self.logger.error("No MS1 data available.")
+        return
+    # Process colormap using the cmap package
+    cmap_palette = _process_cmap(cmap, fallback="iridescent", logger=self.logger)
+    # get columns rt, mz, inty from self.ms1_df, It's polars DataFrame
+    spectradf = self.ms1_df.select(["rt", "mz", "inty"])
+    # remove any inty<raster_min
+    spectradf = spectradf.filter(pl.col("inty") >= raster_min)
+    # keep only rt, mz, and inty
+    spectradf = spectradf.select(["rt", "mz", "inty"])
+    if mz_range is not None:
+        spectradf = spectradf.filter((pl.col("mz") >= mz_range[0]) & (pl.col("mz") <= mz_range[1]))
+    if rt_range is not None:
+        spectradf = spectradf.filter((pl.col("rt") >= rt_range[0]) & (pl.col("rt") <= rt_range[1]))
+    maxrt = spectradf["rt"].max()
+    minrt = spectradf["rt"].min()
+    maxmz = spectradf["mz"].max()
+    minmz = spectradf["mz"].min()
+    def new_bounds_hook(plot, elem):
+        x_range = plot.state.x_range
+        y_range = plot.state.y_range
+        x_range.bounds = minrt, maxrt
+        y_range.bounds = minmz, maxmz
+    points = hv.Points(
+        spectradf,
+        kdims=["rt", "mz"],
+        vdims=["inty"],
+        label="MS1 survey scans",
+    ).opts(
+        fontsize={"title": 16, "labels": 14, "xticks": 6, "yticks": 12},
+        color=np.log(dim("inty")),
+        colorbar=True,
+        cmap="Magma",
+        tools=["hover"],
+    )
+    # Configure marker and size behavior based on size parameter
+    use_dynamic_sizing = size.lower() in ["dyn", "dynamic"]
+    use_slider_sizing = size.lower() == "slider"
+    def dynamic_sizing_hook(plot, element):
+        """Hook to convert size-based markers to radius-based for dynamic behavior"""
+        try:
+            if use_dynamic_sizing and hasattr(plot, "state") and hasattr(plot.state, "renderers"):
+                from bokeh.models import Circle
+                for renderer in plot.state.renderers:
+                    if hasattr(renderer, "glyph"):
+                        glyph = renderer.glyph
+                        # Check if it's a circle/scatter glyph that we can convert
+                        if hasattr(glyph, "size") and marker_type == "circle":
+                            # Create a new Circle glyph with radius instead of size
+                            new_glyph = Circle(
+                                x=glyph.x,
+                                y=glyph.y,
+                                radius=base_radius,
+                                fill_color=glyph.fill_color,
+                                line_color=glyph.line_color,
+                                fill_alpha=glyph.fill_alpha,
+                                line_alpha=glyph.line_alpha,
+                            )
+                            renderer.glyph = new_glyph
+        except Exception:
+            # Silently fail and use regular sizing if hook doesn't work
+            pass
+    if use_dynamic_sizing:
+        # Dynamic sizing: use coordinate-based sizing that scales with zoom
+        marker_type = "circle"
+        # Calculate radius based on data range for coordinate-based sizing
+        rtrange = maxrt - minrt
+        mzrange = maxmz - minmz
+        # Use a fraction of the smaller dimension for radius
+        base_radius = min(rtrange, mzrange) * 0.0005 * markersize
+        size_1 = markersize  # Use regular size initially, hook will convert to radius
+        size_2 = markersize
+        hooks = [dynamic_sizing_hook]
+    elif use_slider_sizing:
+        # Slider sizing: create an interactive slider for marker size
+        marker_type = marker  # Use the original marker parameter
+        size_1 = markersize  # Use markersize initially, will be updated by slider
+        size_2 = markersize
+        base_radius = None  # Not used in slider mode
+        hooks = []
+    else:
+        # Static sizing: use pixel-based sizing that stays fixed
+        marker_type = marker  # Use the original marker parameter
+        size_1 = markersize
+        size_2 = markersize
+        base_radius = None  # Not used in static mode
+        hooks = []
+    color_1 = "forestgreen"
+    color_2 = "darkorange"
+    # Handle colorby parameter for feature coloring
+    use_categorical_coloring = False
+    feature_colors = {}
+    categorical_groups = []
+    if filename is not None:
+        dyn = False
+        if not filename.endswith(".html"):
+            if use_dynamic_sizing:
+                # For exported files, use smaller coordinate-based size
+                size_1 = 2
+                size_2 = 2
+            else:
+                size_1 = 2
+                size_2 = 2
+            color_1 = "forestgreen"
+            color_2 = "darkorange"
+            raster_dynamic = False
+    # For slider functionality, disable raster dynamic to avoid DynamicMap nesting
+    if use_slider_sizing:
+        raster_dynamic = False
+    dyn = raster_dynamic
+    raster = hd.rasterize(
+        points,
+        aggregator=ds.max("inty"),
+        interpolation="bilinear",
+        dynamic=dyn,  # alpha=10,                min_alpha=0,
+    ).opts(
+        active_tools=["box_zoom"],
+        cmap=cmap_palette,
+        tools=["hover"],
+        hooks=[new_bounds_hook],
+        width=width,
+        height=height,
+        cnorm="log" if raster_log else "linear",
+        xlabel="Retention time (s)",
+        ylabel="m/z",
+        colorbar=True,
+        colorbar_position="right",
+        axiswise=True,
+    )
+    raster = hd.dynspread(
+        raster,
+        threshold=raster_threshold,
+        how="add",
+        shape="square",
+        max_px=raster_max_px,
+    )
+    feature_points_1 = None
+    feature_points_2 = None
+    feature_points_3 = None
+    feature_points_4 = None
+    feature_points_iso = None
+    # Plot features as red dots if features is True
+    if self.features_df is not None and show_features:
+        feats = self.features_df.clone()
+        # Convert to pandas for operations that require pandas functionality
+        if hasattr(feats, "to_pandas"):
+            feats = feats.to_pandas()
+        # if ms2_scans is not null, keep only the first element of the list
+        feats["ms2_scans"] = feats["ms2_scans"].apply(
+            lambda x: x[0] if isinstance(x, list) else x,
+        )
+        if mz_range is not None:
+            feats = feats[(feats["mz"] >= mz_range[0]) & (feats["mz"] <= mz_range[1])]
+        if rt_range is not None:
+            feats = feats[(feats["rt"] >= rt_range[0]) & (feats["rt"] <= rt_range[1])]
+        # keep only iso==0, i.e. the main
+        feats = feats[feats["iso"] == 0]
+        # Handle colorby parameter
+        if colorby is not None and colorby in feats.columns:
+            # Check if colorby data is categorical (string-like)
+            colorby_values = feats[colorby].dropna()
+            is_categorical = (
+                feats[colorby].dtype in ["object", "string", "category"] or
+                (len(colorby_values) > 0 and isinstance(colorby_values.iloc[0], str))
+            )
+            if is_categorical:
+                use_categorical_coloring = True
+                # Get unique categories, sorted
+                categorical_groups = sorted(feats[colorby].dropna().unique())
+                # Set up colors for categorical data using matplotlib colormap
+                from matplotlib.colors import to_hex
+                try:
+                    from matplotlib.cm import get_cmap
+                    colormap_func = get_cmap(cmap if cmap != 'iridescent' else 'tab20')
+                    feature_colors = {}
+                    for i, group in enumerate(categorical_groups):
+                        if len(categorical_groups) <= 20:
+                            # Use qualitative colors for small number of categories
+                            color_val = colormap_func(i / max(1, len(categorical_groups) - 1))
+                        else:
+                            # Use continuous colormap for many categories
+                            color_val = colormap_func(i / max(1, len(categorical_groups) - 1))
+                        feature_colors[group] = to_hex(color_val)
+                except Exception as e:
+                    self.logger.warning(f"Could not set up categorical coloring: {e}, using default colors")
+                    use_categorical_coloring = False
+        if use_categorical_coloring and colorby is not None:
+            # Create separate feature points for each category
+            for i, group in enumerate(categorical_groups):
+                group_features = feats[feats[colorby] == group]
+                if len(group_features) == 0:
+                    continue
+                # Split by MS2 status
+                group_with_ms2 = group_features[group_features["ms2_scans"].notnull()]
+                group_without_ms2 = group_features[group_features["ms2_scans"].isnull()]
+                group_color = feature_colors.get(group, color_1)
+                if len(group_with_ms2) > 0:
+                    feature_hover = HoverTool(
+                        tooltips=[
+                            ("rt", "@rt"),
+                            ("m/z", "@mz{0.0000}"),
+                            ("feature_uid", "@feature_uid"),
+                            ("inty", "@inty"),
+                            ("iso", "@iso"),
+                            ("adduct", "@adduct"),
+                            ("chrom_coherence", "@chrom_coherence"),
+                            ("chrom_prominence_scaled", "@chrom_prominence_scaled"),
+                            (colorby, f"@{colorby}"),
+                        ],
+                    )
+                    group_points_ms2 = hv.Points(
+                        group_with_ms2,
+                        kdims=["rt", "mz"],
+                        vdims=[
+                            "feature_uid",
+                            "inty",
+                            "iso",
+                            "adduct",
+                            "ms2_scans",
+                            "chrom_coherence",
+                            "chrom_prominence_scaled",
+                            colorby,
+                        ],
+                        label=f"{group} (MS2)",
+                    ).options(
+                        color=group_color,
+                        marker=marker_type,
+                        size=size_1,
+                        tools=[feature_hover],
+                        hooks=hooks,
+                    )
+                    if feature_points_1 is None:
+                        feature_points_1 = group_points_ms2
+                    else:
+                        feature_points_1 = feature_points_1 * group_points_ms2
+                if len(group_without_ms2) > 0:
+                    feature_hover = HoverTool(
+                        tooltips=[
+                            ("rt", "@rt"),
+                            ("m/z", "@mz{0.0000}"),
+                            ("feature_uid", "@feature_uid"),
+                            ("inty", "@inty"),
+                            ("iso", "@iso"),
+                            ("adduct", "@adduct"),
+                            ("chrom_coherence", "@chrom_coherence"),
+                            ("chrom_prominence_scaled", "@chrom_prominence_scaled"),
+                            (colorby, f"@{colorby}"),
+                        ],
+                    )
+                    group_points_no_ms2 = hv.Points(
+                        group_without_ms2,
+                        kdims=["rt", "mz"],
+                        vdims=[
+                            "feature_uid",
+                            "inty",
+                            "iso",
+                            "adduct",
+                            "chrom_coherence",
+                            "chrom_prominence_scaled",
+                            colorby,
+                        ],
+                        label=f"{group} (no MS2)",
+                    ).options(
+                        color=group_color,
+                        marker=marker_type,
+                        size=size_2,
+                        tools=[feature_hover],
+                        hooks=hooks,
+                    )
+                    if feature_points_2 is None:
+                        feature_points_2 = group_points_no_ms2
+                    else:
+                        feature_points_2 = feature_points_2 * group_points_no_ms2
+        else:
+            # Use original green/red coloring scheme for MS2 presence
+            # find features with ms2_scans not None  and iso==0
+            features_df = feats[feats["ms2_scans"].notnull()]
+            # Create feature points with proper sizing method
+            feature_hover_1 = HoverTool(
+                tooltips=[
+                    ("rt", "@rt"),
+                    ("m/z", "@mz{0.0000}"),
+                    ("feature_uid", "@feature_uid"),
+                    ("inty", "@inty"),
+                    ("iso", "@iso"),
+                    ("adduct", "@adduct"),
+                    ("chrom_coherence", "@chrom_coherence"),
+                    ("chrom_prominence_scaled", "@chrom_prominence_scaled"),
+                ],
+            )
+            if len(features_df) > 0:
+                feature_points_1 = hv.Points(
+                    features_df,
+                    kdims=["rt", "mz"],
+                    vdims=[
+                        "feature_uid",
+                        "inty",
+                        "iso",
+                        "adduct",
+                        "ms2_scans",
+                        "chrom_coherence",
+                        "chrom_prominence_scaled",
+                    ],
+                    label="Features with MS2 data",
+                ).options(
+                    color=color_1,
+                    marker=marker_type,
+                    size=size_1,
+                    tools=[feature_hover_1],
+                    hooks=hooks,
+                )
+            # find features without MS2 data
+            features_df = feats[feats["ms2_scans"].isnull()]
+            feature_hover_2 = HoverTool(
+                tooltips=[
+                    ("rt", "@rt"),
+                    ("m/z", "@mz{0.0000}"),
+                    ("feature_uid", "@feature_uid"),
+                    ("inty", "@inty"),
+                    ("iso", "@iso"),
+                    ("adduct", "@adduct"),
+                    ("chrom_coherence", "@chrom_coherence"),
+                    ("chrom_prominence_scaled", "@chrom_prominence_scaled"),
+                ],
+            )
+            if len(features_df) > 0:
+                feature_points_2 = hv.Points(
+                    features_df,
+                    kdims=["rt", "mz"],
+                    vdims=[
+                        "feature_uid",
+                        "inty",
+                        "iso",
+                        "adduct",
+                        "chrom_coherence",
+                        "chrom_prominence_scaled",
+                    ],
+                    label="Features without MS2 data",
+                ).options(
+                    color="red",
+                    marker=marker_type,
+                    size=size_2,
+                    tools=[feature_hover_2],
+                    hooks=hooks,
+                )
+        if show_isotopes:
             # Use proper Polars filter syntax to avoid boolean indexing issues
             features_df = self.features_df.filter(pl.col("iso") > 0)
             # Convert to pandas for plotting compatibility
@@ -869,6 +1595,31 @@ def plot_2d(
     if title is not None:
         overlay = overlay.opts(title=title)
+    # Handle legend positioning for categorical coloring
+    if legend is not None and use_categorical_coloring and len(categorical_groups) > 1:
+        # Map legend position parameter to HoloViews legend position
+        legend_position_map = {
+            "top_right": "top_right",
+            "top_left": "top_left",
+            "bottom_right": "bottom_right",
+            "bottom_left": "bottom_left",
+            "right": "right",
+            "left": "left",
+            "top": "top",
+            "bottom": "bottom"
+        }
+        hv_legend_pos = legend_position_map.get(legend, "bottom_right")
+        # Apply legend configuration to the overlay
+        overlay = overlay.opts(
+            legend_position=hv_legend_pos,
+            legend_opts={'title': '', 'padding': 2, 'spacing': 2}
+        )
+    elif legend is None and use_categorical_coloring:
+        # Explicitly hide legend when legend=None but categorical coloring is used
+        overlay = overlay.opts(show_legend=False)
     # Handle slider functionality
     if use_slider_sizing:
         # For slider functionality, we need to work with the feature points directly
@@ -931,569 +1682,205 @@ def plot_2d(
             on.pane.HTML("<b>Marker Size Control:</b>", width=150, height=40, margin=(5, 10)),
             size_slider,
             height=60,
-            margin=10,
-        )
-        # Create slider widget
-        size_slider = on.widgets.FloatSlider(
-            name="Marker Size",
-            start=1.0,
-            end=20.0,
-            step=0.5,
-            value=markersize,
-            width=300,
-            height=40,
-            margin=(5, 5),
-            show_value=True,
-        )
-        slider_widget = on.Row(
-            on.pane.HTML("<b>Marker Size:</b>", width=100, height=40, margin=(5, 10)),
-            size_slider,
-            height=60,
-            margin=10,
-        )
-        # Simple reactive plot - slider mode doesn't use dynamic rasterization
-        @on.depends(size_slider.param.value)
-        def reactive_plot(size_val):
-            overlay = create_feature_overlay(float(size_val))
-            # Apply static rasterization for slider mode
-            if raster_dynamic:
-                return hd.rasterize(
-                    overlay,
-                    aggregator=ds.count(),
-                    width=raster_max_px,
-                    height=raster_max_px,
-                    dynamic=False,  # Static raster for slider mode
-                ).opts(
-                    cnorm="eq_hist",
-                    tools=["hover"],
-                    width=width,
-                    height=height,
-                )
-            else:
-                return overlay
-        # Create layout
-        layout = on.Column(slider_widget, reactive_plot, sizing_mode="stretch_width")
-        # Handle filename saving for slider mode
-        if filename is not None:
-            if filename.endswith(".html"):
-                layout.save(filename, embed=True)
-            else:
-                # For slider plots, save the current state
-                hv.save(create_feature_overlay(markersize), filename, fmt="png")
-        else:
-            # Use show() for display in notebook
-            layout.show()
-    else:
-        # Create a panel layout without slider
-        layout = panel.Column(overlay)
-    # Handle display logic based on show_in_browser and raster_dynamic
-    if filename is not None:
-        # Use consistent save/display behavior
-        self._handle_sample_plot_output(layout, filename, "panel")
-    else:
-        # Show in browser if both show_in_browser and raster_dynamic are True
-        if show_in_browser and raster_dynamic:
-            layout.show()
-        else:
-            # Return to notebook for inline display
-            return layout
-def plot_2d_oracle(
-    self,
-    oracle_folder=None,
-    link_by_feature_uid=None,
-    colorby="hg",
-    filename=None,
-    min_id_level=None,
-    max_id_level=None,
-    min_ms_level=None,
-    title=None,
-    cmap=None,
-    markersize=10,
-    raster_dynamic=True,
-    raster_max_px=8,
-    raster_threshold=0.8,
-    mz_range=None,
-    rt_range=None,
-):
-    """
-    Plot a 2D overlay visualization of MS1 survey scans and feature annotations, including oracle annotation data if provided.
-    This function reads the primary mass spectrometry data, applies filtering, processes oracle annotation data (if provided),
-    and produces an interactive plot combining various data layers. The visualization includes rasterized MS1 data and feature
-    points colored by annotation.
-    Parameters:
-        self: The object instance containing MS1 and feature data.
-        oracle_folder (str, optional): Path to the oracle folder containing the annotation file
-            (expected at "<oracle_folder>/diag/summary_by_feature.csv"). If None, oracle data is not used.
-        link_by_feature_uid (bool, optional): Whether to link features by their IDs in the overlay.
-        colorby (str, optional): Parameter that determines the color assignment for annotated features.
-            Expected values include 'hg', 'class', 'id_class', or 'id_hg'. Default is 'hg'.
-        filename (str, optional): Name of the file where the plot should be saved. If provided and ends with
-            ".html", the panel layout is saved as an interactive HTML file; otherwise, the output is saved as a PNG.
-        min_id_level (int, optional): Minimum identification level for oracle annotations to include.
-        max_id_level (int, optional): Maximum identification level for oracle annotations to include.
-        min_ms_level (int, optional): Minimum MS level for features to include.
-        title (str, optional): Title to be displayed on the resulting plot. Default is None.
-        cmap (str, optional): Colormap to be used for the rasterized plot. Acceptable values include None, "grey",
-            "iridescent", or other valid colormap names. Default is None. When None, 'Greys256' is used.
-        markersize (int, optional): Marker size for feature points in the overlay. Default is 10.
-        raster_dynamic (bool, optional): If True, enables dynamic rasterization of the overlay. If filename is provided
-            and does not end with ".html", raster_dynamic is set to False. Default is True.
-        raster_max_px (int, optional): Maximum pixel size for dynamic rasterization. Default is 8.
-        raster_threshold (float, optional): Threshold for dynamic raster spread. Default is 0.8.
-        mz_range (tuple, optional): m/z range for filtering MS1 data.
-        rt_range (tuple, optional): Retention time range for filtering MS1 data.
-    Returns:
-        None
-    The function either displays the interactive panel layout or saves the visualization to a file based on
-    the provided filename. If the primary file object or feature data is missing, the function prints an
-    informative message and returns without plotting.
-    """
-    if self.file_obj is None:
-        print("Please load a file first.")
-        return
-    # Process colormap using the cmap package
-    cmap_palette = _process_cmap(cmap, fallback="Greys256", logger=self.logger)
-    # get columns rt, mz, inty from self.ms1_df, It's polars DataFrame
-    spectradf = self.ms1_df.to_pandas()
-    # remove any inty<1
-    spectradf = spectradf[spectradf["inty"] >= 1]
-    # keep only rt, mz, and inty
-    spectradf = spectradf[["rt", "mz", "inty"]]
-    if mz_range is not None:
-        spectradf = spectradf[(spectradf["mz"] >= mz_range[0]) & (spectradf["mz"] <= mz_range[1])]
-    if rt_range is not None:
-        spectradf = spectradf[(spectradf["rt"] >= rt_range[0]) & (spectradf["rt"] <= rt_range[1])]
-    maxrt = spectradf["rt"].max()
-    minrt = spectradf["rt"].min()
-    maxmz = spectradf["mz"].max()
-    minmz = spectradf["mz"].min()
-    def new_bounds_hook(plot, elem):
-        x_range = plot.state.x_range
-        y_range = plot.state.y_range
-        x_range.bounds = minrt, maxrt
-        y_range.bounds = minmz, maxmz
-    points = hv.Points(
-        spectradf,
-        kdims=["rt", "mz"],
-        vdims=["inty"],
-        label="MS1 survey scans",
-    ).opts(
-        fontsize={"title": 16, "labels": 14, "xticks": 6, "yticks": 12},
-        color=np.log(dim("inty")),
-        colorbar=True,
-        cmap="Magma",
-        tools=["hover"],
-    )
-    if filename is not None:
-        dyn = False
-        if not filename.endswith(".html"):
-            raster_dynamic = False
-    dyn = raster_dynamic
-    raster = hd.rasterize(
-        points,
-        aggregator=ds.max("inty"),
-        interpolation="bilinear",
-        dynamic=dyn,  # alpha=10,                min_alpha=0,
-    ).opts(
-        active_tools=["box_zoom"],
-        cmap=cmap_palette,
-        tools=["hover"],
-        hooks=[new_bounds_hook],
-        width=1000,
-        height=1000,
-        cnorm="log",
-        xlabel="Retention time (s)",
-        ylabel="m/z",
-        colorbar=True,
-        colorbar_position="right",
-        axiswise=True,
-    )
-    raster = hd.dynspread(
-        raster,
-        threshold=raster_threshold,
-        how="add",
-        shape="square",
-        max_px=raster_max_px,
-    )
-    if self.features_df is None:
-        return
-    feats = self.features_df.clone()
-    # Convert to pandas for oracle operations that require pandas functionality
-    if hasattr(feats, "to_pandas"):
-        feats = feats.to_pandas()
-    # check if annotationfile is not None
-    if oracle_folder is None:
-        return
-    # try to read the annotationfile as a csv file and add it to feats
-    try:
-        oracle_data = pd.read_csv(
-            os.path.join(oracle_folder, "diag", "summary_by_feature.csv"),
-        )
-    except Exception:
-        print(f"Could not read {oracle_folder}/diag/summary_by_feature.csv")
-        return
-    if link_by_feature_uid:
-        # scan_idx	slaw_id	slaw_ms2_id	mz	rt	level	formula	ion	species	name	rarity	lib_id	hg	mod	lib	score	score2	score_db	score_db_data	ms2_tic	ms2_evidence	ms2_matched_n	ms2_missed_n	ms2_matched	ms2_missed	ms2_top1
-        cols_to_keep = [
-            "title",
-            "scan_idx",
-            "mslevel",
-            "hits",
-            "id_level",
-            "id_label",
-            "id_ion",
-            "id_class",
-            "id_evidence",
-            "score",
-            "score2",
-        ]
-        oracle_data = oracle_data[cols_to_keep]
-        # extract feature_uid from title. It begins with "fid:XYZ;"
-        oracle_data["feature_uid"] = oracle_data["title"].str.extract(r"fid:(\d+)")
-        oracle_data["feature_uid"] = oracle_data["feature_uid"].astype(int)
-        # sort by id_level, remove duplicate feature_uid, keep the first one
-        oracle_data = oracle_data.sort_values(by=["id_level"], ascending=False)
-        oracle_data = oracle_data.drop_duplicates(subset=["feature_uid"], keep="first")
-    else:
-        cols_to_keep = [
-            "precursor",
-            "rt",
-            "title",
-            "scan_idx",
-            "mslevel",
-            "hits",
-            "id_level",
-            "id_label",
-            "id_ion",
-            "id_class",
-            "id_evidence",
-            "score",
-            "score2",
-        ]
-        # link
-        oracle_data = oracle_data[cols_to_keep]
-        oracle_data["feature_uid"] = None
-        # iterate over the rows and find the feature_uid in feats by looking at the closest rt and mz
-        for i, row in oracle_data.iterrows():
-            candidates = feats[
-                (abs(feats["rt"] - row["rt"]) < 1) & (abs(feats["mz"] - row["precursor"]) < 0.005)
-            ].copy()
-            if len(candidates) > 0:
-                # sort by delta rt
-                candidates["delta_rt"] = abs(candidates["rt"] - row["rt"])
-                candidates = candidates.sort_values(by=["delta_rt"])
-                oracle_data.at[i, "feature_uid"] = candidates["feature_uid"].values[0]
-        # remove precursor and rt columns
-        oracle_data = oracle_data.drop(columns=["precursor", "rt"])
+            margin=10,
+        )
-    feats = feats.merge(oracle_data, how="left", on="feature_uid")
+        # Create slider widget
+        size_slider = on.widgets.FloatSlider(
+            name="Marker Size",
+            start=1.0,
+            end=20.0,
+            step=0.5,
+            value=markersize,
+            width=300,
+            height=40,
+            margin=(5, 5),
+            show_value=True,
+        )
-    # filter feats by id_level
-    if min_id_level is not None:
-        feats = feats[(feats["id_level"] >= min_id_level)]
-    if max_id_level is not None:
-        feats = feats[(feats["id_level"] <= max_id_level)]
-    if min_ms_level is not None:
-        feats = feats[(feats["mslevel"] >= min_ms_level)]
+        slider_widget = on.Row(
+            on.pane.HTML("<b>Marker Size:</b>", width=100, height=40, margin=(5, 10)),
+            size_slider,
+            height=60,
+            margin=10,
+        )
-    feats["color"] = "black"
+        # Simple reactive plot - slider mode doesn't use dynamic rasterization
+        @on.depends(size_slider.param.value)
+        def reactive_plot(size_val):
+            overlay = create_feature_overlay(float(size_val))
+            # Apply static rasterization for slider mode
+            if raster_dynamic:
+                return hd.rasterize(
+                    overlay,
+                    aggregator=ds.count(),
+                    width=raster_max_px,
+                    height=raster_max_px,
+                    dynamic=False,  # Static raster for slider mode
+                ).opts(
+                    cnorm="eq_hist",
+                    tools=["hover"],
+                    width=width,
+                    height=height,
+                )
+            else:
+                return overlay
-    cvalues = None
-    if colorby in ["class", "hg", "id_class", "id_hg"]:
-        # replace nans in feats['id_class'] with 'mix'
-        feats["id_class"] = feats["id_class"].fillna("mix")
-        cvalues = feats["id_class"].unique()
-        # sort alphabetically
-        cvalues = sorted(cvalues)
-        # flip the strings left to right
-        fcvalues = [cvalues[i][::-1] for i in range(len(cvalues))]
-        # sort in alphabetical order the flipped strings and return the index
-        idx = np.argsort(fcvalues)
-        # apply to cvalues
-        cvalues = [cvalues[i] for i in idx]
-    elif colorby in ["ion", "id_ion"]:
-        cvalues = feats["id_ion"].unique()
-    elif colorby in ["id_evidence", "ms2_evidence"]:
-        cvalues = feats["id_evidence"].unique()
+        # Create layout
+        layout = on.Column(slider_widget, reactive_plot, sizing_mode="stretch_width")
-    if cvalues is not None:
-        num_colors = len(cvalues)
-        # Use cmap package for categorical colormap
-        try:
-            if Colormap is not None:
-                # Use rainbow colormap for categorical data
-                colormap = Colormap("rainbow")
-                colors = []
-                for i in range(num_colors):
-                    # Generate evenly spaced colors across the colormap
-                    t = i / (num_colors - 1) if num_colors > 1 else 0.5
-                    color = colormap(t)
-                    # Convert to hex
-                    import matplotlib.colors as mcolors
-                    # Convert color to hex - handle different color formats
-                    if hasattr(color, '__len__') and len(color) >= 3:
-                        # It's an array-like color (RGB or RGBA)
-                        colors.append(mcolors.rgb2hex(color[:3]))
-                    else:
-                        # It's a single value, convert to RGB
-                        colors.append(mcolors.rgb2hex([color, color, color]))
+        # Handle filename saving for slider mode
+        if filename is not None:
+            if filename.endswith(".html"):
+                layout.save(filename, embed=True)
             else:
-                # Fallback to original method
-                cmap = "rainbow"
-                cmap_provider = "colorcet"
-                cm = process_cmap(cmap, ncolors=num_colors, provider=cmap_provider)
-                colors = [
-                    rgb2hex(cm[int(i * (len(cm) - 1) / (num_colors - 1))]) if num_colors > 1 else rgb2hex(cm[0])
-                    for i in range(num_colors)
-                ]
-        except Exception:
-            # Final fallback to original method
-            cmap = "rainbow"
-            cmap_provider = "colorcet"
-            cm = process_cmap(cmap, ncolors=num_colors, provider=cmap_provider)
-            colors = [
-                rgb2hex(cm[int(i * (len(cm) - 1) / (num_colors - 1))]) if num_colors > 1 else rgb2hex(cm[0])
-                for i in range(num_colors)
-            ]
-        # assign color to each row based on id_class. If id_class is null, assign 'black'
-        feats["color"] = "black"
+                # For slider plots, save the current state
+                hv.save(create_feature_overlay(markersize), filename, fmt="png")
+        else:
+            # Use show() for display in notebook
+            layout.show()
+    else:
+        # Create a panel layout without slider
+        layout = panel.Column(overlay)
-        for i, c in enumerate(cvalues):
-            if colorby in ["class", "hg", "id_class", "id_hg"]:
-                feats.loc[feats["id_class"] == c, "color"] = colors[i]
-            elif colorby in ["ion", "id_ion"]:
-                feats.loc[feats["id_ion"] == c, "color"] = colors[i]
-            elif colorby in ["id_evidence", "ms2_evidence"]:
-                feats.loc[feats["id_evidence"] == c, "color"] = colors[i]
+    # Handle display logic based on show_in_browser and raster_dynamic
+    if filename is not None:
+        # Use consistent save/display behavior
+        self._handle_sample_plot_output(layout, filename, "panel")
+    else:
+        # Show in browser if both show_in_browser and raster_dynamic are True
+        if show_in_browser and raster_dynamic:
+            layout.show()
+        else:
+            # Return to notebook for inline display
+            return layout
-    # replace NaN with 0 in id_level
-    feats["id_level"] = feats["id_level"].fillna(0)
-    # feature_points_1 are all features with column ms2_scans not null
-    feature_points_1 = None
-    feat_df = feats.copy()
-    feat_df = feat_df[feat_df["id_level"] == 2]
-    oracle_hover_1 = HoverTool(
-        tooltips=[
-            ("rt", "@rt"),
-            ("m/z", "@mz{0.0000}"),
-            ("feature_uid", "@feature_uid"),
-            ("id_level", "@id_level"),
-            ("id_class", "@id_class"),
-            ("id_label", "@id_label"),
-            ("id_ion", "@id_ion"),
-            ("id_evidence", "@id_evidence"),
-            ("score", "@score"),
-            ("score2", "@score2"),
-        ],
-    )
-    feature_points_1 = hv.Points(
-        feat_df,
-        kdims=["rt", "mz"],
-        vdims=[
-            "inty",
-            "feature_uid",
-            "id_level",
-            "id_class",
-            "id_label",
-            "id_ion",
-            "id_evidence",
-            "score",
-            "score2",
-            "color",
-        ],
-        label="ID by MS2",
-    ).options(
-        color="color",
-        marker="circle",
-        size=markersize,
-        fill_alpha=1.0,
-        tools=[oracle_hover_1],
-    )
-    # feature_points_2 are all features that have ms2_scans not null and id_level ==1
-    feature_points_2 = None
-    feat_df = feats.copy()
-    feat_df = feat_df[(feat_df["ms2_scans"].notnull()) & (feat_df["id_level"] == 1)]
-    if len(feat_df) > 0:
-        oracle_hover_2 = HoverTool(
-            tooltips=[
-                ("rt", "@rt"),
-                ("m/z", "@mz{0.0000}"),
-                ("feature_uid", "@feature_uid"),
-                ("id_level", "@id_level"),
-                ("id_label", "@id_label"),
-                ("id_ion", "@id_ion"),
-                ("id_class", "@id_class"),
-            ],
-        )
-        feature_points_2 = hv.Points(
-            feat_df,
-            kdims=["rt", "mz"],
-            vdims=[
-                "inty",
-                "feature_uid",
-                "id_level",
-                "id_label",
-                "id_ion",
-                "id_class",
-                "color",
-            ],
-            label="ID by MS1, with MS2",
-        ).options(
-            color="color",
-            marker="circle",
-            size=markersize,
-            fill_alpha=0.0,
-            tools=[oracle_hover_2],
-        )
+def plot_2d_oracle(
+    self,
+    oracle_folder=None,
+    link_by_feature_uid=True,
+    min_id_level=1,
+    max_id_level=4,
+    min_ms_level=2,
+    colorby="hg",
+    legend_groups=None,
+    markersize=5,
+    cmap='Turbo',
+    raster_cmap='grey',
+    raster_log=True,
+    raster_min=1,
+    raster_dynamic=True,
+    raster_max_px=8,
+    raster_threshold=0.8,
+    mz_range=None,
+    rt_range=None,
+    width=750,
+    height=600,
+    filename=None,
+    title=None,
+    legend="bottom_right",
+):
+    """
+    Plot a 2D visualization combining MS1 raster data and oracle-annotated features.
-    # feature_points_3 are all features that have ms2_scans null and id_level ==1
-    feature_points_3 = None
-    feat_df = feats.copy()
-    feat_df = feat_df[(feat_df["ms2_scans"].isnull()) & (feat_df["id_level"] == 1)]
-    if len(feat_df) > 0:
-        oracle_hover_3 = HoverTool(
-            tooltips=[
-                ("rt", "@rt"),
-                ("m/z", "@mz{0.0000}"),
-                ("feature_uid", "@feature_uid"),
-                ("id_level", "@id_level"),
-                ("id_label", "@id_label"),
-                ("id_ion", "@id_ion"),
-                ("id_class", "@id_class"),
-            ],
-        )
-        feature_points_3 = hv.Points(
-            feat_df,
-            kdims=["rt", "mz"],
-            vdims=[
-                "inty",
-                "feature_uid",
-                "id_level",
-                "id_label",
-                "id_ion",
-                "id_class",
-                "color",
-            ],
-            label="ID by MS1, no MS2",
-        ).options(
-            color="color",
-            marker="diamond",
-            size=markersize,
-            fill_alpha=0.0,
-            tools=[oracle_hover_3],
-        )
+    Creates an interactive plot overlaying MS1 survey scan data with feature annotations
+    from oracle files. Features are colored categorically based on identification class,
+    ion type, or evidence level.
-    # feature_points_4 are all features that have ms2_scans null and id_level ==0
-    feature_points_4 = None
-    feat_df = feats.copy()
-    feat_df = feat_df[(feat_df["ms2_scans"].notnull()) & (feat_df["id_level"] < 1)]
-    if len(feat_df) > 0:
-        oracle_hover_4 = HoverTool(
-            tooltips=[
-                ("rt", "@rt"),
-                ("m/z", "@mz{0.0000}"),
-                ("feature_uid", "@feature_uid"),
-                ("inty", "@inty"),
-            ],
-        )
-        feature_points_4 = hv.Points(
-            feat_df,
-            kdims=["rt", "mz"],
-            vdims=["inty", "feature_uid"],
-            label="No ID, with MS2",
-        ).options(
-            color="gray",
-            marker="circle",
-            size=markersize,
-            fill_alpha=0.0,
-            tools=[oracle_hover_4],
-        )
+    Parameters:
+        oracle_folder (str, optional): Path to oracle folder containing
+            "diag/summary_by_feature.csv". Required for oracle annotations.
+        link_by_feature_uid (bool): Whether to link features by UID (True) or by m/z/RT proximity.
+        min_id_level (int): Minimum identification confidence level to include.
+        max_id_level (int): Maximum identification confidence level to include.
+        min_ms_level (int): Minimum MS level for features to include.
+        colorby (str): Feature coloring scheme - "id_class", "id_ion", "id_evidence", etc.
+        legend_groups (list, optional): List of groups to include in legend and coloring scheme.
+            If provided, legend will show exactly these groups. 'mix' is automatically added
+            as the last group to contain points not matching other groups. Works for all
+            categorical coloring types (id_class, id_ion, id_evidence, etc.).
+            If None (default), all groups present in the data will be shown without filtering.
+            All specified classes will appear in the legend even if no features are present.
+        markersize (int): Size of feature markers.
+        cmap (str): Colormap name for categorical coloring.
+        raster_cmap (str): Colormap for MS1 raster background.
+        raster_log (bool): Use logarithmic scaling for raster intensity (True) or linear scaling (False).
+        raster_min (float): Minimum intensity threshold for raster data filtering.
+        raster_dynamic (bool): Enable dynamic rasterization.
+        raster_threshold (float): Dynamic raster spread threshold.
+        raster_max_px (int): Maximum pixel size for rasterization.
+        mz_range (tuple, optional): m/z range filter (min, max).
+        rt_range (tuple, optional): Retention time range filter (min, max).
+        width/height (int): Plot dimensions in pixels.
+        filename (str, optional): Export filename (.html/.svg/.png). If None, displays inline.
+        title (str, optional): Plot title.
+        legend (str, optional): Legend position ("top_right", "bottom_left", etc.) or None.
-    # feature_points_5 are all features that have ms2_scans null and id_level ==0
-    feature_points_5 = None
-    feat_df = feats.copy()
-    feat_df = feat_df[(feat_df["ms2_scans"].isnull()) & (feat_df["id_level"] < 1)]
-    if len(feat_df) > 0:
-        oracle_hover_5 = HoverTool(
-            tooltips=[
-                ("rt", "@rt"),
-                ("m/z", "@mz{0.0000}"),
-                ("feature_uid", "@feature_uid"),
-                ("inty", "@inty"),
-            ],
-        )
-        feature_points_5 = hv.Points(
-            feat_df,
-            kdims=["rt", "mz"],
-            vdims=["inty", "feature_uid"],
-            label="No ID, no MS2",
-        ).options(
-            color="gray",
-            marker="diamond",
-            fill_alpha=0.0,
-            size=markersize,
-            tools=[oracle_hover_5],
-        )
+    Returns:
+        HoloViews layout for display (if filename is None), otherwise None.
+    """
-    overlay = raster
+    self.logger.info(f"Starting plot_2d_oracle with oracle_folder: {oracle_folder}")
+    self.logger.debug(f"Parameters - link_by_feature_uid: {link_by_feature_uid}, min_id_level: {min_id_level}, max_id_level: {max_id_level}")
+    self.logger.debug(f"Plot parameters - colorby: {colorby}, markersize: {markersize}, filename: {filename}")
-    if feature_points_1 is not None:
-        overlay = overlay * feature_points_1
-    if feature_points_2 is not None:
-        overlay = overlay * feature_points_2
-    if feature_points_3 is not None:
-        overlay = overlay * feature_points_3
-    if feature_points_4 is not None:
-        overlay = overlay * feature_points_4
-    # if not show_only_features_with_ms2:
-    if feature_points_5 is not None:
-        overlay = overlay * feature_points_5
+    # Early validation
+    if self.features_df is None:
+        self.logger.error("Cannot plot 2D oracle: features_df is not available")
+        return
+    if oracle_folder is None:
+        self.logger.info("No oracle folder provided, plotting features only")
+        return
-    if title is not None:
-        overlay = overlay.opts(title=title)
+    # Create raster plot layer
+    raster = _create_raster_plot(
+        self,
+        mz_range=mz_range,
+        rt_range=rt_range,
+        raster_cmap=raster_cmap,
+        raster_log=raster_log,
+        raster_min=raster_min,
+        raster_dynamic=raster_dynamic,
+        raster_threshold=raster_threshold,
+        raster_max_px=raster_max_px,
+        width=width,
+        height=height,
+        filename=filename
+    )
-    # Create a panel layout
-    layout = panel.Column(overlay)
+    # Load and process oracle data
+    feats = _load_and_merge_oracle_data(
+        self,
+        oracle_folder=oracle_folder,
+        link_by_feature_uid=link_by_feature_uid,
+        min_id_level=min_id_level,
+        max_id_level=max_id_level,
+        min_ms_level=min_ms_level
+    )
+    if feats is None:
+        return
-    if filename is not None:
-        # if filename includes .html, save the panel layout to an HTML file
-        if filename.endswith(".html"):
-            layout.save(filename, embed=True)
-        else:
-            # save the panel layout as a png
-            hv.save(overlay, filename, fmt="png")
-    else:
-        # Check if we're in a notebook environment and display appropriately
-        return _display_plot(overlay, layout)
+    # Set up color scheme and categorical mapping
+    cvalues, color_column, colors = _setup_color_mapping(self, feats, colorby, cmap, legend_groups)
+    # Create feature overlay with all visualization elements
+    overlay = _create_feature_overlay(
+        self,
+        raster=raster,
+        feats=feats,
+        cvalues=cvalues,
+        color_column=color_column,
+        colors=colors,
+        markersize=markersize,
+        title=title,
+        legend=legend
+    )
+    # Handle output: export or display
+    return _handle_output(self, overlay, filename)
 def plot_ms2_eic(
@@ -1756,96 +2143,6 @@ def plot_ms2_cycle(
         max_px=raster_max_px,
     )
-    """
-    feature_points_1 = None
-    feature_points_2 = None
-    feature_points_3 = None
-    feature_points_4 = None
-    feature_points_iso = None
-    # Plot features as red dots if features is True
-    if self.features_df is not None and show_features:
-        feats = self.features_df.clone()
-        # Convert to pandas for operations that require pandas functionality
-        if hasattr(feats, 'to_pandas'):
-            feats = feats.to_pandas()
-        # if ms2_scans is not null, keep only the first element of the list
-        feats['ms2_scans'] = feats['ms2_scans'].apply(lambda x: x[0] if type(x) == list else x)
-        # keep only iso==0, i.e. the main
-        feats = feats[feats['iso']==0]
-        # find features with ms2_scans not None  and iso==0
-        features_df = feats[feats['ms2_scans'].notnull()]
-        feature_points_1 = hv.Points(
-        features_df, kdims=["rt", "mz"], vdims=["feature_uid", "inty", "quality", "rt_delta", "ms2_scans"], label="Features with MS2 data"
-        ).options(
-        color=color_1,
-        marker=marker,
-        size=size_1,
-        tools=["hover"],
-        )
-        # find features without MS2 data
-        features_df = feats[feats['ms2_scans'].isnull()]
-        feature_points_2 = hv.Points(
-        features_df, kdims=["rt", "mz"], vdims=["feature_uid", "inty", "quality", "rt_delta"], label="Features without MS2 data"
-        ).options(
-        color='red',
-        size=size_2,
-        marker=marker,
-        tools=["hover"],
-        )
-        if show_isotopes:
-            feats = self.features_df
-            features_df = feats[feats['iso']>0]
-            feature_points_iso = hv.Points(
-            features_df, kdims=["rt", "mz"], vdims=["feature_uid", "inty", "quality", "rt_delta", "iso", "iso_of"], label="Isotopes"
-            ).options(
-            color='violet',
-            marker=marker,
-            size=size_1,
-            tools=["hover"],
-            )
-    if show_ms2:
-        # find all self.scans_df with mslevel 2 that are not linked to a feature
-        ms2_orphan = self.scans_df.filter(pl.col('ms_level')==2).filter(pl.col("feature_uid")<0)
-        if len(ms2_orphan) > 0:
-            # pandalize
-            ms2 = ms2_orphan.to_pandas()
-            feature_points_3 = hv.Points(
-            ms2, kdims=["rt", "prec_mz"], vdims=["index", "inty_tot", "bl"], label="Orphan MS2 scans"
-            ).options(
-            color=color_2,
-            marker='x',
-            size=size_2,
-            tools=["hover"],
-            )
-        ms2_linked = self.scans_df.filter(pl.col('ms_level')==2).filter(pl.col("feature_uid")>=0)
-        if len(ms2_linked) > 0:
-            # pandalize
-            ms2 = ms2_linked.to_pandas()
-            feature_points_4 = hv.Points(
-            ms2, kdims=["rt", "prec_mz"], vdims=["index", "inty_tot", "bl"], label="Linked MS2 scans"
-            ).options(
-            color=color_1,
-            marker='x',
-            size=size_2,
-            tools=["hover"],
-            )
-    if feature_points_4 is not None:
-        overlay = overlay * feature_points_4
-    if feature_points_3 is not None:
-        overlay = overlay * feature_points_3
-    if feature_points_1 is not None:
-        overlay = overlay * feature_points_1
-    if not show_only_features_with_ms2:
-        if feature_points_2 is not None:
-            overlay = overlay * feature_points_2
-    if feature_points_iso is not None:
-        overlay = overlay * feature_points_iso
-    """
     if title is not None:
         overlay = overlay.opts(title=title)

masster 0.5.9__py3-none-any.whl → 0.5.11__py3-none-any.whl

Potentially problematic release.

masster 0.5.9py3-none-any.whl → 0.5.11py3-none-any.whl