PyPI - masster - Versions diffs - 0.5.28__py3-none-any.whl → 0.6.0__py3-none-any.whl - Mend

masster 0.5.28py3-none-any.whl → 0.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of masster might be problematic. Click here for more details.

Files changed (30) hide show

masster/_version.py +1 -1
masster/data/libs/aa_nort.json +240 -0
masster/data/libs/ccm_nort.json +1319 -0
masster/lib/lib.py +1 -1
masster/logger.py +0 -6
masster/sample/adducts.py +1 -1
masster/sample/defaults/find_adducts_def.py +1 -1
masster/sample/h5.py +152 -2
masster/sample/helpers.py +91 -5
masster/sample/id.py +1160 -0
masster/sample/importers.py +316 -0
masster/sample/plot.py +175 -71
masster/sample/sample.py +18 -3
masster/sample/sample5_schema.json +99 -1
masster/study/defaults/study_def.py +8 -12
masster/study/id.py +59 -12
masster/study/load.py +0 -11
masster/study/merge.py +153 -0
masster/study/plot.py +197 -0
masster/study/study.py +3 -1
masster/study/study5_schema.json +15 -0
masster/wizard/wizard.py +11 -12
{masster-0.5.28.dist-info → masster-0.6.0.dist-info}/METADATA +15 -17
{masster-0.5.28.dist-info → masster-0.6.0.dist-info}/RECORD +27 -26
masster/data/libs/aa.csv +0 -22
masster/data/libs/ccm.csv +0 -120
masster/data/libs/urine.csv +0 -4693
{masster-0.5.28.dist-info → masster-0.6.0.dist-info}/WHEEL +0 -0
{masster-0.5.28.dist-info → masster-0.6.0.dist-info}/entry_points.txt +0 -0
{masster-0.5.28.dist-info → masster-0.6.0.dist-info}/licenses/LICENSE +0 -0

masster/sample/importers.py ADDED Viewed

@@ -0,0 +1,316 @@
+"""
+importers.py
+Module providing import functionality for Sample class, specifically for importing
+oracle identification data into features.
+"""
+from __future__ import annotations
+import os
+import pandas as pd
+import polars as pl
+def import_oracle(
+    self,
+    folder,
+    min_id_level=None,
+    max_id_level=None,
+):
+    """
+    Import oracle identification data and map it to features.
+    This method reads oracle identification results from folder/diag/annotation_full.csv
+    and creates lib_df and id_df DataFrames with detailed library and identification information.
+    It also updates features_df with top identification results.
+    Parameters:
+        folder (str): Path to oracle folder containing diag/annotation_full.csv
+        min_id_level (int, optional): Minimum identification level to include
+        max_id_level (int, optional): Maximum identification level to include
+    Returns:
+        None: Updates features_df, creates lib_df and id_df in-place with oracle identification data
+    Raises:
+        FileNotFoundError: If the oracle annotation file doesn't exist
+        ValueError: If features_df is empty or doesn't have required columns
+    Example:
+        >>> sample.import_oracle(
+        ...     folder="path/to/oracle_results",
+        ...     min_id_level=2,
+        ...     max_id_level=4
+        ... )
+    """
+    self.logger.info(f"Starting oracle import from folder: {folder}")
+    # Validate inputs
+    if self.features_df is None or self.features_df.is_empty():
+        raise ValueError("features_df is empty or not available. Run find_features() first.")
+    if "feature_uid" not in self.features_df.columns:
+        raise ValueError("features_df must contain 'feature_uid' column")
+    # Check if oracle file exists
+    oracle_file_path = os.path.join(folder, "diag", "annotation_full.csv")
+    if not os.path.exists(oracle_file_path):
+        raise FileNotFoundError(f"Oracle annotation file not found: {oracle_file_path}")
+    self.logger.debug(f"Loading oracle data from: {oracle_file_path}")
+    try:
+        # Read oracle data using pandas first for easier processing
+        oracle_data = pd.read_csv(oracle_file_path)
+        self.logger.info(f"Oracle data loaded successfully with {len(oracle_data)} rows")
+    except Exception as e:
+        self.logger.error(f"Could not read {oracle_file_path}: {e}")
+        raise
+    # Extract feature_uid from scan_title column (format: "uid:XYZ, ...")
+    self.logger.debug("Extracting feature UIDs from oracle scan_title using pattern 'uid:(\\d+)'")
+    oracle_data["feature_uid"] = oracle_data["scan_title"].str.extract(r"uid:(\d+)", expand=False)
+    # Remove rows where feature_uid extraction failed
+    initial_count = len(oracle_data)
+    oracle_data = oracle_data.dropna(subset=["feature_uid"])
+    oracle_data["feature_uid"] = oracle_data["feature_uid"].astype(int)
+    self.logger.debug(f"Extracted feature UIDs for {len(oracle_data)}/{initial_count} oracle entries")
+    # Apply id_level filters if specified
+    if min_id_level is not None:
+        oracle_data = oracle_data[oracle_data["level"] >= min_id_level]
+        self.logger.debug(f"After min_id_level filter ({min_id_level}): {len(oracle_data)} entries")
+    if max_id_level is not None:
+        oracle_data = oracle_data[oracle_data["level"] <= max_id_level]
+        self.logger.debug(f"After max_id_level filter ({max_id_level}): {len(oracle_data)} entries")
+    if len(oracle_data) == 0:
+        self.logger.warning("No oracle entries remain after filtering")
+        return
+    # === CREATE LIB_DF ===
+    self.logger.debug("Creating lib_df from Oracle annotation data")
+    self.logger.debug(f"Oracle data shape before lib_df creation: {oracle_data.shape}")
+    # Create unique lib_uid for each library entry
+    oracle_data["lib_uid"] = range(len(oracle_data))
+    # Map Oracle columns to lib_df schema
+    lib_data = []
+    for _, row in oracle_data.iterrows():
+        # Convert cmpd_uid to integer, using lib_uid as fallback
+        cmpd_uid = row["lib_uid"]  # Use lib_uid as integer compound identifier
+        try:
+            if row.get("lib_id") is not None:
+                cmpd_uid = int(float(str(row["lib_id"])))  # Convert to int, handling potential float strings
+        except (ValueError, TypeError):
+            pass  # Keep lib_uid as fallback
+        lib_entry = {
+            "lib_uid": row["lib_uid"],
+            "cmpd_uid": cmpd_uid,  # Integer compound identifier
+            "source_id": "LipidOracle",  # Fixed source identifier
+            "name": row.get("name", None),
+            "shortname": row.get("species", None),
+            "class": row.get("hg", None),
+            "smiles": None,  # Not available in Oracle data
+            "inchi": None,  # Not available in Oracle data
+            "inchikey": None,  # Not available in Oracle data
+            "formula": row.get("formula", None),
+            "iso": 0,  # Fixed isotope value
+            "adduct": row.get("ion", None),
+            "probability": row.get("score", None),
+            "m": None,  # Would need to calculate from formula
+            "z": 1 if row.get("ion", "").find("+") != -1 else (-1 if row.get("ion", "").find("-") != -1 else None),
+            "mz": row.get("mz", None),  # Use mz column from annotation_full.csv
+            "rt": None,  # Set to null as requested
+            "quant_group": None,  # Set to null as requested
+            "db_id": row.get("lib_id", None),
+            "db": row.get("lib", None),
+        }
+        lib_data.append(lib_entry)
+    self.logger.debug(f"Created {len(lib_data)} lib_data entries")
+    # Create lib_df as Polars DataFrame with error handling for mixed types
+    try:
+        lib_df_temp = pl.DataFrame(lib_data)
+    except Exception as e:
+        self.logger.warning(f"Error creating lib_df with polars: {e}")
+        # Fallback: convert to pandas first, then to polars
+        lib_df_pandas = pd.DataFrame(lib_data)
+        lib_df_temp = pl.from_pandas(lib_df_pandas)
+    # Ensure uniqueness by name and adduct combination
+    # Sort by lib_uid and keep first occurrence (earliest in processing order)
+    self.lib_df = lib_df_temp.sort("lib_uid").unique(subset=["name", "adduct"], keep="first")
+    self.logger.info(
+        f"Created lib_df with {len(self.lib_df)} library entries ({len(lib_data) - len(self.lib_df)} duplicates removed)"
+    )
+    # === CREATE ID_DF ===
+    self.logger.debug("Creating id_df from Oracle identification matches")
+    # Create identification matches
+    id_data = []
+    for _, row in oracle_data.iterrows():
+        # Use dmz from annotation_full.csv directly for mz_delta
+        mz_delta = None
+        if row.get("dmz") is not None:
+            try:
+                mz_delta = float(row["dmz"])
+            except (ValueError, TypeError):
+                pass
+        # Use rt_err from annotation_full.csv for rt_delta, None if NaN
+        rt_delta = None
+        rt_err_value = row.get("rt_err")
+        if rt_err_value is not None and not (isinstance(rt_err_value, float) and pd.isna(rt_err_value)):
+            try:
+                rt_delta = float(rt_err_value)
+            except (ValueError, TypeError):
+                pass
+        # Create matcher as "lipidoracle-" + score_metric from annotation_full.csv
+        matcher = "lipidoracle"  # default fallback
+        if row.get("score_metric") is not None:
+            try:
+                score_metric = str(row["score_metric"])
+                matcher = f"lipidoracle-{score_metric}"
+            except (ValueError, TypeError):
+                pass
+        id_entry = {
+            "feature_uid": row["feature_uid"],
+            "lib_uid": row["lib_uid"],
+            "mz_delta": mz_delta,
+            "rt_delta": rt_delta,
+            "matcher": matcher,
+            "score": row.get("score", None),
+            "iso": 0,  # Fixed isotope value for oracle imports
+        }
+        id_data.append(id_entry)
+    # Create id_df as Polars DataFrame with error handling
+    try:
+        id_df_temp = pl.DataFrame(id_data)
+    except Exception as e:
+        self.logger.warning(f"Error creating id_df with polars: {e}")
+        # Fallback: convert to pandas first, then to polars
+        id_df_pandas = pd.DataFrame(id_data)
+        id_df_temp = pl.from_pandas(id_df_pandas)
+    # Filter id_df to only include lib_uids that exist in the final unique lib_df
+    unique_lib_uids = self.lib_df.select("lib_uid").to_series()
+    self.id_df = id_df_temp.filter(pl.col("lib_uid").is_in(unique_lib_uids))
+    self.logger.info(f"Created id_df with {len(self.id_df)} identification matches")
+    # === UPDATE FEATURES_DF (adapted from consensus functionality) ===
+    self.logger.debug("Updating features_df with top identification results")
+    # Convert to polars for efficient joining with error handling
+    try:
+        oracle_pl = pl.DataFrame(oracle_data)
+    except Exception as e:
+        self.logger.warning(f"Error converting oracle_data to polars: {e}")
+        # Convert using from_pandas properly
+        oracle_pl = pl.from_pandas(oracle_data.reset_index(drop=True))
+    # Group by feature_uid and select the best identification (highest level)
+    # In case of ties, take the first one
+    best_ids = (
+        oracle_pl.group_by("feature_uid")
+        .agg([pl.col("level").max().alias("max_level")])
+        .join(oracle_pl, on="feature_uid")
+        .filter(pl.col("level") == pl.col("max_level"))
+        .group_by("feature_uid")
+        .first()  # In case of ties, take the first
+    )
+    self.logger.debug(f"Selected best identifications for {len(best_ids)} features")
+    # Prepare the identification columns
+    id_columns = {
+        "id_top_name": best_ids.select("feature_uid", "name"),
+        "id_top_adduct": best_ids.select("feature_uid", "ion"),
+        "id_top_class": best_ids.select("feature_uid", "hg"),
+        "id_top_score": best_ids.select("feature_uid", pl.col("score").round(3).alias("score")),
+    }
+    # Initialize identification columns in features_df if they don't exist
+    for col_name in id_columns.keys():
+        if col_name not in self.features_df.columns:
+            if col_name == "id_top_score":
+                self.features_df = self.features_df.with_columns(pl.lit(None, dtype=pl.Float64).alias(col_name))
+            else:
+                self.features_df = self.features_df.with_columns(pl.lit(None, dtype=pl.String).alias(col_name))
+    # Update features_df with oracle identifications
+    for col_name, id_data_col in id_columns.items():
+        oracle_column = id_data_col.columns[1]  # second column (after feature_uid)
+        # Create update dataframe
+        update_data = id_data_col.rename({oracle_column: col_name})
+        # Join and update
+        self.features_df = (
+            self.features_df.join(update_data, on="feature_uid", how="left", suffix="_oracle")
+            .with_columns(pl.coalesce([f"{col_name}_oracle", col_name]).alias(col_name))
+            .drop(f"{col_name}_oracle")
+        )
+    # Replace NaN values with None in identification columns
+    id_col_names = ["id_top_name", "id_top_adduct", "id_top_class", "id_top_score"]
+    for col_name in id_col_names:
+        if col_name in self.features_df.columns:
+            # For string columns, replace empty strings and "nan" with None
+            if col_name != "id_top_score":
+                self.features_df = self.features_df.with_columns(
+                    pl.when(
+                        pl.col(col_name).is_null()
+                        | (pl.col(col_name) == "")
+                        | (pl.col(col_name) == "nan")
+                        | (pl.col(col_name) == "NaN")
+                    )
+                    .then(None)
+                    .otherwise(pl.col(col_name))
+                    .alias(col_name)
+                )
+            # For numeric columns, replace NaN with None
+            else:
+                self.features_df = self.features_df.with_columns(
+                    pl.when(pl.col(col_name).is_null() | pl.col(col_name).is_nan())
+                    .then(None)
+                    .otherwise(pl.col(col_name))
+                    .alias(col_name)
+                )
+    # Count how many features were updated
+    updated_count = self.features_df.filter(pl.col("id_top_name").is_not_null()).height
+    total_features = len(self.features_df)
+    self.logger.success(
+        f"LipidOracle import completed. {updated_count}/{total_features} "
+        f"features now have identifications ({updated_count / total_features * 100:.1f}%)"
+    )
+    # Update history
+    self.store_history(
+        ["import_oracle"],
+        {
+            "folder": folder,
+            "min_id_level": min_id_level,
+            "max_id_level": max_id_level,
+            "updated_features": updated_count,
+            "total_features": total_features,
+            "lib_entries": len(self.lib_df),
+            "id_matches": len(self.id_df),
+        },
+    )

masster/sample/plot.py CHANGED Viewed

@@ -1115,6 +1115,7 @@ def plot_2d(
     filename=None,
     show_features=True,
     show_only_features_with_ms2=False,
+    show_only_features_with_id=False,
     show_isotopes=False,
     show_ms2=False,
     show_in_browser=False,
@@ -1134,6 +1135,7 @@ def plot_2d(
     rt_range=None,
     legend=None,
     colorby=None,
+    tooltip=None,
 ):
     """
     Plot a two-dimensional visualization of MS1 survey scan data with optional overlays
@@ -1151,6 +1153,9 @@ def plot_2d(
         show_only_features_with_ms2 (bool, default False):
             If True, only display features that have associated MS2 scans. When False,
             features without MS2 data are also shown.
+        show_only_features_with_id (bool, default False):
+            If True, only display features with non-null id_top_name (identified features).
+            When False, all features are shown. Only applies when colorby='id'.
         show_isotopes (bool, default False):
             Whether to overlay isotope information on top of the features.
         show_ms2 (bool, default False):
@@ -1186,6 +1191,9 @@ def plot_2d(
             Feature property to use for coloring. If None (default), uses current green/red scheme
             for features with/without MS2 data. If specified and contains categorical data, applies
             categorical coloring with legend support (similar to plot_2d_oracle).
+        tooltip (str, optional):
+            Controls the feature hover tooltip content. Use None or "ms1" (default) to display the
+            full feature details, or "id" to show only rt, m/z, feature_uid, inty, and any id_* columns.
     Behavior:
         - Checks for a loaded mzML file by verifying that self.file_obj is not None.
         - Converts internal MS1 data (a Polars DataFrame) to a Pandas DataFrame and filters out low-intensity
@@ -1376,8 +1384,104 @@ def plot_2d(
         # keep only iso==0, i.e. the main
         feats = feats[feats["iso"] == 0]
+        tooltip_mode = str(tooltip).lower() if tooltip is not None else "ms1"
+        if tooltip_mode not in {"ms1", "id"}:
+            tooltip_mode = "ms1"
+        id_columns = [col for col in feats.columns if isinstance(col, str) and col.startswith("id_")]
+        def build_feature_tooltips(*, include_iso=True, include_iso_of=False, include_colorby=None):
+            base_tooltips = [
+                ("rt", "@rt"),
+                ("m/z", "@mz{0.0000}"),
+                ("feature_uid", "@feature_uid"),
+                ("inty", "@inty"),
+            ]
+            if tooltip_mode == "id":
+                base_tooltips.extend((col, f"@{col}") for col in id_columns)
+                return base_tooltips
+            if include_iso:
+                base_tooltips.append(("iso", "@iso"))
+            if include_iso_of:
+                base_tooltips.append(("iso_of", "@iso_of"))
+            base_tooltips.append(("adduct", "@adduct"))
+            base_tooltips.append(("chrom_coherence", "@chrom_coherence"))
+            base_tooltips.append(("chrom_prominence_scaled", "@chrom_prominence_scaled"))
+            if include_colorby and tooltip_mode != "id":
+                base_tooltips.append((include_colorby, f"@{include_colorby}"))
+            return base_tooltips
+        handled_colorby = False
+        colorby_id_mode = False
+        if colorby == "id":
+            if "id_top_name" not in feats.columns:
+                self.logger.warning("colorby='id' requested but 'id_top_name' column is missing; using default colors")
+            else:
+                handled_colorby = True
+                colorby_id_mode = True
+                id_values = feats["id_top_name"]
+                annotated_mask = id_values.notna() & (id_values.astype(str).str.strip() != "")
+                annotated_features = feats[annotated_mask].copy()
+                unannotated_features = feats[~annotated_mask].copy()
+                # Apply show_only_features_with_id filter if requested
+                if show_only_features_with_id:
+                    # Only keep annotated features, discard unannotated
+                    unannotated_features = unannotated_features.iloc[0:0]  # Empty dataframe
+                feature_hover_annotated = HoverTool(
+                    tooltips=build_feature_tooltips(),
+                )
+                feature_hover_unannotated = HoverTool(
+                    tooltips=build_feature_tooltips(),
+                )
+                # Select only plottable columns for vdims (exclude complex objects like Chromatogram)
+                base_vdims = ["feature_uid", "inty", "iso", "adduct", "chrom_coherence", "chrom_prominence_scaled"]
+                # Add id_* columns if they exist
+                id_vdims = [col for col in feats.columns if isinstance(col, str) and col.startswith("id_")]
+                all_vdims = base_vdims + id_vdims
+                if len(annotated_features) > 0:
+                    vdims_annotated = [col for col in all_vdims if col in annotated_features.columns]
+                    feature_points_1 = hv.Points(
+                        annotated_features,
+                        kdims=["rt", "mz"],
+                        vdims=vdims_annotated,
+                        label="Annotated features",
+                    ).options(
+                        color="#2e7d32",
+                        marker=marker_type,
+                        size=size_1,
+                        tools=[feature_hover_annotated],
+                        hooks=hooks,
+                        show_legend=True,
+                    )
+                if len(unannotated_features) > 0:
+                    vdims_unannotated = [col for col in all_vdims if col in unannotated_features.columns]
+                    feature_points_2 = hv.Points(
+                        unannotated_features,
+                        kdims=["rt", "mz"],
+                        vdims=vdims_unannotated,
+                        label="Unannotated features",
+                    ).options(
+                        color="#9e9e9e",
+                        marker=marker_type,
+                        size=size_2,
+                        tools=[feature_hover_unannotated],
+                        hooks=hooks,
+                        show_legend=True,
+                    )
         # Handle colorby parameter
-        if colorby is not None and colorby in feats.columns:
+        if (not handled_colorby) and colorby is not None and colorby in feats.columns:
             # Check if colorby data is categorical (string-like)
             colorby_values = feats[colorby].dropna()
             is_categorical = feats[colorby].dtype in ["object", "string", "category"] or (
@@ -1424,17 +1528,7 @@ def plot_2d(
                 if len(group_with_ms2) > 0:
                     feature_hover = HoverTool(
-                        tooltips=[
-                            ("rt", "@rt"),
-                            ("m/z", "@mz{0.0000}"),
-                            ("feature_uid", "@feature_uid"),
-                            ("inty", "@inty"),
-                            ("iso", "@iso"),
-                            ("adduct", "@adduct"),
-                            ("chrom_coherence", "@chrom_coherence"),
-                            ("chrom_prominence_scaled", "@chrom_prominence_scaled"),
-                            (colorby, f"@{colorby}"),
-                        ],
+                        tooltips=build_feature_tooltips(include_colorby=colorby),
                     )
                     group_points_ms2 = hv.Points(
                         group_with_ms2,
@@ -1464,17 +1558,7 @@ def plot_2d(
                 if len(group_without_ms2) > 0:
                     feature_hover = HoverTool(
-                        tooltips=[
-                            ("rt", "@rt"),
-                            ("m/z", "@mz{0.0000}"),
-                            ("feature_uid", "@feature_uid"),
-                            ("inty", "@inty"),
-                            ("iso", "@iso"),
-                            ("adduct", "@adduct"),
-                            ("chrom_coherence", "@chrom_coherence"),
-                            ("chrom_prominence_scaled", "@chrom_prominence_scaled"),
-                            (colorby, f"@{colorby}"),
-                        ],
+                        tooltips=build_feature_tooltips(include_colorby=colorby),
                     )
                     group_points_no_ms2 = hv.Points(
                         group_without_ms2,
@@ -1500,22 +1584,15 @@ def plot_2d(
                         feature_points_2 = group_points_no_ms2
                     else:
                         feature_points_2 = feature_points_2 * group_points_no_ms2
-        else:
+        # Only use default coloring if no special colorby mode was handled
+        if not handled_colorby and not use_categorical_coloring:
             # Use original green/red coloring scheme for MS2 presence
             # find features with ms2_scans not None  and iso==0
             features_df = feats[feats["ms2_scans"].notnull()]
             # Create feature points with proper sizing method
             feature_hover_1 = HoverTool(
-                tooltips=[
-                    ("rt", "@rt"),
-                    ("m/z", "@mz{0.0000}"),
-                    ("feature_uid", "@feature_uid"),
-                    ("inty", "@inty"),
-                    ("iso", "@iso"),
-                    ("adduct", "@adduct"),
-                    ("chrom_coherence", "@chrom_coherence"),
-                    ("chrom_prominence_scaled", "@chrom_prominence_scaled"),
-                ],
+                tooltips=build_feature_tooltips(),
             )
             if len(features_df) > 0:
                 feature_points_1 = hv.Points(
@@ -1542,16 +1619,7 @@ def plot_2d(
             # find features without MS2 data
             features_df = feats[feats["ms2_scans"].isnull()]
             feature_hover_2 = HoverTool(
-                tooltips=[
-                    ("rt", "@rt"),
-                    ("m/z", "@mz{0.0000}"),
-                    ("feature_uid", "@feature_uid"),
-                    ("inty", "@inty"),
-                    ("iso", "@iso"),
-                    ("adduct", "@adduct"),
-                    ("chrom_coherence", "@chrom_coherence"),
-                    ("chrom_prominence_scaled", "@chrom_prominence_scaled"),
-                ],
+                tooltips=build_feature_tooltips(),
             )
             if len(features_df) > 0:
                 feature_points_2 = hv.Points(
@@ -1581,17 +1649,7 @@ def plot_2d(
             if hasattr(features_df, "to_pandas"):
                 features_df = features_df.to_pandas()
             feature_hover_iso = HoverTool(
-                tooltips=[
-                    ("rt", "@rt"),
-                    ("m/z", "@mz{0.0000}"),
-                    ("feature_uid", "@feature_uid"),
-                    ("inty", "@inty"),
-                    ("iso", "@iso"),
-                    ("iso_of", "@iso_of"),
-                    ("adduct", "@adduct"),
-                    ("chrom_coherence", "@chrom_coherence"),
-                    ("chrom_prominence_scaled", "@chrom_prominence_scaled"),
-                ],
+                tooltips=build_feature_tooltips(include_iso_of=True),
             )
             feature_points_iso = hv.Points(
                 features_df,
@@ -1676,18 +1734,31 @@ def plot_2d(
         overlay = overlay * feature_points_4
     if feature_points_3 is not None:
         overlay = overlay * feature_points_3
-    if feature_points_1 is not None:
-        overlay = overlay * feature_points_1
-    if not show_only_features_with_ms2 and feature_points_2 is not None:
-        overlay = overlay * feature_points_2
+    # In colorby='id' mode, draw unannotated (grey) first, then annotated (green) on top
+    if colorby_id_mode:
+        # Draw grey points first (bottom layer)
+        if feature_points_2 is not None:
+            overlay = overlay * feature_points_2
+        # Draw green points last (top layer)
+        if feature_points_1 is not None:
+            overlay = overlay * feature_points_1
+    else:
+        # Default order: green (with MS2) first, then red (without MS2)
+        if feature_points_1 is not None:
+            overlay = overlay * feature_points_1
+        # In non-id mode, only show features without MS2 if show_only_features_with_ms2 is False
+        if not show_only_features_with_ms2 and feature_points_2 is not None:
+            overlay = overlay * feature_points_2
     if feature_points_iso is not None:
         overlay = overlay * feature_points_iso
     if title is not None:
         overlay = overlay.opts(title=title)
-    # Handle legend positioning for categorical coloring
-    if legend is not None and use_categorical_coloring and len(categorical_groups) > 1:
+    # Handle legend positioning for categorical coloring or colorby='id' mode
+    if legend is not None and (colorby_id_mode or (use_categorical_coloring and len(categorical_groups) > 1)):
         # Map legend position parameter to HoloViews legend position
         legend_position_map = {
             "top_right": "top_right",
@@ -1704,8 +1775,8 @@ def plot_2d(
         # Apply legend configuration to the overlay
         overlay = overlay.opts(legend_position=hv_legend_pos, legend_opts={"title": "", "padding": 2, "spacing": 2})
-    elif legend is None and use_categorical_coloring:
-        # Explicitly hide legend when legend=None but categorical coloring is used
+    elif legend is None and (colorby_id_mode or use_categorical_coloring):
+        # Explicitly hide legend when legend=None but categorical coloring or id mode is used
         overlay = overlay.opts(show_legend=False)
     # Handle slider functionality
@@ -1728,12 +1799,27 @@ def plot_2d(
             if feature_points_3 is not None:
                 updated_points_3 = feature_points_3.opts(size=size_val)
                 feature_overlay = updated_points_3 if feature_overlay is None else feature_overlay * updated_points_3
-            if feature_points_1 is not None:
-                updated_points_1 = feature_points_1.opts(size=size_val)
-                feature_overlay = updated_points_1 if feature_overlay is None else feature_overlay * updated_points_1
-            if not show_only_features_with_ms2 and feature_points_2 is not None:
-                updated_points_2 = feature_points_2.opts(size=size_val)
-                feature_overlay = updated_points_2 if feature_overlay is None else feature_overlay * updated_points_2
+            # In colorby='id' mode, draw unannotated (grey) first, then annotated (green) on top
+            if colorby_id_mode:
+                # Draw grey points first (bottom layer)
+                if feature_points_2 is not None:
+                    updated_points_2 = feature_points_2.opts(size=size_val)
+                    feature_overlay = updated_points_2 if feature_overlay is None else feature_overlay * updated_points_2
+                # Draw green points last (top layer)
+                if feature_points_1 is not None:
+                    updated_points_1 = feature_points_1.opts(size=size_val)
+                    feature_overlay = updated_points_1 if feature_overlay is None else feature_overlay * updated_points_1
+            else:
+                # Default order: green (with MS2) first, then red (without MS2)
+                if feature_points_1 is not None:
+                    updated_points_1 = feature_points_1.opts(size=size_val)
+                    feature_overlay = updated_points_1 if feature_overlay is None else feature_overlay * updated_points_1
+                # In non-id mode, only show features without MS2 if show_only_features_with_ms2 is False
+                if not show_only_features_with_ms2 and feature_points_2 is not None:
+                    updated_points_2 = feature_points_2.opts(size=size_val)
+                    feature_overlay = updated_points_2 if feature_overlay is None else feature_overlay * updated_points_2
             if feature_points_iso is not None:
                 updated_points_iso = feature_points_iso.opts(size=size_val)
                 feature_overlay = (
@@ -2390,10 +2476,28 @@ def plot_dda_stats(
         "time_ms2_to_ms2",
         "time_ms2_to_ms1",
     ]
-    # Ensure that 'index' and 'rt' are kept for hover along with the columns to plot
+    # skip cols that are not in stats
+    cols_to_plot = [col for col in cols_to_plot if col in stats.columns]
     stats = stats[["scan_uid", "cycle", "rt", *cols_to_plot]]
     # set any value < 0 to None
-    stats[stats < 0] = None
+    # Replace negative values with nulls in a polars-friendly way
+    numeric_types = {
+        pl.Float32, pl.Float64,
+        pl.Int8, pl.Int16, pl.Int32, pl.Int64,
+        pl.UInt8, pl.UInt16, pl.UInt32, pl.UInt64
+    }
+    exprs = []
+    for col_name, dtype in stats.schema.items():
+        if dtype in numeric_types:
+            exprs.append(
+                pl.when(pl.col(col_name) < 0)
+                  .then(None)
+                  .otherwise(pl.col(col_name))
+                  .alias(col_name)
+            )
+        else:
+            exprs.append(pl.col(col_name))
+    stats = stats.select(exprs)
     # Create a Scatter for each column in cols_to_plot stacked vertically, with hover enabled
     scatter_plots = []

masster 0.5.28__py3-none-any.whl → 0.6.0__py3-none-any.whl

Potentially problematic release.

masster 0.5.28py3-none-any.whl → 0.6.0py3-none-any.whl