masster 0.5.28__py3-none-any.whl → 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

@@ -0,0 +1,316 @@
1
+ """
2
+ importers.py
3
+
4
+ Module providing import functionality for Sample class, specifically for importing
5
+ oracle identification data into features.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import os
11
+ import pandas as pd
12
+ import polars as pl
13
+
14
+
15
+ def import_oracle(
16
+ self,
17
+ folder,
18
+ min_id_level=None,
19
+ max_id_level=None,
20
+ ):
21
+ """
22
+ Import oracle identification data and map it to features.
23
+
24
+ This method reads oracle identification results from folder/diag/annotation_full.csv
25
+ and creates lib_df and id_df DataFrames with detailed library and identification information.
26
+ It also updates features_df with top identification results.
27
+
28
+ Parameters:
29
+ folder (str): Path to oracle folder containing diag/annotation_full.csv
30
+ min_id_level (int, optional): Minimum identification level to include
31
+ max_id_level (int, optional): Maximum identification level to include
32
+
33
+ Returns:
34
+ None: Updates features_df, creates lib_df and id_df in-place with oracle identification data
35
+
36
+ Raises:
37
+ FileNotFoundError: If the oracle annotation file doesn't exist
38
+ ValueError: If features_df is empty or doesn't have required columns
39
+
40
+ Example:
41
+ >>> sample.import_oracle(
42
+ ... folder="path/to/oracle_results",
43
+ ... min_id_level=2,
44
+ ... max_id_level=4
45
+ ... )
46
+ """
47
+
48
+ self.logger.info(f"Starting oracle import from folder: {folder}")
49
+
50
+ # Validate inputs
51
+ if self.features_df is None or self.features_df.is_empty():
52
+ raise ValueError("features_df is empty or not available. Run find_features() first.")
53
+
54
+ if "feature_uid" not in self.features_df.columns:
55
+ raise ValueError("features_df must contain 'feature_uid' column")
56
+
57
+ # Check if oracle file exists
58
+ oracle_file_path = os.path.join(folder, "diag", "annotation_full.csv")
59
+ if not os.path.exists(oracle_file_path):
60
+ raise FileNotFoundError(f"Oracle annotation file not found: {oracle_file_path}")
61
+
62
+ self.logger.debug(f"Loading oracle data from: {oracle_file_path}")
63
+
64
+ try:
65
+ # Read oracle data using pandas first for easier processing
66
+ oracle_data = pd.read_csv(oracle_file_path)
67
+ self.logger.info(f"Oracle data loaded successfully with {len(oracle_data)} rows")
68
+ except Exception as e:
69
+ self.logger.error(f"Could not read {oracle_file_path}: {e}")
70
+ raise
71
+
72
+ # Extract feature_uid from scan_title column (format: "uid:XYZ, ...")
73
+ self.logger.debug("Extracting feature UIDs from oracle scan_title using pattern 'uid:(\\d+)'")
74
+ oracle_data["feature_uid"] = oracle_data["scan_title"].str.extract(r"uid:(\d+)", expand=False)
75
+
76
+ # Remove rows where feature_uid extraction failed
77
+ initial_count = len(oracle_data)
78
+ oracle_data = oracle_data.dropna(subset=["feature_uid"])
79
+ oracle_data["feature_uid"] = oracle_data["feature_uid"].astype(int)
80
+
81
+ self.logger.debug(f"Extracted feature UIDs for {len(oracle_data)}/{initial_count} oracle entries")
82
+
83
+ # Apply id_level filters if specified
84
+ if min_id_level is not None:
85
+ oracle_data = oracle_data[oracle_data["level"] >= min_id_level]
86
+ self.logger.debug(f"After min_id_level filter ({min_id_level}): {len(oracle_data)} entries")
87
+
88
+ if max_id_level is not None:
89
+ oracle_data = oracle_data[oracle_data["level"] <= max_id_level]
90
+ self.logger.debug(f"After max_id_level filter ({max_id_level}): {len(oracle_data)} entries")
91
+
92
+ if len(oracle_data) == 0:
93
+ self.logger.warning("No oracle entries remain after filtering")
94
+ return
95
+
96
+ # === CREATE LIB_DF ===
97
+ self.logger.debug("Creating lib_df from Oracle annotation data")
98
+ self.logger.debug(f"Oracle data shape before lib_df creation: {oracle_data.shape}")
99
+
100
+ # Create unique lib_uid for each library entry
101
+ oracle_data["lib_uid"] = range(len(oracle_data))
102
+
103
+ # Map Oracle columns to lib_df schema
104
+ lib_data = []
105
+ for _, row in oracle_data.iterrows():
106
+ # Convert cmpd_uid to integer, using lib_uid as fallback
107
+ cmpd_uid = row["lib_uid"] # Use lib_uid as integer compound identifier
108
+ try:
109
+ if row.get("lib_id") is not None:
110
+ cmpd_uid = int(float(str(row["lib_id"]))) # Convert to int, handling potential float strings
111
+ except (ValueError, TypeError):
112
+ pass # Keep lib_uid as fallback
113
+
114
+ lib_entry = {
115
+ "lib_uid": row["lib_uid"],
116
+ "cmpd_uid": cmpd_uid, # Integer compound identifier
117
+ "source_id": "LipidOracle", # Fixed source identifier
118
+ "name": row.get("name", None),
119
+ "shortname": row.get("species", None),
120
+ "class": row.get("hg", None),
121
+ "smiles": None, # Not available in Oracle data
122
+ "inchi": None, # Not available in Oracle data
123
+ "inchikey": None, # Not available in Oracle data
124
+ "formula": row.get("formula", None),
125
+ "iso": 0, # Fixed isotope value
126
+ "adduct": row.get("ion", None),
127
+ "probability": row.get("score", None),
128
+ "m": None, # Would need to calculate from formula
129
+ "z": 1 if row.get("ion", "").find("+") != -1 else (-1 if row.get("ion", "").find("-") != -1 else None),
130
+ "mz": row.get("mz", None), # Use mz column from annotation_full.csv
131
+ "rt": None, # Set to null as requested
132
+ "quant_group": None, # Set to null as requested
133
+ "db_id": row.get("lib_id", None),
134
+ "db": row.get("lib", None),
135
+ }
136
+ lib_data.append(lib_entry)
137
+
138
+ self.logger.debug(f"Created {len(lib_data)} lib_data entries")
139
+
140
+ # Create lib_df as Polars DataFrame with error handling for mixed types
141
+ try:
142
+ lib_df_temp = pl.DataFrame(lib_data)
143
+ except Exception as e:
144
+ self.logger.warning(f"Error creating lib_df with polars: {e}")
145
+ # Fallback: convert to pandas first, then to polars
146
+ lib_df_pandas = pd.DataFrame(lib_data)
147
+ lib_df_temp = pl.from_pandas(lib_df_pandas)
148
+
149
+ # Ensure uniqueness by name and adduct combination
150
+ # Sort by lib_uid and keep first occurrence (earliest in processing order)
151
+ self.lib_df = lib_df_temp.sort("lib_uid").unique(subset=["name", "adduct"], keep="first")
152
+
153
+ self.logger.info(
154
+ f"Created lib_df with {len(self.lib_df)} library entries ({len(lib_data) - len(self.lib_df)} duplicates removed)"
155
+ )
156
+
157
+ # === CREATE ID_DF ===
158
+ self.logger.debug("Creating id_df from Oracle identification matches")
159
+
160
+ # Create identification matches
161
+ id_data = []
162
+ for _, row in oracle_data.iterrows():
163
+ # Use dmz from annotation_full.csv directly for mz_delta
164
+ mz_delta = None
165
+ if row.get("dmz") is not None:
166
+ try:
167
+ mz_delta = float(row["dmz"])
168
+ except (ValueError, TypeError):
169
+ pass
170
+
171
+ # Use rt_err from annotation_full.csv for rt_delta, None if NaN
172
+ rt_delta = None
173
+ rt_err_value = row.get("rt_err")
174
+ if rt_err_value is not None and not (isinstance(rt_err_value, float) and pd.isna(rt_err_value)):
175
+ try:
176
+ rt_delta = float(rt_err_value)
177
+ except (ValueError, TypeError):
178
+ pass
179
+
180
+ # Create matcher as "lipidoracle-" + score_metric from annotation_full.csv
181
+ matcher = "lipidoracle" # default fallback
182
+ if row.get("score_metric") is not None:
183
+ try:
184
+ score_metric = str(row["score_metric"])
185
+ matcher = f"lipidoracle-{score_metric}"
186
+ except (ValueError, TypeError):
187
+ pass
188
+
189
+ id_entry = {
190
+ "feature_uid": row["feature_uid"],
191
+ "lib_uid": row["lib_uid"],
192
+ "mz_delta": mz_delta,
193
+ "rt_delta": rt_delta,
194
+ "matcher": matcher,
195
+ "score": row.get("score", None),
196
+ "iso": 0, # Fixed isotope value for oracle imports
197
+ }
198
+ id_data.append(id_entry)
199
+
200
+ # Create id_df as Polars DataFrame with error handling
201
+ try:
202
+ id_df_temp = pl.DataFrame(id_data)
203
+ except Exception as e:
204
+ self.logger.warning(f"Error creating id_df with polars: {e}")
205
+ # Fallback: convert to pandas first, then to polars
206
+ id_df_pandas = pd.DataFrame(id_data)
207
+ id_df_temp = pl.from_pandas(id_df_pandas)
208
+
209
+ # Filter id_df to only include lib_uids that exist in the final unique lib_df
210
+ unique_lib_uids = self.lib_df.select("lib_uid").to_series()
211
+ self.id_df = id_df_temp.filter(pl.col("lib_uid").is_in(unique_lib_uids))
212
+
213
+ self.logger.info(f"Created id_df with {len(self.id_df)} identification matches")
214
+
215
+ # === UPDATE FEATURES_DF (adapted from consensus functionality) ===
216
+ self.logger.debug("Updating features_df with top identification results")
217
+
218
+ # Convert to polars for efficient joining with error handling
219
+ try:
220
+ oracle_pl = pl.DataFrame(oracle_data)
221
+ except Exception as e:
222
+ self.logger.warning(f"Error converting oracle_data to polars: {e}")
223
+ # Convert using from_pandas properly
224
+ oracle_pl = pl.from_pandas(oracle_data.reset_index(drop=True))
225
+
226
+ # Group by feature_uid and select the best identification (highest level)
227
+ # In case of ties, take the first one
228
+ best_ids = (
229
+ oracle_pl.group_by("feature_uid")
230
+ .agg([pl.col("level").max().alias("max_level")])
231
+ .join(oracle_pl, on="feature_uid")
232
+ .filter(pl.col("level") == pl.col("max_level"))
233
+ .group_by("feature_uid")
234
+ .first() # In case of ties, take the first
235
+ )
236
+
237
+ self.logger.debug(f"Selected best identifications for {len(best_ids)} features")
238
+
239
+ # Prepare the identification columns
240
+ id_columns = {
241
+ "id_top_name": best_ids.select("feature_uid", "name"),
242
+ "id_top_adduct": best_ids.select("feature_uid", "ion"),
243
+ "id_top_class": best_ids.select("feature_uid", "hg"),
244
+ "id_top_score": best_ids.select("feature_uid", pl.col("score").round(3).alias("score")),
245
+ }
246
+
247
+ # Initialize identification columns in features_df if they don't exist
248
+ for col_name in id_columns.keys():
249
+ if col_name not in self.features_df.columns:
250
+ if col_name == "id_top_score":
251
+ self.features_df = self.features_df.with_columns(pl.lit(None, dtype=pl.Float64).alias(col_name))
252
+ else:
253
+ self.features_df = self.features_df.with_columns(pl.lit(None, dtype=pl.String).alias(col_name))
254
+
255
+ # Update features_df with oracle identifications
256
+ for col_name, id_data_col in id_columns.items():
257
+ oracle_column = id_data_col.columns[1] # second column (after feature_uid)
258
+
259
+ # Create update dataframe
260
+ update_data = id_data_col.rename({oracle_column: col_name})
261
+
262
+ # Join and update
263
+ self.features_df = (
264
+ self.features_df.join(update_data, on="feature_uid", how="left", suffix="_oracle")
265
+ .with_columns(pl.coalesce([f"{col_name}_oracle", col_name]).alias(col_name))
266
+ .drop(f"{col_name}_oracle")
267
+ )
268
+
269
+ # Replace NaN values with None in identification columns
270
+ id_col_names = ["id_top_name", "id_top_adduct", "id_top_class", "id_top_score"]
271
+ for col_name in id_col_names:
272
+ if col_name in self.features_df.columns:
273
+ # For string columns, replace empty strings and "nan" with None
274
+ if col_name != "id_top_score":
275
+ self.features_df = self.features_df.with_columns(
276
+ pl.when(
277
+ pl.col(col_name).is_null()
278
+ | (pl.col(col_name) == "")
279
+ | (pl.col(col_name) == "nan")
280
+ | (pl.col(col_name) == "NaN")
281
+ )
282
+ .then(None)
283
+ .otherwise(pl.col(col_name))
284
+ .alias(col_name)
285
+ )
286
+ # For numeric columns, replace NaN with None
287
+ else:
288
+ self.features_df = self.features_df.with_columns(
289
+ pl.when(pl.col(col_name).is_null() | pl.col(col_name).is_nan())
290
+ .then(None)
291
+ .otherwise(pl.col(col_name))
292
+ .alias(col_name)
293
+ )
294
+
295
+ # Count how many features were updated
296
+ updated_count = self.features_df.filter(pl.col("id_top_name").is_not_null()).height
297
+ total_features = len(self.features_df)
298
+
299
+ self.logger.success(
300
+ f"LipidOracle import completed. {updated_count}/{total_features} "
301
+ f"features now have identifications ({updated_count / total_features * 100:.1f}%)"
302
+ )
303
+
304
+ # Update history
305
+ self.store_history(
306
+ ["import_oracle"],
307
+ {
308
+ "folder": folder,
309
+ "min_id_level": min_id_level,
310
+ "max_id_level": max_id_level,
311
+ "updated_features": updated_count,
312
+ "total_features": total_features,
313
+ "lib_entries": len(self.lib_df),
314
+ "id_matches": len(self.id_df),
315
+ },
316
+ )
masster/sample/plot.py CHANGED
@@ -1115,6 +1115,7 @@ def plot_2d(
1115
1115
  filename=None,
1116
1116
  show_features=True,
1117
1117
  show_only_features_with_ms2=False,
1118
+ show_only_features_with_id=False,
1118
1119
  show_isotopes=False,
1119
1120
  show_ms2=False,
1120
1121
  show_in_browser=False,
@@ -1134,6 +1135,7 @@ def plot_2d(
1134
1135
  rt_range=None,
1135
1136
  legend=None,
1136
1137
  colorby=None,
1138
+ tooltip=None,
1137
1139
  ):
1138
1140
  """
1139
1141
  Plot a two-dimensional visualization of MS1 survey scan data with optional overlays
@@ -1151,6 +1153,9 @@ def plot_2d(
1151
1153
  show_only_features_with_ms2 (bool, default False):
1152
1154
  If True, only display features that have associated MS2 scans. When False,
1153
1155
  features without MS2 data are also shown.
1156
+ show_only_features_with_id (bool, default False):
1157
+ If True, only display features with non-null id_top_name (identified features).
1158
+ When False, all features are shown. Only applies when colorby='id'.
1154
1159
  show_isotopes (bool, default False):
1155
1160
  Whether to overlay isotope information on top of the features.
1156
1161
  show_ms2 (bool, default False):
@@ -1186,6 +1191,9 @@ def plot_2d(
1186
1191
  Feature property to use for coloring. If None (default), uses current green/red scheme
1187
1192
  for features with/without MS2 data. If specified and contains categorical data, applies
1188
1193
  categorical coloring with legend support (similar to plot_2d_oracle).
1194
+ tooltip (str, optional):
1195
+ Controls the feature hover tooltip content. Use None or "ms1" (default) to display the
1196
+ full feature details, or "id" to show only rt, m/z, feature_uid, inty, and any id_* columns.
1189
1197
  Behavior:
1190
1198
  - Checks for a loaded mzML file by verifying that self.file_obj is not None.
1191
1199
  - Converts internal MS1 data (a Polars DataFrame) to a Pandas DataFrame and filters out low-intensity
@@ -1376,8 +1384,104 @@ def plot_2d(
1376
1384
  # keep only iso==0, i.e. the main
1377
1385
  feats = feats[feats["iso"] == 0]
1378
1386
 
1387
+ tooltip_mode = str(tooltip).lower() if tooltip is not None else "ms1"
1388
+ if tooltip_mode not in {"ms1", "id"}:
1389
+ tooltip_mode = "ms1"
1390
+
1391
+ id_columns = [col for col in feats.columns if isinstance(col, str) and col.startswith("id_")]
1392
+
1393
+ def build_feature_tooltips(*, include_iso=True, include_iso_of=False, include_colorby=None):
1394
+ base_tooltips = [
1395
+ ("rt", "@rt"),
1396
+ ("m/z", "@mz{0.0000}"),
1397
+ ("feature_uid", "@feature_uid"),
1398
+ ("inty", "@inty"),
1399
+ ]
1400
+
1401
+ if tooltip_mode == "id":
1402
+ base_tooltips.extend((col, f"@{col}") for col in id_columns)
1403
+ return base_tooltips
1404
+
1405
+ if include_iso:
1406
+ base_tooltips.append(("iso", "@iso"))
1407
+ if include_iso_of:
1408
+ base_tooltips.append(("iso_of", "@iso_of"))
1409
+ base_tooltips.append(("adduct", "@adduct"))
1410
+ base_tooltips.append(("chrom_coherence", "@chrom_coherence"))
1411
+ base_tooltips.append(("chrom_prominence_scaled", "@chrom_prominence_scaled"))
1412
+
1413
+ if include_colorby and tooltip_mode != "id":
1414
+ base_tooltips.append((include_colorby, f"@{include_colorby}"))
1415
+
1416
+ return base_tooltips
1417
+
1418
+ handled_colorby = False
1419
+ colorby_id_mode = False
1420
+
1421
+ if colorby == "id":
1422
+ if "id_top_name" not in feats.columns:
1423
+ self.logger.warning("colorby='id' requested but 'id_top_name' column is missing; using default colors")
1424
+ else:
1425
+ handled_colorby = True
1426
+ colorby_id_mode = True
1427
+ id_values = feats["id_top_name"]
1428
+ annotated_mask = id_values.notna() & (id_values.astype(str).str.strip() != "")
1429
+
1430
+ annotated_features = feats[annotated_mask].copy()
1431
+ unannotated_features = feats[~annotated_mask].copy()
1432
+
1433
+ # Apply show_only_features_with_id filter if requested
1434
+ if show_only_features_with_id:
1435
+ # Only keep annotated features, discard unannotated
1436
+ unannotated_features = unannotated_features.iloc[0:0] # Empty dataframe
1437
+
1438
+ feature_hover_annotated = HoverTool(
1439
+ tooltips=build_feature_tooltips(),
1440
+ )
1441
+ feature_hover_unannotated = HoverTool(
1442
+ tooltips=build_feature_tooltips(),
1443
+ )
1444
+
1445
+ # Select only plottable columns for vdims (exclude complex objects like Chromatogram)
1446
+ base_vdims = ["feature_uid", "inty", "iso", "adduct", "chrom_coherence", "chrom_prominence_scaled"]
1447
+ # Add id_* columns if they exist
1448
+ id_vdims = [col for col in feats.columns if isinstance(col, str) and col.startswith("id_")]
1449
+ all_vdims = base_vdims + id_vdims
1450
+
1451
+ if len(annotated_features) > 0:
1452
+ vdims_annotated = [col for col in all_vdims if col in annotated_features.columns]
1453
+ feature_points_1 = hv.Points(
1454
+ annotated_features,
1455
+ kdims=["rt", "mz"],
1456
+ vdims=vdims_annotated,
1457
+ label="Annotated features",
1458
+ ).options(
1459
+ color="#2e7d32",
1460
+ marker=marker_type,
1461
+ size=size_1,
1462
+ tools=[feature_hover_annotated],
1463
+ hooks=hooks,
1464
+ show_legend=True,
1465
+ )
1466
+
1467
+ if len(unannotated_features) > 0:
1468
+ vdims_unannotated = [col for col in all_vdims if col in unannotated_features.columns]
1469
+ feature_points_2 = hv.Points(
1470
+ unannotated_features,
1471
+ kdims=["rt", "mz"],
1472
+ vdims=vdims_unannotated,
1473
+ label="Unannotated features",
1474
+ ).options(
1475
+ color="#9e9e9e",
1476
+ marker=marker_type,
1477
+ size=size_2,
1478
+ tools=[feature_hover_unannotated],
1479
+ hooks=hooks,
1480
+ show_legend=True,
1481
+ )
1482
+
1379
1483
  # Handle colorby parameter
1380
- if colorby is not None and colorby in feats.columns:
1484
+ if (not handled_colorby) and colorby is not None and colorby in feats.columns:
1381
1485
  # Check if colorby data is categorical (string-like)
1382
1486
  colorby_values = feats[colorby].dropna()
1383
1487
  is_categorical = feats[colorby].dtype in ["object", "string", "category"] or (
@@ -1424,17 +1528,7 @@ def plot_2d(
1424
1528
 
1425
1529
  if len(group_with_ms2) > 0:
1426
1530
  feature_hover = HoverTool(
1427
- tooltips=[
1428
- ("rt", "@rt"),
1429
- ("m/z", "@mz{0.0000}"),
1430
- ("feature_uid", "@feature_uid"),
1431
- ("inty", "@inty"),
1432
- ("iso", "@iso"),
1433
- ("adduct", "@adduct"),
1434
- ("chrom_coherence", "@chrom_coherence"),
1435
- ("chrom_prominence_scaled", "@chrom_prominence_scaled"),
1436
- (colorby, f"@{colorby}"),
1437
- ],
1531
+ tooltips=build_feature_tooltips(include_colorby=colorby),
1438
1532
  )
1439
1533
  group_points_ms2 = hv.Points(
1440
1534
  group_with_ms2,
@@ -1464,17 +1558,7 @@ def plot_2d(
1464
1558
 
1465
1559
  if len(group_without_ms2) > 0:
1466
1560
  feature_hover = HoverTool(
1467
- tooltips=[
1468
- ("rt", "@rt"),
1469
- ("m/z", "@mz{0.0000}"),
1470
- ("feature_uid", "@feature_uid"),
1471
- ("inty", "@inty"),
1472
- ("iso", "@iso"),
1473
- ("adduct", "@adduct"),
1474
- ("chrom_coherence", "@chrom_coherence"),
1475
- ("chrom_prominence_scaled", "@chrom_prominence_scaled"),
1476
- (colorby, f"@{colorby}"),
1477
- ],
1561
+ tooltips=build_feature_tooltips(include_colorby=colorby),
1478
1562
  )
1479
1563
  group_points_no_ms2 = hv.Points(
1480
1564
  group_without_ms2,
@@ -1500,22 +1584,15 @@ def plot_2d(
1500
1584
  feature_points_2 = group_points_no_ms2
1501
1585
  else:
1502
1586
  feature_points_2 = feature_points_2 * group_points_no_ms2
1503
- else:
1587
+
1588
+ # Only use default coloring if no special colorby mode was handled
1589
+ if not handled_colorby and not use_categorical_coloring:
1504
1590
  # Use original green/red coloring scheme for MS2 presence
1505
1591
  # find features with ms2_scans not None and iso==0
1506
1592
  features_df = feats[feats["ms2_scans"].notnull()]
1507
1593
  # Create feature points with proper sizing method
1508
1594
  feature_hover_1 = HoverTool(
1509
- tooltips=[
1510
- ("rt", "@rt"),
1511
- ("m/z", "@mz{0.0000}"),
1512
- ("feature_uid", "@feature_uid"),
1513
- ("inty", "@inty"),
1514
- ("iso", "@iso"),
1515
- ("adduct", "@adduct"),
1516
- ("chrom_coherence", "@chrom_coherence"),
1517
- ("chrom_prominence_scaled", "@chrom_prominence_scaled"),
1518
- ],
1595
+ tooltips=build_feature_tooltips(),
1519
1596
  )
1520
1597
  if len(features_df) > 0:
1521
1598
  feature_points_1 = hv.Points(
@@ -1542,16 +1619,7 @@ def plot_2d(
1542
1619
  # find features without MS2 data
1543
1620
  features_df = feats[feats["ms2_scans"].isnull()]
1544
1621
  feature_hover_2 = HoverTool(
1545
- tooltips=[
1546
- ("rt", "@rt"),
1547
- ("m/z", "@mz{0.0000}"),
1548
- ("feature_uid", "@feature_uid"),
1549
- ("inty", "@inty"),
1550
- ("iso", "@iso"),
1551
- ("adduct", "@adduct"),
1552
- ("chrom_coherence", "@chrom_coherence"),
1553
- ("chrom_prominence_scaled", "@chrom_prominence_scaled"),
1554
- ],
1622
+ tooltips=build_feature_tooltips(),
1555
1623
  )
1556
1624
  if len(features_df) > 0:
1557
1625
  feature_points_2 = hv.Points(
@@ -1581,17 +1649,7 @@ def plot_2d(
1581
1649
  if hasattr(features_df, "to_pandas"):
1582
1650
  features_df = features_df.to_pandas()
1583
1651
  feature_hover_iso = HoverTool(
1584
- tooltips=[
1585
- ("rt", "@rt"),
1586
- ("m/z", "@mz{0.0000}"),
1587
- ("feature_uid", "@feature_uid"),
1588
- ("inty", "@inty"),
1589
- ("iso", "@iso"),
1590
- ("iso_of", "@iso_of"),
1591
- ("adduct", "@adduct"),
1592
- ("chrom_coherence", "@chrom_coherence"),
1593
- ("chrom_prominence_scaled", "@chrom_prominence_scaled"),
1594
- ],
1652
+ tooltips=build_feature_tooltips(include_iso_of=True),
1595
1653
  )
1596
1654
  feature_points_iso = hv.Points(
1597
1655
  features_df,
@@ -1676,18 +1734,31 @@ def plot_2d(
1676
1734
  overlay = overlay * feature_points_4
1677
1735
  if feature_points_3 is not None:
1678
1736
  overlay = overlay * feature_points_3
1679
- if feature_points_1 is not None:
1680
- overlay = overlay * feature_points_1
1681
- if not show_only_features_with_ms2 and feature_points_2 is not None:
1682
- overlay = overlay * feature_points_2
1737
+
1738
+ # In colorby='id' mode, draw unannotated (grey) first, then annotated (green) on top
1739
+ if colorby_id_mode:
1740
+ # Draw grey points first (bottom layer)
1741
+ if feature_points_2 is not None:
1742
+ overlay = overlay * feature_points_2
1743
+ # Draw green points last (top layer)
1744
+ if feature_points_1 is not None:
1745
+ overlay = overlay * feature_points_1
1746
+ else:
1747
+ # Default order: green (with MS2) first, then red (without MS2)
1748
+ if feature_points_1 is not None:
1749
+ overlay = overlay * feature_points_1
1750
+ # In non-id mode, only show features without MS2 if show_only_features_with_ms2 is False
1751
+ if not show_only_features_with_ms2 and feature_points_2 is not None:
1752
+ overlay = overlay * feature_points_2
1753
+
1683
1754
  if feature_points_iso is not None:
1684
1755
  overlay = overlay * feature_points_iso
1685
1756
 
1686
1757
  if title is not None:
1687
1758
  overlay = overlay.opts(title=title)
1688
1759
 
1689
- # Handle legend positioning for categorical coloring
1690
- if legend is not None and use_categorical_coloring and len(categorical_groups) > 1:
1760
+ # Handle legend positioning for categorical coloring or colorby='id' mode
1761
+ if legend is not None and (colorby_id_mode or (use_categorical_coloring and len(categorical_groups) > 1)):
1691
1762
  # Map legend position parameter to HoloViews legend position
1692
1763
  legend_position_map = {
1693
1764
  "top_right": "top_right",
@@ -1704,8 +1775,8 @@ def plot_2d(
1704
1775
 
1705
1776
  # Apply legend configuration to the overlay
1706
1777
  overlay = overlay.opts(legend_position=hv_legend_pos, legend_opts={"title": "", "padding": 2, "spacing": 2})
1707
- elif legend is None and use_categorical_coloring:
1708
- # Explicitly hide legend when legend=None but categorical coloring is used
1778
+ elif legend is None and (colorby_id_mode or use_categorical_coloring):
1779
+ # Explicitly hide legend when legend=None but categorical coloring or id mode is used
1709
1780
  overlay = overlay.opts(show_legend=False)
1710
1781
 
1711
1782
  # Handle slider functionality
@@ -1728,12 +1799,27 @@ def plot_2d(
1728
1799
  if feature_points_3 is not None:
1729
1800
  updated_points_3 = feature_points_3.opts(size=size_val)
1730
1801
  feature_overlay = updated_points_3 if feature_overlay is None else feature_overlay * updated_points_3
1731
- if feature_points_1 is not None:
1732
- updated_points_1 = feature_points_1.opts(size=size_val)
1733
- feature_overlay = updated_points_1 if feature_overlay is None else feature_overlay * updated_points_1
1734
- if not show_only_features_with_ms2 and feature_points_2 is not None:
1735
- updated_points_2 = feature_points_2.opts(size=size_val)
1736
- feature_overlay = updated_points_2 if feature_overlay is None else feature_overlay * updated_points_2
1802
+
1803
+ # In colorby='id' mode, draw unannotated (grey) first, then annotated (green) on top
1804
+ if colorby_id_mode:
1805
+ # Draw grey points first (bottom layer)
1806
+ if feature_points_2 is not None:
1807
+ updated_points_2 = feature_points_2.opts(size=size_val)
1808
+ feature_overlay = updated_points_2 if feature_overlay is None else feature_overlay * updated_points_2
1809
+ # Draw green points last (top layer)
1810
+ if feature_points_1 is not None:
1811
+ updated_points_1 = feature_points_1.opts(size=size_val)
1812
+ feature_overlay = updated_points_1 if feature_overlay is None else feature_overlay * updated_points_1
1813
+ else:
1814
+ # Default order: green (with MS2) first, then red (without MS2)
1815
+ if feature_points_1 is not None:
1816
+ updated_points_1 = feature_points_1.opts(size=size_val)
1817
+ feature_overlay = updated_points_1 if feature_overlay is None else feature_overlay * updated_points_1
1818
+ # In non-id mode, only show features without MS2 if show_only_features_with_ms2 is False
1819
+ if not show_only_features_with_ms2 and feature_points_2 is not None:
1820
+ updated_points_2 = feature_points_2.opts(size=size_val)
1821
+ feature_overlay = updated_points_2 if feature_overlay is None else feature_overlay * updated_points_2
1822
+
1737
1823
  if feature_points_iso is not None:
1738
1824
  updated_points_iso = feature_points_iso.opts(size=size_val)
1739
1825
  feature_overlay = (
@@ -2390,10 +2476,28 @@ def plot_dda_stats(
2390
2476
  "time_ms2_to_ms2",
2391
2477
  "time_ms2_to_ms1",
2392
2478
  ]
2393
- # Ensure that 'index' and 'rt' are kept for hover along with the columns to plot
2479
+ # skip cols that are not in stats
2480
+ cols_to_plot = [col for col in cols_to_plot if col in stats.columns]
2394
2481
  stats = stats[["scan_uid", "cycle", "rt", *cols_to_plot]]
2395
2482
  # set any value < 0 to None
2396
- stats[stats < 0] = None
2483
+ # Replace negative values with nulls in a polars-friendly way
2484
+ numeric_types = {
2485
+ pl.Float32, pl.Float64,
2486
+ pl.Int8, pl.Int16, pl.Int32, pl.Int64,
2487
+ pl.UInt8, pl.UInt16, pl.UInt32, pl.UInt64
2488
+ }
2489
+ exprs = []
2490
+ for col_name, dtype in stats.schema.items():
2491
+ if dtype in numeric_types:
2492
+ exprs.append(
2493
+ pl.when(pl.col(col_name) < 0)
2494
+ .then(None)
2495
+ .otherwise(pl.col(col_name))
2496
+ .alias(col_name)
2497
+ )
2498
+ else:
2499
+ exprs.append(pl.col(col_name))
2500
+ stats = stats.select(exprs)
2397
2501
 
2398
2502
  # Create a Scatter for each column in cols_to_plot stacked vertically, with hover enabled
2399
2503
  scatter_plots = []