masster 0.2.5__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

Files changed (55) hide show
  1. masster/__init__.py +27 -27
  2. masster/_version.py +17 -17
  3. masster/chromatogram.py +497 -503
  4. masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.featureXML +199787 -0
  5. masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.sample5 +0 -0
  6. masster/logger.py +318 -244
  7. masster/sample/__init__.py +9 -9
  8. masster/sample/defaults/__init__.py +15 -15
  9. masster/sample/defaults/find_adducts_def.py +325 -325
  10. masster/sample/defaults/find_features_def.py +366 -366
  11. masster/sample/defaults/find_ms2_def.py +285 -285
  12. masster/sample/defaults/get_spectrum_def.py +314 -318
  13. masster/sample/defaults/sample_def.py +374 -378
  14. masster/sample/h5.py +1321 -1297
  15. masster/sample/helpers.py +833 -364
  16. masster/sample/lib.py +762 -0
  17. masster/sample/load.py +1220 -1187
  18. masster/sample/parameters.py +131 -131
  19. masster/sample/plot.py +1685 -1622
  20. masster/sample/processing.py +1402 -1416
  21. masster/sample/quant.py +209 -0
  22. masster/sample/sample.py +393 -387
  23. masster/sample/sample5_schema.json +181 -181
  24. masster/sample/save.py +737 -736
  25. masster/sample/sciex.py +1213 -0
  26. masster/spectrum.py +1287 -1319
  27. masster/study/__init__.py +9 -9
  28. masster/study/defaults/__init__.py +21 -19
  29. masster/study/defaults/align_def.py +267 -267
  30. masster/study/defaults/export_def.py +41 -40
  31. masster/study/defaults/fill_chrom_def.py +264 -264
  32. masster/study/defaults/fill_def.py +260 -0
  33. masster/study/defaults/find_consensus_def.py +256 -256
  34. masster/study/defaults/find_ms2_def.py +163 -163
  35. masster/study/defaults/integrate_chrom_def.py +225 -225
  36. masster/study/defaults/integrate_def.py +221 -0
  37. masster/study/defaults/merge_def.py +256 -0
  38. masster/study/defaults/study_def.py +272 -269
  39. masster/study/export.py +674 -287
  40. masster/study/h5.py +1406 -886
  41. masster/study/helpers.py +1713 -433
  42. masster/study/helpers_optimized.py +317 -0
  43. masster/study/load.py +1231 -1078
  44. masster/study/parameters.py +99 -99
  45. masster/study/plot.py +632 -645
  46. masster/study/processing.py +1057 -1046
  47. masster/study/save.py +161 -134
  48. masster/study/study.py +612 -522
  49. masster/study/study5_schema.json +253 -241
  50. {masster-0.2.5.dist-info → masster-0.3.1.dist-info}/METADATA +15 -10
  51. masster-0.3.1.dist-info/RECORD +59 -0
  52. {masster-0.2.5.dist-info → masster-0.3.1.dist-info}/licenses/LICENSE +661 -661
  53. masster-0.2.5.dist-info/RECORD +0 -50
  54. {masster-0.2.5.dist-info → masster-0.3.1.dist-info}/WHEEL +0 -0
  55. {masster-0.2.5.dist-info → masster-0.3.1.dist-info}/entry_points.txt +0 -0
masster/sample/plot.py CHANGED
@@ -1,1622 +1,1685 @@
1
- """
2
- _plots.py
3
-
4
- This module provides visualization functions for mass spectrometry data analysis.
5
- It contains plotting utilities for extracted ion chromatograms (EICs), 2D data maps,
6
- feature visualizations, and interactive dashboards using modern visualization libraries.
7
-
8
- Key Features:
9
- - **Extracted Ion Chromatograms (EICs)**: Interactive chromatographic plotting with feature annotations.
10
- - **2D Data Visualization**: Mass spectrometry data visualization with datashader for large datasets.
11
- - **Feature Plotting**: Visualize detected features with retention time and m/z information.
12
- - **Interactive Dashboards**: Create interactive panels for data exploration and analysis.
13
- - **Multi-Sample Plotting**: Comparative visualizations across multiple samples.
14
- - **Export Capabilities**: Save plots in various formats (HTML, PNG, SVG).
15
-
16
- Dependencies:
17
- - `holoviews`: For high-level data visualization and interactive plots.
18
- - `datashader`: For rendering large datasets efficiently.
19
- - `panel`: For creating interactive web applications and dashboards.
20
- - `bokeh`: For low-level plotting control and customization.
21
- - `polars` and `pandas`: For data manipulation and processing.
22
- - `numpy`: For numerical computations.
23
-
24
- Functions:
25
- - `plot_eic()`: Generate extracted ion chromatograms with feature overlays.
26
- - `plot_2d()`: Create 2D mass spectrometry data visualizations.
27
- - `plot_features()`: Visualize detected features in retention time vs m/z space.
28
- - Various utility functions for plot styling and configuration.
29
-
30
- Supported Plot Types:
31
- - Extracted Ion Chromatograms (EIC)
32
- - Total Ion Chromatograms (TIC)
33
- - Base Peak Chromatograms (BPC)
34
- - 2D intensity maps (RT vs m/z)
35
- - Feature scatter plots
36
- - Interactive dashboards
37
-
38
- See Also:
39
- - `parameters._plot_parameters`: For plot-specific parameter configuration.
40
- - `single.py`: For applying plotting methods to ddafile objects.
41
- - `study.py`: For study-level visualization functions.
42
-
43
- """
44
-
45
- import os
46
-
47
- import datashader as ds
48
- import holoviews as hv
49
- import holoviews.operation.datashader as hd
50
- import numpy as np
51
- import pandas as pd
52
- import panel
53
- import polars as pl
54
-
55
- from bokeh.models import HoverTool
56
- from holoviews import dim
57
- from holoviews.plotting.util import process_cmap
58
- from matplotlib.colors import rgb2hex
59
-
60
- # Parameters removed - using hardcoded defaults
61
-
62
-
63
- hv.extension("bokeh")
64
-
65
-
66
- def plot_eic(
67
- self,
68
- feature_uid=None,
69
- filename=None,
70
- rt_tol=10,
71
- rt_tol_factor_plot=1,
72
- mz_tol=0.0005,
73
- mz_tol_factor_plot=1,
74
- link_x=False,
75
- ):
76
- """
77
- Plot Extracted Ion Chromatograms (EICs) for one or more features using MS1 data and feature metadata.
78
-
79
- This function filters MS1 data based on retention time (rt) and mass-to-charge ratio (mz) windows
80
- derived from feature information in `features_df`. It then generates interactive EIC plots using
81
- HoloViews, with feature retention time windows annotated. Plots can be displayed interactively or
82
- saved to a file.
83
-
84
- Parameters:
85
- feature_uid (int or list of int, optional):
86
- Feature identifier(s) for EIC generation. If None, EICs for all features in `features_df` are plotted.
87
- filename (str, optional):
88
- Output file path. If ending with `.html`, saves as interactive HTML; otherwise, saves as PNG.
89
- If not provided, displays the plot interactively.
90
- rt_tol (float, default=10):
91
- Retention time tolerance (in seconds) added to feature boundaries for MS1 data filtering.
92
- rt_tol_factor_plot (float, default=1):
93
- Retention time tolerance factor.
94
- mz_tol (float, default=0.0005):
95
- m/z tolerance added to feature boundaries for MS1 data filtering.
96
- mz_tol_factor_plot (float, default=1):
97
- m/z time tolerance factor.
98
- link_x (bool, default=True):
99
- If True, links the x-axes (retention time) across all EIC subplots.
100
-
101
- Returns:
102
- None
103
-
104
- Notes:
105
- - Uses `features_df` for feature metadata and `ms1_df` (Polars DataFrame) for MS1 data.
106
- - Aggregates MS1 intensities by retention time.
107
- - Utilizes HoloViews for visualization and Panel for layout/display.
108
- """
109
- # plots the EIC for a given feature id
110
- # If rt or mz are not provided, they are extracted from features_df using the supplied feature id (feature_uid)
111
-
112
- feature_uids = feature_uid
113
- # if feature_uids is None, plot all features
114
- if feature_uids is None:
115
- feats = self.features_df.clone()
116
- else:
117
- if isinstance(feature_uids, int):
118
- feature_uids = [feature_uids]
119
- # select only the features with feature_uid in feature_uids
120
- feats = self.features_df[
121
- self.features_df["feature_uid"].is_in(feature_uids)
122
- ].clone()
123
-
124
- # make sure feature_uid is a list of integers
125
-
126
- eic_plots = []
127
- feature_uids = feats["feature_uid"].values.tolist()
128
- mz_tol_plot = mz_tol * mz_tol_factor_plot
129
- rt_tol_plot = rt_tol * rt_tol_factor_plot
130
- # iterate over the list of feature_uid
131
- for feature_uid in feature_uids:
132
- # Retrieve the feature info
133
- feature_row = feats[feats["feature_uid"] == feature_uid]
134
- # rt = feature_row["rt"].values[0]
135
- rt_start = feature_row["rt_start"].values[0]
136
- rt_end = feature_row["rt_end"].values[0]
137
- mz = feature_row["mz"].values[0]
138
- mz_start = feature_row["mz_start"].values[0]
139
- mz_end = feature_row["mz_end"].values[0]
140
-
141
- # filter self.ms1_df with rt_start, rt_end, mz_start, mz_end
142
- eic_df = self.ms1_df.filter(
143
- pl.col("rt") >= rt_start - rt_tol_plot,
144
- pl.col("rt") <= rt_end + rt_tol_plot,
145
- )
146
- eic_df = eic_df.filter(
147
- pl.col("mz") >= mz_start - mz_tol_plot,
148
- pl.col("mz") <= mz_end + mz_tol_plot,
149
- )
150
-
151
- if eic_df.is_empty():
152
- print("No MS1 data found in the specified window.")
153
- continue
154
-
155
- # convert to pandas DataFrame
156
- eic_df = eic_df.to_pandas()
157
- # aggregate all points with the same rt using the sum of inty
158
- eic_df = eic_df.groupby("rt").agg({"inty": "sum"}).reset_index()
159
- yname = f"inty_{feature_uid}"
160
- eic_df.rename(columns={"inty": yname}, inplace=True)
161
-
162
- # Plot the EIC using bokeh and ensure axes are independent by setting axiswise=True
163
- eic = hv.Curve(eic_df, kdims=["rt"], vdims=[yname]).opts(
164
- title=f"EIC for feature {feature_uid}, mz = {mz:.4f}",
165
- xlabel="Retention time (s)",
166
- ylabel="Intensity",
167
- width=1000,
168
- tools=["hover"],
169
- height=250,
170
- axiswise=True,
171
- color="black",
172
- )
173
-
174
- # Add vertical lines at the start and end of the retention time
175
- eic = eic * hv.VLine(rt_start).opts(
176
- color="blue",
177
- line_width=1,
178
- line_dash="dashed",
179
- axiswise=True,
180
- )
181
- eic = eic * hv.VLine(rt_end).opts(
182
- color="blue",
183
- line_width=1,
184
- line_dash="dashed",
185
- axiswise=True,
186
- )
187
-
188
- # Append the subplot without linking axes
189
- eic_plots.append(eic)
190
- if link_x:
191
- # Create a layout with shared x-axis for all EIC plots
192
- layout = hv.Layout(eic_plots).opts(shared_axes=True)
193
- else:
194
- layout = hv.Layout(eic_plots).opts(shared_axes=False)
195
-
196
- layout = layout.cols(1)
197
- layout = panel.Column(layout)
198
- if filename is not None:
199
- # if filename includes .html, save the panel layout to an HTML file
200
- if filename.endswith(".html"):
201
- layout.save(filename, embed=True)
202
- else:
203
- # save the panel layout as a png
204
- hv.save(layout, filename, fmt="png")
205
- else:
206
- # Display the panel layout
207
- layout.show()
208
-
209
-
210
- def plot_2d(
211
- self,
212
- filename=None,
213
- show_features=True,
214
- show_only_features_with_ms2=False,
215
- show_isotopes=False,
216
- show_ms2=False,
217
- title=None,
218
- cmap=None,
219
- marker='circle',
220
- markersize=10,
221
- raster_dynamic=True,
222
- raster_max_px=8,
223
- raster_threshold=0.8,
224
- mz_range=None,
225
- rt_range=None,
226
- ):
227
- """
228
- Plot a two-dimensional visualization of MS1 survey scan data with optional overlays
229
- of feature and MS2 scan information.
230
- This method creates a plot from the internal MS1 data loaded into self.ms1_df
231
- and optionally overlays various feature and MS2 information depending on the provided
232
- parameters. The visualization is built using HoloViews and Holoviews dynamic rasterization,
233
- together with Panel for layout and exporting.
234
- Parameters:
235
- filename (str, optional):
236
- Path to save the plot. If provided and ends with ".html", the plot is saved as an
237
- interactive HTML file; otherwise, it is saved as a PNG image.
238
- show_features (bool, default True):
239
- Whether to overlay detected features on the plot.
240
- show_only_features_with_ms2 (bool, default False):
241
- If True, only display features that have associated MS2 scans. When False,
242
- features without MS2 data are also shown.
243
- show_isotopes (bool, default False):
244
- Whether to overlay isotope information on top of the features.
245
- show_ms2 (bool, default False):
246
- Whether to overlay MS2 scan information on the plot.
247
- title (str, optional):
248
- Title of the plot.
249
- cmap (str, optional):
250
- Colormap to use for the background rasterized data. Defaults to "iridescent_r" unless
251
- modified (e.g., if set to "grey", it is changed to "Greys256").
252
- marker (str, default 'circle'):
253
- Marker type to use for feature and MS2 points.
254
- markersize (int, default 10):
255
- Base size of the markers used for plotting points.
256
- raster_dynamic (bool, default True):
257
- Whether to use dynamic rasterization for the background point cloud.
258
- raster_max_px (int, default 8):
259
- Maximum pixel size for dynamic rasterization when using dynspread.
260
- raster_threshold (float, default 0.8):
261
- Threshold used for the dynspread process in dynamic rasterization.
262
- Behavior:
263
- - Checks for a loaded mzML file by verifying that self.file_obj is not None.
264
- - Converts internal MS1 data (a Polars DataFrame) to a Pandas DataFrame and filters out low-intensity
265
- points (inty < 1).
266
- - Sets up the plot bounds for retention time (rt) and mass-to-charge ratio (mz) using a hook function.
267
- - Renders the MS1 data as a background rasterized image with a logarithmic intensity normalization.
268
- - Conditionally overlays feature points (with and without MS2 information), isotopes (if requested),
269
- and MS2 scan points based on internal DataFrame data.
270
- - Depending on the filename parameter, either displays the plot interactively using Panel or
271
- saves it as an HTML or PNG file.
272
- Returns:
273
- None
274
- Side Effects:
275
- - May print a warning if no mzML file is loaded.
276
- - Either shows the plot interactively or writes the output to a file.
277
- """
278
-
279
- if self.ms1_df is None:
280
- self.logger.error("No MS1 data available.")
281
- return
282
-
283
- if cmap is None:
284
- cmap = "iridescent_r"
285
- elif cmap == "grey":
286
- cmap = "Greys256"
287
-
288
- # get columns rt, mz, inty from self.ms1_df, It's polars DataFrame
289
- spectradf = self.ms1_df.select(["rt", "mz", "inty"])
290
- # remove any inty<1
291
- spectradf = spectradf.filter(pl.col("inty") >= 1)
292
- # keep only rt, mz, and inty
293
- spectradf = spectradf.select(["rt", "mz", "inty"])
294
- if mz_range is not None:
295
- spectradf = spectradf[
296
- (spectradf["mz"] >= mz_range[0]) & (spectradf["mz"] <= mz_range[1])
297
- ]
298
- if rt_range is not None:
299
- spectradf = spectradf[
300
- (spectradf["rt"] >= rt_range[0]) & (spectradf["rt"] <= rt_range[1])
301
- ]
302
- maxrt = spectradf["rt"].max()
303
- minrt = spectradf["rt"].min()
304
- maxmz = spectradf["mz"].max()
305
- minmz = spectradf["mz"].min()
306
-
307
- def new_bounds_hook(plot, elem):
308
- x_range = plot.state.x_range
309
- y_range = plot.state.y_range
310
- x_range.bounds = minrt, maxrt
311
- y_range.bounds = minmz, maxmz
312
-
313
- points = hv.Points(
314
- spectradf,
315
- kdims=["rt", "mz"],
316
- vdims=["inty"],
317
- label="MS1 survey scans",
318
- ).opts(
319
- fontsize={"title": 16, "labels": 14, "xticks": 6, "yticks": 12},
320
- color=np.log(dim("inty")),
321
- colorbar=True,
322
- cmap="Magma",
323
- tools=["hover"],
324
- )
325
-
326
- size_1 = 1 * markersize
327
- color_1 = "forestgreen"
328
- size_2 = 1 * markersize
329
- color_2 = "darkorange"
330
- if filename is not None:
331
- dyn = False
332
- if not filename.endswith(".html"):
333
- size_1 = 2
334
- color_1 = "forestgreen"
335
- size_2 = 2
336
- color_2 = "darkorange"
337
- raster_dynamic = False
338
-
339
- dyn = raster_dynamic
340
- raster = hd.rasterize(
341
- points,
342
- aggregator=ds.max("inty"),
343
- interpolation="bilinear",
344
- dynamic=dyn, # alpha=10, min_alpha=0,
345
- ).opts(
346
- active_tools=["box_zoom"],
347
- cmap=process_cmap(cmap, provider="bokeh"), # blues
348
- tools=["hover"],
349
- hooks=[new_bounds_hook],
350
- width=1000,
351
- height=1000,
352
- cnorm="log",
353
- xlabel="Retention time (s)",
354
- ylabel="m/z",
355
- colorbar=True,
356
- colorbar_position="right",
357
- axiswise=True,
358
- )
359
-
360
- raster = hd.dynspread(
361
- raster,
362
- threshold=raster_threshold,
363
- how="add",
364
- shape="square",
365
- max_px=raster_max_px,
366
- )
367
- feature_points_1 = None
368
- feature_points_2 = None
369
- feature_points_3 = None
370
- feature_points_4 = None
371
- feature_points_iso = None
372
- # Plot features as red dots if features is True
373
- if self.features_df is not None and show_features:
374
- feats = self.features_df.clone()
375
- # Convert to pandas for operations that require pandas functionality
376
- if hasattr(feats, "to_pandas"):
377
- feats = feats.to_pandas()
378
- # if ms2_scans is not null, keep only the first element of the list
379
- feats["ms2_scans"] = feats["ms2_scans"].apply(
380
- lambda x: x[0] if type(x) == list else x,
381
- )
382
- if mz_range is not None:
383
- feats = feats[(feats["mz"] >= mz_range[0]) & (feats["mz"] <= mz_range[1])]
384
- if rt_range is not None:
385
- feats = feats[(feats["rt"] >= rt_range[0]) & (feats["rt"] <= rt_range[1])]
386
- # keep only iso==0, i.e. the main
387
- feats = feats[feats["iso"] == 0]
388
- # find features with ms2_scans not None and iso==0
389
- features_df = feats[feats["ms2_scans"].notnull()]
390
- feature_points_1 = hv.Points(
391
- features_df,
392
- kdims=["rt", "mz"],
393
- vdims=[
394
- "feature_uid",
395
- "inty",
396
- "quality",
397
- "rt_delta",
398
- "ms2_scans",
399
- "chrom_coherence",
400
- "chrom_prominence_scaled",
401
- ],
402
- label="Features with MS2 data",
403
- ).options(
404
- color=color_1,
405
- marker=marker,
406
- size=size_1,
407
- tools=["hover"],
408
- )
409
- # find features without MS2 data
410
- features_df = feats[feats["ms2_scans"].isnull()]
411
- feature_points_2 = hv.Points(
412
- features_df,
413
- kdims=["rt", "mz"],
414
- vdims=[
415
- "feature_uid",
416
- "inty",
417
- "quality",
418
- "rt_delta",
419
- "chrom_coherence",
420
- "chrom_prominence_scaled",
421
- ],
422
- label="Features without MS2 data",
423
- ).options(
424
- color="red",
425
- size=size_2,
426
- marker=marker,
427
- tools=["hover"],
428
- )
429
-
430
- if show_isotopes:
431
- feats = self.features_df
432
- features_df = feats[feats["iso"] > 0]
433
- feature_points_iso = hv.Points(
434
- features_df,
435
- kdims=["rt", "mz"],
436
- vdims=[
437
- "feature_uid",
438
- "inty",
439
- "quality",
440
- "rt_delta",
441
- "iso",
442
- "iso_of",
443
- "chrom_coherence",
444
- "chrom_prominence_scaled",
445
- ],
446
- label="Isotopes",
447
- ).options(
448
- color="violet",
449
- marker=marker,
450
- size=size_1,
451
- tools=["hover"],
452
- )
453
- if show_ms2:
454
- # find all self.scans_df with mslevel 2 that are not linked to a feature
455
- ms2_orphan = self.scans_df.filter(pl.col("ms_level") == 2).filter(
456
- pl.col("feature_uid") < 0,
457
- )
458
-
459
- if len(ms2_orphan) > 0:
460
- # pandalize
461
- ms2 = ms2_orphan.to_pandas()
462
- feature_points_3 = hv.Points(
463
- ms2,
464
- kdims=["rt", "prec_mz"],
465
- vdims=["index", "inty_tot", "bl"],
466
- label="Orphan MS2 scans",
467
- ).options(
468
- color=color_2,
469
- marker="x",
470
- size=size_2,
471
- tools=["hover"],
472
- )
473
-
474
- ms2_linked = self.scans_df.filter(pl.col("ms_level") == 2).filter(
475
- pl.col("feature_uid") >= 0,
476
- )
477
- if len(ms2_linked) > 0:
478
- # pandalize
479
- ms2 = ms2_linked.to_pandas()
480
- feature_points_4 = hv.Points(
481
- ms2,
482
- kdims=["rt", "prec_mz"],
483
- vdims=["index", "inty_tot", "bl"],
484
- label="Linked MS2 scans",
485
- ).options(
486
- color=color_1,
487
- marker="x",
488
- size=size_2,
489
- tools=["hover"],
490
- )
491
-
492
- overlay = raster
493
-
494
- if feature_points_4 is not None:
495
- overlay = overlay * feature_points_4
496
- if feature_points_3 is not None:
497
- overlay = overlay * feature_points_3
498
- if feature_points_1 is not None:
499
- overlay = overlay * feature_points_1
500
- if not show_only_features_with_ms2 and feature_points_2 is not None:
501
- overlay = overlay * feature_points_2
502
- if feature_points_iso is not None:
503
- overlay = overlay * feature_points_iso
504
-
505
- if title is not None:
506
- overlay = overlay.opts(title=title)
507
-
508
- # Create a panel layout
509
- layout = panel.Column(overlay)
510
-
511
- if filename is not None:
512
- # if filename includes .html, save the panel layout to an HTML file
513
- if filename.endswith(".html"):
514
- layout.save(filename, embed=True)
515
- else:
516
- # save the panel layout as a png
517
- hv.save(overlay, filename, fmt="png")
518
- else:
519
- # Display the panel layout
520
- layout.show()
521
-
522
-
523
- def plot_2d_oracle(
524
- self,
525
- oracle_folder=None,
526
- link_by_feature_uid=None,
527
- colorby='hg',
528
- filename=None,
529
- min_id_level=None,
530
- max_id_level=None,
531
- min_ms_level=None,
532
- title=None,
533
- cmap=None,
534
- markersize=10,
535
- raster_dynamic=True,
536
- raster_max_px=8,
537
- raster_threshold=0.8,
538
- mz_range=None,
539
- rt_range=None,
540
- ):
541
- """
542
- Plot a 2D overlay visualization of MS1 survey scans and feature annotations, including oracle annotation data if provided.
543
-
544
- This function reads the primary mass spectrometry data, applies filtering, processes oracle annotation data (if provided),
545
- and produces an interactive plot combining various data layers. The visualization includes rasterized MS1 data and feature
546
- points colored by annotation.
547
-
548
- Parameters:
549
- self: The object instance containing MS1 and feature data.
550
- oracle_folder (str, optional): Path to the oracle folder containing the annotation file
551
- (expected at "<oracle_folder>/diag/summary_by_feature.csv"). If None, oracle data is not used.
552
- link_by_feature_uid (bool, optional): Whether to link features by their IDs in the overlay.
553
- colorby (str, optional): Parameter that determines the color assignment for annotated features.
554
- Expected values include 'hg', 'class', 'id_class', or 'id_hg'. Default is 'hg'.
555
- filename (str, optional): Name of the file where the plot should be saved. If provided and ends with
556
- ".html", the panel layout is saved as an interactive HTML file; otherwise, the output is saved as a PNG.
557
- min_id_level (int, optional): Minimum identification level for oracle annotations to include.
558
- max_id_level (int, optional): Maximum identification level for oracle annotations to include.
559
- min_ms_level (int, optional): Minimum MS level for features to include.
560
- title (str, optional): Title to be displayed on the resulting plot. Default is None.
561
- cmap (str, optional): Colormap to be used for the rasterized plot. Acceptable values include None, "grey",
562
- "iridescent", or other valid colormap names. Default is None. When None, 'Greys256' is used.
563
- markersize (int, optional): Marker size for feature points in the overlay. Default is 10.
564
- raster_dynamic (bool, optional): If True, enables dynamic rasterization of the overlay. If filename is provided
565
- and does not end with ".html", raster_dynamic is set to False. Default is True.
566
- raster_max_px (int, optional): Maximum pixel size for dynamic rasterization. Default is 8.
567
- raster_threshold (float, optional): Threshold for dynamic raster spread. Default is 0.8.
568
- mz_range (tuple, optional): m/z range for filtering MS1 data.
569
- rt_range (tuple, optional): Retention time range for filtering MS1 data.
570
-
571
- Returns:
572
- None
573
-
574
- The function either displays the interactive panel layout or saves the visualization to a file based on
575
- the provided filename. If the primary file object or feature data is missing, the function prints an
576
- informative message and returns without plotting.
577
- """
578
-
579
- if self.file_obj is None:
580
- print("Please load a file first.")
581
- return
582
-
583
- if cmap is None or cmap == "grey":
584
- cmap = "Greys256"
585
- elif cmap == "iridescent":
586
- cmap = "iridescent_r"
587
-
588
- # get columns rt, mz, inty from self.ms1_df, It's polars DataFrame
589
- spectradf = self.ms1_df.to_pandas()
590
-
591
- # remove any inty<1
592
- spectradf = spectradf[spectradf["inty"] >= 1]
593
- # keep only rt, mz, and inty
594
- spectradf = spectradf[["rt", "mz", "inty"]]
595
- if mz_range is not None:
596
- spectradf = spectradf[
597
- (spectradf["mz"] >= mz_range[0]) & (spectradf["mz"] <= mz_range[1])
598
- ]
599
- if rt_range is not None:
600
- spectradf = spectradf[
601
- (spectradf["rt"] >= rt_range[0]) & (spectradf["rt"] <= rt_range[1])
602
- ]
603
-
604
- maxrt = spectradf["rt"].max()
605
- minrt = spectradf["rt"].min()
606
- maxmz = spectradf["mz"].max()
607
- minmz = spectradf["mz"].min()
608
-
609
- def new_bounds_hook(plot, elem):
610
- x_range = plot.state.x_range
611
- y_range = plot.state.y_range
612
- x_range.bounds = minrt, maxrt
613
- y_range.bounds = minmz, maxmz
614
-
615
- points = hv.Points(
616
- spectradf,
617
- kdims=["rt", "mz"],
618
- vdims=["inty"],
619
- label="MS1 survey scans",
620
- ).opts(
621
- fontsize={"title": 16, "labels": 14, "xticks": 6, "yticks": 12},
622
- color=np.log(dim("inty")),
623
- colorbar=True,
624
- cmap="Magma",
625
- tools=["hover"],
626
- )
627
-
628
- if filename is not None:
629
- dyn = False
630
- if not filename.endswith(".html"):
631
- raster_dynamic = False
632
-
633
- dyn = raster_dynamic
634
- raster = hd.rasterize(
635
- points,
636
- aggregator=ds.max("inty"),
637
- interpolation="bilinear",
638
- dynamic=dyn, # alpha=10, min_alpha=0,
639
- ).opts(
640
- active_tools=["box_zoom"],
641
- cmap=process_cmap(cmap, provider="bokeh"), # blues
642
- tools=["hover"],
643
- hooks=[new_bounds_hook],
644
- width=1000,
645
- height=1000,
646
- cnorm="log",
647
- xlabel="Retention time (s)",
648
- ylabel="m/z",
649
- colorbar=True,
650
- colorbar_position="right",
651
- axiswise=True,
652
- )
653
- raster = hd.dynspread(
654
- raster,
655
- threshold=raster_threshold,
656
- how="add",
657
- shape="square",
658
- max_px=raster_max_px,
659
- )
660
-
661
- if self.features_df is None:
662
- return
663
- feats = self.features_df.clone()
664
-
665
- # Convert to pandas for oracle operations that require pandas functionality
666
- if hasattr(feats, "to_pandas"):
667
- feats = feats.to_pandas()
668
-
669
- # check if annotationfile is not None
670
- if oracle_folder is None:
671
- return
672
- # try to read the annotationfile as a csv file and add it to feats
673
- try:
674
- oracle_data = pd.read_csv(
675
- os.path.join(oracle_folder, "diag", "summary_by_feature.csv"),
676
- )
677
- except:
678
- print(f"Could not read {oracle_folder}/diag/summary_by_feature.csv")
679
- return
680
-
681
- if link_by_feature_uid:
682
- # scan_idx slaw_id slaw_ms2_id mz rt level formula ion species name rarity lib_id hg mod lib score score2 score_db score_db_data ms2_tic ms2_evidence ms2_matched_n ms2_missed_n ms2_matched ms2_missed ms2_top1
683
- cols_to_keep = [
684
- "title",
685
- "scan_idx",
686
- "mslevel",
687
- "hits",
688
- "id_level",
689
- "id_label",
690
- "id_ion",
691
- "id_class",
692
- "id_evidence",
693
- "score",
694
- "score2",
695
- ]
696
- oracle_data = oracle_data[cols_to_keep]
697
- # extract feature_uid from title. It begins with "fid:XYZ;"
698
- oracle_data["feature_uid"] = oracle_data["title"].str.extract(r"fid:(\d+)")
699
- oracle_data["feature_uid"] = oracle_data["feature_uid"].astype(int)
700
- # sort by id_level, remove duplicate feature_uid, keep the first one
701
- oracle_data = oracle_data.sort_values(by=["id_level"], ascending=False)
702
- oracle_data = oracle_data.drop_duplicates(subset=["feature_uid"], keep="first")
703
- else:
704
- cols_to_keep = [
705
- "precursor",
706
- "rt",
707
- "title",
708
- "scan_idx",
709
- "mslevel",
710
- "hits",
711
- "id_level",
712
- "id_label",
713
- "id_ion",
714
- "id_class",
715
- "id_evidence",
716
- "score",
717
- "score2",
718
- ]
719
- # link
720
- oracle_data = oracle_data[cols_to_keep]
721
- oracle_data["feature_uid"] = None
722
- # iterate over the rows and find the feature_uid in feats by looking at the closest rt and mz
723
- for i, row in oracle_data.iterrows():
724
- candidates = feats[
725
- (abs(feats["rt"] - row["rt"]) < 1)
726
- & (abs(feats["mz"] - row["precursor"]) < 0.005)
727
- ].copy()
728
- if len(candidates) > 0:
729
- # sort by delta rt
730
- candidates["delta_rt"] = abs(candidates["rt"] - row["rt"])
731
- candidates = candidates.sort_values(by=["delta_rt"])
732
- oracle_data.at[i, "feature_uid"] = candidates["feature_uid"].values[0]
733
- # remove precursor and rt columns
734
- oracle_data = oracle_data.drop(columns=["precursor", "rt"])
735
-
736
- feats = feats.merge(oracle_data, how="left", on="feature_uid")
737
-
738
- # filter feats by id_level
739
- if min_id_level is not None:
740
- feats = feats[(feats["id_level"] >= min_id_level)]
741
- if max_id_level is not None:
742
- feats = feats[(feats["id_level"] <= max_id_level)]
743
- if min_ms_level is not None:
744
- feats = feats[(feats["mslevel"] >= min_ms_level)]
745
-
746
- feats["color"] = "black"
747
-
748
- cvalues = None
749
- if colorby in ["class", "hg", "id_class", "id_hg"]:
750
- # replace nans in feats['id_class'] with 'mix'
751
- feats["id_class"] = feats["id_class"].fillna("mix")
752
- cvalues = feats["id_class"].unique()
753
- # sort alphabetically
754
- cvalues = sorted(cvalues)
755
- # flip the strings left to right
756
- fcvalues = [cvalues[i][::-1] for i in range(len(cvalues))]
757
- # sort in alphabetical order the flipped strings and return the index
758
- idx = np.argsort(fcvalues)
759
- # apply to cvalues
760
- cvalues = [cvalues[i] for i in idx]
761
- elif colorby in ["ion", "id_ion"]:
762
- cvalues = feats["id_ion"].unique()
763
- elif colorby in ["id_evidence", "ms2_evidence"]:
764
- cvalues = feats["id_evidence"].unique()
765
-
766
- if cvalues is not None:
767
- num_colors = len(cvalues)
768
- cmap = "rainbow"
769
- cmap_provider = "colorcet"
770
- cm = process_cmap(cmap, ncolors=num_colors, provider=cmap_provider)
771
- colors = [
772
- rgb2hex(cm[int(i * (len(cm) - 1) / (num_colors - 1))])
773
- if num_colors > 1
774
- else rgb2hex(cm[0])
775
- for i in range(num_colors)
776
- ]
777
- # assign color to each row based on id_class. If id_class is null, assign 'black'
778
- feats["color"] = "black"
779
-
780
- for i, c in enumerate(cvalues):
781
- if colorby in ["class", "hg", "id_class", "id_hg"]:
782
- feats.loc[feats["id_class"] == c, "color"] = colors[i]
783
- elif colorby in ["ion", "id_ion"]:
784
- feats.loc[feats["id_ion"] == c, "color"] = colors[i]
785
- elif colorby in ["id_evidence", "ms2_evidence"]:
786
- feats.loc[feats["id_evidence"] == c, "color"] = colors[i]
787
-
788
- # replace NaN with 0 in id_level
789
- feats["id_level"] = feats["id_level"].fillna(0)
790
- # feature_points_1 are all features with column ms2_scans not null
791
- feature_points_1 = None
792
- feat_df = feats.copy()
793
- feat_df = feat_df[feat_df["id_level"] == 2]
794
-
795
- feature_points_1 = hv.Points(
796
- feat_df,
797
- kdims=["rt", "mz"],
798
- vdims=[
799
- "inty",
800
- "feature_uid",
801
- "id_level",
802
- "id_class",
803
- "id_label",
804
- "id_ion",
805
- "id_evidence",
806
- "score",
807
- "score2",
808
- "color",
809
- ],
810
- label="ID by MS2",
811
- ).options(
812
- color="color",
813
- marker="circle",
814
- size=markersize,
815
- fill_alpha=1.0,
816
- tools=["hover"],
817
- )
818
-
819
- # feature_points_2 are all features that have ms2_scans not null and id_level ==1
820
- feature_points_2 = None
821
- feat_df = feats.copy()
822
- feat_df = feat_df[(feat_df["ms2_scans"].notnull()) & (feat_df["id_level"] == 1)]
823
- if len(feat_df) > 0:
824
- feature_points_2 = hv.Points(
825
- feat_df,
826
- kdims=["rt", "mz"],
827
- vdims=[
828
- "inty",
829
- "feature_uid",
830
- "id_level",
831
- "id_label",
832
- "id_ion",
833
- "id_class",
834
- "color",
835
- ],
836
- label="ID by MS1, with MS2",
837
- ).options(
838
- color="color",
839
- marker="circle",
840
- size=markersize,
841
- fill_alpha=0.0,
842
- tools=["hover"],
843
- )
844
-
845
- # feature_points_3 are all features that have ms2_scans null and id_level ==1
846
- feature_points_3 = None
847
- feat_df = feats.copy()
848
- feat_df = feat_df[(feat_df["ms2_scans"].isnull()) & (feat_df["id_level"] == 1)]
849
- if len(feat_df) > 0:
850
- feature_points_3 = hv.Points(
851
- feat_df,
852
- kdims=["rt", "mz"],
853
- vdims=[
854
- "inty",
855
- "feature_uid",
856
- "id_level",
857
- "id_label",
858
- "id_ion",
859
- "id_class",
860
- "color",
861
- ],
862
- label="ID by MS1, no MS2",
863
- ).options(
864
- color="color",
865
- marker="diamond",
866
- size=markersize,
867
- fill_alpha=0.0,
868
- tools=["hover"],
869
- )
870
-
871
- # feature_points_4 are all features that have ms2_scans null and id_level ==0
872
- feature_points_4 = None
873
- feat_df = feats.copy()
874
- feat_df = feat_df[(feat_df["ms2_scans"].notnull()) & (feat_df["id_level"] < 1)]
875
- if len(feat_df) > 0:
876
- feature_points_4 = hv.Points(
877
- feat_df,
878
- kdims=["rt", "mz"],
879
- vdims=["inty", "feature_uid"],
880
- label="No ID, with MS2",
881
- ).options(
882
- color="gray",
883
- marker="circle",
884
- size=markersize,
885
- fill_alpha=0.0,
886
- tools=["hover"],
887
- )
888
-
889
- # feature_points_4 are all features that have ms2_scans null and id_level ==0
890
- feature_points_5 = None
891
- feat_df = feats.copy()
892
- feat_df = feat_df[(feat_df["ms2_scans"].isnull()) & (feat_df["id_level"] < 1)]
893
- if len(feat_df) > 0:
894
- feature_points_5 = hv.Points(
895
- feat_df,
896
- kdims=["rt", "mz"],
897
- vdims=["inty", "feature_uid"],
898
- label="No ID, no MS2",
899
- ).options(
900
- color="gray",
901
- marker="diamond",
902
- fill_alpha=0.0,
903
- size=markersize,
904
- tools=["hover"],
905
- )
906
-
907
- overlay = raster
908
-
909
- if feature_points_1 is not None:
910
- overlay = overlay * feature_points_1
911
- if feature_points_2 is not None:
912
- overlay = overlay * feature_points_2
913
- if feature_points_3 is not None:
914
- overlay = overlay * feature_points_3
915
- if feature_points_4 is not None:
916
- overlay = overlay * feature_points_4
917
- # if not show_only_features_with_ms2:
918
- if feature_points_5 is not None:
919
- overlay = overlay * feature_points_5
920
-
921
- if title is not None:
922
- overlay = overlay.opts(title=title)
923
-
924
- # Create a panel layout
925
- layout = panel.Column(overlay)
926
-
927
- if filename is not None:
928
- # if filename includes .html, save the panel layout to an HTML file
929
- if filename.endswith(".html"):
930
- layout.save(filename, embed=True)
931
- else:
932
- # save the panel layout as a png
933
- hv.save(overlay, filename, fmt="png")
934
- else:
935
- # Display the panel layout
936
- layout.show()
937
-
938
-
939
- def plot_ms2_eic(
940
- self,
941
- feature_uid=None,
942
- rt_tol=5,
943
- mz_tol=0.05,
944
- link_x=True,
945
- n=20,
946
- deisotope=True,
947
- centroid=True,
948
- filename=None,
949
- ):
950
- """
951
- Plots the Extracted Ion Chromatograms (EIC) for the precursor and top n MS2 fragment ions of a given feature.
952
- Parameters:
953
- feature_uid: The feature unique identifier. Must be present in the features dataframe; if None, a message is printed.
954
- rt_tol (float, optional): The retention time tolerance (in seconds) to extend the feature's rt start and end values. Default is 5.
955
- mz_tol (float, optional): The m/z tolerance used when filtering the precursor and fragment ion intensities. Default is 0.05.
956
- link_x (bool, optional): If True, the x-axis (retention time) of all subplots is linked. Default is True.
957
- n (int, optional): The number of top MS2 fragment m/z values to consider for plotting. Default is 20.
958
- deisotope (bool, optional): Flag that determines whether deisotoping should be applied to the MS2 fragments. Default is True.
959
- centroid (bool, optional): Flag that controls whether centroiding is applied to the MS2 data. Default is True.
960
- filename (str, optional): If provided, the function saves the plot to the specified file. Supports .html for interactive plots or other formats (e.g., png).
961
- If None, the plot is displayed instead of being saved.
962
- Returns:
963
- None
964
- Notes:
965
- - The function first verifies the existence of the provided feature id and its associated MS2 spectrum.
966
- - It retrieves the top n fragments by intensity from the MS2 spectrum and computes the EIC for both the precursor ion and the fragments.
967
- - A helper method (_spec_to_mat) is used to convert spectral data into intensity matrices.
968
- - The resulting plots include hover tools to display the retention time and scan identifier.
969
- - The layout is arranged in a grid (4 columns by default) and may have linked x-axes based on the link_x parameter.
970
- """
971
- # plots the EIC for a given feature id inlcusind the EIC of the top n MS2 fragments
972
-
973
- if feature_uid is None:
974
- print("Please provide a feature id.")
975
- return
976
- # check if feature_uid is in features_df
977
- if feature_uid not in self.features_df["feature_uid"].values:
978
- print("Feature id not found in features_df.")
979
-
980
- feature = self.features_df[self.features_df["feature_uid"] == feature_uid]
981
- # get top n fragments
982
- ms2_specs = feature["ms2_specs"].values[0]
983
- if ms2_specs is None:
984
- print("No MS2 data found for this feature.")
985
- return
986
-
987
- if len(ms2_specs) == 0:
988
- print("No MS2 data found for this feature.")
989
- return
990
- # get the MS2 spectrum
991
- # get the mz of the top n fragments
992
- ms2_specs_df = ms2_specs[0].pandalize()
993
- ms2_specs_df = ms2_specs_df.sort_values(by="inty", ascending=False)
994
- ms2_specs_df = ms2_specs_df.head(n)
995
- top_mzs = ms2_specs_df["mz"].values.tolist()
996
-
997
- # find rt_start and rt_end of the feature_uid
998
- rt_start = feature["rt_start"].values[0] - rt_tol
999
- rt_end = feature["rt_end"].values[0] + rt_tol
1000
- # get the cycle at rt_start and the cycle at rt_end from the closest scan with ms_level == 1
1001
- scans = self.scans_df.filter(pl.col("ms_level") == 1)
1002
- scans = scans.filter(pl.col("rt") > rt_start)
1003
- scans = scans.filter(pl.col("rt") < rt_end)
1004
- rts = scans["rt"].to_list()
1005
- if len(scans) == 0:
1006
- print(f"No scans found between {rt_start} and {rt_end}.")
1007
- return
1008
- scan_uids = scans["scan_uid"].to_list()
1009
- eic_prec = self._spec_to_mat(
1010
- scan_uids,
1011
- mz_ref=feature["mz"].values.tolist(),
1012
- mz_tol=mz_tol,
1013
- deisotope=False,
1014
- centroid=True,
1015
- )
1016
- # convert eic_prec from matrix to list
1017
- eic_prec = eic_prec[0].tolist()
1018
-
1019
- # get all unique cycles from scans
1020
- cycles = scans["cycle"].unique()
1021
- scan_uids = []
1022
- # iterate over all cycles and get the scan_uid of scan with ms_level == 2 and closest precursor_mz to spec.precursor_mz
1023
- for cycle in cycles:
1024
- scans = self.scans_df.filter(pl.col("cycle") == cycle)
1025
- scans = scans.filter(pl.col("ms_level") == 2)
1026
- scans = scans.filter(pl.col("prec_mz") > feature["mz"] - 5)
1027
- scans = scans.filter(pl.col("prec_mz") < feature["mz"] + 5)
1028
- if len(scans) == 0:
1029
- print(
1030
- f"No scans found for cycle {cycle} and mz {feature['mz']}. Increase mz_tol tolerance.",
1031
- )
1032
- return
1033
- # get the scan with the closest precursor_mz to feature['mz']
1034
- scan = scans[(scans["prec_mz"] - feature["mz"]).abs().arg_sort()[:1]]
1035
- scan_uids.append(scan["scan_uid"][0])
1036
- eic_prod = self._spec_to_mat(
1037
- scan_uids,
1038
- mz_ref=top_mzs,
1039
- mz_tol=mz_tol,
1040
- deisotope=deisotope,
1041
- centroid=centroid,
1042
- )
1043
-
1044
- prec_name = f"prec {feature['mz'].values[0]:.3f}"
1045
- eic_df = pd.DataFrame({"rt": rts, prec_name: eic_prec})
1046
- # add scan_uid to eic_df for the tooltips
1047
- eic_df["scan_uid"] = scan_uids
1048
-
1049
- frag_names = [prec_name]
1050
- for i, mz in enumerate(top_mzs):
1051
- # add column to eic_df
1052
- name = f"frag {mz:.3f}"
1053
- frag_names.append(name)
1054
- eic_df[name] = eic_prod[i]
1055
-
1056
- # create a plot for all columns in eic_df
1057
- eic_plots: list[hv.Curve] = []
1058
- for name in frag_names:
1059
- eic = hv.Curve(eic_df, kdims=["rt"], vdims=[name, "scan_uid"]).opts(
1060
- title=name,
1061
- xlabel="RT (s)",
1062
- ylabel=f"Inty_f{len(eic_plots)}",
1063
- width=250,
1064
- height=200,
1065
- axiswise=True,
1066
- color="black",
1067
- tools=[HoverTool(tooltips=[("rt", "@rt"), ("scan_uid", "@scan_uid")])],
1068
- )
1069
- eic_plots.append(eic)
1070
-
1071
- # add as
1072
-
1073
- layout = hv.Layout(eic_plots).cols(4)
1074
- if link_x:
1075
- layout = layout.opts(shared_axes=True)
1076
-
1077
- if filename is not None:
1078
- if filename.endswith(".html"):
1079
- panel.panel(layout).save(filename, embed=True) # type: ignore[attr-defined]
1080
- else:
1081
- hv.save(layout, filename, fmt="png")
1082
- else:
1083
- panel.panel(layout).show()
1084
-
1085
-
1086
- def plot_ms2_cycle(
1087
- self,
1088
- cycle=None,
1089
- filename=None,
1090
- title=None,
1091
- cmap=None,
1092
- raster_dynamic=True,
1093
- raster_max_px=8,
1094
- raster_threshold=0.8,
1095
- centroid=True,
1096
- deisotope=True,
1097
- ):
1098
- if self.file_obj is None:
1099
- print("Please load a mzML file first.")
1100
- return
1101
-
1102
- if cycle is None:
1103
- print("Please provide a cycle number.")
1104
- return
1105
-
1106
- if cycle not in self.scans_df["cycle"].unique():
1107
- print("Cycle number not found in scans_df.")
1108
- return
1109
-
1110
- if cmap is None:
1111
- cmap = "iridescent_r"
1112
- elif cmap == "grey":
1113
- cmap = "Greys256"
1114
-
1115
- # find all scans in cycle
1116
- scans = self.scans_df.filter(pl.col("cycle") == cycle)
1117
- scans = scans.filter(pl.col("ms_level") == 2)
1118
-
1119
- ms2data = []
1120
- # iterate through all rows
1121
- for scan in scans.iter_rows(named=True):
1122
- scan_uid = scan["scan_uid"]
1123
- # get spectrum
1124
- spec = self.get_spectrum(
1125
- scan_uid,
1126
- precursor_trim=None,
1127
- centroid=centroid,
1128
- deisotope=deisotope,
1129
- )
1130
- if spec.mz.size == 0:
1131
- continue
1132
- d = {
1133
- "prec_mz": [scan["prec_mz"]] * spec.mz.size,
1134
- "mz": spec.mz,
1135
- "inty": spec.inty,
1136
- }
1137
- ms2data.append(d)
1138
-
1139
- # convert to pandas DataFrame
1140
- spectradf = pd.DataFrame(ms2data)
1141
-
1142
- # remove any inty<1
1143
- spectradf = spectradf[spectradf["inty"] >= 1]
1144
- # keep only rt, mz, and inty
1145
- spectradf = spectradf[["prec_mz", "mz", "inty"]]
1146
- maxrt = spectradf["prec_mz"].max()
1147
- minrt = spectradf["prec_mz"].min()
1148
- maxmz = spectradf["mz"].max()
1149
- minmz = spectradf["mz"].min()
1150
-
1151
- # TODO elem not used
1152
- def new_bounds_hook(plot, elem):
1153
- x_range = plot.state.x_range
1154
- y_range = plot.state.y_range
1155
- x_range.bounds = minrt, maxrt
1156
- y_range.bounds = minmz, maxmz
1157
-
1158
- points = hv.Points(
1159
- spectradf,
1160
- kdims=["prec_mz", "mz"],
1161
- vdims=["inty"],
1162
- label="MS1 survey scans",
1163
- ).opts(
1164
- fontsize={"title": 16, "labels": 14, "xticks": 6, "yticks": 12},
1165
- color=np.log(dim("inty")),
1166
- colorbar=True,
1167
- cmap="Magma",
1168
- tools=["hover"],
1169
- )
1170
-
1171
- raster = hd.rasterize(
1172
- points,
1173
- aggregator=ds.max("inty"),
1174
- interpolation="bilinear",
1175
- dynamic=raster_dynamic, # alpha=10, min_alpha=0,
1176
- ).opts(
1177
- active_tools=["box_zoom"],
1178
- cmap=process_cmap(cmap, provider="bokeh"), # blues
1179
- tools=["hover"],
1180
- hooks=[new_bounds_hook],
1181
- width=1000,
1182
- height=1000,
1183
- cnorm="log",
1184
- xlabel="Q1 m/z",
1185
- ylabel="m/z",
1186
- colorbar=True,
1187
- colorbar_position="right",
1188
- axiswise=True,
1189
- )
1190
-
1191
- overlay = hd.dynspread(
1192
- raster,
1193
- threshold=raster_threshold,
1194
- how="add",
1195
- shape="square",
1196
- max_px=raster_max_px,
1197
- )
1198
-
1199
- """
1200
- feature_points_1 = None
1201
- feature_points_2 = None
1202
- feature_points_3 = None
1203
- feature_points_4 = None
1204
- feature_points_iso = None
1205
- # Plot features as red dots if features is True
1206
- if self.features_df is not None and show_features:
1207
- feats = self.features_df.clone()
1208
- # Convert to pandas for operations that require pandas functionality
1209
- if hasattr(feats, 'to_pandas'):
1210
- feats = feats.to_pandas()
1211
- # if ms2_scans is not null, keep only the first element of the list
1212
- feats['ms2_scans'] = feats['ms2_scans'].apply(lambda x: x[0] if type(x) == list else x)
1213
- # keep only iso==0, i.e. the main
1214
- feats = feats[feats['iso']==0]
1215
- # find features with ms2_scans not None and iso==0
1216
- features_df = feats[feats['ms2_scans'].notnull()]
1217
- feature_points_1 = hv.Points(
1218
- features_df, kdims=["rt", "mz"], vdims=["feature_uid", "inty", "quality", "rt_delta", "ms2_scans"], label="Features with MS2 data"
1219
- ).options(
1220
- color=color_1,
1221
- marker=marker,
1222
- size=size_1,
1223
- tools=["hover"],
1224
- )
1225
- # find features without MS2 data
1226
- features_df = feats[feats['ms2_scans'].isnull()]
1227
- feature_points_2 = hv.Points(
1228
- features_df, kdims=["rt", "mz"], vdims=["feature_uid", "inty", "quality", "rt_delta"], label="Features without MS2 data"
1229
- ).options(
1230
- color='red',
1231
- size=size_2,
1232
- marker=marker,
1233
- tools=["hover"],
1234
- )
1235
-
1236
- if show_isotopes:
1237
- feats = self.features_df
1238
- features_df = feats[feats['iso']>0]
1239
- feature_points_iso = hv.Points(
1240
- features_df, kdims=["rt", "mz"], vdims=["feature_uid", "inty", "quality", "rt_delta", "iso", "iso_of"], label="Isotopes"
1241
- ).options(
1242
- color='violet',
1243
- marker=marker,
1244
- size=size_1,
1245
- tools=["hover"],
1246
- )
1247
- if show_ms2:
1248
- # find all self.scans_df with mslevel 2 that are not linked to a feature
1249
- ms2_orphan = self.scans_df.filter(pl.col('ms_level')==2).filter(pl.col("feature_uid")<0)
1250
-
1251
- if len(ms2_orphan) > 0:
1252
- # pandalize
1253
- ms2 = ms2_orphan.to_pandas()
1254
- feature_points_3 = hv.Points(
1255
- ms2, kdims=["rt", "prec_mz"], vdims=["index", "inty_tot", "bl"], label="Orphan MS2 scans"
1256
- ).options(
1257
- color=color_2,
1258
- marker='x',
1259
- size=size_2,
1260
- tools=["hover"],
1261
- )
1262
-
1263
- ms2_linked = self.scans_df.filter(pl.col('ms_level')==2).filter(pl.col("feature_uid")>=0)
1264
- if len(ms2_linked) > 0:
1265
- # pandalize
1266
- ms2 = ms2_linked.to_pandas()
1267
- feature_points_4 = hv.Points(
1268
- ms2, kdims=["rt", "prec_mz"], vdims=["index", "inty_tot", "bl"], label="Linked MS2 scans"
1269
- ).options(
1270
- color=color_1,
1271
- marker='x',
1272
- size=size_2,
1273
- tools=["hover"],
1274
- )
1275
-
1276
-
1277
- if feature_points_4 is not None:
1278
- overlay = overlay * feature_points_4
1279
- if feature_points_3 is not None:
1280
- overlay = overlay * feature_points_3
1281
- if feature_points_1 is not None:
1282
- overlay = overlay * feature_points_1
1283
- if not show_only_features_with_ms2:
1284
- if feature_points_2 is not None:
1285
- overlay = overlay * feature_points_2
1286
- if feature_points_iso is not None:
1287
- overlay = overlay * feature_points_iso
1288
- """
1289
- if title is not None:
1290
- overlay = overlay.opts(title=title)
1291
-
1292
- # Create a panel layout
1293
- layout = panel.Column(overlay)
1294
-
1295
- if filename is not None:
1296
- # if filename includes .html, save the panel layout to an HTML file
1297
- if filename.endswith(".html"):
1298
- layout.save(filename, embed=True)
1299
- else:
1300
- # save the panel layout as a png
1301
- hv.save(overlay, filename, fmt="png")
1302
- else:
1303
- # Display the panel layout
1304
- layout.show()
1305
-
1306
-
1307
- def plot_ms2_q1(
1308
- self,
1309
- feature_uid=None,
1310
- q1_width=10.0,
1311
- mz_tol=0.01,
1312
- link_x=True,
1313
- n=20,
1314
- deisotope=True,
1315
- centroid=True,
1316
- filename=None,
1317
- ):
1318
- # plots the EIC for a given feature id including the EIC of the top n MS2 fragments
1319
-
1320
- if feature_uid is None:
1321
- print("Please provide a feature id.")
1322
- return
1323
- # check if feature_uid is in features_df
1324
- if feature_uid not in self.features_df["feature_uid"].values:
1325
- print("Feature id not found in features_df.")
1326
-
1327
- feature = self.features_df[self.features_df["feature_uid"] == feature_uid]
1328
- # get top n fragments
1329
- ms2_specs = feature["ms2_specs"].values[0]
1330
- if ms2_specs is None:
1331
- print("No MS2 data found for this feature.")
1332
- return
1333
-
1334
- if len(ms2_specs) == 0:
1335
- print("No MS2 data found for this feature.")
1336
- return
1337
- # get the MS2 spectrum
1338
- # get the mz of the top n fragments
1339
- ms2_specs_df = ms2_specs[0].pandalize()
1340
- ms2_specs_df = ms2_specs_df.sort_values(by="inty", ascending=False)
1341
- ms2_specs_df = ms2_specs_df.head(n)
1342
- top_mzs = ms2_specs_df["mz"].values.tolist()
1343
-
1344
- # cycles is the cycle of the feature plus/minus q1_width
1345
- feature_scan = self.find_closest_scan(feature["rt"].values[0])
1346
- cycle = feature_scan["cycle"].values[0]
1347
- scans = self.scans_df.filter(pl.col("cycle") == cycle)
1348
- scans = scans.filter(pl.col("ms_level") == 2)
1349
- # find the scan in cycle whose 'prec_mz' is the closest to the feature['mz']
1350
- scan_uid = scans[(scans["prec_mz"] - feature["mz"]).abs().arg_sort()[:1]][
1351
- "scan_uid"
1352
- ][0]
1353
- # get q1_width scans before and after the scan_uid
1354
- scans = self.scans_df.filter(pl.col("scan_uid") >= scan_uid - q1_width)
1355
- scans = scans.filter(pl.col("scan_uid") <= scan_uid + q1_width)
1356
- scan_uids = scans["scan_uid"].to_list()
1357
- q1s = scans["prec_mz"].to_list()
1358
-
1359
- q1_prod = self._spec_to_mat(
1360
- scan_uids,
1361
- mz_ref=top_mzs,
1362
- mz_tol=mz_tol,
1363
- deisotope=deisotope,
1364
- centroid=centroid,
1365
- )
1366
- q1_df = pd.DataFrame({"q1": q1s})
1367
-
1368
- frag_names = []
1369
- for i, mz in enumerate(top_mzs):
1370
- # add column to q1_df
1371
- name = f"frag {mz:.3f}"
1372
- # if q1_ratio exists, add it to the name
1373
- if "q1_ratio" in ms2_specs_df.columns:
1374
- q1_ratio = ms2_specs_df["q1_ratio"].values[i]
1375
- name += f" q1r: {q1_ratio:.2f}"
1376
- frag_names.append(name)
1377
- q1_df[name] = q1_prod[i]
1378
- # add scan_uid to q1_df for the tooltips
1379
- q1_df["scan_uid"] = scan_uids
1380
-
1381
- # create a plot for all columns in eic_df
1382
- eic_plots: list[hv.Curve] = []
1383
- for name in frag_names:
1384
- eic = hv.Curve(q1_df, kdims=["q1"], vdims=[name, "scan_uid"]).opts(
1385
- title=name,
1386
- xlabel="Q1 (m/z)",
1387
- ylabel=f"Inty_f{len(eic_plots)}",
1388
- width=250,
1389
- height=200,
1390
- axiswise=True,
1391
- color="black",
1392
- tools=[HoverTool(tooltips=[("Q1", "@q1"), ("scan_uid", "@scan_uid")])],
1393
- )
1394
- eic_plots.append(eic)
1395
-
1396
- # add as
1397
-
1398
- layout = hv.Layout(eic_plots).cols(4)
1399
- if link_x:
1400
- layout = layout.opts(shared_axes=True)
1401
-
1402
- if filename is not None:
1403
- if filename.endswith(".html"):
1404
- panel.panel(layout).save(filename, embed=True) # type: ignore[attr-defined]
1405
- else:
1406
- hv.save(layout, filename, fmt="png")
1407
- else:
1408
- panel.panel(layout).show()
1409
-
1410
-
1411
- def plot_dda_stats(
1412
- self,
1413
- filename=None,
1414
- ):
1415
- """
1416
- Generates scatter plots for DDA statistics.
1417
- This method retrieves statistical data using the `get_dda_stats` method, filters relevant
1418
- columns, and preprocesses the data by replacing any values below 0 with None. It then creates
1419
- a scatter plot for each metric specified in the `cols_to_plot` list. Each scatter plot uses "cycle"
1420
- as the x-axis, and the corresponding metric as the y-axis. In addition, common hover tooltips are
1421
- configured to display auxiliary data including "index", "cycle", "rt", and all other metric values.
1422
- If the `filename` parameter is provided:
1423
- - If it ends with ".html", the layout is saved as an interactive HTML file using Panel.
1424
- - Otherwise, the layout is saved as a PNG image using HoloViews.
1425
- If no filename is provided, the interactive panel is displayed.
1426
- Parameters:
1427
- filename (str, optional): The path and filename where the plot should be saved. If the filename
1428
- ends with ".html", the plot is saved as an HTML file; otherwise, it is saved as a PNG image.
1429
- If not provided, the plot is displayed interactively.
1430
- Notes:
1431
- - The method requires the holoviews, panel, and bokeh libraries for visualization.
1432
- - The data is expected to include the columns 'index', 'cycle', 'rt', and the metrics listed in
1433
- `cols_to_plot`.
1434
- """
1435
- stats = self.get_dda_stats()
1436
- cols_to_plot = [
1437
- "inty_tot",
1438
- "bl",
1439
- "ms2_n",
1440
- "time_cycle",
1441
- "time_ms1_to_ms1",
1442
- "time_ms1_to_ms2",
1443
- "time_ms2_to_ms2",
1444
- "time_ms2_to_ms1",
1445
- ]
1446
- # Ensure that 'index' and 'rt' are kept for hover along with the columns to plot
1447
- stats = stats[["scan_uid", "cycle", "rt", *cols_to_plot]]
1448
- # set any value < 0 to None
1449
- stats[stats < 0] = None
1450
-
1451
- # Create a Scatter for each column in cols_to_plot stacked vertically, with hover enabled
1452
- scatter_plots = []
1453
- # Define common hover tooltips for all plots including all cols_to_plot
1454
- common_tooltips = [
1455
- ("scan_uid", "@scan_uid"),
1456
- ("cycle", "@cycle"),
1457
- ("rt", "@rt"),
1458
- ] + [(c, f"@{c}") for c in cols_to_plot]
1459
- for col in cols_to_plot:
1460
- hover = HoverTool(tooltips=common_tooltips)
1461
- scatter = hv.Scatter(
1462
- stats,
1463
- kdims="cycle",
1464
- vdims=[col, "scan_uid", "rt"] + [c for c in cols_to_plot if c != col],
1465
- ).opts(
1466
- title=col,
1467
- xlabel="Cycle",
1468
- ylabel=col,
1469
- height=250,
1470
- width=800,
1471
- tools=[hover],
1472
- size=3,
1473
- )
1474
- scatter_plots.append(scatter)
1475
-
1476
- layout = hv.Layout(scatter_plots).cols(1)
1477
- if filename is not None:
1478
- if filename.endswith(".html"):
1479
- panel.panel(layout).save(filename, embed=True) # type: ignore[attr-defined]
1480
- else:
1481
- hv.save(layout, filename, fmt="png")
1482
- else:
1483
- panel.panel(layout).show()
1484
-
1485
-
1486
- def plot_feature_stats(
1487
- self,
1488
- filename=None,
1489
- ):
1490
- """
1491
- Generates overlaid distribution plots for selected feature metrics.
1492
- The distributions are created separately for features with and without MS2 data.
1493
- Metrics include intensity, quality, retention time, m/z (and m/z delta), number of MS2 peaks,
1494
- summed MS2 intensities, and the MS2-to-MS1 ratio. The plots help to visualize the distribution
1495
- differences between features that are linked to MS2 spectra and those that are not.
1496
-
1497
- Parameters:
1498
- filename (str, optional): The output filename. If the filename ends with ".html",
1499
- the plot is saved as an interactive HTML file; otherwise,
1500
- if provided, the plot is saved as a PNG image. If not provided,
1501
- the interactive plot is displayed.
1502
-
1503
- Returns:
1504
- None
1505
- """
1506
- # Work on a copy of features_df
1507
- feats = self.features_df.clone()
1508
- # Convert to pandas for operations that require pandas functionality
1509
- if hasattr(feats, "to_pandas"):
1510
- feats = feats.to_pandas()
1511
- # Compute m/z delta for each feature
1512
- feats["mz_delta"] = feats["mz_end"] - feats["mz_start"]
1513
- # Add a column with the number of peaks in the MS2 spectrum
1514
- feats["MS2peaks"] = feats["ms2_specs"].apply(
1515
- lambda x: len(x[0]) if x is not None else 0,
1516
- )
1517
- # Add a column with the sum of intensities in the MS2 spectrum
1518
- feats["MS2int"] = feats["ms2_specs"].apply(
1519
- lambda x: sum(x[0].inty) if x is not None else 0,
1520
- )
1521
-
1522
- # Calculate the ratio of MS2 to MS1 intensities
1523
- feats["MS2toMS1"] = feats["MS2int"] / feats["inty"]
1524
- # Apply log10 transformation to intensity, quality, and MS2int columns (handling non-positive values)
1525
- feats["inty"] = np.where(feats["inty"] <= 0, np.nan, np.log10(feats["inty"]))
1526
- # COMMENT: AR was bugging
1527
- # feats["chrom_heights"] = np.where(
1528
- # feats["chrom_heights"] <= 0, np.nan, np.log10(feats["chrom_heights"])
1529
- # )
1530
-
1531
- feats["quality"] = np.where(
1532
- feats["quality"] <= 0,
1533
- np.nan,
1534
- np.log10(feats["quality"]),
1535
- )
1536
- feats["MS2int"] = np.where(feats["MS2int"] <= 0, np.nan, np.log10(feats["MS2int"]))
1537
-
1538
- # Separate features based on presence of MS2 data
1539
- feats_with_MS2 = feats[feats["ms2_scans"].notnull()]
1540
- feats_without_MS2 = feats[feats["ms2_scans"].isnull()]
1541
-
1542
- # Define the metrics to plot
1543
- cols_to_plot = [
1544
- "mz",
1545
- "mz_delta",
1546
- "inty",
1547
- "quality",
1548
- "rt",
1549
- "rt_delta",
1550
- "chrom_coherence",
1551
- "chrom_prominence",
1552
- "chrom_prominence_scaled",
1553
- # COMMENT: AR was bugging
1554
- # "chrom_heights",
1555
- # "chrom_heights_scaled",
1556
- "MS2peaks",
1557
- "MS2int",
1558
- "MS2toMS1",
1559
- ]
1560
-
1561
- # Ensure an index column is available for plotting
1562
- feats["index"] = feats.index
1563
-
1564
- density_plots = []
1565
- # Create overlaid distribution plots for each metric
1566
- for col in cols_to_plot:
1567
- # Extract non-null values from both groups
1568
- data_with = feats_with_MS2[col].dropna().values
1569
- data_without = feats_without_MS2[col].dropna().values
1570
-
1571
- # Create distribution elements for features with and without MS2
1572
- dist_with = hv.Distribution(data_with, label="With MS2").opts(
1573
- color="red",
1574
- alpha=0.6,
1575
- )
1576
- dist_without = hv.Distribution(data_without, label="Without MS2").opts(
1577
- color="blue",
1578
- alpha=0.6,
1579
- )
1580
-
1581
- # Overlay the distributions with a legend and hover tool enabled
1582
- overlay = (dist_with * dist_without).opts(
1583
- title=col,
1584
- show_legend=True,
1585
- tools=["hover"],
1586
- )
1587
- density_plots.append(overlay)
1588
-
1589
- # Arrange the plots in a layout with three columns
1590
- layout = hv.Layout(density_plots).cols(3).opts(shared_axes=False)
1591
-
1592
- # Save or display the layout based on the filename parameter
1593
- if filename is not None:
1594
- if filename.endswith(".html"):
1595
- panel.panel(layout).save(filename, embed=True) # type: ignore[attr-defined]
1596
- else:
1597
- hv.save(layout, filename, fmt="png")
1598
- else:
1599
- panel.panel(layout).show()
1600
-
1601
-
1602
- def plot_tic(
1603
- self,
1604
- title=None,
1605
- filename=None,
1606
- ):
1607
- # get all ms_level ==1 scans from sefl.scans_df
1608
- scans = self.scans_df.filter(pl.col("ms_level") == 1)
1609
- # select rt, scan_uid and inty_tot, convert to pandas
1610
- data = scans[["rt", "scan_uid", "inty_tot"]].to_pandas()
1611
- # sort by rt
1612
- data = data.sort_values("rt")
1613
-
1614
- # plot using hv.Curve
1615
- tic = hv.Curve(data, kdims=["rt"], vdims=["inty_tot"])
1616
- tic.opts(
1617
- title=title,
1618
- xlabel="Retention Time (min)",
1619
- ylabel="TIC",
1620
- height=250,
1621
- width=100,
1622
- )
1
+ """
2
+ _plots.py
3
+
4
+ This module provides visualization functions for mass spectrometry data analysis.
5
+ It contains plotting utilities for extracted ion chromatograms (EICs), 2D data maps,
6
+ feature visualizations, and interactive dashboards using modern visualization libraries.
7
+
8
+ Key Features:
9
+ - **Extracted Ion Chromatograms (EICs)**: Interactive chromatographic plotting with feature annotations.
10
+ - **2D Data Visualization**: Mass spectrometry data visualization with datashader for large datasets.
11
+ - **Feature Plotting**: Visualize detected features with retention time and m/z information.
12
+ - **Interactive Dashboards**: Create interactive panels for data exploration and analysis.
13
+ - **Multi-Sample Plotting**: Comparative visualizations across multiple samples.
14
+ - **Export Capabilities**: Save plots in various formats (HTML, PNG, SVG).
15
+
16
+ Dependencies:
17
+ - `holoviews`: For high-level data visualization and interactive plots.
18
+ - `datashader`: For rendering large datasets efficiently.
19
+ - `panel`: For creating interactive web applications and dashboards.
20
+ - `bokeh`: For low-level plotting control and customization.
21
+ - `polars` and `pandas`: For data manipulation and processing.
22
+ - `numpy`: For numerical computations.
23
+
24
+ Functions:
25
+ - `plot_chrom()`: Generate chromatograms with feature overlays.
26
+ - `plot_2d()`: Create 2D mass spectrometry data visualizations.
27
+ - `plot_features()`: Visualize detected features in retention time vs m/z space.
28
+ - Various utility functions for plot styling and configuration.
29
+
30
+ Supported Plot Types:
31
+ - Chromatograms
32
+ - Total Ion Chromatograms (TIC)
33
+ - Base Peak Chromatograms (BPC)
34
+ - 2D intensity maps (RT vs m/z)
35
+ - Feature scatter plots
36
+ - Interactive dashboards
37
+
38
+ See Also:
39
+ - `parameters._plot_parameters`: For plot-specific parameter configuration.
40
+ - `single.py`: For applying plotting methods to ddafile objects.
41
+ - `study.py`: For study-level visualization functions.
42
+
43
+ """
44
+
45
+ import os
46
+
47
+ import datashader as ds
48
+ import holoviews as hv
49
+ import holoviews.operation.datashader as hd
50
+ import numpy as np
51
+ import pandas as pd
52
+ import panel
53
+ import polars as pl
54
+
55
+ from bokeh.models import HoverTool
56
+ from holoviews import dim
57
+ from holoviews.plotting.util import process_cmap
58
+ from matplotlib.colors import rgb2hex
59
+
60
+ # Parameters removed - using hardcoded defaults
61
+
62
+
63
+ hv.extension("bokeh")
64
+
65
+
66
+ def _is_notebook_environment():
67
+ """
68
+ Detect if code is running in a notebook environment (Jupyter, JupyterLab, or Marimo).
69
+
70
+ Returns:
71
+ bool: True if running in a notebook, False otherwise
72
+ """
73
+ try:
74
+ # Check for Jupyter/JupyterLab
75
+ from IPython import get_ipython
76
+ if get_ipython() is not None:
77
+ # Check if we're in a notebook context
78
+ shell = get_ipython().__class__.__name__
79
+ if shell in ['ZMQInteractiveShell', 'Shell']: # Jupyter notebook/lab
80
+ return True
81
+
82
+ # Check for Marimo
83
+ import sys
84
+ if 'marimo' in sys.modules:
85
+ return True
86
+
87
+ # Additional check for notebook environments
88
+ if hasattr(__builtins__, '__IPYTHON__') or hasattr(__builtins__, '_ih'):
89
+ return True
90
+
91
+ except ImportError:
92
+ pass
93
+
94
+ return False
95
+
96
+
97
+ def _display_plot(plot_object, layout=None):
98
+ """
99
+ Display a plot object in the appropriate way based on the environment.
100
+
101
+ Args:
102
+ plot_object: The plot object to display (holoviews overlay, etc.)
103
+ layout: Optional panel layout object
104
+
105
+ Returns:
106
+ The layout object if in notebook environment, None otherwise
107
+ """
108
+ if _is_notebook_environment():
109
+ # Display inline in notebook
110
+ try:
111
+ # For Jupyter notebooks, just return the plot object -
112
+ # holoviews will handle the display automatically
113
+ return plot_object
114
+ except Exception:
115
+ # Fallback to panel display for other notebook environments
116
+ if layout is not None:
117
+ return layout
118
+ else:
119
+ # Create a simple layout if none provided
120
+ simple_layout = panel.Column(plot_object)
121
+ return simple_layout
122
+ else:
123
+ # Display in browser (original behavior)
124
+ if layout is not None:
125
+ layout.show()
126
+ else:
127
+ # Create a simple layout for browser display
128
+ simple_layout = panel.Column(plot_object)
129
+ simple_layout.show()
130
+ return None
131
+
132
+
133
+ def plot_chrom(
134
+ self,
135
+ feature_uid=None,
136
+ filename=None,
137
+ rt_tol=10,
138
+ rt_tol_factor_plot=1,
139
+ mz_tol=0.0005,
140
+ mz_tol_factor_plot=1,
141
+ link_x=False,
142
+ ):
143
+ """
144
+ Plot chromatograms for one or more features using MS1 data and feature metadata.
145
+
146
+ This function filters MS1 data based on retention time (rt) and mass-to-charge ratio (mz) windows
147
+ derived from feature information in `features_df`. It then generates interactive chromatogram plots using
148
+ HoloViews, with feature retention time windows annotated. Plots can be displayed interactively or
149
+ saved to a file.
150
+
151
+ Parameters:
152
+ feature_uid (int or list of int, optional):
153
+ Feature identifier(s) for chromatogram generation. If None, chromatograms for all features in `features_df` are plotted.
154
+ filename (str, optional):
155
+ Output file path. If ending with `.html`, saves as interactive HTML; otherwise, saves as PNG.
156
+ If not provided, displays the plot interactively.
157
+ rt_tol (float, default=10):
158
+ Retention time tolerance (in seconds) added to feature boundaries for MS1 data filtering.
159
+ rt_tol_factor_plot (float, default=1):
160
+ Retention time tolerance factor.
161
+ mz_tol (float, default=0.0005):
162
+ m/z tolerance added to feature boundaries for MS1 data filtering.
163
+ mz_tol_factor_plot (float, default=1):
164
+ m/z time tolerance factor.
165
+ link_x (bool, default=True):
166
+ If True, links the x-axes (retention time) across all chromatogram subplots.
167
+
168
+ Returns:
169
+ None
170
+
171
+ Notes:
172
+ - Uses `features_df` for feature metadata and `ms1_df` (Polars DataFrame) for MS1 data.
173
+ - Aggregates MS1 intensities by retention time.
174
+ - Utilizes HoloViews for visualization and Panel for layout/display.
175
+ """
176
+ # plots the chromatogram for a given feature id
177
+ # If rt or mz are not provided, they are extracted from features_df using the supplied feature id (feature_uid)
178
+
179
+ feature_uids = feature_uid
180
+ # if feature_uids is None, plot all features
181
+ if feature_uids is None:
182
+ feats = self.features_df.clone()
183
+ else:
184
+ if isinstance(feature_uids, int):
185
+ feature_uids = [feature_uids]
186
+ # select only the features with feature_uid in feature_uids
187
+ feats = self.features_df[self.features_df["feature_uid"].is_in(feature_uids)].clone()
188
+
189
+ # make sure feature_uid is a list of integers
190
+
191
+ chrom_plots = []
192
+ feature_uids = feats["feature_uid"].values.tolist()
193
+ mz_tol_plot = mz_tol * mz_tol_factor_plot
194
+ rt_tol_plot = rt_tol * rt_tol_factor_plot
195
+ # iterate over the list of feature_uid
196
+ for feature_uid in feature_uids:
197
+ # Retrieve the feature info
198
+ feature_row = feats[feats["feature_uid"] == feature_uid]
199
+ # rt = feature_row["rt"].values[0]
200
+ rt_start = feature_row["rt_start"].values[0]
201
+ rt_end = feature_row["rt_end"].values[0]
202
+ mz = feature_row["mz"].values[0]
203
+ mz_start = feature_row["mz_start"].values[0]
204
+ mz_end = feature_row["mz_end"].values[0]
205
+
206
+ # filter self.ms1_df with rt_start, rt_end, mz_start, mz_end
207
+ chrom_df = self.ms1_df.filter(
208
+ pl.col("rt") >= rt_start - rt_tol_plot,
209
+ pl.col("rt") <= rt_end + rt_tol_plot,
210
+ )
211
+ chrom_df = chrom_df.filter(
212
+ pl.col("mz") >= mz_start - mz_tol_plot,
213
+ pl.col("mz") <= mz_end + mz_tol_plot,
214
+ )
215
+
216
+ if chrom_df.is_empty():
217
+ print("No MS1 data found in the specified window.")
218
+ continue
219
+
220
+ # convert to pandas DataFrame
221
+ chrom_df = chrom_df.to_pandas()
222
+ # aggregate all points with the same rt using the sum of inty
223
+ chrom_df = chrom_df.groupby("rt").agg({"inty": "sum"}).reset_index()
224
+ yname = f"inty_{feature_uid}"
225
+ chrom_df.rename(columns={"inty": yname}, inplace=True)
226
+
227
+ # Plot the chromatogram using bokeh and ensure axes are independent by setting axiswise=True
228
+ chrom = hv.Curve(chrom_df, kdims=["rt"], vdims=[yname]).opts(
229
+ title=f"Chromatogram for feature {feature_uid}, mz = {mz:.4f}",
230
+ xlabel="Retention time (s)",
231
+ ylabel="Intensity",
232
+ width=1000,
233
+ tools=["hover"],
234
+ height=250,
235
+ axiswise=True,
236
+ color="black",
237
+ )
238
+
239
+ # Add vertical lines at the start and end of the retention time
240
+ chrom = chrom * hv.VLine(rt_start).opts(
241
+ color="blue",
242
+ line_width=1,
243
+ line_dash="dashed",
244
+ axiswise=True,
245
+ )
246
+ chrom = chrom * hv.VLine(rt_end).opts(
247
+ color="blue",
248
+ line_width=1,
249
+ line_dash="dashed",
250
+ axiswise=True,
251
+ )
252
+
253
+ # Append the subplot without linking axes
254
+ chrom_plots.append(chrom)
255
+ if link_x:
256
+ # Create a layout with shared x-axis for all chromatogram plots
257
+ layout = hv.Layout(chrom_plots).opts(shared_axes=True)
258
+ else:
259
+ layout = hv.Layout(chrom_plots).opts(shared_axes=False)
260
+
261
+ layout = layout.cols(1)
262
+ layout = panel.Column(layout)
263
+ if filename is not None:
264
+ # if filename includes .html, save the panel layout to an HTML file
265
+ if filename.endswith(".html"):
266
+ layout.save(filename, embed=True)
267
+ else:
268
+ # save the panel layout as a png
269
+ hv.save(layout, filename, fmt="png")
270
+ else:
271
+ # Check if we're in a notebook environment and display appropriately
272
+ return _display_plot(layout.object, layout)
273
+
274
+
275
+ def plot_2d(
276
+ self,
277
+ filename=None,
278
+ show_features=True,
279
+ show_only_features_with_ms2=False,
280
+ show_isotopes=False,
281
+ show_ms2=False,
282
+ title=None,
283
+ cmap=None,
284
+ marker="circle",
285
+ markersize=10,
286
+ raster_dynamic=True,
287
+ raster_max_px=8,
288
+ raster_threshold=0.8,
289
+ mz_range=None,
290
+ rt_range=None,
291
+ ):
292
+ """
293
+ Plot a two-dimensional visualization of MS1 survey scan data with optional overlays
294
+ of feature and MS2 scan information.
295
+ This method creates a plot from the internal MS1 data loaded into self.ms1_df
296
+ and optionally overlays various feature and MS2 information depending on the provided
297
+ parameters. The visualization is built using HoloViews and Holoviews dynamic rasterization,
298
+ together with Panel for layout and exporting.
299
+ Parameters:
300
+ filename (str, optional):
301
+ Path to save the plot. If provided and ends with ".html", the plot is saved as an
302
+ interactive HTML file; otherwise, it is saved as a PNG image.
303
+ show_features (bool, default True):
304
+ Whether to overlay detected features on the plot.
305
+ show_only_features_with_ms2 (bool, default False):
306
+ If True, only display features that have associated MS2 scans. When False,
307
+ features without MS2 data are also shown.
308
+ show_isotopes (bool, default False):
309
+ Whether to overlay isotope information on top of the features.
310
+ show_ms2 (bool, default False):
311
+ Whether to overlay MS2 scan information on the plot.
312
+ title (str, optional):
313
+ Title of the plot.
314
+ cmap (str, optional):
315
+ Colormap to use for the background rasterized data. Defaults to "iridescent_r" unless
316
+ modified (e.g., if set to "grey", it is changed to "Greys256").
317
+ marker (str, default 'circle'):
318
+ Marker type to use for feature and MS2 points.
319
+ markersize (int, default 10):
320
+ Base size of the markers used for plotting points.
321
+ raster_dynamic (bool, default True):
322
+ Whether to use dynamic rasterization for the background point cloud.
323
+ raster_max_px (int, default 8):
324
+ Maximum pixel size for dynamic rasterization when using dynspread.
325
+ raster_threshold (float, default 0.8):
326
+ Threshold used for the dynspread process in dynamic rasterization.
327
+ Behavior:
328
+ - Checks for a loaded mzML file by verifying that self.file_obj is not None.
329
+ - Converts internal MS1 data (a Polars DataFrame) to a Pandas DataFrame and filters out low-intensity
330
+ points (inty < 1).
331
+ - Sets up the plot bounds for retention time (rt) and mass-to-charge ratio (mz) using a hook function.
332
+ - Renders the MS1 data as a background rasterized image with a logarithmic intensity normalization.
333
+ - Conditionally overlays feature points (with and without MS2 information), isotopes (if requested),
334
+ and MS2 scan points based on internal DataFrame data.
335
+ - Depending on the filename parameter, either displays the plot interactively using Panel or
336
+ saves it as an HTML or PNG file.
337
+ Returns:
338
+ None
339
+ Side Effects:
340
+ - May print a warning if no mzML file is loaded.
341
+ - Either shows the plot interactively or writes the output to a file.
342
+ """
343
+
344
+ if self.ms1_df is None:
345
+ self.logger.error("No MS1 data available.")
346
+ return
347
+
348
+ if cmap is None:
349
+ cmap = "iridescent_r"
350
+ elif cmap == "grey":
351
+ cmap = "Greys256"
352
+
353
+ # get columns rt, mz, inty from self.ms1_df, It's polars DataFrame
354
+ spectradf = self.ms1_df.select(["rt", "mz", "inty"])
355
+ # remove any inty<1
356
+ spectradf = spectradf.filter(pl.col("inty") >= 1)
357
+ # keep only rt, mz, and inty
358
+ spectradf = spectradf.select(["rt", "mz", "inty"])
359
+ if mz_range is not None:
360
+ spectradf = spectradf[(spectradf["mz"] >= mz_range[0]) & (spectradf["mz"] <= mz_range[1])]
361
+ if rt_range is not None:
362
+ spectradf = spectradf[(spectradf["rt"] >= rt_range[0]) & (spectradf["rt"] <= rt_range[1])]
363
+ maxrt = spectradf["rt"].max()
364
+ minrt = spectradf["rt"].min()
365
+ maxmz = spectradf["mz"].max()
366
+ minmz = spectradf["mz"].min()
367
+
368
+ def new_bounds_hook(plot, elem):
369
+ x_range = plot.state.x_range
370
+ y_range = plot.state.y_range
371
+ x_range.bounds = minrt, maxrt
372
+ y_range.bounds = minmz, maxmz
373
+
374
+ points = hv.Points(
375
+ spectradf,
376
+ kdims=["rt", "mz"],
377
+ vdims=["inty"],
378
+ label="MS1 survey scans",
379
+ ).opts(
380
+ fontsize={"title": 16, "labels": 14, "xticks": 6, "yticks": 12},
381
+ color=np.log(dim("inty")),
382
+ colorbar=True,
383
+ cmap="Magma",
384
+ tools=["hover"],
385
+ )
386
+
387
+ size_1 = 1 * markersize
388
+ color_1 = "forestgreen"
389
+ size_2 = 1 * markersize
390
+ color_2 = "darkorange"
391
+ if filename is not None:
392
+ dyn = False
393
+ if not filename.endswith(".html"):
394
+ size_1 = 2
395
+ color_1 = "forestgreen"
396
+ size_2 = 2
397
+ color_2 = "darkorange"
398
+ raster_dynamic = False
399
+
400
+ dyn = raster_dynamic
401
+ raster = hd.rasterize(
402
+ points,
403
+ aggregator=ds.max("inty"),
404
+ interpolation="bilinear",
405
+ dynamic=dyn, # alpha=10, min_alpha=0,
406
+ ).opts(
407
+ active_tools=["box_zoom"],
408
+ cmap=process_cmap(cmap, provider="bokeh"), # blues
409
+ tools=["hover"],
410
+ hooks=[new_bounds_hook],
411
+ width=1000,
412
+ height=1000,
413
+ cnorm="log",
414
+ xlabel="Retention time (s)",
415
+ ylabel="m/z",
416
+ colorbar=True,
417
+ colorbar_position="right",
418
+ axiswise=True,
419
+ )
420
+
421
+ raster = hd.dynspread(
422
+ raster,
423
+ threshold=raster_threshold,
424
+ how="add",
425
+ shape="square",
426
+ max_px=raster_max_px,
427
+ )
428
+ feature_points_1 = None
429
+ feature_points_2 = None
430
+ feature_points_3 = None
431
+ feature_points_4 = None
432
+ feature_points_iso = None
433
+ # Plot features as red dots if features is True
434
+ if self.features_df is not None and show_features:
435
+ feats = self.features_df.clone()
436
+ # Convert to pandas for operations that require pandas functionality
437
+ if hasattr(feats, "to_pandas"):
438
+ feats = feats.to_pandas()
439
+ # if ms2_scans is not null, keep only the first element of the list
440
+ feats["ms2_scans"] = feats["ms2_scans"].apply(
441
+ lambda x: x[0] if type(x) == list else x,
442
+ )
443
+ if mz_range is not None:
444
+ feats = feats[(feats["mz"] >= mz_range[0]) & (feats["mz"] <= mz_range[1])]
445
+ if rt_range is not None:
446
+ feats = feats[(feats["rt"] >= rt_range[0]) & (feats["rt"] <= rt_range[1])]
447
+ # keep only iso==0, i.e. the main
448
+ feats = feats[feats["iso"] == 0]
449
+ # find features with ms2_scans not None and iso==0
450
+ features_df = feats[feats["ms2_scans"].notnull()]
451
+ feature_points_1 = hv.Points(
452
+ features_df,
453
+ kdims=["rt", "mz"],
454
+ vdims=[
455
+ "feature_uid",
456
+ "inty",
457
+ "quality",
458
+ "rt_delta",
459
+ "ms2_scans",
460
+ "chrom_coherence",
461
+ "chrom_prominence_scaled",
462
+ ],
463
+ label="Features with MS2 data",
464
+ ).options(
465
+ color=color_1,
466
+ marker=marker,
467
+ size=size_1,
468
+ tools=["hover"],
469
+ )
470
+ # find features without MS2 data
471
+ features_df = feats[feats["ms2_scans"].isnull()]
472
+ feature_points_2 = hv.Points(
473
+ features_df,
474
+ kdims=["rt", "mz"],
475
+ vdims=[
476
+ "feature_uid",
477
+ "inty",
478
+ "quality",
479
+ "rt_delta",
480
+ "chrom_coherence",
481
+ "chrom_prominence_scaled",
482
+ ],
483
+ label="Features without MS2 data",
484
+ ).options(
485
+ color="red",
486
+ size=size_2,
487
+ marker=marker,
488
+ tools=["hover"],
489
+ )
490
+
491
+ if show_isotopes:
492
+ # Use proper Polars filter syntax to avoid boolean indexing issues
493
+ features_df = self.features_df.filter(pl.col("iso") > 0)
494
+ # Convert to pandas for plotting compatibility
495
+ if hasattr(features_df, "to_pandas"):
496
+ features_df = features_df.to_pandas()
497
+ feature_points_iso = hv.Points(
498
+ features_df,
499
+ kdims=["rt", "mz"],
500
+ vdims=[
501
+ "feature_uid",
502
+ "inty",
503
+ "quality",
504
+ "rt_delta",
505
+ "iso",
506
+ "iso_of",
507
+ "chrom_coherence",
508
+ "chrom_prominence_scaled",
509
+ ],
510
+ label="Isotopes",
511
+ ).options(
512
+ color="violet",
513
+ marker=marker,
514
+ size=size_1,
515
+ tools=["hover"],
516
+ )
517
+ if show_ms2:
518
+ # find all self.scans_df with mslevel 2 that are not linked to a feature
519
+ ms2_orphan = self.scans_df.filter(pl.col("ms_level") == 2).filter(
520
+ pl.col("feature_uid") < 0,
521
+ )
522
+
523
+ if len(ms2_orphan) > 0:
524
+ # pandalize
525
+ ms2 = ms2_orphan.to_pandas()
526
+ feature_points_3 = hv.Points(
527
+ ms2,
528
+ kdims=["rt", "prec_mz"],
529
+ vdims=["index", "inty_tot", "bl"],
530
+ label="Orphan MS2 scans",
531
+ ).options(
532
+ color=color_2,
533
+ marker="x",
534
+ size=size_2,
535
+ tools=["hover"],
536
+ )
537
+
538
+ ms2_linked = self.scans_df.filter(pl.col("ms_level") == 2).filter(
539
+ pl.col("feature_uid") >= 0,
540
+ )
541
+ if len(ms2_linked) > 0:
542
+ # pandalize
543
+ ms2 = ms2_linked.to_pandas()
544
+ feature_points_4 = hv.Points(
545
+ ms2,
546
+ kdims=["rt", "prec_mz"],
547
+ vdims=["index", "inty_tot", "bl"],
548
+ label="Linked MS2 scans",
549
+ ).options(
550
+ color=color_1,
551
+ marker="x",
552
+ size=size_2,
553
+ tools=["hover"],
554
+ )
555
+
556
+ overlay = raster
557
+
558
+ if feature_points_4 is not None:
559
+ overlay = overlay * feature_points_4
560
+ if feature_points_3 is not None:
561
+ overlay = overlay * feature_points_3
562
+ if feature_points_1 is not None:
563
+ overlay = overlay * feature_points_1
564
+ if not show_only_features_with_ms2 and feature_points_2 is not None:
565
+ overlay = overlay * feature_points_2
566
+ if feature_points_iso is not None:
567
+ overlay = overlay * feature_points_iso
568
+
569
+ if title is not None:
570
+ overlay = overlay.opts(title=title)
571
+
572
+ # Create a panel layout
573
+ layout = panel.Column(overlay)
574
+
575
+ if filename is not None:
576
+ # if filename includes .html, save the panel layout to an HTML file
577
+ if filename.endswith(".html"):
578
+ layout.save(filename, embed=True)
579
+ else:
580
+ # save the panel layout as a png
581
+ hv.save(overlay, filename, fmt="png")
582
+ else:
583
+ # Check if we're in a notebook environment and display appropriately
584
+ return _display_plot(overlay, layout)
585
+
586
+
587
+ def plot_2d_oracle(
588
+ self,
589
+ oracle_folder=None,
590
+ link_by_feature_uid=None,
591
+ colorby="hg",
592
+ filename=None,
593
+ min_id_level=None,
594
+ max_id_level=None,
595
+ min_ms_level=None,
596
+ title=None,
597
+ cmap=None,
598
+ markersize=10,
599
+ raster_dynamic=True,
600
+ raster_max_px=8,
601
+ raster_threshold=0.8,
602
+ mz_range=None,
603
+ rt_range=None,
604
+ ):
605
+ """
606
+ Plot a 2D overlay visualization of MS1 survey scans and feature annotations, including oracle annotation data if provided.
607
+
608
+ This function reads the primary mass spectrometry data, applies filtering, processes oracle annotation data (if provided),
609
+ and produces an interactive plot combining various data layers. The visualization includes rasterized MS1 data and feature
610
+ points colored by annotation.
611
+
612
+ Parameters:
613
+ self: The object instance containing MS1 and feature data.
614
+ oracle_folder (str, optional): Path to the oracle folder containing the annotation file
615
+ (expected at "<oracle_folder>/diag/summary_by_feature.csv"). If None, oracle data is not used.
616
+ link_by_feature_uid (bool, optional): Whether to link features by their IDs in the overlay.
617
+ colorby (str, optional): Parameter that determines the color assignment for annotated features.
618
+ Expected values include 'hg', 'class', 'id_class', or 'id_hg'. Default is 'hg'.
619
+ filename (str, optional): Name of the file where the plot should be saved. If provided and ends with
620
+ ".html", the panel layout is saved as an interactive HTML file; otherwise, the output is saved as a PNG.
621
+ min_id_level (int, optional): Minimum identification level for oracle annotations to include.
622
+ max_id_level (int, optional): Maximum identification level for oracle annotations to include.
623
+ min_ms_level (int, optional): Minimum MS level for features to include.
624
+ title (str, optional): Title to be displayed on the resulting plot. Default is None.
625
+ cmap (str, optional): Colormap to be used for the rasterized plot. Acceptable values include None, "grey",
626
+ "iridescent", or other valid colormap names. Default is None. When None, 'Greys256' is used.
627
+ markersize (int, optional): Marker size for feature points in the overlay. Default is 10.
628
+ raster_dynamic (bool, optional): If True, enables dynamic rasterization of the overlay. If filename is provided
629
+ and does not end with ".html", raster_dynamic is set to False. Default is True.
630
+ raster_max_px (int, optional): Maximum pixel size for dynamic rasterization. Default is 8.
631
+ raster_threshold (float, optional): Threshold for dynamic raster spread. Default is 0.8.
632
+ mz_range (tuple, optional): m/z range for filtering MS1 data.
633
+ rt_range (tuple, optional): Retention time range for filtering MS1 data.
634
+
635
+ Returns:
636
+ None
637
+
638
+ The function either displays the interactive panel layout or saves the visualization to a file based on
639
+ the provided filename. If the primary file object or feature data is missing, the function prints an
640
+ informative message and returns without plotting.
641
+ """
642
+
643
+ if self.file_obj is None:
644
+ print("Please load a file first.")
645
+ return
646
+
647
+ if cmap is None or cmap == "grey":
648
+ cmap = "Greys256"
649
+ elif cmap == "iridescent":
650
+ cmap = "iridescent_r"
651
+
652
+ # get columns rt, mz, inty from self.ms1_df, It's polars DataFrame
653
+ spectradf = self.ms1_df.to_pandas()
654
+
655
+ # remove any inty<1
656
+ spectradf = spectradf[spectradf["inty"] >= 1]
657
+ # keep only rt, mz, and inty
658
+ spectradf = spectradf[["rt", "mz", "inty"]]
659
+ if mz_range is not None:
660
+ spectradf = spectradf[(spectradf["mz"] >= mz_range[0]) & (spectradf["mz"] <= mz_range[1])]
661
+ if rt_range is not None:
662
+ spectradf = spectradf[(spectradf["rt"] >= rt_range[0]) & (spectradf["rt"] <= rt_range[1])]
663
+
664
+ maxrt = spectradf["rt"].max()
665
+ minrt = spectradf["rt"].min()
666
+ maxmz = spectradf["mz"].max()
667
+ minmz = spectradf["mz"].min()
668
+
669
+ def new_bounds_hook(plot, elem):
670
+ x_range = plot.state.x_range
671
+ y_range = plot.state.y_range
672
+ x_range.bounds = minrt, maxrt
673
+ y_range.bounds = minmz, maxmz
674
+
675
+ points = hv.Points(
676
+ spectradf,
677
+ kdims=["rt", "mz"],
678
+ vdims=["inty"],
679
+ label="MS1 survey scans",
680
+ ).opts(
681
+ fontsize={"title": 16, "labels": 14, "xticks": 6, "yticks": 12},
682
+ color=np.log(dim("inty")),
683
+ colorbar=True,
684
+ cmap="Magma",
685
+ tools=["hover"],
686
+ )
687
+
688
+ if filename is not None:
689
+ dyn = False
690
+ if not filename.endswith(".html"):
691
+ raster_dynamic = False
692
+
693
+ dyn = raster_dynamic
694
+ raster = hd.rasterize(
695
+ points,
696
+ aggregator=ds.max("inty"),
697
+ interpolation="bilinear",
698
+ dynamic=dyn, # alpha=10, min_alpha=0,
699
+ ).opts(
700
+ active_tools=["box_zoom"],
701
+ cmap=process_cmap(cmap, provider="bokeh"), # blues
702
+ tools=["hover"],
703
+ hooks=[new_bounds_hook],
704
+ width=1000,
705
+ height=1000,
706
+ cnorm="log",
707
+ xlabel="Retention time (s)",
708
+ ylabel="m/z",
709
+ colorbar=True,
710
+ colorbar_position="right",
711
+ axiswise=True,
712
+ )
713
+ raster = hd.dynspread(
714
+ raster,
715
+ threshold=raster_threshold,
716
+ how="add",
717
+ shape="square",
718
+ max_px=raster_max_px,
719
+ )
720
+
721
+ if self.features_df is None:
722
+ return
723
+ feats = self.features_df.clone()
724
+
725
+ # Convert to pandas for oracle operations that require pandas functionality
726
+ if hasattr(feats, "to_pandas"):
727
+ feats = feats.to_pandas()
728
+
729
+ # check if annotationfile is not None
730
+ if oracle_folder is None:
731
+ return
732
+ # try to read the annotationfile as a csv file and add it to feats
733
+ try:
734
+ oracle_data = pd.read_csv(
735
+ os.path.join(oracle_folder, "diag", "summary_by_feature.csv"),
736
+ )
737
+ except:
738
+ print(f"Could not read {oracle_folder}/diag/summary_by_feature.csv")
739
+ return
740
+
741
+ if link_by_feature_uid:
742
+ # scan_idx slaw_id slaw_ms2_id mz rt level formula ion species name rarity lib_id hg mod lib score score2 score_db score_db_data ms2_tic ms2_evidence ms2_matched_n ms2_missed_n ms2_matched ms2_missed ms2_top1
743
+ cols_to_keep = [
744
+ "title",
745
+ "scan_idx",
746
+ "mslevel",
747
+ "hits",
748
+ "id_level",
749
+ "id_label",
750
+ "id_ion",
751
+ "id_class",
752
+ "id_evidence",
753
+ "score",
754
+ "score2",
755
+ ]
756
+ oracle_data = oracle_data[cols_to_keep]
757
+ # extract feature_uid from title. It begins with "fid:XYZ;"
758
+ oracle_data["feature_uid"] = oracle_data["title"].str.extract(r"fid:(\d+)")
759
+ oracle_data["feature_uid"] = oracle_data["feature_uid"].astype(int)
760
+ # sort by id_level, remove duplicate feature_uid, keep the first one
761
+ oracle_data = oracle_data.sort_values(by=["id_level"], ascending=False)
762
+ oracle_data = oracle_data.drop_duplicates(subset=["feature_uid"], keep="first")
763
+ else:
764
+ cols_to_keep = [
765
+ "precursor",
766
+ "rt",
767
+ "title",
768
+ "scan_idx",
769
+ "mslevel",
770
+ "hits",
771
+ "id_level",
772
+ "id_label",
773
+ "id_ion",
774
+ "id_class",
775
+ "id_evidence",
776
+ "score",
777
+ "score2",
778
+ ]
779
+ # link
780
+ oracle_data = oracle_data[cols_to_keep]
781
+ oracle_data["feature_uid"] = None
782
+ # iterate over the rows and find the feature_uid in feats by looking at the closest rt and mz
783
+ for i, row in oracle_data.iterrows():
784
+ candidates = feats[
785
+ (abs(feats["rt"] - row["rt"]) < 1) & (abs(feats["mz"] - row["precursor"]) < 0.005)
786
+ ].copy()
787
+ if len(candidates) > 0:
788
+ # sort by delta rt
789
+ candidates["delta_rt"] = abs(candidates["rt"] - row["rt"])
790
+ candidates = candidates.sort_values(by=["delta_rt"])
791
+ oracle_data.at[i, "feature_uid"] = candidates["feature_uid"].values[0]
792
+ # remove precursor and rt columns
793
+ oracle_data = oracle_data.drop(columns=["precursor", "rt"])
794
+
795
+ feats = feats.merge(oracle_data, how="left", on="feature_uid")
796
+
797
+ # filter feats by id_level
798
+ if min_id_level is not None:
799
+ feats = feats[(feats["id_level"] >= min_id_level)]
800
+ if max_id_level is not None:
801
+ feats = feats[(feats["id_level"] <= max_id_level)]
802
+ if min_ms_level is not None:
803
+ feats = feats[(feats["mslevel"] >= min_ms_level)]
804
+
805
+ feats["color"] = "black"
806
+
807
+ cvalues = None
808
+ if colorby in ["class", "hg", "id_class", "id_hg"]:
809
+ # replace nans in feats['id_class'] with 'mix'
810
+ feats["id_class"] = feats["id_class"].fillna("mix")
811
+ cvalues = feats["id_class"].unique()
812
+ # sort alphabetically
813
+ cvalues = sorted(cvalues)
814
+ # flip the strings left to right
815
+ fcvalues = [cvalues[i][::-1] for i in range(len(cvalues))]
816
+ # sort in alphabetical order the flipped strings and return the index
817
+ idx = np.argsort(fcvalues)
818
+ # apply to cvalues
819
+ cvalues = [cvalues[i] for i in idx]
820
+ elif colorby in ["ion", "id_ion"]:
821
+ cvalues = feats["id_ion"].unique()
822
+ elif colorby in ["id_evidence", "ms2_evidence"]:
823
+ cvalues = feats["id_evidence"].unique()
824
+
825
+ if cvalues is not None:
826
+ num_colors = len(cvalues)
827
+ cmap = "rainbow"
828
+ cmap_provider = "colorcet"
829
+ cm = process_cmap(cmap, ncolors=num_colors, provider=cmap_provider)
830
+ colors = [
831
+ rgb2hex(cm[int(i * (len(cm) - 1) / (num_colors - 1))]) if num_colors > 1 else rgb2hex(cm[0])
832
+ for i in range(num_colors)
833
+ ]
834
+ # assign color to each row based on id_class. If id_class is null, assign 'black'
835
+ feats["color"] = "black"
836
+
837
+ for i, c in enumerate(cvalues):
838
+ if colorby in ["class", "hg", "id_class", "id_hg"]:
839
+ feats.loc[feats["id_class"] == c, "color"] = colors[i]
840
+ elif colorby in ["ion", "id_ion"]:
841
+ feats.loc[feats["id_ion"] == c, "color"] = colors[i]
842
+ elif colorby in ["id_evidence", "ms2_evidence"]:
843
+ feats.loc[feats["id_evidence"] == c, "color"] = colors[i]
844
+
845
+ # replace NaN with 0 in id_level
846
+ feats["id_level"] = feats["id_level"].fillna(0)
847
+ # feature_points_1 are all features with column ms2_scans not null
848
+ feature_points_1 = None
849
+ feat_df = feats.copy()
850
+ feat_df = feat_df[feat_df["id_level"] == 2]
851
+
852
+ feature_points_1 = hv.Points(
853
+ feat_df,
854
+ kdims=["rt", "mz"],
855
+ vdims=[
856
+ "inty",
857
+ "feature_uid",
858
+ "id_level",
859
+ "id_class",
860
+ "id_label",
861
+ "id_ion",
862
+ "id_evidence",
863
+ "score",
864
+ "score2",
865
+ "color",
866
+ ],
867
+ label="ID by MS2",
868
+ ).options(
869
+ color="color",
870
+ marker="circle",
871
+ size=markersize,
872
+ fill_alpha=1.0,
873
+ tools=["hover"],
874
+ )
875
+
876
+ # feature_points_2 are all features that have ms2_scans not null and id_level ==1
877
+ feature_points_2 = None
878
+ feat_df = feats.copy()
879
+ feat_df = feat_df[(feat_df["ms2_scans"].notnull()) & (feat_df["id_level"] == 1)]
880
+ if len(feat_df) > 0:
881
+ feature_points_2 = hv.Points(
882
+ feat_df,
883
+ kdims=["rt", "mz"],
884
+ vdims=[
885
+ "inty",
886
+ "feature_uid",
887
+ "id_level",
888
+ "id_label",
889
+ "id_ion",
890
+ "id_class",
891
+ "color",
892
+ ],
893
+ label="ID by MS1, with MS2",
894
+ ).options(
895
+ color="color",
896
+ marker="circle",
897
+ size=markersize,
898
+ fill_alpha=0.0,
899
+ tools=["hover"],
900
+ )
901
+
902
+ # feature_points_3 are all features that have ms2_scans null and id_level ==1
903
+ feature_points_3 = None
904
+ feat_df = feats.copy()
905
+ feat_df = feat_df[(feat_df["ms2_scans"].isnull()) & (feat_df["id_level"] == 1)]
906
+ if len(feat_df) > 0:
907
+ feature_points_3 = hv.Points(
908
+ feat_df,
909
+ kdims=["rt", "mz"],
910
+ vdims=[
911
+ "inty",
912
+ "feature_uid",
913
+ "id_level",
914
+ "id_label",
915
+ "id_ion",
916
+ "id_class",
917
+ "color",
918
+ ],
919
+ label="ID by MS1, no MS2",
920
+ ).options(
921
+ color="color",
922
+ marker="diamond",
923
+ size=markersize,
924
+ fill_alpha=0.0,
925
+ tools=["hover"],
926
+ )
927
+
928
+ # feature_points_4 are all features that have ms2_scans null and id_level ==0
929
+ feature_points_4 = None
930
+ feat_df = feats.copy()
931
+ feat_df = feat_df[(feat_df["ms2_scans"].notnull()) & (feat_df["id_level"] < 1)]
932
+ if len(feat_df) > 0:
933
+ feature_points_4 = hv.Points(
934
+ feat_df,
935
+ kdims=["rt", "mz"],
936
+ vdims=["inty", "feature_uid"],
937
+ label="No ID, with MS2",
938
+ ).options(
939
+ color="gray",
940
+ marker="circle",
941
+ size=markersize,
942
+ fill_alpha=0.0,
943
+ tools=["hover"],
944
+ )
945
+
946
+ # feature_points_4 are all features that have ms2_scans null and id_level ==0
947
+ feature_points_5 = None
948
+ feat_df = feats.copy()
949
+ feat_df = feat_df[(feat_df["ms2_scans"].isnull()) & (feat_df["id_level"] < 1)]
950
+ if len(feat_df) > 0:
951
+ feature_points_5 = hv.Points(
952
+ feat_df,
953
+ kdims=["rt", "mz"],
954
+ vdims=["inty", "feature_uid"],
955
+ label="No ID, no MS2",
956
+ ).options(
957
+ color="gray",
958
+ marker="diamond",
959
+ fill_alpha=0.0,
960
+ size=markersize,
961
+ tools=["hover"],
962
+ )
963
+
964
+ overlay = raster
965
+
966
+ if feature_points_1 is not None:
967
+ overlay = overlay * feature_points_1
968
+ if feature_points_2 is not None:
969
+ overlay = overlay * feature_points_2
970
+ if feature_points_3 is not None:
971
+ overlay = overlay * feature_points_3
972
+ if feature_points_4 is not None:
973
+ overlay = overlay * feature_points_4
974
+ # if not show_only_features_with_ms2:
975
+ if feature_points_5 is not None:
976
+ overlay = overlay * feature_points_5
977
+
978
+ if title is not None:
979
+ overlay = overlay.opts(title=title)
980
+
981
+ # Create a panel layout
982
+ layout = panel.Column(overlay)
983
+
984
+ if filename is not None:
985
+ # if filename includes .html, save the panel layout to an HTML file
986
+ if filename.endswith(".html"):
987
+ layout.save(filename, embed=True)
988
+ else:
989
+ # save the panel layout as a png
990
+ hv.save(overlay, filename, fmt="png")
991
+ else:
992
+ # Check if we're in a notebook environment and display appropriately
993
+ return _display_plot(overlay, layout)
994
+
995
+
996
+ def plot_ms2_eic(
997
+ self,
998
+ feature_uid=None,
999
+ rt_tol=5,
1000
+ mz_tol=0.05,
1001
+ link_x=True,
1002
+ n=20,
1003
+ deisotope=True,
1004
+ centroid=True,
1005
+ filename=None,
1006
+ ):
1007
+ """
1008
+ Plots the Extracted Ion Chromatograms (EIC) for the precursor and top n MS2 fragment ions of a given feature.
1009
+ Parameters:
1010
+ feature_uid: The feature unique identifier. Must be present in the features dataframe; if None, a message is printed.
1011
+ rt_tol (float, optional): The retention time tolerance (in seconds) to extend the feature's rt start and end values. Default is 5.
1012
+ mz_tol (float, optional): The m/z tolerance used when filtering the precursor and fragment ion intensities. Default is 0.05.
1013
+ link_x (bool, optional): If True, the x-axis (retention time) of all subplots is linked. Default is True.
1014
+ n (int, optional): The number of top MS2 fragment m/z values to consider for plotting. Default is 20.
1015
+ deisotope (bool, optional): Flag that determines whether deisotoping should be applied to the MS2 fragments. Default is True.
1016
+ centroid (bool, optional): Flag that controls whether centroiding is applied to the MS2 data. Default is True.
1017
+ filename (str, optional): If provided, the function saves the plot to the specified file. Supports .html for interactive plots or other formats (e.g., png).
1018
+ If None, the plot is displayed instead of being saved.
1019
+ Returns:
1020
+ None
1021
+ Notes:
1022
+ - The function first verifies the existence of the provided feature id and its associated MS2 spectrum.
1023
+ - It retrieves the top n fragments by intensity from the MS2 spectrum and computes the EIC for both the precursor ion and the fragments.
1024
+ - A helper method (_spec_to_mat) is used to convert spectral data into intensity matrices.
1025
+ - The resulting plots include hover tools to display the retention time and scan identifier.
1026
+ - The layout is arranged in a grid (4 columns by default) and may have linked x-axes based on the link_x parameter.
1027
+ """
1028
+ # plots the EIC for a given feature id inlcusind the EIC of the top n MS2 fragments
1029
+
1030
+ if feature_uid is None:
1031
+ print("Please provide a feature id.")
1032
+ return
1033
+ # check if feature_uid is in features_df
1034
+ if feature_uid not in self.features_df["feature_uid"].values:
1035
+ print("Feature id not found in features_df.")
1036
+
1037
+ feature = self.features_df[self.features_df["feature_uid"] == feature_uid]
1038
+ # get top n fragments
1039
+ ms2_specs = feature["ms2_specs"].values[0]
1040
+ if ms2_specs is None:
1041
+ print("No MS2 data found for this feature.")
1042
+ return
1043
+
1044
+ if len(ms2_specs) == 0:
1045
+ print("No MS2 data found for this feature.")
1046
+ return
1047
+ # get the MS2 spectrum
1048
+ # get the mz of the top n fragments
1049
+ ms2_specs_df = ms2_specs[0].pandalize()
1050
+ ms2_specs_df = ms2_specs_df.sort_values(by="inty", ascending=False)
1051
+ ms2_specs_df = ms2_specs_df.head(n)
1052
+ top_mzs = ms2_specs_df["mz"].values.tolist()
1053
+
1054
+ # find rt_start and rt_end of the feature_uid
1055
+ rt_start = feature["rt_start"].values[0] - rt_tol
1056
+ rt_end = feature["rt_end"].values[0] + rt_tol
1057
+ # get the cycle at rt_start and the cycle at rt_end from the closest scan with ms_level == 1
1058
+ scans = self.scans_df.filter(pl.col("ms_level") == 1)
1059
+ scans = scans.filter(pl.col("rt") > rt_start)
1060
+ scans = scans.filter(pl.col("rt") < rt_end)
1061
+ rts = scans["rt"].to_list()
1062
+ if len(scans) == 0:
1063
+ print(f"No scans found between {rt_start} and {rt_end}.")
1064
+ return
1065
+ scan_uids = scans["scan_uid"].to_list()
1066
+ eic_prec = self._spec_to_mat(
1067
+ scan_uids,
1068
+ mz_ref=feature["mz"].values.tolist(),
1069
+ mz_tol=mz_tol,
1070
+ deisotope=False,
1071
+ centroid=True,
1072
+ )
1073
+ # convert eic_prec from matrix to list
1074
+ eic_prec = eic_prec[0].tolist()
1075
+
1076
+ # get all unique cycles from scans
1077
+ cycles = scans["cycle"].unique()
1078
+ scan_uids = []
1079
+ # iterate over all cycles and get the scan_uid of scan with ms_level == 2 and closest precursor_mz to spec.precursor_mz
1080
+ for cycle in cycles:
1081
+ scans = self.scans_df.filter(pl.col("cycle") == cycle)
1082
+ scans = scans.filter(pl.col("ms_level") == 2)
1083
+ scans = scans.filter(pl.col("prec_mz") > feature["mz"] - 5)
1084
+ scans = scans.filter(pl.col("prec_mz") < feature["mz"] + 5)
1085
+ if len(scans) == 0:
1086
+ print(
1087
+ f"No scans found for cycle {cycle} and mz {feature['mz']}. Increase mz_tol tolerance.",
1088
+ )
1089
+ return
1090
+ # get the scan with the closest precursor_mz to feature['mz']
1091
+ scan = scans[(scans["prec_mz"] - feature["mz"]).abs().arg_sort()[:1]]
1092
+ scan_uids.append(scan["scan_uid"][0])
1093
+ eic_prod = self._spec_to_mat(
1094
+ scan_uids,
1095
+ mz_ref=top_mzs,
1096
+ mz_tol=mz_tol,
1097
+ deisotope=deisotope,
1098
+ centroid=centroid,
1099
+ )
1100
+
1101
+ prec_name = f"prec {feature['mz'].values[0]:.3f}"
1102
+ eic_df = pd.DataFrame({"rt": rts, prec_name: eic_prec})
1103
+ # add scan_uid to eic_df for the tooltips
1104
+ eic_df["scan_uid"] = scan_uids
1105
+
1106
+ frag_names = [prec_name]
1107
+ for i, mz in enumerate(top_mzs):
1108
+ # add column to eic_df
1109
+ name = f"frag {mz:.3f}"
1110
+ frag_names.append(name)
1111
+ eic_df[name] = eic_prod[i]
1112
+
1113
+ # create a plot for all columns in eic_df
1114
+ eic_plots: list[hv.Curve] = []
1115
+ for name in frag_names:
1116
+ eic = hv.Curve(eic_df, kdims=["rt"], vdims=[name, "scan_uid"]).opts(
1117
+ title=name,
1118
+ xlabel="RT (s)",
1119
+ ylabel=f"Inty_f{len(eic_plots)}",
1120
+ width=250,
1121
+ height=200,
1122
+ axiswise=True,
1123
+ color="black",
1124
+ tools=[HoverTool(tooltips=[("rt", "@rt"), ("scan_uid", "@scan_uid")])],
1125
+ )
1126
+ eic_plots.append(eic)
1127
+
1128
+ # add as
1129
+
1130
+ layout = hv.Layout(eic_plots).cols(4)
1131
+ if link_x:
1132
+ layout = layout.opts(shared_axes=True)
1133
+
1134
+ if filename is not None:
1135
+ if filename.endswith(".html"):
1136
+ panel.panel(layout).save(filename, embed=True) # type: ignore[attr-defined]
1137
+ else:
1138
+ hv.save(layout, filename, fmt="png")
1139
+ else:
1140
+ # Check if we're in a notebook environment and display appropriately
1141
+ layout_obj = panel.panel(layout)
1142
+ return _display_plot(layout, layout_obj)
1143
+
1144
+
1145
+ def plot_ms2_cycle(
1146
+ self,
1147
+ cycle=None,
1148
+ filename=None,
1149
+ title=None,
1150
+ cmap=None,
1151
+ raster_dynamic=True,
1152
+ raster_max_px=8,
1153
+ raster_threshold=0.8,
1154
+ centroid=True,
1155
+ deisotope=True,
1156
+ ):
1157
+ if self.file_obj is None:
1158
+ print("Please load a mzML file first.")
1159
+ return
1160
+
1161
+ if cycle is None:
1162
+ print("Please provide a cycle number.")
1163
+ return
1164
+
1165
+ if cycle not in self.scans_df["cycle"].unique():
1166
+ print("Cycle number not found in scans_df.")
1167
+ return
1168
+
1169
+ if cmap is None:
1170
+ cmap = "iridescent_r"
1171
+ elif cmap == "grey":
1172
+ cmap = "Greys256"
1173
+
1174
+ # find all scans in cycle
1175
+ scans = self.scans_df.filter(pl.col("cycle") == cycle)
1176
+ scans = scans.filter(pl.col("ms_level") == 2)
1177
+
1178
+ ms2data = []
1179
+ # iterate through all rows
1180
+ for scan in scans.iter_rows(named=True):
1181
+ scan_uid = scan["scan_uid"]
1182
+ # get spectrum
1183
+ spec = self.get_spectrum(
1184
+ scan_uid,
1185
+ precursor_trim=None,
1186
+ centroid=centroid,
1187
+ deisotope=deisotope,
1188
+ )
1189
+ if spec.mz.size == 0:
1190
+ continue
1191
+ d = {
1192
+ "prec_mz": [scan["prec_mz"]] * spec.mz.size,
1193
+ "mz": spec.mz,
1194
+ "inty": spec.inty,
1195
+ }
1196
+ ms2data.append(d)
1197
+
1198
+ # convert to pandas DataFrame
1199
+ spectradf = pd.DataFrame(ms2data)
1200
+
1201
+ # remove any inty<1
1202
+ spectradf = spectradf[spectradf["inty"] >= 1]
1203
+ # keep only rt, mz, and inty
1204
+ spectradf = spectradf[["prec_mz", "mz", "inty"]]
1205
+ maxrt = spectradf["prec_mz"].max()
1206
+ minrt = spectradf["prec_mz"].min()
1207
+ maxmz = spectradf["mz"].max()
1208
+ minmz = spectradf["mz"].min()
1209
+
1210
+ # TODO elem not used
1211
+ def new_bounds_hook(plot, elem):
1212
+ x_range = plot.state.x_range
1213
+ y_range = plot.state.y_range
1214
+ x_range.bounds = minrt, maxrt
1215
+ y_range.bounds = minmz, maxmz
1216
+
1217
+ points = hv.Points(
1218
+ spectradf,
1219
+ kdims=["prec_mz", "mz"],
1220
+ vdims=["inty"],
1221
+ label="MS1 survey scans",
1222
+ ).opts(
1223
+ fontsize={"title": 16, "labels": 14, "xticks": 6, "yticks": 12},
1224
+ color=np.log(dim("inty")),
1225
+ colorbar=True,
1226
+ cmap="Magma",
1227
+ tools=["hover"],
1228
+ )
1229
+
1230
+ raster = hd.rasterize(
1231
+ points,
1232
+ aggregator=ds.max("inty"),
1233
+ interpolation="bilinear",
1234
+ dynamic=raster_dynamic, # alpha=10, min_alpha=0,
1235
+ ).opts(
1236
+ active_tools=["box_zoom"],
1237
+ cmap=process_cmap(cmap, provider="bokeh"), # blues
1238
+ tools=["hover"],
1239
+ hooks=[new_bounds_hook],
1240
+ width=1000,
1241
+ height=1000,
1242
+ cnorm="log",
1243
+ xlabel="Q1 m/z",
1244
+ ylabel="m/z",
1245
+ colorbar=True,
1246
+ colorbar_position="right",
1247
+ axiswise=True,
1248
+ )
1249
+
1250
+ overlay = hd.dynspread(
1251
+ raster,
1252
+ threshold=raster_threshold,
1253
+ how="add",
1254
+ shape="square",
1255
+ max_px=raster_max_px,
1256
+ )
1257
+
1258
+ """
1259
+ feature_points_1 = None
1260
+ feature_points_2 = None
1261
+ feature_points_3 = None
1262
+ feature_points_4 = None
1263
+ feature_points_iso = None
1264
+ # Plot features as red dots if features is True
1265
+ if self.features_df is not None and show_features:
1266
+ feats = self.features_df.clone()
1267
+ # Convert to pandas for operations that require pandas functionality
1268
+ if hasattr(feats, 'to_pandas'):
1269
+ feats = feats.to_pandas()
1270
+ # if ms2_scans is not null, keep only the first element of the list
1271
+ feats['ms2_scans'] = feats['ms2_scans'].apply(lambda x: x[0] if type(x) == list else x)
1272
+ # keep only iso==0, i.e. the main
1273
+ feats = feats[feats['iso']==0]
1274
+ # find features with ms2_scans not None and iso==0
1275
+ features_df = feats[feats['ms2_scans'].notnull()]
1276
+ feature_points_1 = hv.Points(
1277
+ features_df, kdims=["rt", "mz"], vdims=["feature_uid", "inty", "quality", "rt_delta", "ms2_scans"], label="Features with MS2 data"
1278
+ ).options(
1279
+ color=color_1,
1280
+ marker=marker,
1281
+ size=size_1,
1282
+ tools=["hover"],
1283
+ )
1284
+ # find features without MS2 data
1285
+ features_df = feats[feats['ms2_scans'].isnull()]
1286
+ feature_points_2 = hv.Points(
1287
+ features_df, kdims=["rt", "mz"], vdims=["feature_uid", "inty", "quality", "rt_delta"], label="Features without MS2 data"
1288
+ ).options(
1289
+ color='red',
1290
+ size=size_2,
1291
+ marker=marker,
1292
+ tools=["hover"],
1293
+ )
1294
+
1295
+ if show_isotopes:
1296
+ feats = self.features_df
1297
+ features_df = feats[feats['iso']>0]
1298
+ feature_points_iso = hv.Points(
1299
+ features_df, kdims=["rt", "mz"], vdims=["feature_uid", "inty", "quality", "rt_delta", "iso", "iso_of"], label="Isotopes"
1300
+ ).options(
1301
+ color='violet',
1302
+ marker=marker,
1303
+ size=size_1,
1304
+ tools=["hover"],
1305
+ )
1306
+ if show_ms2:
1307
+ # find all self.scans_df with mslevel 2 that are not linked to a feature
1308
+ ms2_orphan = self.scans_df.filter(pl.col('ms_level')==2).filter(pl.col("feature_uid")<0)
1309
+
1310
+ if len(ms2_orphan) > 0:
1311
+ # pandalize
1312
+ ms2 = ms2_orphan.to_pandas()
1313
+ feature_points_3 = hv.Points(
1314
+ ms2, kdims=["rt", "prec_mz"], vdims=["index", "inty_tot", "bl"], label="Orphan MS2 scans"
1315
+ ).options(
1316
+ color=color_2,
1317
+ marker='x',
1318
+ size=size_2,
1319
+ tools=["hover"],
1320
+ )
1321
+
1322
+ ms2_linked = self.scans_df.filter(pl.col('ms_level')==2).filter(pl.col("feature_uid")>=0)
1323
+ if len(ms2_linked) > 0:
1324
+ # pandalize
1325
+ ms2 = ms2_linked.to_pandas()
1326
+ feature_points_4 = hv.Points(
1327
+ ms2, kdims=["rt", "prec_mz"], vdims=["index", "inty_tot", "bl"], label="Linked MS2 scans"
1328
+ ).options(
1329
+ color=color_1,
1330
+ marker='x',
1331
+ size=size_2,
1332
+ tools=["hover"],
1333
+ )
1334
+
1335
+
1336
+ if feature_points_4 is not None:
1337
+ overlay = overlay * feature_points_4
1338
+ if feature_points_3 is not None:
1339
+ overlay = overlay * feature_points_3
1340
+ if feature_points_1 is not None:
1341
+ overlay = overlay * feature_points_1
1342
+ if not show_only_features_with_ms2:
1343
+ if feature_points_2 is not None:
1344
+ overlay = overlay * feature_points_2
1345
+ if feature_points_iso is not None:
1346
+ overlay = overlay * feature_points_iso
1347
+ """
1348
+ if title is not None:
1349
+ overlay = overlay.opts(title=title)
1350
+
1351
+ # Create a panel layout
1352
+ layout = panel.Column(overlay)
1353
+
1354
+ if filename is not None:
1355
+ # if filename includes .html, save the panel layout to an HTML file
1356
+ if filename.endswith(".html"):
1357
+ layout.save(filename, embed=True)
1358
+ else:
1359
+ # save the panel layout as a png
1360
+ hv.save(overlay, filename, fmt="png")
1361
+ else:
1362
+ # Check if we're in a notebook environment and display appropriately
1363
+ return _display_plot(overlay, layout)
1364
+
1365
+
1366
+ def plot_ms2_q1(
1367
+ self,
1368
+ feature_uid=None,
1369
+ q1_width=10.0,
1370
+ mz_tol=0.01,
1371
+ link_x=True,
1372
+ n=20,
1373
+ deisotope=True,
1374
+ centroid=True,
1375
+ filename=None,
1376
+ ):
1377
+ # plots the EIC for a given feature id including the EIC of the top n MS2 fragments
1378
+
1379
+ if feature_uid is None:
1380
+ print("Please provide a feature id.")
1381
+ return
1382
+ # check if feature_uid is in features_df
1383
+ if feature_uid not in self.features_df["feature_uid"].values:
1384
+ print("Feature id not found in features_df.")
1385
+
1386
+ feature = self.features_df[self.features_df["feature_uid"] == feature_uid]
1387
+ # get top n fragments
1388
+ ms2_specs = feature["ms2_specs"].values[0]
1389
+ if ms2_specs is None:
1390
+ print("No MS2 data found for this feature.")
1391
+ return
1392
+
1393
+ if len(ms2_specs) == 0:
1394
+ print("No MS2 data found for this feature.")
1395
+ return
1396
+ # get the MS2 spectrum
1397
+ # get the mz of the top n fragments
1398
+ ms2_specs_df = ms2_specs[0].pandalize()
1399
+ ms2_specs_df = ms2_specs_df.sort_values(by="inty", ascending=False)
1400
+ ms2_specs_df = ms2_specs_df.head(n)
1401
+ top_mzs = ms2_specs_df["mz"].values.tolist()
1402
+
1403
+ # cycles is the cycle of the feature plus/minus q1_width
1404
+ feature_scan = self.select_closest_scan(feature["rt"].values[0])
1405
+ cycle = feature_scan["cycle"][0]
1406
+ scans = self.scans_df.filter(pl.col("cycle") == cycle)
1407
+ scans = scans.filter(pl.col("ms_level") == 2)
1408
+ # find the scan in cycle whose 'prec_mz' is the closest to the feature['mz']
1409
+ scan_uid = scans[(scans["prec_mz"] - feature["mz"]).abs().arg_sort()[:1]]["scan_uid"][0]
1410
+ # get q1_width scans before and after the scan_uid
1411
+ scans = self.scans_df.filter(pl.col("scan_uid") >= scan_uid - q1_width)
1412
+ scans = scans.filter(pl.col("scan_uid") <= scan_uid + q1_width)
1413
+ scan_uids = scans["scan_uid"].to_list()
1414
+ q1s = scans["prec_mz"].to_list()
1415
+
1416
+ q1_prod = self._spec_to_mat(
1417
+ scan_uids,
1418
+ mz_ref=top_mzs,
1419
+ mz_tol=mz_tol,
1420
+ deisotope=deisotope,
1421
+ centroid=centroid,
1422
+ )
1423
+ q1_df = pd.DataFrame({"q1": q1s})
1424
+
1425
+ frag_names = []
1426
+ for i, mz in enumerate(top_mzs):
1427
+ # add column to q1_df
1428
+ name = f"frag {mz:.3f}"
1429
+ # if q1_ratio exists, add it to the name
1430
+ if "q1_ratio" in ms2_specs_df.columns:
1431
+ q1_ratio = ms2_specs_df["q1_ratio"].values[i]
1432
+ name += f" q1r: {q1_ratio:.2f}"
1433
+ frag_names.append(name)
1434
+ q1_df[name] = q1_prod[i]
1435
+ # add scan_uid to q1_df for the tooltips
1436
+ q1_df["scan_uid"] = scan_uids
1437
+
1438
+ # create a plot for all columns in eic_df
1439
+ eic_plots: list[hv.Curve] = []
1440
+ for name in frag_names:
1441
+ eic = hv.Curve(q1_df, kdims=["q1"], vdims=[name, "scan_uid"]).opts(
1442
+ title=name,
1443
+ xlabel="Q1 (m/z)",
1444
+ ylabel=f"Inty_f{len(eic_plots)}",
1445
+ width=250,
1446
+ height=200,
1447
+ axiswise=True,
1448
+ color="black",
1449
+ tools=[HoverTool(tooltips=[("Q1", "@q1"), ("scan_uid", "@scan_uid")])],
1450
+ )
1451
+ eic_plots.append(eic)
1452
+
1453
+ # add as
1454
+
1455
+ layout = hv.Layout(eic_plots).cols(4)
1456
+ if link_x:
1457
+ layout = layout.opts(shared_axes=True)
1458
+
1459
+ if filename is not None:
1460
+ if filename.endswith(".html"):
1461
+ panel.panel(layout).save(filename, embed=True) # type: ignore[attr-defined]
1462
+ else:
1463
+ hv.save(layout, filename, fmt="png")
1464
+ else:
1465
+ # Check if we're in a notebook environment and display appropriately
1466
+ layout_obj = panel.panel(layout)
1467
+ return _display_plot(layout, layout_obj)
1468
+
1469
+
1470
+ def plot_dda_stats(
1471
+ self,
1472
+ filename=None,
1473
+ ):
1474
+ """
1475
+ Generates scatter plots for DDA statistics.
1476
+ This method retrieves statistical data using the `get_dda_stats` method, filters relevant
1477
+ columns, and preprocesses the data by replacing any values below 0 with None. It then creates
1478
+ a scatter plot for each metric specified in the `cols_to_plot` list. Each scatter plot uses "cycle"
1479
+ as the x-axis, and the corresponding metric as the y-axis. In addition, common hover tooltips are
1480
+ configured to display auxiliary data including "index", "cycle", "rt", and all other metric values.
1481
+ If the `filename` parameter is provided:
1482
+ - If it ends with ".html", the layout is saved as an interactive HTML file using Panel.
1483
+ - Otherwise, the layout is saved as a PNG image using HoloViews.
1484
+ If no filename is provided, the interactive panel is displayed.
1485
+ Parameters:
1486
+ filename (str, optional): The path and filename where the plot should be saved. If the filename
1487
+ ends with ".html", the plot is saved as an HTML file; otherwise, it is saved as a PNG image.
1488
+ If not provided, the plot is displayed interactively.
1489
+ Notes:
1490
+ - The method requires the holoviews, panel, and bokeh libraries for visualization.
1491
+ - The data is expected to include the columns 'index', 'cycle', 'rt', and the metrics listed in
1492
+ `cols_to_plot`.
1493
+ """
1494
+ stats = self.get_dda_stats()
1495
+ cols_to_plot = [
1496
+ "inty_tot",
1497
+ "bl",
1498
+ "ms2_n",
1499
+ "time_cycle",
1500
+ "time_ms1_to_ms1",
1501
+ "time_ms1_to_ms2",
1502
+ "time_ms2_to_ms2",
1503
+ "time_ms2_to_ms1",
1504
+ ]
1505
+ # Ensure that 'index' and 'rt' are kept for hover along with the columns to plot
1506
+ stats = stats[["scan_uid", "cycle", "rt", *cols_to_plot]]
1507
+ # set any value < 0 to None
1508
+ stats[stats < 0] = None
1509
+
1510
+ # Create a Scatter for each column in cols_to_plot stacked vertically, with hover enabled
1511
+ scatter_plots = []
1512
+ # Define common hover tooltips for all plots including all cols_to_plot
1513
+ common_tooltips = [
1514
+ ("scan_uid", "@scan_uid"),
1515
+ ("cycle", "@cycle"),
1516
+ ("rt", "@rt"),
1517
+ ] + [(c, f"@{c}") for c in cols_to_plot]
1518
+ for col in cols_to_plot:
1519
+ hover = HoverTool(tooltips=common_tooltips)
1520
+ scatter = hv.Scatter(
1521
+ stats,
1522
+ kdims="cycle",
1523
+ vdims=[col, "scan_uid", "rt"] + [c for c in cols_to_plot if c != col],
1524
+ ).opts(
1525
+ title=col,
1526
+ xlabel="Cycle",
1527
+ ylabel=col,
1528
+ height=250,
1529
+ width=800,
1530
+ tools=[hover],
1531
+ size=3,
1532
+ )
1533
+ scatter_plots.append(scatter)
1534
+
1535
+ layout = hv.Layout(scatter_plots).cols(1)
1536
+ if filename is not None:
1537
+ if filename.endswith(".html"):
1538
+ panel.panel(layout).save(filename, embed=True) # type: ignore[attr-defined]
1539
+ else:
1540
+ hv.save(layout, filename, fmt="png")
1541
+ else:
1542
+ # Check if we're in a notebook environment and display appropriately
1543
+ layout_obj = panel.panel(layout)
1544
+ return _display_plot(layout, layout_obj)
1545
+
1546
+
1547
+ def plot_feature_stats(
1548
+ self,
1549
+ filename=None,
1550
+ ):
1551
+ """
1552
+ Generates overlaid distribution plots for selected feature metrics.
1553
+ The distributions are created separately for features with and without MS2 data.
1554
+ Metrics include intensity, quality, retention time, m/z (and m/z delta), number of MS2 peaks,
1555
+ summed MS2 intensities, and the MS2-to-MS1 ratio. The plots help to visualize the distribution
1556
+ differences between features that are linked to MS2 spectra and those that are not.
1557
+
1558
+ Parameters:
1559
+ filename (str, optional): The output filename. If the filename ends with ".html",
1560
+ the plot is saved as an interactive HTML file; otherwise,
1561
+ if provided, the plot is saved as a PNG image. If not provided,
1562
+ the interactive plot is displayed.
1563
+
1564
+ Returns:
1565
+ None
1566
+ """
1567
+ # Work on a copy of features_df
1568
+ feats = self.features_df.clone()
1569
+ # Convert to pandas for operations that require pandas functionality
1570
+ if hasattr(feats, "to_pandas"):
1571
+ feats = feats.to_pandas()
1572
+ # Compute m/z delta for each feature
1573
+ feats["mz_delta"] = feats["mz_end"] - feats["mz_start"]
1574
+ # Add a column with the number of peaks in the MS2 spectrum
1575
+ feats["MS2peaks"] = feats["ms2_specs"].apply(
1576
+ lambda x: len(x[0]) if x is not None else 0,
1577
+ )
1578
+ # Add a column with the sum of intensities in the MS2 spectrum
1579
+ feats["MS2int"] = feats["ms2_specs"].apply(
1580
+ lambda x: sum(x[0].inty) if x is not None else 0,
1581
+ )
1582
+
1583
+ # Calculate the ratio of MS2 to MS1 intensities
1584
+ feats["MS2toMS1"] = feats["MS2int"] / feats["inty"]
1585
+ # Apply log10 transformation to intensity, quality, and MS2int columns (handling non-positive values)
1586
+ feats["inty"] = np.where(feats["inty"] <= 0, np.nan, np.log10(feats["inty"]))
1587
+ # COMMENT: AR was bugging
1588
+ # feats["chrom_heights"] = np.where(
1589
+ # feats["chrom_heights"] <= 0, np.nan, np.log10(feats["chrom_heights"])
1590
+ # )
1591
+
1592
+ feats["quality"] = np.where(
1593
+ feats["quality"] <= 0,
1594
+ np.nan,
1595
+ np.log10(feats["quality"]),
1596
+ )
1597
+ feats["MS2int"] = np.where(feats["MS2int"] <= 0, np.nan, np.log10(feats["MS2int"]))
1598
+
1599
+ # Separate features based on presence of MS2 data
1600
+ feats_with_MS2 = feats[feats["ms2_scans"].notnull()]
1601
+ feats_without_MS2 = feats[feats["ms2_scans"].isnull()]
1602
+
1603
+ # Define the metrics to plot
1604
+ cols_to_plot = [
1605
+ "mz",
1606
+ "mz_delta",
1607
+ "inty",
1608
+ "quality",
1609
+ "rt",
1610
+ "rt_delta",
1611
+ "chrom_coherence",
1612
+ "chrom_prominence",
1613
+ "chrom_prominence_scaled",
1614
+ # COMMENT: AR was bugging
1615
+ # "chrom_heights",
1616
+ # "chrom_heights_scaled",
1617
+ "MS2peaks",
1618
+ "MS2int",
1619
+ "MS2toMS1",
1620
+ ]
1621
+
1622
+ # Ensure an index column is available for plotting
1623
+ feats["index"] = feats.index
1624
+
1625
+ density_plots = []
1626
+ # Create overlaid distribution plots for each metric
1627
+ for col in cols_to_plot:
1628
+ # Extract non-null values from both groups
1629
+ data_with = feats_with_MS2[col].dropna().values
1630
+ data_without = feats_without_MS2[col].dropna().values
1631
+
1632
+ # Create distribution elements for features with and without MS2
1633
+ dist_with = hv.Distribution(data_with, label="With MS2").opts(
1634
+ color="red",
1635
+ alpha=0.6,
1636
+ )
1637
+ dist_without = hv.Distribution(data_without, label="Without MS2").opts(
1638
+ color="blue",
1639
+ alpha=0.6,
1640
+ )
1641
+
1642
+ # Overlay the distributions with a legend and hover tool enabled
1643
+ overlay = (dist_with * dist_without).opts(
1644
+ title=col,
1645
+ show_legend=True,
1646
+ tools=["hover"],
1647
+ )
1648
+ density_plots.append(overlay)
1649
+
1650
+ # Arrange the plots in a layout with three columns
1651
+ layout = hv.Layout(density_plots).cols(3).opts(shared_axes=False)
1652
+
1653
+ # Save or display the layout based on the filename parameter
1654
+ if filename is not None:
1655
+ if filename.endswith(".html"):
1656
+ panel.panel(layout).save(filename, embed=True) # type: ignore[attr-defined]
1657
+ else:
1658
+ hv.save(layout, filename, fmt="png")
1659
+ else:
1660
+ # Check if we're in a notebook environment and display appropriately
1661
+ layout_obj = panel.panel(layout)
1662
+ return _display_plot(layout, layout_obj)
1663
+
1664
+
1665
+ def plot_tic(
1666
+ self,
1667
+ title=None,
1668
+ filename=None,
1669
+ ):
1670
+ # get all ms_level ==1 scans from sefl.scans_df
1671
+ scans = self.scans_df.filter(pl.col("ms_level") == 1)
1672
+ # select rt, scan_uid and inty_tot, convert to pandas
1673
+ data = scans[["rt", "scan_uid", "inty_tot"]].to_pandas()
1674
+ # sort by rt
1675
+ data = data.sort_values("rt")
1676
+
1677
+ # plot using hv.Curve
1678
+ tic = hv.Curve(data, kdims=["rt"], vdims=["inty_tot"])
1679
+ tic.opts(
1680
+ title=title,
1681
+ xlabel="Retention Time (min)",
1682
+ ylabel="TIC",
1683
+ height=250,
1684
+ width=100,
1685
+ )