masster 0.2.4__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

Files changed (55) hide show
  1. masster/__init__.py +27 -27
  2. masster/_version.py +17 -17
  3. masster/chromatogram.py +497 -503
  4. masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.featureXML +199787 -0
  5. masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.sample5 +0 -0
  6. masster/logger.py +318 -244
  7. masster/sample/__init__.py +9 -9
  8. masster/sample/defaults/__init__.py +15 -15
  9. masster/sample/defaults/find_adducts_def.py +325 -325
  10. masster/sample/defaults/find_features_def.py +366 -366
  11. masster/sample/defaults/find_ms2_def.py +285 -285
  12. masster/sample/defaults/get_spectrum_def.py +314 -318
  13. masster/sample/defaults/sample_def.py +374 -378
  14. masster/sample/h5.py +1321 -1297
  15. masster/sample/helpers.py +833 -364
  16. masster/sample/lib.py +762 -0
  17. masster/sample/load.py +1220 -1187
  18. masster/sample/parameters.py +131 -131
  19. masster/sample/plot.py +1610 -1622
  20. masster/sample/processing.py +1402 -1416
  21. masster/sample/quant.py +209 -0
  22. masster/sample/sample.py +391 -387
  23. masster/sample/sample5_schema.json +181 -181
  24. masster/sample/save.py +737 -719
  25. masster/sample/sciex.py +1213 -0
  26. masster/spectrum.py +1287 -1319
  27. masster/study/__init__.py +9 -9
  28. masster/study/defaults/__init__.py +21 -19
  29. masster/study/defaults/align_def.py +267 -267
  30. masster/study/defaults/export_def.py +41 -40
  31. masster/study/defaults/fill_chrom_def.py +264 -264
  32. masster/study/defaults/fill_def.py +260 -0
  33. masster/study/defaults/find_consensus_def.py +256 -256
  34. masster/study/defaults/find_ms2_def.py +163 -163
  35. masster/study/defaults/integrate_chrom_def.py +225 -225
  36. masster/study/defaults/integrate_def.py +221 -0
  37. masster/study/defaults/merge_def.py +256 -0
  38. masster/study/defaults/study_def.py +272 -269
  39. masster/study/export.py +674 -287
  40. masster/study/h5.py +1398 -886
  41. masster/study/helpers.py +1650 -433
  42. masster/study/helpers_optimized.py +317 -0
  43. masster/study/load.py +1201 -1078
  44. masster/study/parameters.py +99 -99
  45. masster/study/plot.py +632 -645
  46. masster/study/processing.py +1057 -1046
  47. masster/study/save.py +149 -134
  48. masster/study/study.py +606 -522
  49. masster/study/study5_schema.json +247 -241
  50. {masster-0.2.4.dist-info → masster-0.3.0.dist-info}/METADATA +15 -10
  51. masster-0.3.0.dist-info/RECORD +59 -0
  52. {masster-0.2.4.dist-info → masster-0.3.0.dist-info}/licenses/LICENSE +661 -661
  53. masster-0.2.4.dist-info/RECORD +0 -50
  54. {masster-0.2.4.dist-info → masster-0.3.0.dist-info}/WHEEL +0 -0
  55. {masster-0.2.4.dist-info → masster-0.3.0.dist-info}/entry_points.txt +0 -0
masster/sample/plot.py CHANGED
@@ -1,1622 +1,1610 @@
1
- """
2
- _plots.py
3
-
4
- This module provides visualization functions for mass spectrometry data analysis.
5
- It contains plotting utilities for extracted ion chromatograms (EICs), 2D data maps,
6
- feature visualizations, and interactive dashboards using modern visualization libraries.
7
-
8
- Key Features:
9
- - **Extracted Ion Chromatograms (EICs)**: Interactive chromatographic plotting with feature annotations.
10
- - **2D Data Visualization**: Mass spectrometry data visualization with datashader for large datasets.
11
- - **Feature Plotting**: Visualize detected features with retention time and m/z information.
12
- - **Interactive Dashboards**: Create interactive panels for data exploration and analysis.
13
- - **Multi-Sample Plotting**: Comparative visualizations across multiple samples.
14
- - **Export Capabilities**: Save plots in various formats (HTML, PNG, SVG).
15
-
16
- Dependencies:
17
- - `holoviews`: For high-level data visualization and interactive plots.
18
- - `datashader`: For rendering large datasets efficiently.
19
- - `panel`: For creating interactive web applications and dashboards.
20
- - `bokeh`: For low-level plotting control and customization.
21
- - `polars` and `pandas`: For data manipulation and processing.
22
- - `numpy`: For numerical computations.
23
-
24
- Functions:
25
- - `plot_eic()`: Generate extracted ion chromatograms with feature overlays.
26
- - `plot_2d()`: Create 2D mass spectrometry data visualizations.
27
- - `plot_features()`: Visualize detected features in retention time vs m/z space.
28
- - Various utility functions for plot styling and configuration.
29
-
30
- Supported Plot Types:
31
- - Extracted Ion Chromatograms (EIC)
32
- - Total Ion Chromatograms (TIC)
33
- - Base Peak Chromatograms (BPC)
34
- - 2D intensity maps (RT vs m/z)
35
- - Feature scatter plots
36
- - Interactive dashboards
37
-
38
- See Also:
39
- - `parameters._plot_parameters`: For plot-specific parameter configuration.
40
- - `single.py`: For applying plotting methods to ddafile objects.
41
- - `study.py`: For study-level visualization functions.
42
-
43
- """
44
-
45
- import os
46
-
47
- import datashader as ds
48
- import holoviews as hv
49
- import holoviews.operation.datashader as hd
50
- import numpy as np
51
- import pandas as pd
52
- import panel
53
- import polars as pl
54
-
55
- from bokeh.models import HoverTool
56
- from holoviews import dim
57
- from holoviews.plotting.util import process_cmap
58
- from matplotlib.colors import rgb2hex
59
-
60
- # Parameters removed - using hardcoded defaults
61
-
62
-
63
- hv.extension("bokeh")
64
-
65
-
66
- def plot_eic(
67
- self,
68
- feature_uid=None,
69
- filename=None,
70
- rt_tol=10,
71
- rt_tol_factor_plot=1,
72
- mz_tol=0.0005,
73
- mz_tol_factor_plot=1,
74
- link_x=False,
75
- ):
76
- """
77
- Plot Extracted Ion Chromatograms (EICs) for one or more features using MS1 data and feature metadata.
78
-
79
- This function filters MS1 data based on retention time (rt) and mass-to-charge ratio (mz) windows
80
- derived from feature information in `features_df`. It then generates interactive EIC plots using
81
- HoloViews, with feature retention time windows annotated. Plots can be displayed interactively or
82
- saved to a file.
83
-
84
- Parameters:
85
- feature_uid (int or list of int, optional):
86
- Feature identifier(s) for EIC generation. If None, EICs for all features in `features_df` are plotted.
87
- filename (str, optional):
88
- Output file path. If ending with `.html`, saves as interactive HTML; otherwise, saves as PNG.
89
- If not provided, displays the plot interactively.
90
- rt_tol (float, default=10):
91
- Retention time tolerance (in seconds) added to feature boundaries for MS1 data filtering.
92
- rt_tol_factor_plot (float, default=1):
93
- Retention time tolerance factor.
94
- mz_tol (float, default=0.0005):
95
- m/z tolerance added to feature boundaries for MS1 data filtering.
96
- mz_tol_factor_plot (float, default=1):
97
- m/z time tolerance factor.
98
- link_x (bool, default=True):
99
- If True, links the x-axes (retention time) across all EIC subplots.
100
-
101
- Returns:
102
- None
103
-
104
- Notes:
105
- - Uses `features_df` for feature metadata and `ms1_df` (Polars DataFrame) for MS1 data.
106
- - Aggregates MS1 intensities by retention time.
107
- - Utilizes HoloViews for visualization and Panel for layout/display.
108
- """
109
- # plots the EIC for a given feature id
110
- # If rt or mz are not provided, they are extracted from features_df using the supplied feature id (feature_uid)
111
-
112
- feature_uids = feature_uid
113
- # if feature_uids is None, plot all features
114
- if feature_uids is None:
115
- feats = self.features_df.clone()
116
- else:
117
- if isinstance(feature_uids, int):
118
- feature_uids = [feature_uids]
119
- # select only the features with feature_uid in feature_uids
120
- feats = self.features_df[
121
- self.features_df["feature_uid"].is_in(feature_uids)
122
- ].clone()
123
-
124
- # make sure feature_uid is a list of integers
125
-
126
- eic_plots = []
127
- feature_uids = feats["feature_uid"].values.tolist()
128
- mz_tol_plot = mz_tol * mz_tol_factor_plot
129
- rt_tol_plot = rt_tol * rt_tol_factor_plot
130
- # iterate over the list of feature_uid
131
- for feature_uid in feature_uids:
132
- # Retrieve the feature info
133
- feature_row = feats[feats["feature_uid"] == feature_uid]
134
- # rt = feature_row["rt"].values[0]
135
- rt_start = feature_row["rt_start"].values[0]
136
- rt_end = feature_row["rt_end"].values[0]
137
- mz = feature_row["mz"].values[0]
138
- mz_start = feature_row["mz_start"].values[0]
139
- mz_end = feature_row["mz_end"].values[0]
140
-
141
- # filter self.ms1_df with rt_start, rt_end, mz_start, mz_end
142
- eic_df = self.ms1_df.filter(
143
- pl.col("rt") >= rt_start - rt_tol_plot,
144
- pl.col("rt") <= rt_end + rt_tol_plot,
145
- )
146
- eic_df = eic_df.filter(
147
- pl.col("mz") >= mz_start - mz_tol_plot,
148
- pl.col("mz") <= mz_end + mz_tol_plot,
149
- )
150
-
151
- if eic_df.is_empty():
152
- print("No MS1 data found in the specified window.")
153
- continue
154
-
155
- # convert to pandas DataFrame
156
- eic_df = eic_df.to_pandas()
157
- # aggregate all points with the same rt using the sum of inty
158
- eic_df = eic_df.groupby("rt").agg({"inty": "sum"}).reset_index()
159
- yname = f"inty_{feature_uid}"
160
- eic_df.rename(columns={"inty": yname}, inplace=True)
161
-
162
- # Plot the EIC using bokeh and ensure axes are independent by setting axiswise=True
163
- eic = hv.Curve(eic_df, kdims=["rt"], vdims=[yname]).opts(
164
- title=f"EIC for feature {feature_uid}, mz = {mz:.4f}",
165
- xlabel="Retention time (s)",
166
- ylabel="Intensity",
167
- width=1000,
168
- tools=["hover"],
169
- height=250,
170
- axiswise=True,
171
- color="black",
172
- )
173
-
174
- # Add vertical lines at the start and end of the retention time
175
- eic = eic * hv.VLine(rt_start).opts(
176
- color="blue",
177
- line_width=1,
178
- line_dash="dashed",
179
- axiswise=True,
180
- )
181
- eic = eic * hv.VLine(rt_end).opts(
182
- color="blue",
183
- line_width=1,
184
- line_dash="dashed",
185
- axiswise=True,
186
- )
187
-
188
- # Append the subplot without linking axes
189
- eic_plots.append(eic)
190
- if link_x:
191
- # Create a layout with shared x-axis for all EIC plots
192
- layout = hv.Layout(eic_plots).opts(shared_axes=True)
193
- else:
194
- layout = hv.Layout(eic_plots).opts(shared_axes=False)
195
-
196
- layout = layout.cols(1)
197
- layout = panel.Column(layout)
198
- if filename is not None:
199
- # if filename includes .html, save the panel layout to an HTML file
200
- if filename.endswith(".html"):
201
- layout.save(filename, embed=True)
202
- else:
203
- # save the panel layout as a png
204
- hv.save(layout, filename, fmt="png")
205
- else:
206
- # Display the panel layout
207
- layout.show()
208
-
209
-
210
- def plot_2d(
211
- self,
212
- filename=None,
213
- show_features=True,
214
- show_only_features_with_ms2=False,
215
- show_isotopes=False,
216
- show_ms2=False,
217
- title=None,
218
- cmap=None,
219
- marker='circle',
220
- markersize=10,
221
- raster_dynamic=True,
222
- raster_max_px=8,
223
- raster_threshold=0.8,
224
- mz_range=None,
225
- rt_range=None,
226
- ):
227
- """
228
- Plot a two-dimensional visualization of MS1 survey scan data with optional overlays
229
- of feature and MS2 scan information.
230
- This method creates a plot from the internal MS1 data loaded into self.ms1_df
231
- and optionally overlays various feature and MS2 information depending on the provided
232
- parameters. The visualization is built using HoloViews and Holoviews dynamic rasterization,
233
- together with Panel for layout and exporting.
234
- Parameters:
235
- filename (str, optional):
236
- Path to save the plot. If provided and ends with ".html", the plot is saved as an
237
- interactive HTML file; otherwise, it is saved as a PNG image.
238
- show_features (bool, default True):
239
- Whether to overlay detected features on the plot.
240
- show_only_features_with_ms2 (bool, default False):
241
- If True, only display features that have associated MS2 scans. When False,
242
- features without MS2 data are also shown.
243
- show_isotopes (bool, default False):
244
- Whether to overlay isotope information on top of the features.
245
- show_ms2 (bool, default False):
246
- Whether to overlay MS2 scan information on the plot.
247
- title (str, optional):
248
- Title of the plot.
249
- cmap (str, optional):
250
- Colormap to use for the background rasterized data. Defaults to "iridescent_r" unless
251
- modified (e.g., if set to "grey", it is changed to "Greys256").
252
- marker (str, default 'circle'):
253
- Marker type to use for feature and MS2 points.
254
- markersize (int, default 10):
255
- Base size of the markers used for plotting points.
256
- raster_dynamic (bool, default True):
257
- Whether to use dynamic rasterization for the background point cloud.
258
- raster_max_px (int, default 8):
259
- Maximum pixel size for dynamic rasterization when using dynspread.
260
- raster_threshold (float, default 0.8):
261
- Threshold used for the dynspread process in dynamic rasterization.
262
- Behavior:
263
- - Checks for a loaded mzML file by verifying that self.file_obj is not None.
264
- - Converts internal MS1 data (a Polars DataFrame) to a Pandas DataFrame and filters out low-intensity
265
- points (inty < 1).
266
- - Sets up the plot bounds for retention time (rt) and mass-to-charge ratio (mz) using a hook function.
267
- - Renders the MS1 data as a background rasterized image with a logarithmic intensity normalization.
268
- - Conditionally overlays feature points (with and without MS2 information), isotopes (if requested),
269
- and MS2 scan points based on internal DataFrame data.
270
- - Depending on the filename parameter, either displays the plot interactively using Panel or
271
- saves it as an HTML or PNG file.
272
- Returns:
273
- None
274
- Side Effects:
275
- - May print a warning if no mzML file is loaded.
276
- - Either shows the plot interactively or writes the output to a file.
277
- """
278
-
279
- if self.ms1_df is None:
280
- self.logger.error("No MS1 data available.")
281
- return
282
-
283
- if cmap is None:
284
- cmap = "iridescent_r"
285
- elif cmap == "grey":
286
- cmap = "Greys256"
287
-
288
- # get columns rt, mz, inty from self.ms1_df, It's polars DataFrame
289
- spectradf = self.ms1_df.select(["rt", "mz", "inty"])
290
- # remove any inty<1
291
- spectradf = spectradf.filter(pl.col("inty") >= 1)
292
- # keep only rt, mz, and inty
293
- spectradf = spectradf.select(["rt", "mz", "inty"])
294
- if mz_range is not None:
295
- spectradf = spectradf[
296
- (spectradf["mz"] >= mz_range[0]) & (spectradf["mz"] <= mz_range[1])
297
- ]
298
- if rt_range is not None:
299
- spectradf = spectradf[
300
- (spectradf["rt"] >= rt_range[0]) & (spectradf["rt"] <= rt_range[1])
301
- ]
302
- maxrt = spectradf["rt"].max()
303
- minrt = spectradf["rt"].min()
304
- maxmz = spectradf["mz"].max()
305
- minmz = spectradf["mz"].min()
306
-
307
- def new_bounds_hook(plot, elem):
308
- x_range = plot.state.x_range
309
- y_range = plot.state.y_range
310
- x_range.bounds = minrt, maxrt
311
- y_range.bounds = minmz, maxmz
312
-
313
- points = hv.Points(
314
- spectradf,
315
- kdims=["rt", "mz"],
316
- vdims=["inty"],
317
- label="MS1 survey scans",
318
- ).opts(
319
- fontsize={"title": 16, "labels": 14, "xticks": 6, "yticks": 12},
320
- color=np.log(dim("inty")),
321
- colorbar=True,
322
- cmap="Magma",
323
- tools=["hover"],
324
- )
325
-
326
- size_1 = 1 * markersize
327
- color_1 = "forestgreen"
328
- size_2 = 1 * markersize
329
- color_2 = "darkorange"
330
- if filename is not None:
331
- dyn = False
332
- if not filename.endswith(".html"):
333
- size_1 = 2
334
- color_1 = "forestgreen"
335
- size_2 = 2
336
- color_2 = "darkorange"
337
- raster_dynamic = False
338
-
339
- dyn = raster_dynamic
340
- raster = hd.rasterize(
341
- points,
342
- aggregator=ds.max("inty"),
343
- interpolation="bilinear",
344
- dynamic=dyn, # alpha=10, min_alpha=0,
345
- ).opts(
346
- active_tools=["box_zoom"],
347
- cmap=process_cmap(cmap, provider="bokeh"), # blues
348
- tools=["hover"],
349
- hooks=[new_bounds_hook],
350
- width=1000,
351
- height=1000,
352
- cnorm="log",
353
- xlabel="Retention time (s)",
354
- ylabel="m/z",
355
- colorbar=True,
356
- colorbar_position="right",
357
- axiswise=True,
358
- )
359
-
360
- raster = hd.dynspread(
361
- raster,
362
- threshold=raster_threshold,
363
- how="add",
364
- shape="square",
365
- max_px=raster_max_px,
366
- )
367
- feature_points_1 = None
368
- feature_points_2 = None
369
- feature_points_3 = None
370
- feature_points_4 = None
371
- feature_points_iso = None
372
- # Plot features as red dots if features is True
373
- if self.features_df is not None and show_features:
374
- feats = self.features_df.clone()
375
- # Convert to pandas for operations that require pandas functionality
376
- if hasattr(feats, "to_pandas"):
377
- feats = feats.to_pandas()
378
- # if ms2_scans is not null, keep only the first element of the list
379
- feats["ms2_scans"] = feats["ms2_scans"].apply(
380
- lambda x: x[0] if type(x) == list else x,
381
- )
382
- if mz_range is not None:
383
- feats = feats[(feats["mz"] >= mz_range[0]) & (feats["mz"] <= mz_range[1])]
384
- if rt_range is not None:
385
- feats = feats[(feats["rt"] >= rt_range[0]) & (feats["rt"] <= rt_range[1])]
386
- # keep only iso==0, i.e. the main
387
- feats = feats[feats["iso"] == 0]
388
- # find features with ms2_scans not None and iso==0
389
- features_df = feats[feats["ms2_scans"].notnull()]
390
- feature_points_1 = hv.Points(
391
- features_df,
392
- kdims=["rt", "mz"],
393
- vdims=[
394
- "feature_uid",
395
- "inty",
396
- "quality",
397
- "rt_delta",
398
- "ms2_scans",
399
- "chrom_coherence",
400
- "chrom_prominence_scaled",
401
- ],
402
- label="Features with MS2 data",
403
- ).options(
404
- color=color_1,
405
- marker=marker,
406
- size=size_1,
407
- tools=["hover"],
408
- )
409
- # find features without MS2 data
410
- features_df = feats[feats["ms2_scans"].isnull()]
411
- feature_points_2 = hv.Points(
412
- features_df,
413
- kdims=["rt", "mz"],
414
- vdims=[
415
- "feature_uid",
416
- "inty",
417
- "quality",
418
- "rt_delta",
419
- "chrom_coherence",
420
- "chrom_prominence_scaled",
421
- ],
422
- label="Features without MS2 data",
423
- ).options(
424
- color="red",
425
- size=size_2,
426
- marker=marker,
427
- tools=["hover"],
428
- )
429
-
430
- if show_isotopes:
431
- feats = self.features_df
432
- features_df = feats[feats["iso"] > 0]
433
- feature_points_iso = hv.Points(
434
- features_df,
435
- kdims=["rt", "mz"],
436
- vdims=[
437
- "feature_uid",
438
- "inty",
439
- "quality",
440
- "rt_delta",
441
- "iso",
442
- "iso_of",
443
- "chrom_coherence",
444
- "chrom_prominence_scaled",
445
- ],
446
- label="Isotopes",
447
- ).options(
448
- color="violet",
449
- marker=marker,
450
- size=size_1,
451
- tools=["hover"],
452
- )
453
- if show_ms2:
454
- # find all self.scans_df with mslevel 2 that are not linked to a feature
455
- ms2_orphan = self.scans_df.filter(pl.col("ms_level") == 2).filter(
456
- pl.col("feature_uid") < 0,
457
- )
458
-
459
- if len(ms2_orphan) > 0:
460
- # pandalize
461
- ms2 = ms2_orphan.to_pandas()
462
- feature_points_3 = hv.Points(
463
- ms2,
464
- kdims=["rt", "prec_mz"],
465
- vdims=["index", "inty_tot", "bl"],
466
- label="Orphan MS2 scans",
467
- ).options(
468
- color=color_2,
469
- marker="x",
470
- size=size_2,
471
- tools=["hover"],
472
- )
473
-
474
- ms2_linked = self.scans_df.filter(pl.col("ms_level") == 2).filter(
475
- pl.col("feature_uid") >= 0,
476
- )
477
- if len(ms2_linked) > 0:
478
- # pandalize
479
- ms2 = ms2_linked.to_pandas()
480
- feature_points_4 = hv.Points(
481
- ms2,
482
- kdims=["rt", "prec_mz"],
483
- vdims=["index", "inty_tot", "bl"],
484
- label="Linked MS2 scans",
485
- ).options(
486
- color=color_1,
487
- marker="x",
488
- size=size_2,
489
- tools=["hover"],
490
- )
491
-
492
- overlay = raster
493
-
494
- if feature_points_4 is not None:
495
- overlay = overlay * feature_points_4
496
- if feature_points_3 is not None:
497
- overlay = overlay * feature_points_3
498
- if feature_points_1 is not None:
499
- overlay = overlay * feature_points_1
500
- if not show_only_features_with_ms2 and feature_points_2 is not None:
501
- overlay = overlay * feature_points_2
502
- if feature_points_iso is not None:
503
- overlay = overlay * feature_points_iso
504
-
505
- if title is not None:
506
- overlay = overlay.opts(title=title)
507
-
508
- # Create a panel layout
509
- layout = panel.Column(overlay)
510
-
511
- if filename is not None:
512
- # if filename includes .html, save the panel layout to an HTML file
513
- if filename.endswith(".html"):
514
- layout.save(filename, embed=True)
515
- else:
516
- # save the panel layout as a png
517
- hv.save(overlay, filename, fmt="png")
518
- else:
519
- # Display the panel layout
520
- layout.show()
521
-
522
-
523
- def plot_2d_oracle(
524
- self,
525
- oracle_folder=None,
526
- link_by_feature_uid=None,
527
- colorby='hg',
528
- filename=None,
529
- min_id_level=None,
530
- max_id_level=None,
531
- min_ms_level=None,
532
- title=None,
533
- cmap=None,
534
- markersize=10,
535
- raster_dynamic=True,
536
- raster_max_px=8,
537
- raster_threshold=0.8,
538
- mz_range=None,
539
- rt_range=None,
540
- ):
541
- """
542
- Plot a 2D overlay visualization of MS1 survey scans and feature annotations, including oracle annotation data if provided.
543
-
544
- This function reads the primary mass spectrometry data, applies filtering, processes oracle annotation data (if provided),
545
- and produces an interactive plot combining various data layers. The visualization includes rasterized MS1 data and feature
546
- points colored by annotation.
547
-
548
- Parameters:
549
- self: The object instance containing MS1 and feature data.
550
- oracle_folder (str, optional): Path to the oracle folder containing the annotation file
551
- (expected at "<oracle_folder>/diag/summary_by_feature.csv"). If None, oracle data is not used.
552
- link_by_feature_uid (bool, optional): Whether to link features by their IDs in the overlay.
553
- colorby (str, optional): Parameter that determines the color assignment for annotated features.
554
- Expected values include 'hg', 'class', 'id_class', or 'id_hg'. Default is 'hg'.
555
- filename (str, optional): Name of the file where the plot should be saved. If provided and ends with
556
- ".html", the panel layout is saved as an interactive HTML file; otherwise, the output is saved as a PNG.
557
- min_id_level (int, optional): Minimum identification level for oracle annotations to include.
558
- max_id_level (int, optional): Maximum identification level for oracle annotations to include.
559
- min_ms_level (int, optional): Minimum MS level for features to include.
560
- title (str, optional): Title to be displayed on the resulting plot. Default is None.
561
- cmap (str, optional): Colormap to be used for the rasterized plot. Acceptable values include None, "grey",
562
- "iridescent", or other valid colormap names. Default is None. When None, 'Greys256' is used.
563
- markersize (int, optional): Marker size for feature points in the overlay. Default is 10.
564
- raster_dynamic (bool, optional): If True, enables dynamic rasterization of the overlay. If filename is provided
565
- and does not end with ".html", raster_dynamic is set to False. Default is True.
566
- raster_max_px (int, optional): Maximum pixel size for dynamic rasterization. Default is 8.
567
- raster_threshold (float, optional): Threshold for dynamic raster spread. Default is 0.8.
568
- mz_range (tuple, optional): m/z range for filtering MS1 data.
569
- rt_range (tuple, optional): Retention time range for filtering MS1 data.
570
-
571
- Returns:
572
- None
573
-
574
- The function either displays the interactive panel layout or saves the visualization to a file based on
575
- the provided filename. If the primary file object or feature data is missing, the function prints an
576
- informative message and returns without plotting.
577
- """
578
-
579
- if self.file_obj is None:
580
- print("Please load a file first.")
581
- return
582
-
583
- if cmap is None or cmap == "grey":
584
- cmap = "Greys256"
585
- elif cmap == "iridescent":
586
- cmap = "iridescent_r"
587
-
588
- # get columns rt, mz, inty from self.ms1_df, It's polars DataFrame
589
- spectradf = self.ms1_df.to_pandas()
590
-
591
- # remove any inty<1
592
- spectradf = spectradf[spectradf["inty"] >= 1]
593
- # keep only rt, mz, and inty
594
- spectradf = spectradf[["rt", "mz", "inty"]]
595
- if mz_range is not None:
596
- spectradf = spectradf[
597
- (spectradf["mz"] >= mz_range[0]) & (spectradf["mz"] <= mz_range[1])
598
- ]
599
- if rt_range is not None:
600
- spectradf = spectradf[
601
- (spectradf["rt"] >= rt_range[0]) & (spectradf["rt"] <= rt_range[1])
602
- ]
603
-
604
- maxrt = spectradf["rt"].max()
605
- minrt = spectradf["rt"].min()
606
- maxmz = spectradf["mz"].max()
607
- minmz = spectradf["mz"].min()
608
-
609
- def new_bounds_hook(plot, elem):
610
- x_range = plot.state.x_range
611
- y_range = plot.state.y_range
612
- x_range.bounds = minrt, maxrt
613
- y_range.bounds = minmz, maxmz
614
-
615
- points = hv.Points(
616
- spectradf,
617
- kdims=["rt", "mz"],
618
- vdims=["inty"],
619
- label="MS1 survey scans",
620
- ).opts(
621
- fontsize={"title": 16, "labels": 14, "xticks": 6, "yticks": 12},
622
- color=np.log(dim("inty")),
623
- colorbar=True,
624
- cmap="Magma",
625
- tools=["hover"],
626
- )
627
-
628
- if filename is not None:
629
- dyn = False
630
- if not filename.endswith(".html"):
631
- raster_dynamic = False
632
-
633
- dyn = raster_dynamic
634
- raster = hd.rasterize(
635
- points,
636
- aggregator=ds.max("inty"),
637
- interpolation="bilinear",
638
- dynamic=dyn, # alpha=10, min_alpha=0,
639
- ).opts(
640
- active_tools=["box_zoom"],
641
- cmap=process_cmap(cmap, provider="bokeh"), # blues
642
- tools=["hover"],
643
- hooks=[new_bounds_hook],
644
- width=1000,
645
- height=1000,
646
- cnorm="log",
647
- xlabel="Retention time (s)",
648
- ylabel="m/z",
649
- colorbar=True,
650
- colorbar_position="right",
651
- axiswise=True,
652
- )
653
- raster = hd.dynspread(
654
- raster,
655
- threshold=raster_threshold,
656
- how="add",
657
- shape="square",
658
- max_px=raster_max_px,
659
- )
660
-
661
- if self.features_df is None:
662
- return
663
- feats = self.features_df.clone()
664
-
665
- # Convert to pandas for oracle operations that require pandas functionality
666
- if hasattr(feats, "to_pandas"):
667
- feats = feats.to_pandas()
668
-
669
- # check if annotationfile is not None
670
- if oracle_folder is None:
671
- return
672
- # try to read the annotationfile as a csv file and add it to feats
673
- try:
674
- oracle_data = pd.read_csv(
675
- os.path.join(oracle_folder, "diag", "summary_by_feature.csv"),
676
- )
677
- except:
678
- print(f"Could not read {oracle_folder}/diag/summary_by_feature.csv")
679
- return
680
-
681
- if link_by_feature_uid:
682
- # scan_idx slaw_id slaw_ms2_id mz rt level formula ion species name rarity lib_id hg mod lib score score2 score_db score_db_data ms2_tic ms2_evidence ms2_matched_n ms2_missed_n ms2_matched ms2_missed ms2_top1
683
- cols_to_keep = [
684
- "title",
685
- "scan_idx",
686
- "mslevel",
687
- "hits",
688
- "id_level",
689
- "id_label",
690
- "id_ion",
691
- "id_class",
692
- "id_evidence",
693
- "score",
694
- "score2",
695
- ]
696
- oracle_data = oracle_data[cols_to_keep]
697
- # extract feature_uid from title. It begins with "fid:XYZ;"
698
- oracle_data["feature_uid"] = oracle_data["title"].str.extract(r"fid:(\d+)")
699
- oracle_data["feature_uid"] = oracle_data["feature_uid"].astype(int)
700
- # sort by id_level, remove duplicate feature_uid, keep the first one
701
- oracle_data = oracle_data.sort_values(by=["id_level"], ascending=False)
702
- oracle_data = oracle_data.drop_duplicates(subset=["feature_uid"], keep="first")
703
- else:
704
- cols_to_keep = [
705
- "precursor",
706
- "rt",
707
- "title",
708
- "scan_idx",
709
- "mslevel",
710
- "hits",
711
- "id_level",
712
- "id_label",
713
- "id_ion",
714
- "id_class",
715
- "id_evidence",
716
- "score",
717
- "score2",
718
- ]
719
- # link
720
- oracle_data = oracle_data[cols_to_keep]
721
- oracle_data["feature_uid"] = None
722
- # iterate over the rows and find the feature_uid in feats by looking at the closest rt and mz
723
- for i, row in oracle_data.iterrows():
724
- candidates = feats[
725
- (abs(feats["rt"] - row["rt"]) < 1)
726
- & (abs(feats["mz"] - row["precursor"]) < 0.005)
727
- ].copy()
728
- if len(candidates) > 0:
729
- # sort by delta rt
730
- candidates["delta_rt"] = abs(candidates["rt"] - row["rt"])
731
- candidates = candidates.sort_values(by=["delta_rt"])
732
- oracle_data.at[i, "feature_uid"] = candidates["feature_uid"].values[0]
733
- # remove precursor and rt columns
734
- oracle_data = oracle_data.drop(columns=["precursor", "rt"])
735
-
736
- feats = feats.merge(oracle_data, how="left", on="feature_uid")
737
-
738
- # filter feats by id_level
739
- if min_id_level is not None:
740
- feats = feats[(feats["id_level"] >= min_id_level)]
741
- if max_id_level is not None:
742
- feats = feats[(feats["id_level"] <= max_id_level)]
743
- if min_ms_level is not None:
744
- feats = feats[(feats["mslevel"] >= min_ms_level)]
745
-
746
- feats["color"] = "black"
747
-
748
- cvalues = None
749
- if colorby in ["class", "hg", "id_class", "id_hg"]:
750
- # replace nans in feats['id_class'] with 'mix'
751
- feats["id_class"] = feats["id_class"].fillna("mix")
752
- cvalues = feats["id_class"].unique()
753
- # sort alphabetically
754
- cvalues = sorted(cvalues)
755
- # flip the strings left to right
756
- fcvalues = [cvalues[i][::-1] for i in range(len(cvalues))]
757
- # sort in alphabetical order the flipped strings and return the index
758
- idx = np.argsort(fcvalues)
759
- # apply to cvalues
760
- cvalues = [cvalues[i] for i in idx]
761
- elif colorby in ["ion", "id_ion"]:
762
- cvalues = feats["id_ion"].unique()
763
- elif colorby in ["id_evidence", "ms2_evidence"]:
764
- cvalues = feats["id_evidence"].unique()
765
-
766
- if cvalues is not None:
767
- num_colors = len(cvalues)
768
- cmap = "rainbow"
769
- cmap_provider = "colorcet"
770
- cm = process_cmap(cmap, ncolors=num_colors, provider=cmap_provider)
771
- colors = [
772
- rgb2hex(cm[int(i * (len(cm) - 1) / (num_colors - 1))])
773
- if num_colors > 1
774
- else rgb2hex(cm[0])
775
- for i in range(num_colors)
776
- ]
777
- # assign color to each row based on id_class. If id_class is null, assign 'black'
778
- feats["color"] = "black"
779
-
780
- for i, c in enumerate(cvalues):
781
- if colorby in ["class", "hg", "id_class", "id_hg"]:
782
- feats.loc[feats["id_class"] == c, "color"] = colors[i]
783
- elif colorby in ["ion", "id_ion"]:
784
- feats.loc[feats["id_ion"] == c, "color"] = colors[i]
785
- elif colorby in ["id_evidence", "ms2_evidence"]:
786
- feats.loc[feats["id_evidence"] == c, "color"] = colors[i]
787
-
788
- # replace NaN with 0 in id_level
789
- feats["id_level"] = feats["id_level"].fillna(0)
790
- # feature_points_1 are all features with column ms2_scans not null
791
- feature_points_1 = None
792
- feat_df = feats.copy()
793
- feat_df = feat_df[feat_df["id_level"] == 2]
794
-
795
- feature_points_1 = hv.Points(
796
- feat_df,
797
- kdims=["rt", "mz"],
798
- vdims=[
799
- "inty",
800
- "feature_uid",
801
- "id_level",
802
- "id_class",
803
- "id_label",
804
- "id_ion",
805
- "id_evidence",
806
- "score",
807
- "score2",
808
- "color",
809
- ],
810
- label="ID by MS2",
811
- ).options(
812
- color="color",
813
- marker="circle",
814
- size=markersize,
815
- fill_alpha=1.0,
816
- tools=["hover"],
817
- )
818
-
819
- # feature_points_2 are all features that have ms2_scans not null and id_level ==1
820
- feature_points_2 = None
821
- feat_df = feats.copy()
822
- feat_df = feat_df[(feat_df["ms2_scans"].notnull()) & (feat_df["id_level"] == 1)]
823
- if len(feat_df) > 0:
824
- feature_points_2 = hv.Points(
825
- feat_df,
826
- kdims=["rt", "mz"],
827
- vdims=[
828
- "inty",
829
- "feature_uid",
830
- "id_level",
831
- "id_label",
832
- "id_ion",
833
- "id_class",
834
- "color",
835
- ],
836
- label="ID by MS1, with MS2",
837
- ).options(
838
- color="color",
839
- marker="circle",
840
- size=markersize,
841
- fill_alpha=0.0,
842
- tools=["hover"],
843
- )
844
-
845
- # feature_points_3 are all features that have ms2_scans null and id_level ==1
846
- feature_points_3 = None
847
- feat_df = feats.copy()
848
- feat_df = feat_df[(feat_df["ms2_scans"].isnull()) & (feat_df["id_level"] == 1)]
849
- if len(feat_df) > 0:
850
- feature_points_3 = hv.Points(
851
- feat_df,
852
- kdims=["rt", "mz"],
853
- vdims=[
854
- "inty",
855
- "feature_uid",
856
- "id_level",
857
- "id_label",
858
- "id_ion",
859
- "id_class",
860
- "color",
861
- ],
862
- label="ID by MS1, no MS2",
863
- ).options(
864
- color="color",
865
- marker="diamond",
866
- size=markersize,
867
- fill_alpha=0.0,
868
- tools=["hover"],
869
- )
870
-
871
- # feature_points_4 are all features that have ms2_scans null and id_level ==0
872
- feature_points_4 = None
873
- feat_df = feats.copy()
874
- feat_df = feat_df[(feat_df["ms2_scans"].notnull()) & (feat_df["id_level"] < 1)]
875
- if len(feat_df) > 0:
876
- feature_points_4 = hv.Points(
877
- feat_df,
878
- kdims=["rt", "mz"],
879
- vdims=["inty", "feature_uid"],
880
- label="No ID, with MS2",
881
- ).options(
882
- color="gray",
883
- marker="circle",
884
- size=markersize,
885
- fill_alpha=0.0,
886
- tools=["hover"],
887
- )
888
-
889
- # feature_points_4 are all features that have ms2_scans null and id_level ==0
890
- feature_points_5 = None
891
- feat_df = feats.copy()
892
- feat_df = feat_df[(feat_df["ms2_scans"].isnull()) & (feat_df["id_level"] < 1)]
893
- if len(feat_df) > 0:
894
- feature_points_5 = hv.Points(
895
- feat_df,
896
- kdims=["rt", "mz"],
897
- vdims=["inty", "feature_uid"],
898
- label="No ID, no MS2",
899
- ).options(
900
- color="gray",
901
- marker="diamond",
902
- fill_alpha=0.0,
903
- size=markersize,
904
- tools=["hover"],
905
- )
906
-
907
- overlay = raster
908
-
909
- if feature_points_1 is not None:
910
- overlay = overlay * feature_points_1
911
- if feature_points_2 is not None:
912
- overlay = overlay * feature_points_2
913
- if feature_points_3 is not None:
914
- overlay = overlay * feature_points_3
915
- if feature_points_4 is not None:
916
- overlay = overlay * feature_points_4
917
- # if not show_only_features_with_ms2:
918
- if feature_points_5 is not None:
919
- overlay = overlay * feature_points_5
920
-
921
- if title is not None:
922
- overlay = overlay.opts(title=title)
923
-
924
- # Create a panel layout
925
- layout = panel.Column(overlay)
926
-
927
- if filename is not None:
928
- # if filename includes .html, save the panel layout to an HTML file
929
- if filename.endswith(".html"):
930
- layout.save(filename, embed=True)
931
- else:
932
- # save the panel layout as a png
933
- hv.save(overlay, filename, fmt="png")
934
- else:
935
- # Display the panel layout
936
- layout.show()
937
-
938
-
939
- def plot_ms2_eic(
940
- self,
941
- feature_uid=None,
942
- rt_tol=5,
943
- mz_tol=0.05,
944
- link_x=True,
945
- n=20,
946
- deisotope=True,
947
- centroid=True,
948
- filename=None,
949
- ):
950
- """
951
- Plots the Extracted Ion Chromatograms (EIC) for the precursor and top n MS2 fragment ions of a given feature.
952
- Parameters:
953
- feature_uid: The feature unique identifier. Must be present in the features dataframe; if None, a message is printed.
954
- rt_tol (float, optional): The retention time tolerance (in seconds) to extend the feature's rt start and end values. Default is 5.
955
- mz_tol (float, optional): The m/z tolerance used when filtering the precursor and fragment ion intensities. Default is 0.05.
956
- link_x (bool, optional): If True, the x-axis (retention time) of all subplots is linked. Default is True.
957
- n (int, optional): The number of top MS2 fragment m/z values to consider for plotting. Default is 20.
958
- deisotope (bool, optional): Flag that determines whether deisotoping should be applied to the MS2 fragments. Default is True.
959
- centroid (bool, optional): Flag that controls whether centroiding is applied to the MS2 data. Default is True.
960
- filename (str, optional): If provided, the function saves the plot to the specified file. Supports .html for interactive plots or other formats (e.g., png).
961
- If None, the plot is displayed instead of being saved.
962
- Returns:
963
- None
964
- Notes:
965
- - The function first verifies the existence of the provided feature id and its associated MS2 spectrum.
966
- - It retrieves the top n fragments by intensity from the MS2 spectrum and computes the EIC for both the precursor ion and the fragments.
967
- - A helper method (_spec_to_mat) is used to convert spectral data into intensity matrices.
968
- - The resulting plots include hover tools to display the retention time and scan identifier.
969
- - The layout is arranged in a grid (4 columns by default) and may have linked x-axes based on the link_x parameter.
970
- """
971
- # plots the EIC for a given feature id inlcusind the EIC of the top n MS2 fragments
972
-
973
- if feature_uid is None:
974
- print("Please provide a feature id.")
975
- return
976
- # check if feature_uid is in features_df
977
- if feature_uid not in self.features_df["feature_uid"].values:
978
- print("Feature id not found in features_df.")
979
-
980
- feature = self.features_df[self.features_df["feature_uid"] == feature_uid]
981
- # get top n fragments
982
- ms2_specs = feature["ms2_specs"].values[0]
983
- if ms2_specs is None:
984
- print("No MS2 data found for this feature.")
985
- return
986
-
987
- if len(ms2_specs) == 0:
988
- print("No MS2 data found for this feature.")
989
- return
990
- # get the MS2 spectrum
991
- # get the mz of the top n fragments
992
- ms2_specs_df = ms2_specs[0].pandalize()
993
- ms2_specs_df = ms2_specs_df.sort_values(by="inty", ascending=False)
994
- ms2_specs_df = ms2_specs_df.head(n)
995
- top_mzs = ms2_specs_df["mz"].values.tolist()
996
-
997
- # find rt_start and rt_end of the feature_uid
998
- rt_start = feature["rt_start"].values[0] - rt_tol
999
- rt_end = feature["rt_end"].values[0] + rt_tol
1000
- # get the cycle at rt_start and the cycle at rt_end from the closest scan with ms_level == 1
1001
- scans = self.scans_df.filter(pl.col("ms_level") == 1)
1002
- scans = scans.filter(pl.col("rt") > rt_start)
1003
- scans = scans.filter(pl.col("rt") < rt_end)
1004
- rts = scans["rt"].to_list()
1005
- if len(scans) == 0:
1006
- print(f"No scans found between {rt_start} and {rt_end}.")
1007
- return
1008
- scan_uids = scans["scan_uid"].to_list()
1009
- eic_prec = self._spec_to_mat(
1010
- scan_uids,
1011
- mz_ref=feature["mz"].values.tolist(),
1012
- mz_tol=mz_tol,
1013
- deisotope=False,
1014
- centroid=True,
1015
- )
1016
- # convert eic_prec from matrix to list
1017
- eic_prec = eic_prec[0].tolist()
1018
-
1019
- # get all unique cycles from scans
1020
- cycles = scans["cycle"].unique()
1021
- scan_uids = []
1022
- # iterate over all cycles and get the scan_uid of scan with ms_level == 2 and closest precursor_mz to spec.precursor_mz
1023
- for cycle in cycles:
1024
- scans = self.scans_df.filter(pl.col("cycle") == cycle)
1025
- scans = scans.filter(pl.col("ms_level") == 2)
1026
- scans = scans.filter(pl.col("prec_mz") > feature["mz"] - 5)
1027
- scans = scans.filter(pl.col("prec_mz") < feature["mz"] + 5)
1028
- if len(scans) == 0:
1029
- print(
1030
- f"No scans found for cycle {cycle} and mz {feature['mz']}. Increase mz_tol tolerance.",
1031
- )
1032
- return
1033
- # get the scan with the closest precursor_mz to feature['mz']
1034
- scan = scans[(scans["prec_mz"] - feature["mz"]).abs().arg_sort()[:1]]
1035
- scan_uids.append(scan["scan_uid"][0])
1036
- eic_prod = self._spec_to_mat(
1037
- scan_uids,
1038
- mz_ref=top_mzs,
1039
- mz_tol=mz_tol,
1040
- deisotope=deisotope,
1041
- centroid=centroid,
1042
- )
1043
-
1044
- prec_name = f"prec {feature['mz'].values[0]:.3f}"
1045
- eic_df = pd.DataFrame({"rt": rts, prec_name: eic_prec})
1046
- # add scan_uid to eic_df for the tooltips
1047
- eic_df["scan_uid"] = scan_uids
1048
-
1049
- frag_names = [prec_name]
1050
- for i, mz in enumerate(top_mzs):
1051
- # add column to eic_df
1052
- name = f"frag {mz:.3f}"
1053
- frag_names.append(name)
1054
- eic_df[name] = eic_prod[i]
1055
-
1056
- # create a plot for all columns in eic_df
1057
- eic_plots: list[hv.Curve] = []
1058
- for name in frag_names:
1059
- eic = hv.Curve(eic_df, kdims=["rt"], vdims=[name, "scan_uid"]).opts(
1060
- title=name,
1061
- xlabel="RT (s)",
1062
- ylabel=f"Inty_f{len(eic_plots)}",
1063
- width=250,
1064
- height=200,
1065
- axiswise=True,
1066
- color="black",
1067
- tools=[HoverTool(tooltips=[("rt", "@rt"), ("scan_uid", "@scan_uid")])],
1068
- )
1069
- eic_plots.append(eic)
1070
-
1071
- # add as
1072
-
1073
- layout = hv.Layout(eic_plots).cols(4)
1074
- if link_x:
1075
- layout = layout.opts(shared_axes=True)
1076
-
1077
- if filename is not None:
1078
- if filename.endswith(".html"):
1079
- panel.panel(layout).save(filename, embed=True) # type: ignore[attr-defined]
1080
- else:
1081
- hv.save(layout, filename, fmt="png")
1082
- else:
1083
- panel.panel(layout).show()
1084
-
1085
-
1086
- def plot_ms2_cycle(
1087
- self,
1088
- cycle=None,
1089
- filename=None,
1090
- title=None,
1091
- cmap=None,
1092
- raster_dynamic=True,
1093
- raster_max_px=8,
1094
- raster_threshold=0.8,
1095
- centroid=True,
1096
- deisotope=True,
1097
- ):
1098
- if self.file_obj is None:
1099
- print("Please load a mzML file first.")
1100
- return
1101
-
1102
- if cycle is None:
1103
- print("Please provide a cycle number.")
1104
- return
1105
-
1106
- if cycle not in self.scans_df["cycle"].unique():
1107
- print("Cycle number not found in scans_df.")
1108
- return
1109
-
1110
- if cmap is None:
1111
- cmap = "iridescent_r"
1112
- elif cmap == "grey":
1113
- cmap = "Greys256"
1114
-
1115
- # find all scans in cycle
1116
- scans = self.scans_df.filter(pl.col("cycle") == cycle)
1117
- scans = scans.filter(pl.col("ms_level") == 2)
1118
-
1119
- ms2data = []
1120
- # iterate through all rows
1121
- for scan in scans.iter_rows(named=True):
1122
- scan_uid = scan["scan_uid"]
1123
- # get spectrum
1124
- spec = self.get_spectrum(
1125
- scan_uid,
1126
- precursor_trim=None,
1127
- centroid=centroid,
1128
- deisotope=deisotope,
1129
- )
1130
- if spec.mz.size == 0:
1131
- continue
1132
- d = {
1133
- "prec_mz": [scan["prec_mz"]] * spec.mz.size,
1134
- "mz": spec.mz,
1135
- "inty": spec.inty,
1136
- }
1137
- ms2data.append(d)
1138
-
1139
- # convert to pandas DataFrame
1140
- spectradf = pd.DataFrame(ms2data)
1141
-
1142
- # remove any inty<1
1143
- spectradf = spectradf[spectradf["inty"] >= 1]
1144
- # keep only rt, mz, and inty
1145
- spectradf = spectradf[["prec_mz", "mz", "inty"]]
1146
- maxrt = spectradf["prec_mz"].max()
1147
- minrt = spectradf["prec_mz"].min()
1148
- maxmz = spectradf["mz"].max()
1149
- minmz = spectradf["mz"].min()
1150
-
1151
- # TODO elem not used
1152
- def new_bounds_hook(plot, elem):
1153
- x_range = plot.state.x_range
1154
- y_range = plot.state.y_range
1155
- x_range.bounds = minrt, maxrt
1156
- y_range.bounds = minmz, maxmz
1157
-
1158
- points = hv.Points(
1159
- spectradf,
1160
- kdims=["prec_mz", "mz"],
1161
- vdims=["inty"],
1162
- label="MS1 survey scans",
1163
- ).opts(
1164
- fontsize={"title": 16, "labels": 14, "xticks": 6, "yticks": 12},
1165
- color=np.log(dim("inty")),
1166
- colorbar=True,
1167
- cmap="Magma",
1168
- tools=["hover"],
1169
- )
1170
-
1171
- raster = hd.rasterize(
1172
- points,
1173
- aggregator=ds.max("inty"),
1174
- interpolation="bilinear",
1175
- dynamic=raster_dynamic, # alpha=10, min_alpha=0,
1176
- ).opts(
1177
- active_tools=["box_zoom"],
1178
- cmap=process_cmap(cmap, provider="bokeh"), # blues
1179
- tools=["hover"],
1180
- hooks=[new_bounds_hook],
1181
- width=1000,
1182
- height=1000,
1183
- cnorm="log",
1184
- xlabel="Q1 m/z",
1185
- ylabel="m/z",
1186
- colorbar=True,
1187
- colorbar_position="right",
1188
- axiswise=True,
1189
- )
1190
-
1191
- overlay = hd.dynspread(
1192
- raster,
1193
- threshold=raster_threshold,
1194
- how="add",
1195
- shape="square",
1196
- max_px=raster_max_px,
1197
- )
1198
-
1199
- """
1200
- feature_points_1 = None
1201
- feature_points_2 = None
1202
- feature_points_3 = None
1203
- feature_points_4 = None
1204
- feature_points_iso = None
1205
- # Plot features as red dots if features is True
1206
- if self.features_df is not None and show_features:
1207
- feats = self.features_df.clone()
1208
- # Convert to pandas for operations that require pandas functionality
1209
- if hasattr(feats, 'to_pandas'):
1210
- feats = feats.to_pandas()
1211
- # if ms2_scans is not null, keep only the first element of the list
1212
- feats['ms2_scans'] = feats['ms2_scans'].apply(lambda x: x[0] if type(x) == list else x)
1213
- # keep only iso==0, i.e. the main
1214
- feats = feats[feats['iso']==0]
1215
- # find features with ms2_scans not None and iso==0
1216
- features_df = feats[feats['ms2_scans'].notnull()]
1217
- feature_points_1 = hv.Points(
1218
- features_df, kdims=["rt", "mz"], vdims=["feature_uid", "inty", "quality", "rt_delta", "ms2_scans"], label="Features with MS2 data"
1219
- ).options(
1220
- color=color_1,
1221
- marker=marker,
1222
- size=size_1,
1223
- tools=["hover"],
1224
- )
1225
- # find features without MS2 data
1226
- features_df = feats[feats['ms2_scans'].isnull()]
1227
- feature_points_2 = hv.Points(
1228
- features_df, kdims=["rt", "mz"], vdims=["feature_uid", "inty", "quality", "rt_delta"], label="Features without MS2 data"
1229
- ).options(
1230
- color='red',
1231
- size=size_2,
1232
- marker=marker,
1233
- tools=["hover"],
1234
- )
1235
-
1236
- if show_isotopes:
1237
- feats = self.features_df
1238
- features_df = feats[feats['iso']>0]
1239
- feature_points_iso = hv.Points(
1240
- features_df, kdims=["rt", "mz"], vdims=["feature_uid", "inty", "quality", "rt_delta", "iso", "iso_of"], label="Isotopes"
1241
- ).options(
1242
- color='violet',
1243
- marker=marker,
1244
- size=size_1,
1245
- tools=["hover"],
1246
- )
1247
- if show_ms2:
1248
- # find all self.scans_df with mslevel 2 that are not linked to a feature
1249
- ms2_orphan = self.scans_df.filter(pl.col('ms_level')==2).filter(pl.col("feature_uid")<0)
1250
-
1251
- if len(ms2_orphan) > 0:
1252
- # pandalize
1253
- ms2 = ms2_orphan.to_pandas()
1254
- feature_points_3 = hv.Points(
1255
- ms2, kdims=["rt", "prec_mz"], vdims=["index", "inty_tot", "bl"], label="Orphan MS2 scans"
1256
- ).options(
1257
- color=color_2,
1258
- marker='x',
1259
- size=size_2,
1260
- tools=["hover"],
1261
- )
1262
-
1263
- ms2_linked = self.scans_df.filter(pl.col('ms_level')==2).filter(pl.col("feature_uid")>=0)
1264
- if len(ms2_linked) > 0:
1265
- # pandalize
1266
- ms2 = ms2_linked.to_pandas()
1267
- feature_points_4 = hv.Points(
1268
- ms2, kdims=["rt", "prec_mz"], vdims=["index", "inty_tot", "bl"], label="Linked MS2 scans"
1269
- ).options(
1270
- color=color_1,
1271
- marker='x',
1272
- size=size_2,
1273
- tools=["hover"],
1274
- )
1275
-
1276
-
1277
- if feature_points_4 is not None:
1278
- overlay = overlay * feature_points_4
1279
- if feature_points_3 is not None:
1280
- overlay = overlay * feature_points_3
1281
- if feature_points_1 is not None:
1282
- overlay = overlay * feature_points_1
1283
- if not show_only_features_with_ms2:
1284
- if feature_points_2 is not None:
1285
- overlay = overlay * feature_points_2
1286
- if feature_points_iso is not None:
1287
- overlay = overlay * feature_points_iso
1288
- """
1289
- if title is not None:
1290
- overlay = overlay.opts(title=title)
1291
-
1292
- # Create a panel layout
1293
- layout = panel.Column(overlay)
1294
-
1295
- if filename is not None:
1296
- # if filename includes .html, save the panel layout to an HTML file
1297
- if filename.endswith(".html"):
1298
- layout.save(filename, embed=True)
1299
- else:
1300
- # save the panel layout as a png
1301
- hv.save(overlay, filename, fmt="png")
1302
- else:
1303
- # Display the panel layout
1304
- layout.show()
1305
-
1306
-
1307
- def plot_ms2_q1(
1308
- self,
1309
- feature_uid=None,
1310
- q1_width=10.0,
1311
- mz_tol=0.01,
1312
- link_x=True,
1313
- n=20,
1314
- deisotope=True,
1315
- centroid=True,
1316
- filename=None,
1317
- ):
1318
- # plots the EIC for a given feature id including the EIC of the top n MS2 fragments
1319
-
1320
- if feature_uid is None:
1321
- print("Please provide a feature id.")
1322
- return
1323
- # check if feature_uid is in features_df
1324
- if feature_uid not in self.features_df["feature_uid"].values:
1325
- print("Feature id not found in features_df.")
1326
-
1327
- feature = self.features_df[self.features_df["feature_uid"] == feature_uid]
1328
- # get top n fragments
1329
- ms2_specs = feature["ms2_specs"].values[0]
1330
- if ms2_specs is None:
1331
- print("No MS2 data found for this feature.")
1332
- return
1333
-
1334
- if len(ms2_specs) == 0:
1335
- print("No MS2 data found for this feature.")
1336
- return
1337
- # get the MS2 spectrum
1338
- # get the mz of the top n fragments
1339
- ms2_specs_df = ms2_specs[0].pandalize()
1340
- ms2_specs_df = ms2_specs_df.sort_values(by="inty", ascending=False)
1341
- ms2_specs_df = ms2_specs_df.head(n)
1342
- top_mzs = ms2_specs_df["mz"].values.tolist()
1343
-
1344
- # cycles is the cycle of the feature plus/minus q1_width
1345
- feature_scan = self.find_closest_scan(feature["rt"].values[0])
1346
- cycle = feature_scan["cycle"].values[0]
1347
- scans = self.scans_df.filter(pl.col("cycle") == cycle)
1348
- scans = scans.filter(pl.col("ms_level") == 2)
1349
- # find the scan in cycle whose 'prec_mz' is the closest to the feature['mz']
1350
- scan_uid = scans[(scans["prec_mz"] - feature["mz"]).abs().arg_sort()[:1]][
1351
- "scan_uid"
1352
- ][0]
1353
- # get q1_width scans before and after the scan_uid
1354
- scans = self.scans_df.filter(pl.col("scan_uid") >= scan_uid - q1_width)
1355
- scans = scans.filter(pl.col("scan_uid") <= scan_uid + q1_width)
1356
- scan_uids = scans["scan_uid"].to_list()
1357
- q1s = scans["prec_mz"].to_list()
1358
-
1359
- q1_prod = self._spec_to_mat(
1360
- scan_uids,
1361
- mz_ref=top_mzs,
1362
- mz_tol=mz_tol,
1363
- deisotope=deisotope,
1364
- centroid=centroid,
1365
- )
1366
- q1_df = pd.DataFrame({"q1": q1s})
1367
-
1368
- frag_names = []
1369
- for i, mz in enumerate(top_mzs):
1370
- # add column to q1_df
1371
- name = f"frag {mz:.3f}"
1372
- # if q1_ratio exists, add it to the name
1373
- if "q1_ratio" in ms2_specs_df.columns:
1374
- q1_ratio = ms2_specs_df["q1_ratio"].values[i]
1375
- name += f" q1r: {q1_ratio:.2f}"
1376
- frag_names.append(name)
1377
- q1_df[name] = q1_prod[i]
1378
- # add scan_uid to q1_df for the tooltips
1379
- q1_df["scan_uid"] = scan_uids
1380
-
1381
- # create a plot for all columns in eic_df
1382
- eic_plots: list[hv.Curve] = []
1383
- for name in frag_names:
1384
- eic = hv.Curve(q1_df, kdims=["q1"], vdims=[name, "scan_uid"]).opts(
1385
- title=name,
1386
- xlabel="Q1 (m/z)",
1387
- ylabel=f"Inty_f{len(eic_plots)}",
1388
- width=250,
1389
- height=200,
1390
- axiswise=True,
1391
- color="black",
1392
- tools=[HoverTool(tooltips=[("Q1", "@q1"), ("scan_uid", "@scan_uid")])],
1393
- )
1394
- eic_plots.append(eic)
1395
-
1396
- # add as
1397
-
1398
- layout = hv.Layout(eic_plots).cols(4)
1399
- if link_x:
1400
- layout = layout.opts(shared_axes=True)
1401
-
1402
- if filename is not None:
1403
- if filename.endswith(".html"):
1404
- panel.panel(layout).save(filename, embed=True) # type: ignore[attr-defined]
1405
- else:
1406
- hv.save(layout, filename, fmt="png")
1407
- else:
1408
- panel.panel(layout).show()
1409
-
1410
-
1411
- def plot_dda_stats(
1412
- self,
1413
- filename=None,
1414
- ):
1415
- """
1416
- Generates scatter plots for DDA statistics.
1417
- This method retrieves statistical data using the `get_dda_stats` method, filters relevant
1418
- columns, and preprocesses the data by replacing any values below 0 with None. It then creates
1419
- a scatter plot for each metric specified in the `cols_to_plot` list. Each scatter plot uses "cycle"
1420
- as the x-axis, and the corresponding metric as the y-axis. In addition, common hover tooltips are
1421
- configured to display auxiliary data including "index", "cycle", "rt", and all other metric values.
1422
- If the `filename` parameter is provided:
1423
- - If it ends with ".html", the layout is saved as an interactive HTML file using Panel.
1424
- - Otherwise, the layout is saved as a PNG image using HoloViews.
1425
- If no filename is provided, the interactive panel is displayed.
1426
- Parameters:
1427
- filename (str, optional): The path and filename where the plot should be saved. If the filename
1428
- ends with ".html", the plot is saved as an HTML file; otherwise, it is saved as a PNG image.
1429
- If not provided, the plot is displayed interactively.
1430
- Notes:
1431
- - The method requires the holoviews, panel, and bokeh libraries for visualization.
1432
- - The data is expected to include the columns 'index', 'cycle', 'rt', and the metrics listed in
1433
- `cols_to_plot`.
1434
- """
1435
- stats = self.get_dda_stats()
1436
- cols_to_plot = [
1437
- "inty_tot",
1438
- "bl",
1439
- "ms2_n",
1440
- "time_cycle",
1441
- "time_ms1_to_ms1",
1442
- "time_ms1_to_ms2",
1443
- "time_ms2_to_ms2",
1444
- "time_ms2_to_ms1",
1445
- ]
1446
- # Ensure that 'index' and 'rt' are kept for hover along with the columns to plot
1447
- stats = stats[["scan_uid", "cycle", "rt", *cols_to_plot]]
1448
- # set any value < 0 to None
1449
- stats[stats < 0] = None
1450
-
1451
- # Create a Scatter for each column in cols_to_plot stacked vertically, with hover enabled
1452
- scatter_plots = []
1453
- # Define common hover tooltips for all plots including all cols_to_plot
1454
- common_tooltips = [
1455
- ("scan_uid", "@scan_uid"),
1456
- ("cycle", "@cycle"),
1457
- ("rt", "@rt"),
1458
- ] + [(c, f"@{c}") for c in cols_to_plot]
1459
- for col in cols_to_plot:
1460
- hover = HoverTool(tooltips=common_tooltips)
1461
- scatter = hv.Scatter(
1462
- stats,
1463
- kdims="cycle",
1464
- vdims=[col, "scan_uid", "rt"] + [c for c in cols_to_plot if c != col],
1465
- ).opts(
1466
- title=col,
1467
- xlabel="Cycle",
1468
- ylabel=col,
1469
- height=250,
1470
- width=800,
1471
- tools=[hover],
1472
- size=3,
1473
- )
1474
- scatter_plots.append(scatter)
1475
-
1476
- layout = hv.Layout(scatter_plots).cols(1)
1477
- if filename is not None:
1478
- if filename.endswith(".html"):
1479
- panel.panel(layout).save(filename, embed=True) # type: ignore[attr-defined]
1480
- else:
1481
- hv.save(layout, filename, fmt="png")
1482
- else:
1483
- panel.panel(layout).show()
1484
-
1485
-
1486
- def plot_feature_stats(
1487
- self,
1488
- filename=None,
1489
- ):
1490
- """
1491
- Generates overlaid distribution plots for selected feature metrics.
1492
- The distributions are created separately for features with and without MS2 data.
1493
- Metrics include intensity, quality, retention time, m/z (and m/z delta), number of MS2 peaks,
1494
- summed MS2 intensities, and the MS2-to-MS1 ratio. The plots help to visualize the distribution
1495
- differences between features that are linked to MS2 spectra and those that are not.
1496
-
1497
- Parameters:
1498
- filename (str, optional): The output filename. If the filename ends with ".html",
1499
- the plot is saved as an interactive HTML file; otherwise,
1500
- if provided, the plot is saved as a PNG image. If not provided,
1501
- the interactive plot is displayed.
1502
-
1503
- Returns:
1504
- None
1505
- """
1506
- # Work on a copy of features_df
1507
- feats = self.features_df.clone()
1508
- # Convert to pandas for operations that require pandas functionality
1509
- if hasattr(feats, "to_pandas"):
1510
- feats = feats.to_pandas()
1511
- # Compute m/z delta for each feature
1512
- feats["mz_delta"] = feats["mz_end"] - feats["mz_start"]
1513
- # Add a column with the number of peaks in the MS2 spectrum
1514
- feats["MS2peaks"] = feats["ms2_specs"].apply(
1515
- lambda x: len(x[0]) if x is not None else 0,
1516
- )
1517
- # Add a column with the sum of intensities in the MS2 spectrum
1518
- feats["MS2int"] = feats["ms2_specs"].apply(
1519
- lambda x: sum(x[0].inty) if x is not None else 0,
1520
- )
1521
-
1522
- # Calculate the ratio of MS2 to MS1 intensities
1523
- feats["MS2toMS1"] = feats["MS2int"] / feats["inty"]
1524
- # Apply log10 transformation to intensity, quality, and MS2int columns (handling non-positive values)
1525
- feats["inty"] = np.where(feats["inty"] <= 0, np.nan, np.log10(feats["inty"]))
1526
- # COMMENT: AR was bugging
1527
- # feats["chrom_heights"] = np.where(
1528
- # feats["chrom_heights"] <= 0, np.nan, np.log10(feats["chrom_heights"])
1529
- # )
1530
-
1531
- feats["quality"] = np.where(
1532
- feats["quality"] <= 0,
1533
- np.nan,
1534
- np.log10(feats["quality"]),
1535
- )
1536
- feats["MS2int"] = np.where(feats["MS2int"] <= 0, np.nan, np.log10(feats["MS2int"]))
1537
-
1538
- # Separate features based on presence of MS2 data
1539
- feats_with_MS2 = feats[feats["ms2_scans"].notnull()]
1540
- feats_without_MS2 = feats[feats["ms2_scans"].isnull()]
1541
-
1542
- # Define the metrics to plot
1543
- cols_to_plot = [
1544
- "mz",
1545
- "mz_delta",
1546
- "inty",
1547
- "quality",
1548
- "rt",
1549
- "rt_delta",
1550
- "chrom_coherence",
1551
- "chrom_prominence",
1552
- "chrom_prominence_scaled",
1553
- # COMMENT: AR was bugging
1554
- # "chrom_heights",
1555
- # "chrom_heights_scaled",
1556
- "MS2peaks",
1557
- "MS2int",
1558
- "MS2toMS1",
1559
- ]
1560
-
1561
- # Ensure an index column is available for plotting
1562
- feats["index"] = feats.index
1563
-
1564
- density_plots = []
1565
- # Create overlaid distribution plots for each metric
1566
- for col in cols_to_plot:
1567
- # Extract non-null values from both groups
1568
- data_with = feats_with_MS2[col].dropna().values
1569
- data_without = feats_without_MS2[col].dropna().values
1570
-
1571
- # Create distribution elements for features with and without MS2
1572
- dist_with = hv.Distribution(data_with, label="With MS2").opts(
1573
- color="red",
1574
- alpha=0.6,
1575
- )
1576
- dist_without = hv.Distribution(data_without, label="Without MS2").opts(
1577
- color="blue",
1578
- alpha=0.6,
1579
- )
1580
-
1581
- # Overlay the distributions with a legend and hover tool enabled
1582
- overlay = (dist_with * dist_without).opts(
1583
- title=col,
1584
- show_legend=True,
1585
- tools=["hover"],
1586
- )
1587
- density_plots.append(overlay)
1588
-
1589
- # Arrange the plots in a layout with three columns
1590
- layout = hv.Layout(density_plots).cols(3).opts(shared_axes=False)
1591
-
1592
- # Save or display the layout based on the filename parameter
1593
- if filename is not None:
1594
- if filename.endswith(".html"):
1595
- panel.panel(layout).save(filename, embed=True) # type: ignore[attr-defined]
1596
- else:
1597
- hv.save(layout, filename, fmt="png")
1598
- else:
1599
- panel.panel(layout).show()
1600
-
1601
-
1602
- def plot_tic(
1603
- self,
1604
- title=None,
1605
- filename=None,
1606
- ):
1607
- # get all ms_level ==1 scans from sefl.scans_df
1608
- scans = self.scans_df.filter(pl.col("ms_level") == 1)
1609
- # select rt, scan_uid and inty_tot, convert to pandas
1610
- data = scans[["rt", "scan_uid", "inty_tot"]].to_pandas()
1611
- # sort by rt
1612
- data = data.sort_values("rt")
1613
-
1614
- # plot using hv.Curve
1615
- tic = hv.Curve(data, kdims=["rt"], vdims=["inty_tot"])
1616
- tic.opts(
1617
- title=title,
1618
- xlabel="Retention Time (min)",
1619
- ylabel="TIC",
1620
- height=250,
1621
- width=100,
1622
- )
1
+ """
2
+ _plots.py
3
+
4
+ This module provides visualization functions for mass spectrometry data analysis.
5
+ It contains plotting utilities for extracted ion chromatograms (EICs), 2D data maps,
6
+ feature visualizations, and interactive dashboards using modern visualization libraries.
7
+
8
+ Key Features:
9
+ - **Extracted Ion Chromatograms (EICs)**: Interactive chromatographic plotting with feature annotations.
10
+ - **2D Data Visualization**: Mass spectrometry data visualization with datashader for large datasets.
11
+ - **Feature Plotting**: Visualize detected features with retention time and m/z information.
12
+ - **Interactive Dashboards**: Create interactive panels for data exploration and analysis.
13
+ - **Multi-Sample Plotting**: Comparative visualizations across multiple samples.
14
+ - **Export Capabilities**: Save plots in various formats (HTML, PNG, SVG).
15
+
16
+ Dependencies:
17
+ - `holoviews`: For high-level data visualization and interactive plots.
18
+ - `datashader`: For rendering large datasets efficiently.
19
+ - `panel`: For creating interactive web applications and dashboards.
20
+ - `bokeh`: For low-level plotting control and customization.
21
+ - `polars` and `pandas`: For data manipulation and processing.
22
+ - `numpy`: For numerical computations.
23
+
24
+ Functions:
25
+ - `plot_eic()`: Generate extracted ion chromatograms with feature overlays.
26
+ - `plot_2d()`: Create 2D mass spectrometry data visualizations.
27
+ - `plot_features()`: Visualize detected features in retention time vs m/z space.
28
+ - Various utility functions for plot styling and configuration.
29
+
30
+ Supported Plot Types:
31
+ - Extracted Ion Chromatograms (EIC)
32
+ - Total Ion Chromatograms (TIC)
33
+ - Base Peak Chromatograms (BPC)
34
+ - 2D intensity maps (RT vs m/z)
35
+ - Feature scatter plots
36
+ - Interactive dashboards
37
+
38
+ See Also:
39
+ - `parameters._plot_parameters`: For plot-specific parameter configuration.
40
+ - `single.py`: For applying plotting methods to ddafile objects.
41
+ - `study.py`: For study-level visualization functions.
42
+
43
+ """
44
+
45
+ import os
46
+
47
+ import datashader as ds
48
+ import holoviews as hv
49
+ import holoviews.operation.datashader as hd
50
+ import numpy as np
51
+ import pandas as pd
52
+ import panel
53
+ import polars as pl
54
+
55
+ from bokeh.models import HoverTool
56
+ from holoviews import dim
57
+ from holoviews.plotting.util import process_cmap
58
+ from matplotlib.colors import rgb2hex
59
+
60
+ # Parameters removed - using hardcoded defaults
61
+
62
+
63
+ hv.extension("bokeh")
64
+
65
+
66
+ def plot_eic(
67
+ self,
68
+ feature_uid=None,
69
+ filename=None,
70
+ rt_tol=10,
71
+ rt_tol_factor_plot=1,
72
+ mz_tol=0.0005,
73
+ mz_tol_factor_plot=1,
74
+ link_x=False,
75
+ ):
76
+ """
77
+ Plot Extracted Ion Chromatograms (EICs) for one or more features using MS1 data and feature metadata.
78
+
79
+ This function filters MS1 data based on retention time (rt) and mass-to-charge ratio (mz) windows
80
+ derived from feature information in `features_df`. It then generates interactive EIC plots using
81
+ HoloViews, with feature retention time windows annotated. Plots can be displayed interactively or
82
+ saved to a file.
83
+
84
+ Parameters:
85
+ feature_uid (int or list of int, optional):
86
+ Feature identifier(s) for EIC generation. If None, EICs for all features in `features_df` are plotted.
87
+ filename (str, optional):
88
+ Output file path. If ending with `.html`, saves as interactive HTML; otherwise, saves as PNG.
89
+ If not provided, displays the plot interactively.
90
+ rt_tol (float, default=10):
91
+ Retention time tolerance (in seconds) added to feature boundaries for MS1 data filtering.
92
+ rt_tol_factor_plot (float, default=1):
93
+ Retention time tolerance factor.
94
+ mz_tol (float, default=0.0005):
95
+ m/z tolerance added to feature boundaries for MS1 data filtering.
96
+ mz_tol_factor_plot (float, default=1):
97
+ m/z time tolerance factor.
98
+ link_x (bool, default=True):
99
+ If True, links the x-axes (retention time) across all EIC subplots.
100
+
101
+ Returns:
102
+ None
103
+
104
+ Notes:
105
+ - Uses `features_df` for feature metadata and `ms1_df` (Polars DataFrame) for MS1 data.
106
+ - Aggregates MS1 intensities by retention time.
107
+ - Utilizes HoloViews for visualization and Panel for layout/display.
108
+ """
109
+ # plots the EIC for a given feature id
110
+ # If rt or mz are not provided, they are extracted from features_df using the supplied feature id (feature_uid)
111
+
112
+ feature_uids = feature_uid
113
+ # if feature_uids is None, plot all features
114
+ if feature_uids is None:
115
+ feats = self.features_df.clone()
116
+ else:
117
+ if isinstance(feature_uids, int):
118
+ feature_uids = [feature_uids]
119
+ # select only the features with feature_uid in feature_uids
120
+ feats = self.features_df[self.features_df["feature_uid"].is_in(feature_uids)].clone()
121
+
122
+ # make sure feature_uid is a list of integers
123
+
124
+ eic_plots = []
125
+ feature_uids = feats["feature_uid"].values.tolist()
126
+ mz_tol_plot = mz_tol * mz_tol_factor_plot
127
+ rt_tol_plot = rt_tol * rt_tol_factor_plot
128
+ # iterate over the list of feature_uid
129
+ for feature_uid in feature_uids:
130
+ # Retrieve the feature info
131
+ feature_row = feats[feats["feature_uid"] == feature_uid]
132
+ # rt = feature_row["rt"].values[0]
133
+ rt_start = feature_row["rt_start"].values[0]
134
+ rt_end = feature_row["rt_end"].values[0]
135
+ mz = feature_row["mz"].values[0]
136
+ mz_start = feature_row["mz_start"].values[0]
137
+ mz_end = feature_row["mz_end"].values[0]
138
+
139
+ # filter self.ms1_df with rt_start, rt_end, mz_start, mz_end
140
+ eic_df = self.ms1_df.filter(
141
+ pl.col("rt") >= rt_start - rt_tol_plot,
142
+ pl.col("rt") <= rt_end + rt_tol_plot,
143
+ )
144
+ eic_df = eic_df.filter(
145
+ pl.col("mz") >= mz_start - mz_tol_plot,
146
+ pl.col("mz") <= mz_end + mz_tol_plot,
147
+ )
148
+
149
+ if eic_df.is_empty():
150
+ print("No MS1 data found in the specified window.")
151
+ continue
152
+
153
+ # convert to pandas DataFrame
154
+ eic_df = eic_df.to_pandas()
155
+ # aggregate all points with the same rt using the sum of inty
156
+ eic_df = eic_df.groupby("rt").agg({"inty": "sum"}).reset_index()
157
+ yname = f"inty_{feature_uid}"
158
+ eic_df.rename(columns={"inty": yname}, inplace=True)
159
+
160
+ # Plot the EIC using bokeh and ensure axes are independent by setting axiswise=True
161
+ eic = hv.Curve(eic_df, kdims=["rt"], vdims=[yname]).opts(
162
+ title=f"EIC for feature {feature_uid}, mz = {mz:.4f}",
163
+ xlabel="Retention time (s)",
164
+ ylabel="Intensity",
165
+ width=1000,
166
+ tools=["hover"],
167
+ height=250,
168
+ axiswise=True,
169
+ color="black",
170
+ )
171
+
172
+ # Add vertical lines at the start and end of the retention time
173
+ eic = eic * hv.VLine(rt_start).opts(
174
+ color="blue",
175
+ line_width=1,
176
+ line_dash="dashed",
177
+ axiswise=True,
178
+ )
179
+ eic = eic * hv.VLine(rt_end).opts(
180
+ color="blue",
181
+ line_width=1,
182
+ line_dash="dashed",
183
+ axiswise=True,
184
+ )
185
+
186
+ # Append the subplot without linking axes
187
+ eic_plots.append(eic)
188
+ if link_x:
189
+ # Create a layout with shared x-axis for all EIC plots
190
+ layout = hv.Layout(eic_plots).opts(shared_axes=True)
191
+ else:
192
+ layout = hv.Layout(eic_plots).opts(shared_axes=False)
193
+
194
+ layout = layout.cols(1)
195
+ layout = panel.Column(layout)
196
+ if filename is not None:
197
+ # if filename includes .html, save the panel layout to an HTML file
198
+ if filename.endswith(".html"):
199
+ layout.save(filename, embed=True)
200
+ else:
201
+ # save the panel layout as a png
202
+ hv.save(layout, filename, fmt="png")
203
+ else:
204
+ # Display the panel layout
205
+ layout.show()
206
+
207
+
208
+ def plot_2d(
209
+ self,
210
+ filename=None,
211
+ show_features=True,
212
+ show_only_features_with_ms2=False,
213
+ show_isotopes=False,
214
+ show_ms2=False,
215
+ title=None,
216
+ cmap=None,
217
+ marker="circle",
218
+ markersize=10,
219
+ raster_dynamic=True,
220
+ raster_max_px=8,
221
+ raster_threshold=0.8,
222
+ mz_range=None,
223
+ rt_range=None,
224
+ ):
225
+ """
226
+ Plot a two-dimensional visualization of MS1 survey scan data with optional overlays
227
+ of feature and MS2 scan information.
228
+ This method creates a plot from the internal MS1 data loaded into self.ms1_df
229
+ and optionally overlays various feature and MS2 information depending on the provided
230
+ parameters. The visualization is built using HoloViews and Holoviews dynamic rasterization,
231
+ together with Panel for layout and exporting.
232
+ Parameters:
233
+ filename (str, optional):
234
+ Path to save the plot. If provided and ends with ".html", the plot is saved as an
235
+ interactive HTML file; otherwise, it is saved as a PNG image.
236
+ show_features (bool, default True):
237
+ Whether to overlay detected features on the plot.
238
+ show_only_features_with_ms2 (bool, default False):
239
+ If True, only display features that have associated MS2 scans. When False,
240
+ features without MS2 data are also shown.
241
+ show_isotopes (bool, default False):
242
+ Whether to overlay isotope information on top of the features.
243
+ show_ms2 (bool, default False):
244
+ Whether to overlay MS2 scan information on the plot.
245
+ title (str, optional):
246
+ Title of the plot.
247
+ cmap (str, optional):
248
+ Colormap to use for the background rasterized data. Defaults to "iridescent_r" unless
249
+ modified (e.g., if set to "grey", it is changed to "Greys256").
250
+ marker (str, default 'circle'):
251
+ Marker type to use for feature and MS2 points.
252
+ markersize (int, default 10):
253
+ Base size of the markers used for plotting points.
254
+ raster_dynamic (bool, default True):
255
+ Whether to use dynamic rasterization for the background point cloud.
256
+ raster_max_px (int, default 8):
257
+ Maximum pixel size for dynamic rasterization when using dynspread.
258
+ raster_threshold (float, default 0.8):
259
+ Threshold used for the dynspread process in dynamic rasterization.
260
+ Behavior:
261
+ - Checks for a loaded mzML file by verifying that self.file_obj is not None.
262
+ - Converts internal MS1 data (a Polars DataFrame) to a Pandas DataFrame and filters out low-intensity
263
+ points (inty < 1).
264
+ - Sets up the plot bounds for retention time (rt) and mass-to-charge ratio (mz) using a hook function.
265
+ - Renders the MS1 data as a background rasterized image with a logarithmic intensity normalization.
266
+ - Conditionally overlays feature points (with and without MS2 information), isotopes (if requested),
267
+ and MS2 scan points based on internal DataFrame data.
268
+ - Depending on the filename parameter, either displays the plot interactively using Panel or
269
+ saves it as an HTML or PNG file.
270
+ Returns:
271
+ None
272
+ Side Effects:
273
+ - May print a warning if no mzML file is loaded.
274
+ - Either shows the plot interactively or writes the output to a file.
275
+ """
276
+
277
+ if self.ms1_df is None:
278
+ self.logger.error("No MS1 data available.")
279
+ return
280
+
281
+ if cmap is None:
282
+ cmap = "iridescent_r"
283
+ elif cmap == "grey":
284
+ cmap = "Greys256"
285
+
286
+ # get columns rt, mz, inty from self.ms1_df, It's polars DataFrame
287
+ spectradf = self.ms1_df.select(["rt", "mz", "inty"])
288
+ # remove any inty<1
289
+ spectradf = spectradf.filter(pl.col("inty") >= 1)
290
+ # keep only rt, mz, and inty
291
+ spectradf = spectradf.select(["rt", "mz", "inty"])
292
+ if mz_range is not None:
293
+ spectradf = spectradf[(spectradf["mz"] >= mz_range[0]) & (spectradf["mz"] <= mz_range[1])]
294
+ if rt_range is not None:
295
+ spectradf = spectradf[(spectradf["rt"] >= rt_range[0]) & (spectradf["rt"] <= rt_range[1])]
296
+ maxrt = spectradf["rt"].max()
297
+ minrt = spectradf["rt"].min()
298
+ maxmz = spectradf["mz"].max()
299
+ minmz = spectradf["mz"].min()
300
+
301
+ def new_bounds_hook(plot, elem):
302
+ x_range = plot.state.x_range
303
+ y_range = plot.state.y_range
304
+ x_range.bounds = minrt, maxrt
305
+ y_range.bounds = minmz, maxmz
306
+
307
+ points = hv.Points(
308
+ spectradf,
309
+ kdims=["rt", "mz"],
310
+ vdims=["inty"],
311
+ label="MS1 survey scans",
312
+ ).opts(
313
+ fontsize={"title": 16, "labels": 14, "xticks": 6, "yticks": 12},
314
+ color=np.log(dim("inty")),
315
+ colorbar=True,
316
+ cmap="Magma",
317
+ tools=["hover"],
318
+ )
319
+
320
+ size_1 = 1 * markersize
321
+ color_1 = "forestgreen"
322
+ size_2 = 1 * markersize
323
+ color_2 = "darkorange"
324
+ if filename is not None:
325
+ dyn = False
326
+ if not filename.endswith(".html"):
327
+ size_1 = 2
328
+ color_1 = "forestgreen"
329
+ size_2 = 2
330
+ color_2 = "darkorange"
331
+ raster_dynamic = False
332
+
333
+ dyn = raster_dynamic
334
+ raster = hd.rasterize(
335
+ points,
336
+ aggregator=ds.max("inty"),
337
+ interpolation="bilinear",
338
+ dynamic=dyn, # alpha=10, min_alpha=0,
339
+ ).opts(
340
+ active_tools=["box_zoom"],
341
+ cmap=process_cmap(cmap, provider="bokeh"), # blues
342
+ tools=["hover"],
343
+ hooks=[new_bounds_hook],
344
+ width=1000,
345
+ height=1000,
346
+ cnorm="log",
347
+ xlabel="Retention time (s)",
348
+ ylabel="m/z",
349
+ colorbar=True,
350
+ colorbar_position="right",
351
+ axiswise=True,
352
+ )
353
+
354
+ raster = hd.dynspread(
355
+ raster,
356
+ threshold=raster_threshold,
357
+ how="add",
358
+ shape="square",
359
+ max_px=raster_max_px,
360
+ )
361
+ feature_points_1 = None
362
+ feature_points_2 = None
363
+ feature_points_3 = None
364
+ feature_points_4 = None
365
+ feature_points_iso = None
366
+ # Plot features as red dots if features is True
367
+ if self.features_df is not None and show_features:
368
+ feats = self.features_df.clone()
369
+ # Convert to pandas for operations that require pandas functionality
370
+ if hasattr(feats, "to_pandas"):
371
+ feats = feats.to_pandas()
372
+ # if ms2_scans is not null, keep only the first element of the list
373
+ feats["ms2_scans"] = feats["ms2_scans"].apply(
374
+ lambda x: x[0] if type(x) == list else x,
375
+ )
376
+ if mz_range is not None:
377
+ feats = feats[(feats["mz"] >= mz_range[0]) & (feats["mz"] <= mz_range[1])]
378
+ if rt_range is not None:
379
+ feats = feats[(feats["rt"] >= rt_range[0]) & (feats["rt"] <= rt_range[1])]
380
+ # keep only iso==0, i.e. the main
381
+ feats = feats[feats["iso"] == 0]
382
+ # find features with ms2_scans not None and iso==0
383
+ features_df = feats[feats["ms2_scans"].notnull()]
384
+ feature_points_1 = hv.Points(
385
+ features_df,
386
+ kdims=["rt", "mz"],
387
+ vdims=[
388
+ "feature_uid",
389
+ "inty",
390
+ "quality",
391
+ "rt_delta",
392
+ "ms2_scans",
393
+ "chrom_coherence",
394
+ "chrom_prominence_scaled",
395
+ ],
396
+ label="Features with MS2 data",
397
+ ).options(
398
+ color=color_1,
399
+ marker=marker,
400
+ size=size_1,
401
+ tools=["hover"],
402
+ )
403
+ # find features without MS2 data
404
+ features_df = feats[feats["ms2_scans"].isnull()]
405
+ feature_points_2 = hv.Points(
406
+ features_df,
407
+ kdims=["rt", "mz"],
408
+ vdims=[
409
+ "feature_uid",
410
+ "inty",
411
+ "quality",
412
+ "rt_delta",
413
+ "chrom_coherence",
414
+ "chrom_prominence_scaled",
415
+ ],
416
+ label="Features without MS2 data",
417
+ ).options(
418
+ color="red",
419
+ size=size_2,
420
+ marker=marker,
421
+ tools=["hover"],
422
+ )
423
+
424
+ if show_isotopes:
425
+ # Use proper Polars filter syntax to avoid boolean indexing issues
426
+ features_df = self.features_df.filter(pl.col("iso") > 0)
427
+ # Convert to pandas for plotting compatibility
428
+ if hasattr(features_df, "to_pandas"):
429
+ features_df = features_df.to_pandas()
430
+ feature_points_iso = hv.Points(
431
+ features_df,
432
+ kdims=["rt", "mz"],
433
+ vdims=[
434
+ "feature_uid",
435
+ "inty",
436
+ "quality",
437
+ "rt_delta",
438
+ "iso",
439
+ "iso_of",
440
+ "chrom_coherence",
441
+ "chrom_prominence_scaled",
442
+ ],
443
+ label="Isotopes",
444
+ ).options(
445
+ color="violet",
446
+ marker=marker,
447
+ size=size_1,
448
+ tools=["hover"],
449
+ )
450
+ if show_ms2:
451
+ # find all self.scans_df with mslevel 2 that are not linked to a feature
452
+ ms2_orphan = self.scans_df.filter(pl.col("ms_level") == 2).filter(
453
+ pl.col("feature_uid") < 0,
454
+ )
455
+
456
+ if len(ms2_orphan) > 0:
457
+ # pandalize
458
+ ms2 = ms2_orphan.to_pandas()
459
+ feature_points_3 = hv.Points(
460
+ ms2,
461
+ kdims=["rt", "prec_mz"],
462
+ vdims=["index", "inty_tot", "bl"],
463
+ label="Orphan MS2 scans",
464
+ ).options(
465
+ color=color_2,
466
+ marker="x",
467
+ size=size_2,
468
+ tools=["hover"],
469
+ )
470
+
471
+ ms2_linked = self.scans_df.filter(pl.col("ms_level") == 2).filter(
472
+ pl.col("feature_uid") >= 0,
473
+ )
474
+ if len(ms2_linked) > 0:
475
+ # pandalize
476
+ ms2 = ms2_linked.to_pandas()
477
+ feature_points_4 = hv.Points(
478
+ ms2,
479
+ kdims=["rt", "prec_mz"],
480
+ vdims=["index", "inty_tot", "bl"],
481
+ label="Linked MS2 scans",
482
+ ).options(
483
+ color=color_1,
484
+ marker="x",
485
+ size=size_2,
486
+ tools=["hover"],
487
+ )
488
+
489
+ overlay = raster
490
+
491
+ if feature_points_4 is not None:
492
+ overlay = overlay * feature_points_4
493
+ if feature_points_3 is not None:
494
+ overlay = overlay * feature_points_3
495
+ if feature_points_1 is not None:
496
+ overlay = overlay * feature_points_1
497
+ if not show_only_features_with_ms2 and feature_points_2 is not None:
498
+ overlay = overlay * feature_points_2
499
+ if feature_points_iso is not None:
500
+ overlay = overlay * feature_points_iso
501
+
502
+ if title is not None:
503
+ overlay = overlay.opts(title=title)
504
+
505
+ # Create a panel layout
506
+ layout = panel.Column(overlay)
507
+
508
+ if filename is not None:
509
+ # if filename includes .html, save the panel layout to an HTML file
510
+ if filename.endswith(".html"):
511
+ layout.save(filename, embed=True)
512
+ else:
513
+ # save the panel layout as a png
514
+ hv.save(overlay, filename, fmt="png")
515
+ else:
516
+ # Display the panel layout
517
+ layout.show()
518
+
519
+
520
+ def plot_2d_oracle(
521
+ self,
522
+ oracle_folder=None,
523
+ link_by_feature_uid=None,
524
+ colorby="hg",
525
+ filename=None,
526
+ min_id_level=None,
527
+ max_id_level=None,
528
+ min_ms_level=None,
529
+ title=None,
530
+ cmap=None,
531
+ markersize=10,
532
+ raster_dynamic=True,
533
+ raster_max_px=8,
534
+ raster_threshold=0.8,
535
+ mz_range=None,
536
+ rt_range=None,
537
+ ):
538
+ """
539
+ Plot a 2D overlay visualization of MS1 survey scans and feature annotations, including oracle annotation data if provided.
540
+
541
+ This function reads the primary mass spectrometry data, applies filtering, processes oracle annotation data (if provided),
542
+ and produces an interactive plot combining various data layers. The visualization includes rasterized MS1 data and feature
543
+ points colored by annotation.
544
+
545
+ Parameters:
546
+ self: The object instance containing MS1 and feature data.
547
+ oracle_folder (str, optional): Path to the oracle folder containing the annotation file
548
+ (expected at "<oracle_folder>/diag/summary_by_feature.csv"). If None, oracle data is not used.
549
+ link_by_feature_uid (bool, optional): Whether to link features by their IDs in the overlay.
550
+ colorby (str, optional): Parameter that determines the color assignment for annotated features.
551
+ Expected values include 'hg', 'class', 'id_class', or 'id_hg'. Default is 'hg'.
552
+ filename (str, optional): Name of the file where the plot should be saved. If provided and ends with
553
+ ".html", the panel layout is saved as an interactive HTML file; otherwise, the output is saved as a PNG.
554
+ min_id_level (int, optional): Minimum identification level for oracle annotations to include.
555
+ max_id_level (int, optional): Maximum identification level for oracle annotations to include.
556
+ min_ms_level (int, optional): Minimum MS level for features to include.
557
+ title (str, optional): Title to be displayed on the resulting plot. Default is None.
558
+ cmap (str, optional): Colormap to be used for the rasterized plot. Acceptable values include None, "grey",
559
+ "iridescent", or other valid colormap names. Default is None. When None, 'Greys256' is used.
560
+ markersize (int, optional): Marker size for feature points in the overlay. Default is 10.
561
+ raster_dynamic (bool, optional): If True, enables dynamic rasterization of the overlay. If filename is provided
562
+ and does not end with ".html", raster_dynamic is set to False. Default is True.
563
+ raster_max_px (int, optional): Maximum pixel size for dynamic rasterization. Default is 8.
564
+ raster_threshold (float, optional): Threshold for dynamic raster spread. Default is 0.8.
565
+ mz_range (tuple, optional): m/z range for filtering MS1 data.
566
+ rt_range (tuple, optional): Retention time range for filtering MS1 data.
567
+
568
+ Returns:
569
+ None
570
+
571
+ The function either displays the interactive panel layout or saves the visualization to a file based on
572
+ the provided filename. If the primary file object or feature data is missing, the function prints an
573
+ informative message and returns without plotting.
574
+ """
575
+
576
+ if self.file_obj is None:
577
+ print("Please load a file first.")
578
+ return
579
+
580
+ if cmap is None or cmap == "grey":
581
+ cmap = "Greys256"
582
+ elif cmap == "iridescent":
583
+ cmap = "iridescent_r"
584
+
585
+ # get columns rt, mz, inty from self.ms1_df, It's polars DataFrame
586
+ spectradf = self.ms1_df.to_pandas()
587
+
588
+ # remove any inty<1
589
+ spectradf = spectradf[spectradf["inty"] >= 1]
590
+ # keep only rt, mz, and inty
591
+ spectradf = spectradf[["rt", "mz", "inty"]]
592
+ if mz_range is not None:
593
+ spectradf = spectradf[(spectradf["mz"] >= mz_range[0]) & (spectradf["mz"] <= mz_range[1])]
594
+ if rt_range is not None:
595
+ spectradf = spectradf[(spectradf["rt"] >= rt_range[0]) & (spectradf["rt"] <= rt_range[1])]
596
+
597
+ maxrt = spectradf["rt"].max()
598
+ minrt = spectradf["rt"].min()
599
+ maxmz = spectradf["mz"].max()
600
+ minmz = spectradf["mz"].min()
601
+
602
+ def new_bounds_hook(plot, elem):
603
+ x_range = plot.state.x_range
604
+ y_range = plot.state.y_range
605
+ x_range.bounds = minrt, maxrt
606
+ y_range.bounds = minmz, maxmz
607
+
608
+ points = hv.Points(
609
+ spectradf,
610
+ kdims=["rt", "mz"],
611
+ vdims=["inty"],
612
+ label="MS1 survey scans",
613
+ ).opts(
614
+ fontsize={"title": 16, "labels": 14, "xticks": 6, "yticks": 12},
615
+ color=np.log(dim("inty")),
616
+ colorbar=True,
617
+ cmap="Magma",
618
+ tools=["hover"],
619
+ )
620
+
621
+ if filename is not None:
622
+ dyn = False
623
+ if not filename.endswith(".html"):
624
+ raster_dynamic = False
625
+
626
+ dyn = raster_dynamic
627
+ raster = hd.rasterize(
628
+ points,
629
+ aggregator=ds.max("inty"),
630
+ interpolation="bilinear",
631
+ dynamic=dyn, # alpha=10, min_alpha=0,
632
+ ).opts(
633
+ active_tools=["box_zoom"],
634
+ cmap=process_cmap(cmap, provider="bokeh"), # blues
635
+ tools=["hover"],
636
+ hooks=[new_bounds_hook],
637
+ width=1000,
638
+ height=1000,
639
+ cnorm="log",
640
+ xlabel="Retention time (s)",
641
+ ylabel="m/z",
642
+ colorbar=True,
643
+ colorbar_position="right",
644
+ axiswise=True,
645
+ )
646
+ raster = hd.dynspread(
647
+ raster,
648
+ threshold=raster_threshold,
649
+ how="add",
650
+ shape="square",
651
+ max_px=raster_max_px,
652
+ )
653
+
654
+ if self.features_df is None:
655
+ return
656
+ feats = self.features_df.clone()
657
+
658
+ # Convert to pandas for oracle operations that require pandas functionality
659
+ if hasattr(feats, "to_pandas"):
660
+ feats = feats.to_pandas()
661
+
662
+ # check if annotationfile is not None
663
+ if oracle_folder is None:
664
+ return
665
+ # try to read the annotationfile as a csv file and add it to feats
666
+ try:
667
+ oracle_data = pd.read_csv(
668
+ os.path.join(oracle_folder, "diag", "summary_by_feature.csv"),
669
+ )
670
+ except:
671
+ print(f"Could not read {oracle_folder}/diag/summary_by_feature.csv")
672
+ return
673
+
674
+ if link_by_feature_uid:
675
+ # scan_idx slaw_id slaw_ms2_id mz rt level formula ion species name rarity lib_id hg mod lib score score2 score_db score_db_data ms2_tic ms2_evidence ms2_matched_n ms2_missed_n ms2_matched ms2_missed ms2_top1
676
+ cols_to_keep = [
677
+ "title",
678
+ "scan_idx",
679
+ "mslevel",
680
+ "hits",
681
+ "id_level",
682
+ "id_label",
683
+ "id_ion",
684
+ "id_class",
685
+ "id_evidence",
686
+ "score",
687
+ "score2",
688
+ ]
689
+ oracle_data = oracle_data[cols_to_keep]
690
+ # extract feature_uid from title. It begins with "fid:XYZ;"
691
+ oracle_data["feature_uid"] = oracle_data["title"].str.extract(r"fid:(\d+)")
692
+ oracle_data["feature_uid"] = oracle_data["feature_uid"].astype(int)
693
+ # sort by id_level, remove duplicate feature_uid, keep the first one
694
+ oracle_data = oracle_data.sort_values(by=["id_level"], ascending=False)
695
+ oracle_data = oracle_data.drop_duplicates(subset=["feature_uid"], keep="first")
696
+ else:
697
+ cols_to_keep = [
698
+ "precursor",
699
+ "rt",
700
+ "title",
701
+ "scan_idx",
702
+ "mslevel",
703
+ "hits",
704
+ "id_level",
705
+ "id_label",
706
+ "id_ion",
707
+ "id_class",
708
+ "id_evidence",
709
+ "score",
710
+ "score2",
711
+ ]
712
+ # link
713
+ oracle_data = oracle_data[cols_to_keep]
714
+ oracle_data["feature_uid"] = None
715
+ # iterate over the rows and find the feature_uid in feats by looking at the closest rt and mz
716
+ for i, row in oracle_data.iterrows():
717
+ candidates = feats[
718
+ (abs(feats["rt"] - row["rt"]) < 1) & (abs(feats["mz"] - row["precursor"]) < 0.005)
719
+ ].copy()
720
+ if len(candidates) > 0:
721
+ # sort by delta rt
722
+ candidates["delta_rt"] = abs(candidates["rt"] - row["rt"])
723
+ candidates = candidates.sort_values(by=["delta_rt"])
724
+ oracle_data.at[i, "feature_uid"] = candidates["feature_uid"].values[0]
725
+ # remove precursor and rt columns
726
+ oracle_data = oracle_data.drop(columns=["precursor", "rt"])
727
+
728
+ feats = feats.merge(oracle_data, how="left", on="feature_uid")
729
+
730
+ # filter feats by id_level
731
+ if min_id_level is not None:
732
+ feats = feats[(feats["id_level"] >= min_id_level)]
733
+ if max_id_level is not None:
734
+ feats = feats[(feats["id_level"] <= max_id_level)]
735
+ if min_ms_level is not None:
736
+ feats = feats[(feats["mslevel"] >= min_ms_level)]
737
+
738
+ feats["color"] = "black"
739
+
740
+ cvalues = None
741
+ if colorby in ["class", "hg", "id_class", "id_hg"]:
742
+ # replace nans in feats['id_class'] with 'mix'
743
+ feats["id_class"] = feats["id_class"].fillna("mix")
744
+ cvalues = feats["id_class"].unique()
745
+ # sort alphabetically
746
+ cvalues = sorted(cvalues)
747
+ # flip the strings left to right
748
+ fcvalues = [cvalues[i][::-1] for i in range(len(cvalues))]
749
+ # sort in alphabetical order the flipped strings and return the index
750
+ idx = np.argsort(fcvalues)
751
+ # apply to cvalues
752
+ cvalues = [cvalues[i] for i in idx]
753
+ elif colorby in ["ion", "id_ion"]:
754
+ cvalues = feats["id_ion"].unique()
755
+ elif colorby in ["id_evidence", "ms2_evidence"]:
756
+ cvalues = feats["id_evidence"].unique()
757
+
758
+ if cvalues is not None:
759
+ num_colors = len(cvalues)
760
+ cmap = "rainbow"
761
+ cmap_provider = "colorcet"
762
+ cm = process_cmap(cmap, ncolors=num_colors, provider=cmap_provider)
763
+ colors = [
764
+ rgb2hex(cm[int(i * (len(cm) - 1) / (num_colors - 1))]) if num_colors > 1 else rgb2hex(cm[0])
765
+ for i in range(num_colors)
766
+ ]
767
+ # assign color to each row based on id_class. If id_class is null, assign 'black'
768
+ feats["color"] = "black"
769
+
770
+ for i, c in enumerate(cvalues):
771
+ if colorby in ["class", "hg", "id_class", "id_hg"]:
772
+ feats.loc[feats["id_class"] == c, "color"] = colors[i]
773
+ elif colorby in ["ion", "id_ion"]:
774
+ feats.loc[feats["id_ion"] == c, "color"] = colors[i]
775
+ elif colorby in ["id_evidence", "ms2_evidence"]:
776
+ feats.loc[feats["id_evidence"] == c, "color"] = colors[i]
777
+
778
+ # replace NaN with 0 in id_level
779
+ feats["id_level"] = feats["id_level"].fillna(0)
780
+ # feature_points_1 are all features with column ms2_scans not null
781
+ feature_points_1 = None
782
+ feat_df = feats.copy()
783
+ feat_df = feat_df[feat_df["id_level"] == 2]
784
+
785
+ feature_points_1 = hv.Points(
786
+ feat_df,
787
+ kdims=["rt", "mz"],
788
+ vdims=[
789
+ "inty",
790
+ "feature_uid",
791
+ "id_level",
792
+ "id_class",
793
+ "id_label",
794
+ "id_ion",
795
+ "id_evidence",
796
+ "score",
797
+ "score2",
798
+ "color",
799
+ ],
800
+ label="ID by MS2",
801
+ ).options(
802
+ color="color",
803
+ marker="circle",
804
+ size=markersize,
805
+ fill_alpha=1.0,
806
+ tools=["hover"],
807
+ )
808
+
809
+ # feature_points_2 are all features that have ms2_scans not null and id_level ==1
810
+ feature_points_2 = None
811
+ feat_df = feats.copy()
812
+ feat_df = feat_df[(feat_df["ms2_scans"].notnull()) & (feat_df["id_level"] == 1)]
813
+ if len(feat_df) > 0:
814
+ feature_points_2 = hv.Points(
815
+ feat_df,
816
+ kdims=["rt", "mz"],
817
+ vdims=[
818
+ "inty",
819
+ "feature_uid",
820
+ "id_level",
821
+ "id_label",
822
+ "id_ion",
823
+ "id_class",
824
+ "color",
825
+ ],
826
+ label="ID by MS1, with MS2",
827
+ ).options(
828
+ color="color",
829
+ marker="circle",
830
+ size=markersize,
831
+ fill_alpha=0.0,
832
+ tools=["hover"],
833
+ )
834
+
835
+ # feature_points_3 are all features that have ms2_scans null and id_level ==1
836
+ feature_points_3 = None
837
+ feat_df = feats.copy()
838
+ feat_df = feat_df[(feat_df["ms2_scans"].isnull()) & (feat_df["id_level"] == 1)]
839
+ if len(feat_df) > 0:
840
+ feature_points_3 = hv.Points(
841
+ feat_df,
842
+ kdims=["rt", "mz"],
843
+ vdims=[
844
+ "inty",
845
+ "feature_uid",
846
+ "id_level",
847
+ "id_label",
848
+ "id_ion",
849
+ "id_class",
850
+ "color",
851
+ ],
852
+ label="ID by MS1, no MS2",
853
+ ).options(
854
+ color="color",
855
+ marker="diamond",
856
+ size=markersize,
857
+ fill_alpha=0.0,
858
+ tools=["hover"],
859
+ )
860
+
861
+ # feature_points_4 are all features that have ms2_scans null and id_level ==0
862
+ feature_points_4 = None
863
+ feat_df = feats.copy()
864
+ feat_df = feat_df[(feat_df["ms2_scans"].notnull()) & (feat_df["id_level"] < 1)]
865
+ if len(feat_df) > 0:
866
+ feature_points_4 = hv.Points(
867
+ feat_df,
868
+ kdims=["rt", "mz"],
869
+ vdims=["inty", "feature_uid"],
870
+ label="No ID, with MS2",
871
+ ).options(
872
+ color="gray",
873
+ marker="circle",
874
+ size=markersize,
875
+ fill_alpha=0.0,
876
+ tools=["hover"],
877
+ )
878
+
879
+ # feature_points_4 are all features that have ms2_scans null and id_level ==0
880
+ feature_points_5 = None
881
+ feat_df = feats.copy()
882
+ feat_df = feat_df[(feat_df["ms2_scans"].isnull()) & (feat_df["id_level"] < 1)]
883
+ if len(feat_df) > 0:
884
+ feature_points_5 = hv.Points(
885
+ feat_df,
886
+ kdims=["rt", "mz"],
887
+ vdims=["inty", "feature_uid"],
888
+ label="No ID, no MS2",
889
+ ).options(
890
+ color="gray",
891
+ marker="diamond",
892
+ fill_alpha=0.0,
893
+ size=markersize,
894
+ tools=["hover"],
895
+ )
896
+
897
+ overlay = raster
898
+
899
+ if feature_points_1 is not None:
900
+ overlay = overlay * feature_points_1
901
+ if feature_points_2 is not None:
902
+ overlay = overlay * feature_points_2
903
+ if feature_points_3 is not None:
904
+ overlay = overlay * feature_points_3
905
+ if feature_points_4 is not None:
906
+ overlay = overlay * feature_points_4
907
+ # if not show_only_features_with_ms2:
908
+ if feature_points_5 is not None:
909
+ overlay = overlay * feature_points_5
910
+
911
+ if title is not None:
912
+ overlay = overlay.opts(title=title)
913
+
914
+ # Create a panel layout
915
+ layout = panel.Column(overlay)
916
+
917
+ if filename is not None:
918
+ # if filename includes .html, save the panel layout to an HTML file
919
+ if filename.endswith(".html"):
920
+ layout.save(filename, embed=True)
921
+ else:
922
+ # save the panel layout as a png
923
+ hv.save(overlay, filename, fmt="png")
924
+ else:
925
+ # Display the panel layout
926
+ layout.show()
927
+
928
+
929
+ def plot_ms2_eic(
930
+ self,
931
+ feature_uid=None,
932
+ rt_tol=5,
933
+ mz_tol=0.05,
934
+ link_x=True,
935
+ n=20,
936
+ deisotope=True,
937
+ centroid=True,
938
+ filename=None,
939
+ ):
940
+ """
941
+ Plots the Extracted Ion Chromatograms (EIC) for the precursor and top n MS2 fragment ions of a given feature.
942
+ Parameters:
943
+ feature_uid: The feature unique identifier. Must be present in the features dataframe; if None, a message is printed.
944
+ rt_tol (float, optional): The retention time tolerance (in seconds) to extend the feature's rt start and end values. Default is 5.
945
+ mz_tol (float, optional): The m/z tolerance used when filtering the precursor and fragment ion intensities. Default is 0.05.
946
+ link_x (bool, optional): If True, the x-axis (retention time) of all subplots is linked. Default is True.
947
+ n (int, optional): The number of top MS2 fragment m/z values to consider for plotting. Default is 20.
948
+ deisotope (bool, optional): Flag that determines whether deisotoping should be applied to the MS2 fragments. Default is True.
949
+ centroid (bool, optional): Flag that controls whether centroiding is applied to the MS2 data. Default is True.
950
+ filename (str, optional): If provided, the function saves the plot to the specified file. Supports .html for interactive plots or other formats (e.g., png).
951
+ If None, the plot is displayed instead of being saved.
952
+ Returns:
953
+ None
954
+ Notes:
955
+ - The function first verifies the existence of the provided feature id and its associated MS2 spectrum.
956
+ - It retrieves the top n fragments by intensity from the MS2 spectrum and computes the EIC for both the precursor ion and the fragments.
957
+ - A helper method (_spec_to_mat) is used to convert spectral data into intensity matrices.
958
+ - The resulting plots include hover tools to display the retention time and scan identifier.
959
+ - The layout is arranged in a grid (4 columns by default) and may have linked x-axes based on the link_x parameter.
960
+ """
961
+ # plots the EIC for a given feature id inlcusind the EIC of the top n MS2 fragments
962
+
963
+ if feature_uid is None:
964
+ print("Please provide a feature id.")
965
+ return
966
+ # check if feature_uid is in features_df
967
+ if feature_uid not in self.features_df["feature_uid"].values:
968
+ print("Feature id not found in features_df.")
969
+
970
+ feature = self.features_df[self.features_df["feature_uid"] == feature_uid]
971
+ # get top n fragments
972
+ ms2_specs = feature["ms2_specs"].values[0]
973
+ if ms2_specs is None:
974
+ print("No MS2 data found for this feature.")
975
+ return
976
+
977
+ if len(ms2_specs) == 0:
978
+ print("No MS2 data found for this feature.")
979
+ return
980
+ # get the MS2 spectrum
981
+ # get the mz of the top n fragments
982
+ ms2_specs_df = ms2_specs[0].pandalize()
983
+ ms2_specs_df = ms2_specs_df.sort_values(by="inty", ascending=False)
984
+ ms2_specs_df = ms2_specs_df.head(n)
985
+ top_mzs = ms2_specs_df["mz"].values.tolist()
986
+
987
+ # find rt_start and rt_end of the feature_uid
988
+ rt_start = feature["rt_start"].values[0] - rt_tol
989
+ rt_end = feature["rt_end"].values[0] + rt_tol
990
+ # get the cycle at rt_start and the cycle at rt_end from the closest scan with ms_level == 1
991
+ scans = self.scans_df.filter(pl.col("ms_level") == 1)
992
+ scans = scans.filter(pl.col("rt") > rt_start)
993
+ scans = scans.filter(pl.col("rt") < rt_end)
994
+ rts = scans["rt"].to_list()
995
+ if len(scans) == 0:
996
+ print(f"No scans found between {rt_start} and {rt_end}.")
997
+ return
998
+ scan_uids = scans["scan_uid"].to_list()
999
+ eic_prec = self._spec_to_mat(
1000
+ scan_uids,
1001
+ mz_ref=feature["mz"].values.tolist(),
1002
+ mz_tol=mz_tol,
1003
+ deisotope=False,
1004
+ centroid=True,
1005
+ )
1006
+ # convert eic_prec from matrix to list
1007
+ eic_prec = eic_prec[0].tolist()
1008
+
1009
+ # get all unique cycles from scans
1010
+ cycles = scans["cycle"].unique()
1011
+ scan_uids = []
1012
+ # iterate over all cycles and get the scan_uid of scan with ms_level == 2 and closest precursor_mz to spec.precursor_mz
1013
+ for cycle in cycles:
1014
+ scans = self.scans_df.filter(pl.col("cycle") == cycle)
1015
+ scans = scans.filter(pl.col("ms_level") == 2)
1016
+ scans = scans.filter(pl.col("prec_mz") > feature["mz"] - 5)
1017
+ scans = scans.filter(pl.col("prec_mz") < feature["mz"] + 5)
1018
+ if len(scans) == 0:
1019
+ print(
1020
+ f"No scans found for cycle {cycle} and mz {feature['mz']}. Increase mz_tol tolerance.",
1021
+ )
1022
+ return
1023
+ # get the scan with the closest precursor_mz to feature['mz']
1024
+ scan = scans[(scans["prec_mz"] - feature["mz"]).abs().arg_sort()[:1]]
1025
+ scan_uids.append(scan["scan_uid"][0])
1026
+ eic_prod = self._spec_to_mat(
1027
+ scan_uids,
1028
+ mz_ref=top_mzs,
1029
+ mz_tol=mz_tol,
1030
+ deisotope=deisotope,
1031
+ centroid=centroid,
1032
+ )
1033
+
1034
+ prec_name = f"prec {feature['mz'].values[0]:.3f}"
1035
+ eic_df = pd.DataFrame({"rt": rts, prec_name: eic_prec})
1036
+ # add scan_uid to eic_df for the tooltips
1037
+ eic_df["scan_uid"] = scan_uids
1038
+
1039
+ frag_names = [prec_name]
1040
+ for i, mz in enumerate(top_mzs):
1041
+ # add column to eic_df
1042
+ name = f"frag {mz:.3f}"
1043
+ frag_names.append(name)
1044
+ eic_df[name] = eic_prod[i]
1045
+
1046
+ # create a plot for all columns in eic_df
1047
+ eic_plots: list[hv.Curve] = []
1048
+ for name in frag_names:
1049
+ eic = hv.Curve(eic_df, kdims=["rt"], vdims=[name, "scan_uid"]).opts(
1050
+ title=name,
1051
+ xlabel="RT (s)",
1052
+ ylabel=f"Inty_f{len(eic_plots)}",
1053
+ width=250,
1054
+ height=200,
1055
+ axiswise=True,
1056
+ color="black",
1057
+ tools=[HoverTool(tooltips=[("rt", "@rt"), ("scan_uid", "@scan_uid")])],
1058
+ )
1059
+ eic_plots.append(eic)
1060
+
1061
+ # add as
1062
+
1063
+ layout = hv.Layout(eic_plots).cols(4)
1064
+ if link_x:
1065
+ layout = layout.opts(shared_axes=True)
1066
+
1067
+ if filename is not None:
1068
+ if filename.endswith(".html"):
1069
+ panel.panel(layout).save(filename, embed=True) # type: ignore[attr-defined]
1070
+ else:
1071
+ hv.save(layout, filename, fmt="png")
1072
+ else:
1073
+ panel.panel(layout).show()
1074
+
1075
+
1076
+ def plot_ms2_cycle(
1077
+ self,
1078
+ cycle=None,
1079
+ filename=None,
1080
+ title=None,
1081
+ cmap=None,
1082
+ raster_dynamic=True,
1083
+ raster_max_px=8,
1084
+ raster_threshold=0.8,
1085
+ centroid=True,
1086
+ deisotope=True,
1087
+ ):
1088
+ if self.file_obj is None:
1089
+ print("Please load a mzML file first.")
1090
+ return
1091
+
1092
+ if cycle is None:
1093
+ print("Please provide a cycle number.")
1094
+ return
1095
+
1096
+ if cycle not in self.scans_df["cycle"].unique():
1097
+ print("Cycle number not found in scans_df.")
1098
+ return
1099
+
1100
+ if cmap is None:
1101
+ cmap = "iridescent_r"
1102
+ elif cmap == "grey":
1103
+ cmap = "Greys256"
1104
+
1105
+ # find all scans in cycle
1106
+ scans = self.scans_df.filter(pl.col("cycle") == cycle)
1107
+ scans = scans.filter(pl.col("ms_level") == 2)
1108
+
1109
+ ms2data = []
1110
+ # iterate through all rows
1111
+ for scan in scans.iter_rows(named=True):
1112
+ scan_uid = scan["scan_uid"]
1113
+ # get spectrum
1114
+ spec = self.get_spectrum(
1115
+ scan_uid,
1116
+ precursor_trim=None,
1117
+ centroid=centroid,
1118
+ deisotope=deisotope,
1119
+ )
1120
+ if spec.mz.size == 0:
1121
+ continue
1122
+ d = {
1123
+ "prec_mz": [scan["prec_mz"]] * spec.mz.size,
1124
+ "mz": spec.mz,
1125
+ "inty": spec.inty,
1126
+ }
1127
+ ms2data.append(d)
1128
+
1129
+ # convert to pandas DataFrame
1130
+ spectradf = pd.DataFrame(ms2data)
1131
+
1132
+ # remove any inty<1
1133
+ spectradf = spectradf[spectradf["inty"] >= 1]
1134
+ # keep only rt, mz, and inty
1135
+ spectradf = spectradf[["prec_mz", "mz", "inty"]]
1136
+ maxrt = spectradf["prec_mz"].max()
1137
+ minrt = spectradf["prec_mz"].min()
1138
+ maxmz = spectradf["mz"].max()
1139
+ minmz = spectradf["mz"].min()
1140
+
1141
+ # TODO elem not used
1142
+ def new_bounds_hook(plot, elem):
1143
+ x_range = plot.state.x_range
1144
+ y_range = plot.state.y_range
1145
+ x_range.bounds = minrt, maxrt
1146
+ y_range.bounds = minmz, maxmz
1147
+
1148
+ points = hv.Points(
1149
+ spectradf,
1150
+ kdims=["prec_mz", "mz"],
1151
+ vdims=["inty"],
1152
+ label="MS1 survey scans",
1153
+ ).opts(
1154
+ fontsize={"title": 16, "labels": 14, "xticks": 6, "yticks": 12},
1155
+ color=np.log(dim("inty")),
1156
+ colorbar=True,
1157
+ cmap="Magma",
1158
+ tools=["hover"],
1159
+ )
1160
+
1161
+ raster = hd.rasterize(
1162
+ points,
1163
+ aggregator=ds.max("inty"),
1164
+ interpolation="bilinear",
1165
+ dynamic=raster_dynamic, # alpha=10, min_alpha=0,
1166
+ ).opts(
1167
+ active_tools=["box_zoom"],
1168
+ cmap=process_cmap(cmap, provider="bokeh"), # blues
1169
+ tools=["hover"],
1170
+ hooks=[new_bounds_hook],
1171
+ width=1000,
1172
+ height=1000,
1173
+ cnorm="log",
1174
+ xlabel="Q1 m/z",
1175
+ ylabel="m/z",
1176
+ colorbar=True,
1177
+ colorbar_position="right",
1178
+ axiswise=True,
1179
+ )
1180
+
1181
+ overlay = hd.dynspread(
1182
+ raster,
1183
+ threshold=raster_threshold,
1184
+ how="add",
1185
+ shape="square",
1186
+ max_px=raster_max_px,
1187
+ )
1188
+
1189
+ """
1190
+ feature_points_1 = None
1191
+ feature_points_2 = None
1192
+ feature_points_3 = None
1193
+ feature_points_4 = None
1194
+ feature_points_iso = None
1195
+ # Plot features as red dots if features is True
1196
+ if self.features_df is not None and show_features:
1197
+ feats = self.features_df.clone()
1198
+ # Convert to pandas for operations that require pandas functionality
1199
+ if hasattr(feats, 'to_pandas'):
1200
+ feats = feats.to_pandas()
1201
+ # if ms2_scans is not null, keep only the first element of the list
1202
+ feats['ms2_scans'] = feats['ms2_scans'].apply(lambda x: x[0] if type(x) == list else x)
1203
+ # keep only iso==0, i.e. the main
1204
+ feats = feats[feats['iso']==0]
1205
+ # find features with ms2_scans not None and iso==0
1206
+ features_df = feats[feats['ms2_scans'].notnull()]
1207
+ feature_points_1 = hv.Points(
1208
+ features_df, kdims=["rt", "mz"], vdims=["feature_uid", "inty", "quality", "rt_delta", "ms2_scans"], label="Features with MS2 data"
1209
+ ).options(
1210
+ color=color_1,
1211
+ marker=marker,
1212
+ size=size_1,
1213
+ tools=["hover"],
1214
+ )
1215
+ # find features without MS2 data
1216
+ features_df = feats[feats['ms2_scans'].isnull()]
1217
+ feature_points_2 = hv.Points(
1218
+ features_df, kdims=["rt", "mz"], vdims=["feature_uid", "inty", "quality", "rt_delta"], label="Features without MS2 data"
1219
+ ).options(
1220
+ color='red',
1221
+ size=size_2,
1222
+ marker=marker,
1223
+ tools=["hover"],
1224
+ )
1225
+
1226
+ if show_isotopes:
1227
+ feats = self.features_df
1228
+ features_df = feats[feats['iso']>0]
1229
+ feature_points_iso = hv.Points(
1230
+ features_df, kdims=["rt", "mz"], vdims=["feature_uid", "inty", "quality", "rt_delta", "iso", "iso_of"], label="Isotopes"
1231
+ ).options(
1232
+ color='violet',
1233
+ marker=marker,
1234
+ size=size_1,
1235
+ tools=["hover"],
1236
+ )
1237
+ if show_ms2:
1238
+ # find all self.scans_df with mslevel 2 that are not linked to a feature
1239
+ ms2_orphan = self.scans_df.filter(pl.col('ms_level')==2).filter(pl.col("feature_uid")<0)
1240
+
1241
+ if len(ms2_orphan) > 0:
1242
+ # pandalize
1243
+ ms2 = ms2_orphan.to_pandas()
1244
+ feature_points_3 = hv.Points(
1245
+ ms2, kdims=["rt", "prec_mz"], vdims=["index", "inty_tot", "bl"], label="Orphan MS2 scans"
1246
+ ).options(
1247
+ color=color_2,
1248
+ marker='x',
1249
+ size=size_2,
1250
+ tools=["hover"],
1251
+ )
1252
+
1253
+ ms2_linked = self.scans_df.filter(pl.col('ms_level')==2).filter(pl.col("feature_uid")>=0)
1254
+ if len(ms2_linked) > 0:
1255
+ # pandalize
1256
+ ms2 = ms2_linked.to_pandas()
1257
+ feature_points_4 = hv.Points(
1258
+ ms2, kdims=["rt", "prec_mz"], vdims=["index", "inty_tot", "bl"], label="Linked MS2 scans"
1259
+ ).options(
1260
+ color=color_1,
1261
+ marker='x',
1262
+ size=size_2,
1263
+ tools=["hover"],
1264
+ )
1265
+
1266
+
1267
+ if feature_points_4 is not None:
1268
+ overlay = overlay * feature_points_4
1269
+ if feature_points_3 is not None:
1270
+ overlay = overlay * feature_points_3
1271
+ if feature_points_1 is not None:
1272
+ overlay = overlay * feature_points_1
1273
+ if not show_only_features_with_ms2:
1274
+ if feature_points_2 is not None:
1275
+ overlay = overlay * feature_points_2
1276
+ if feature_points_iso is not None:
1277
+ overlay = overlay * feature_points_iso
1278
+ """
1279
+ if title is not None:
1280
+ overlay = overlay.opts(title=title)
1281
+
1282
+ # Create a panel layout
1283
+ layout = panel.Column(overlay)
1284
+
1285
+ if filename is not None:
1286
+ # if filename includes .html, save the panel layout to an HTML file
1287
+ if filename.endswith(".html"):
1288
+ layout.save(filename, embed=True)
1289
+ else:
1290
+ # save the panel layout as a png
1291
+ hv.save(overlay, filename, fmt="png")
1292
+ else:
1293
+ # Display the panel layout
1294
+ layout.show()
1295
+
1296
+
1297
+ def plot_ms2_q1(
1298
+ self,
1299
+ feature_uid=None,
1300
+ q1_width=10.0,
1301
+ mz_tol=0.01,
1302
+ link_x=True,
1303
+ n=20,
1304
+ deisotope=True,
1305
+ centroid=True,
1306
+ filename=None,
1307
+ ):
1308
+ # plots the EIC for a given feature id including the EIC of the top n MS2 fragments
1309
+
1310
+ if feature_uid is None:
1311
+ print("Please provide a feature id.")
1312
+ return
1313
+ # check if feature_uid is in features_df
1314
+ if feature_uid not in self.features_df["feature_uid"].values:
1315
+ print("Feature id not found in features_df.")
1316
+
1317
+ feature = self.features_df[self.features_df["feature_uid"] == feature_uid]
1318
+ # get top n fragments
1319
+ ms2_specs = feature["ms2_specs"].values[0]
1320
+ if ms2_specs is None:
1321
+ print("No MS2 data found for this feature.")
1322
+ return
1323
+
1324
+ if len(ms2_specs) == 0:
1325
+ print("No MS2 data found for this feature.")
1326
+ return
1327
+ # get the MS2 spectrum
1328
+ # get the mz of the top n fragments
1329
+ ms2_specs_df = ms2_specs[0].pandalize()
1330
+ ms2_specs_df = ms2_specs_df.sort_values(by="inty", ascending=False)
1331
+ ms2_specs_df = ms2_specs_df.head(n)
1332
+ top_mzs = ms2_specs_df["mz"].values.tolist()
1333
+
1334
+ # cycles is the cycle of the feature plus/minus q1_width
1335
+ feature_scan = self.select_closest_scan(feature["rt"].values[0])
1336
+ cycle = feature_scan["cycle"][0]
1337
+ scans = self.scans_df.filter(pl.col("cycle") == cycle)
1338
+ scans = scans.filter(pl.col("ms_level") == 2)
1339
+ # find the scan in cycle whose 'prec_mz' is the closest to the feature['mz']
1340
+ scan_uid = scans[(scans["prec_mz"] - feature["mz"]).abs().arg_sort()[:1]]["scan_uid"][0]
1341
+ # get q1_width scans before and after the scan_uid
1342
+ scans = self.scans_df.filter(pl.col("scan_uid") >= scan_uid - q1_width)
1343
+ scans = scans.filter(pl.col("scan_uid") <= scan_uid + q1_width)
1344
+ scan_uids = scans["scan_uid"].to_list()
1345
+ q1s = scans["prec_mz"].to_list()
1346
+
1347
+ q1_prod = self._spec_to_mat(
1348
+ scan_uids,
1349
+ mz_ref=top_mzs,
1350
+ mz_tol=mz_tol,
1351
+ deisotope=deisotope,
1352
+ centroid=centroid,
1353
+ )
1354
+ q1_df = pd.DataFrame({"q1": q1s})
1355
+
1356
+ frag_names = []
1357
+ for i, mz in enumerate(top_mzs):
1358
+ # add column to q1_df
1359
+ name = f"frag {mz:.3f}"
1360
+ # if q1_ratio exists, add it to the name
1361
+ if "q1_ratio" in ms2_specs_df.columns:
1362
+ q1_ratio = ms2_specs_df["q1_ratio"].values[i]
1363
+ name += f" q1r: {q1_ratio:.2f}"
1364
+ frag_names.append(name)
1365
+ q1_df[name] = q1_prod[i]
1366
+ # add scan_uid to q1_df for the tooltips
1367
+ q1_df["scan_uid"] = scan_uids
1368
+
1369
+ # create a plot for all columns in eic_df
1370
+ eic_plots: list[hv.Curve] = []
1371
+ for name in frag_names:
1372
+ eic = hv.Curve(q1_df, kdims=["q1"], vdims=[name, "scan_uid"]).opts(
1373
+ title=name,
1374
+ xlabel="Q1 (m/z)",
1375
+ ylabel=f"Inty_f{len(eic_plots)}",
1376
+ width=250,
1377
+ height=200,
1378
+ axiswise=True,
1379
+ color="black",
1380
+ tools=[HoverTool(tooltips=[("Q1", "@q1"), ("scan_uid", "@scan_uid")])],
1381
+ )
1382
+ eic_plots.append(eic)
1383
+
1384
+ # add as
1385
+
1386
+ layout = hv.Layout(eic_plots).cols(4)
1387
+ if link_x:
1388
+ layout = layout.opts(shared_axes=True)
1389
+
1390
+ if filename is not None:
1391
+ if filename.endswith(".html"):
1392
+ panel.panel(layout).save(filename, embed=True) # type: ignore[attr-defined]
1393
+ else:
1394
+ hv.save(layout, filename, fmt="png")
1395
+ else:
1396
+ panel.panel(layout).show()
1397
+
1398
+
1399
+ def plot_dda_stats(
1400
+ self,
1401
+ filename=None,
1402
+ ):
1403
+ """
1404
+ Generates scatter plots for DDA statistics.
1405
+ This method retrieves statistical data using the `get_dda_stats` method, filters relevant
1406
+ columns, and preprocesses the data by replacing any values below 0 with None. It then creates
1407
+ a scatter plot for each metric specified in the `cols_to_plot` list. Each scatter plot uses "cycle"
1408
+ as the x-axis, and the corresponding metric as the y-axis. In addition, common hover tooltips are
1409
+ configured to display auxiliary data including "index", "cycle", "rt", and all other metric values.
1410
+ If the `filename` parameter is provided:
1411
+ - If it ends with ".html", the layout is saved as an interactive HTML file using Panel.
1412
+ - Otherwise, the layout is saved as a PNG image using HoloViews.
1413
+ If no filename is provided, the interactive panel is displayed.
1414
+ Parameters:
1415
+ filename (str, optional): The path and filename where the plot should be saved. If the filename
1416
+ ends with ".html", the plot is saved as an HTML file; otherwise, it is saved as a PNG image.
1417
+ If not provided, the plot is displayed interactively.
1418
+ Notes:
1419
+ - The method requires the holoviews, panel, and bokeh libraries for visualization.
1420
+ - The data is expected to include the columns 'index', 'cycle', 'rt', and the metrics listed in
1421
+ `cols_to_plot`.
1422
+ """
1423
+ stats = self.get_dda_stats()
1424
+ cols_to_plot = [
1425
+ "inty_tot",
1426
+ "bl",
1427
+ "ms2_n",
1428
+ "time_cycle",
1429
+ "time_ms1_to_ms1",
1430
+ "time_ms1_to_ms2",
1431
+ "time_ms2_to_ms2",
1432
+ "time_ms2_to_ms1",
1433
+ ]
1434
+ # Ensure that 'index' and 'rt' are kept for hover along with the columns to plot
1435
+ stats = stats[["scan_uid", "cycle", "rt", *cols_to_plot]]
1436
+ # set any value < 0 to None
1437
+ stats[stats < 0] = None
1438
+
1439
+ # Create a Scatter for each column in cols_to_plot stacked vertically, with hover enabled
1440
+ scatter_plots = []
1441
+ # Define common hover tooltips for all plots including all cols_to_plot
1442
+ common_tooltips = [
1443
+ ("scan_uid", "@scan_uid"),
1444
+ ("cycle", "@cycle"),
1445
+ ("rt", "@rt"),
1446
+ ] + [(c, f"@{c}") for c in cols_to_plot]
1447
+ for col in cols_to_plot:
1448
+ hover = HoverTool(tooltips=common_tooltips)
1449
+ scatter = hv.Scatter(
1450
+ stats,
1451
+ kdims="cycle",
1452
+ vdims=[col, "scan_uid", "rt"] + [c for c in cols_to_plot if c != col],
1453
+ ).opts(
1454
+ title=col,
1455
+ xlabel="Cycle",
1456
+ ylabel=col,
1457
+ height=250,
1458
+ width=800,
1459
+ tools=[hover],
1460
+ size=3,
1461
+ )
1462
+ scatter_plots.append(scatter)
1463
+
1464
+ layout = hv.Layout(scatter_plots).cols(1)
1465
+ if filename is not None:
1466
+ if filename.endswith(".html"):
1467
+ panel.panel(layout).save(filename, embed=True) # type: ignore[attr-defined]
1468
+ else:
1469
+ hv.save(layout, filename, fmt="png")
1470
+ else:
1471
+ panel.panel(layout).show()
1472
+
1473
+
1474
+ def plot_feature_stats(
1475
+ self,
1476
+ filename=None,
1477
+ ):
1478
+ """
1479
+ Generates overlaid distribution plots for selected feature metrics.
1480
+ The distributions are created separately for features with and without MS2 data.
1481
+ Metrics include intensity, quality, retention time, m/z (and m/z delta), number of MS2 peaks,
1482
+ summed MS2 intensities, and the MS2-to-MS1 ratio. The plots help to visualize the distribution
1483
+ differences between features that are linked to MS2 spectra and those that are not.
1484
+
1485
+ Parameters:
1486
+ filename (str, optional): The output filename. If the filename ends with ".html",
1487
+ the plot is saved as an interactive HTML file; otherwise,
1488
+ if provided, the plot is saved as a PNG image. If not provided,
1489
+ the interactive plot is displayed.
1490
+
1491
+ Returns:
1492
+ None
1493
+ """
1494
+ # Work on a copy of features_df
1495
+ feats = self.features_df.clone()
1496
+ # Convert to pandas for operations that require pandas functionality
1497
+ if hasattr(feats, "to_pandas"):
1498
+ feats = feats.to_pandas()
1499
+ # Compute m/z delta for each feature
1500
+ feats["mz_delta"] = feats["mz_end"] - feats["mz_start"]
1501
+ # Add a column with the number of peaks in the MS2 spectrum
1502
+ feats["MS2peaks"] = feats["ms2_specs"].apply(
1503
+ lambda x: len(x[0]) if x is not None else 0,
1504
+ )
1505
+ # Add a column with the sum of intensities in the MS2 spectrum
1506
+ feats["MS2int"] = feats["ms2_specs"].apply(
1507
+ lambda x: sum(x[0].inty) if x is not None else 0,
1508
+ )
1509
+
1510
+ # Calculate the ratio of MS2 to MS1 intensities
1511
+ feats["MS2toMS1"] = feats["MS2int"] / feats["inty"]
1512
+ # Apply log10 transformation to intensity, quality, and MS2int columns (handling non-positive values)
1513
+ feats["inty"] = np.where(feats["inty"] <= 0, np.nan, np.log10(feats["inty"]))
1514
+ # COMMENT: AR was bugging
1515
+ # feats["chrom_heights"] = np.where(
1516
+ # feats["chrom_heights"] <= 0, np.nan, np.log10(feats["chrom_heights"])
1517
+ # )
1518
+
1519
+ feats["quality"] = np.where(
1520
+ feats["quality"] <= 0,
1521
+ np.nan,
1522
+ np.log10(feats["quality"]),
1523
+ )
1524
+ feats["MS2int"] = np.where(feats["MS2int"] <= 0, np.nan, np.log10(feats["MS2int"]))
1525
+
1526
+ # Separate features based on presence of MS2 data
1527
+ feats_with_MS2 = feats[feats["ms2_scans"].notnull()]
1528
+ feats_without_MS2 = feats[feats["ms2_scans"].isnull()]
1529
+
1530
+ # Define the metrics to plot
1531
+ cols_to_plot = [
1532
+ "mz",
1533
+ "mz_delta",
1534
+ "inty",
1535
+ "quality",
1536
+ "rt",
1537
+ "rt_delta",
1538
+ "chrom_coherence",
1539
+ "chrom_prominence",
1540
+ "chrom_prominence_scaled",
1541
+ # COMMENT: AR was bugging
1542
+ # "chrom_heights",
1543
+ # "chrom_heights_scaled",
1544
+ "MS2peaks",
1545
+ "MS2int",
1546
+ "MS2toMS1",
1547
+ ]
1548
+
1549
+ # Ensure an index column is available for plotting
1550
+ feats["index"] = feats.index
1551
+
1552
+ density_plots = []
1553
+ # Create overlaid distribution plots for each metric
1554
+ for col in cols_to_plot:
1555
+ # Extract non-null values from both groups
1556
+ data_with = feats_with_MS2[col].dropna().values
1557
+ data_without = feats_without_MS2[col].dropna().values
1558
+
1559
+ # Create distribution elements for features with and without MS2
1560
+ dist_with = hv.Distribution(data_with, label="With MS2").opts(
1561
+ color="red",
1562
+ alpha=0.6,
1563
+ )
1564
+ dist_without = hv.Distribution(data_without, label="Without MS2").opts(
1565
+ color="blue",
1566
+ alpha=0.6,
1567
+ )
1568
+
1569
+ # Overlay the distributions with a legend and hover tool enabled
1570
+ overlay = (dist_with * dist_without).opts(
1571
+ title=col,
1572
+ show_legend=True,
1573
+ tools=["hover"],
1574
+ )
1575
+ density_plots.append(overlay)
1576
+
1577
+ # Arrange the plots in a layout with three columns
1578
+ layout = hv.Layout(density_plots).cols(3).opts(shared_axes=False)
1579
+
1580
+ # Save or display the layout based on the filename parameter
1581
+ if filename is not None:
1582
+ if filename.endswith(".html"):
1583
+ panel.panel(layout).save(filename, embed=True) # type: ignore[attr-defined]
1584
+ else:
1585
+ hv.save(layout, filename, fmt="png")
1586
+ else:
1587
+ panel.panel(layout).show()
1588
+
1589
+
1590
+ def plot_tic(
1591
+ self,
1592
+ title=None,
1593
+ filename=None,
1594
+ ):
1595
+ # get all ms_level ==1 scans from sefl.scans_df
1596
+ scans = self.scans_df.filter(pl.col("ms_level") == 1)
1597
+ # select rt, scan_uid and inty_tot, convert to pandas
1598
+ data = scans[["rt", "scan_uid", "inty_tot"]].to_pandas()
1599
+ # sort by rt
1600
+ data = data.sort_values("rt")
1601
+
1602
+ # plot using hv.Curve
1603
+ tic = hv.Curve(data, kdims=["rt"], vdims=["inty_tot"])
1604
+ tic.opts(
1605
+ title=title,
1606
+ xlabel="Retention Time (min)",
1607
+ ylabel="TIC",
1608
+ height=250,
1609
+ width=100,
1610
+ )