masster 0.2.5__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- masster/__init__.py +27 -27
- masster/_version.py +17 -17
- masster/chromatogram.py +497 -503
- masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.featureXML +199787 -0
- masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.sample5 +0 -0
- masster/logger.py +318 -244
- masster/sample/__init__.py +9 -9
- masster/sample/defaults/__init__.py +15 -15
- masster/sample/defaults/find_adducts_def.py +325 -325
- masster/sample/defaults/find_features_def.py +366 -366
- masster/sample/defaults/find_ms2_def.py +285 -285
- masster/sample/defaults/get_spectrum_def.py +314 -318
- masster/sample/defaults/sample_def.py +374 -378
- masster/sample/h5.py +1321 -1297
- masster/sample/helpers.py +833 -364
- masster/sample/lib.py +762 -0
- masster/sample/load.py +1220 -1187
- masster/sample/parameters.py +131 -131
- masster/sample/plot.py +1610 -1622
- masster/sample/processing.py +1402 -1416
- masster/sample/quant.py +209 -0
- masster/sample/sample.py +391 -387
- masster/sample/sample5_schema.json +181 -181
- masster/sample/save.py +737 -736
- masster/sample/sciex.py +1213 -0
- masster/spectrum.py +1287 -1319
- masster/study/__init__.py +9 -9
- masster/study/defaults/__init__.py +21 -19
- masster/study/defaults/align_def.py +267 -267
- masster/study/defaults/export_def.py +41 -40
- masster/study/defaults/fill_chrom_def.py +264 -264
- masster/study/defaults/fill_def.py +260 -0
- masster/study/defaults/find_consensus_def.py +256 -256
- masster/study/defaults/find_ms2_def.py +163 -163
- masster/study/defaults/integrate_chrom_def.py +225 -225
- masster/study/defaults/integrate_def.py +221 -0
- masster/study/defaults/merge_def.py +256 -0
- masster/study/defaults/study_def.py +272 -269
- masster/study/export.py +674 -287
- masster/study/h5.py +1398 -886
- masster/study/helpers.py +1650 -433
- masster/study/helpers_optimized.py +317 -0
- masster/study/load.py +1201 -1078
- masster/study/parameters.py +99 -99
- masster/study/plot.py +632 -645
- masster/study/processing.py +1057 -1046
- masster/study/save.py +149 -134
- masster/study/study.py +606 -522
- masster/study/study5_schema.json +247 -241
- {masster-0.2.5.dist-info → masster-0.3.0.dist-info}/METADATA +15 -10
- masster-0.3.0.dist-info/RECORD +59 -0
- {masster-0.2.5.dist-info → masster-0.3.0.dist-info}/licenses/LICENSE +661 -661
- masster-0.2.5.dist-info/RECORD +0 -50
- {masster-0.2.5.dist-info → masster-0.3.0.dist-info}/WHEEL +0 -0
- {masster-0.2.5.dist-info → masster-0.3.0.dist-info}/entry_points.txt +0 -0
masster/sample/plot.py
CHANGED
|
@@ -1,1622 +1,1610 @@
|
|
|
1
|
-
"""
|
|
2
|
-
_plots.py
|
|
3
|
-
|
|
4
|
-
This module provides visualization functions for mass spectrometry data analysis.
|
|
5
|
-
It contains plotting utilities for extracted ion chromatograms (EICs), 2D data maps,
|
|
6
|
-
feature visualizations, and interactive dashboards using modern visualization libraries.
|
|
7
|
-
|
|
8
|
-
Key Features:
|
|
9
|
-
- **Extracted Ion Chromatograms (EICs)**: Interactive chromatographic plotting with feature annotations.
|
|
10
|
-
- **2D Data Visualization**: Mass spectrometry data visualization with datashader for large datasets.
|
|
11
|
-
- **Feature Plotting**: Visualize detected features with retention time and m/z information.
|
|
12
|
-
- **Interactive Dashboards**: Create interactive panels for data exploration and analysis.
|
|
13
|
-
- **Multi-Sample Plotting**: Comparative visualizations across multiple samples.
|
|
14
|
-
- **Export Capabilities**: Save plots in various formats (HTML, PNG, SVG).
|
|
15
|
-
|
|
16
|
-
Dependencies:
|
|
17
|
-
- `holoviews`: For high-level data visualization and interactive plots.
|
|
18
|
-
- `datashader`: For rendering large datasets efficiently.
|
|
19
|
-
- `panel`: For creating interactive web applications and dashboards.
|
|
20
|
-
- `bokeh`: For low-level plotting control and customization.
|
|
21
|
-
- `polars` and `pandas`: For data manipulation and processing.
|
|
22
|
-
- `numpy`: For numerical computations.
|
|
23
|
-
|
|
24
|
-
Functions:
|
|
25
|
-
- `plot_eic()`: Generate extracted ion chromatograms with feature overlays.
|
|
26
|
-
- `plot_2d()`: Create 2D mass spectrometry data visualizations.
|
|
27
|
-
- `plot_features()`: Visualize detected features in retention time vs m/z space.
|
|
28
|
-
- Various utility functions for plot styling and configuration.
|
|
29
|
-
|
|
30
|
-
Supported Plot Types:
|
|
31
|
-
- Extracted Ion Chromatograms (EIC)
|
|
32
|
-
- Total Ion Chromatograms (TIC)
|
|
33
|
-
- Base Peak Chromatograms (BPC)
|
|
34
|
-
- 2D intensity maps (RT vs m/z)
|
|
35
|
-
- Feature scatter plots
|
|
36
|
-
- Interactive dashboards
|
|
37
|
-
|
|
38
|
-
See Also:
|
|
39
|
-
- `parameters._plot_parameters`: For plot-specific parameter configuration.
|
|
40
|
-
- `single.py`: For applying plotting methods to ddafile objects.
|
|
41
|
-
- `study.py`: For study-level visualization functions.
|
|
42
|
-
|
|
43
|
-
"""
|
|
44
|
-
|
|
45
|
-
import os
|
|
46
|
-
|
|
47
|
-
import datashader as ds
|
|
48
|
-
import holoviews as hv
|
|
49
|
-
import holoviews.operation.datashader as hd
|
|
50
|
-
import numpy as np
|
|
51
|
-
import pandas as pd
|
|
52
|
-
import panel
|
|
53
|
-
import polars as pl
|
|
54
|
-
|
|
55
|
-
from bokeh.models import HoverTool
|
|
56
|
-
from holoviews import dim
|
|
57
|
-
from holoviews.plotting.util import process_cmap
|
|
58
|
-
from matplotlib.colors import rgb2hex
|
|
59
|
-
|
|
60
|
-
# Parameters removed - using hardcoded defaults
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
hv.extension("bokeh")
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
def plot_eic(
|
|
67
|
-
self,
|
|
68
|
-
feature_uid=None,
|
|
69
|
-
filename=None,
|
|
70
|
-
rt_tol=10,
|
|
71
|
-
rt_tol_factor_plot=1,
|
|
72
|
-
mz_tol=0.0005,
|
|
73
|
-
mz_tol_factor_plot=1,
|
|
74
|
-
link_x=False,
|
|
75
|
-
):
|
|
76
|
-
"""
|
|
77
|
-
Plot Extracted Ion Chromatograms (EICs) for one or more features using MS1 data and feature metadata.
|
|
78
|
-
|
|
79
|
-
This function filters MS1 data based on retention time (rt) and mass-to-charge ratio (mz) windows
|
|
80
|
-
derived from feature information in `features_df`. It then generates interactive EIC plots using
|
|
81
|
-
HoloViews, with feature retention time windows annotated. Plots can be displayed interactively or
|
|
82
|
-
saved to a file.
|
|
83
|
-
|
|
84
|
-
Parameters:
|
|
85
|
-
feature_uid (int or list of int, optional):
|
|
86
|
-
Feature identifier(s) for EIC generation. If None, EICs for all features in `features_df` are plotted.
|
|
87
|
-
filename (str, optional):
|
|
88
|
-
Output file path. If ending with `.html`, saves as interactive HTML; otherwise, saves as PNG.
|
|
89
|
-
If not provided, displays the plot interactively.
|
|
90
|
-
rt_tol (float, default=10):
|
|
91
|
-
Retention time tolerance (in seconds) added to feature boundaries for MS1 data filtering.
|
|
92
|
-
rt_tol_factor_plot (float, default=1):
|
|
93
|
-
Retention time tolerance factor.
|
|
94
|
-
mz_tol (float, default=0.0005):
|
|
95
|
-
m/z tolerance added to feature boundaries for MS1 data filtering.
|
|
96
|
-
mz_tol_factor_plot (float, default=1):
|
|
97
|
-
m/z time tolerance factor.
|
|
98
|
-
link_x (bool, default=True):
|
|
99
|
-
If True, links the x-axes (retention time) across all EIC subplots.
|
|
100
|
-
|
|
101
|
-
Returns:
|
|
102
|
-
None
|
|
103
|
-
|
|
104
|
-
Notes:
|
|
105
|
-
- Uses `features_df` for feature metadata and `ms1_df` (Polars DataFrame) for MS1 data.
|
|
106
|
-
- Aggregates MS1 intensities by retention time.
|
|
107
|
-
- Utilizes HoloViews for visualization and Panel for layout/display.
|
|
108
|
-
"""
|
|
109
|
-
# plots the EIC for a given feature id
|
|
110
|
-
# If rt or mz are not provided, they are extracted from features_df using the supplied feature id (feature_uid)
|
|
111
|
-
|
|
112
|
-
feature_uids = feature_uid
|
|
113
|
-
# if feature_uids is None, plot all features
|
|
114
|
-
if feature_uids is None:
|
|
115
|
-
feats = self.features_df.clone()
|
|
116
|
-
else:
|
|
117
|
-
if isinstance(feature_uids, int):
|
|
118
|
-
feature_uids = [feature_uids]
|
|
119
|
-
# select only the features with feature_uid in feature_uids
|
|
120
|
-
feats = self.features_df[
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
#
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
)
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
#
|
|
156
|
-
eic_df = eic_df.
|
|
157
|
-
|
|
158
|
-
eic_df
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
layout = hv.Layout(eic_plots).opts(shared_axes=
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
Whether to overlay
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
-
|
|
265
|
-
|
|
266
|
-
-
|
|
267
|
-
|
|
268
|
-
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
cmap = "
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
#
|
|
289
|
-
spectradf =
|
|
290
|
-
#
|
|
291
|
-
spectradf = spectradf.
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
if
|
|
295
|
-
spectradf = spectradf[
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
)
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
if
|
|
377
|
-
feats = feats
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
"
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
],
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
],
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
"
|
|
438
|
-
"
|
|
439
|
-
"
|
|
440
|
-
"
|
|
441
|
-
"
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
)
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
if
|
|
503
|
-
overlay = overlay
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
#
|
|
589
|
-
spectradf =
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
)
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
)
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
#
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
"
|
|
685
|
-
"
|
|
686
|
-
"
|
|
687
|
-
"
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
]
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
"
|
|
706
|
-
"
|
|
707
|
-
"
|
|
708
|
-
"
|
|
709
|
-
"
|
|
710
|
-
"
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
if
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
cvalues =
|
|
753
|
-
|
|
754
|
-
cvalues =
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
if
|
|
912
|
-
overlay = overlay
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
|
|
916
|
-
|
|
917
|
-
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
|
|
927
|
-
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
|
|
939
|
-
|
|
940
|
-
|
|
941
|
-
|
|
942
|
-
|
|
943
|
-
|
|
944
|
-
|
|
945
|
-
|
|
946
|
-
|
|
947
|
-
|
|
948
|
-
|
|
949
|
-
):
|
|
950
|
-
|
|
951
|
-
|
|
952
|
-
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
|
|
956
|
-
|
|
957
|
-
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
|
|
961
|
-
|
|
962
|
-
|
|
963
|
-
|
|
964
|
-
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
""
|
|
971
|
-
#
|
|
972
|
-
|
|
973
|
-
if
|
|
974
|
-
print("
|
|
975
|
-
return
|
|
976
|
-
|
|
977
|
-
if
|
|
978
|
-
print("
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
# get top n fragments
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
|
|
989
|
-
|
|
990
|
-
# get the
|
|
991
|
-
|
|
992
|
-
|
|
993
|
-
|
|
994
|
-
|
|
995
|
-
|
|
996
|
-
|
|
997
|
-
|
|
998
|
-
|
|
999
|
-
|
|
1000
|
-
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
#
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
|
|
1065
|
-
|
|
1066
|
-
|
|
1067
|
-
|
|
1068
|
-
)
|
|
1069
|
-
|
|
1070
|
-
|
|
1071
|
-
|
|
1072
|
-
|
|
1073
|
-
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
|
-
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
|
|
1089
|
-
|
|
1090
|
-
|
|
1091
|
-
|
|
1092
|
-
|
|
1093
|
-
|
|
1094
|
-
|
|
1095
|
-
|
|
1096
|
-
|
|
1097
|
-
)
|
|
1098
|
-
|
|
1099
|
-
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
|
|
1104
|
-
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
|
|
1108
|
-
|
|
1109
|
-
|
|
1110
|
-
|
|
1111
|
-
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
|
|
1116
|
-
|
|
1117
|
-
|
|
1118
|
-
|
|
1119
|
-
|
|
1120
|
-
|
|
1121
|
-
|
|
1122
|
-
|
|
1123
|
-
|
|
1124
|
-
|
|
1125
|
-
|
|
1126
|
-
|
|
1127
|
-
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
|
|
1134
|
-
|
|
1135
|
-
|
|
1136
|
-
|
|
1137
|
-
|
|
1138
|
-
|
|
1139
|
-
|
|
1140
|
-
|
|
1141
|
-
|
|
1142
|
-
|
|
1143
|
-
|
|
1144
|
-
|
|
1145
|
-
|
|
1146
|
-
|
|
1147
|
-
|
|
1148
|
-
|
|
1149
|
-
|
|
1150
|
-
|
|
1151
|
-
|
|
1152
|
-
|
|
1153
|
-
|
|
1154
|
-
|
|
1155
|
-
|
|
1156
|
-
|
|
1157
|
-
|
|
1158
|
-
|
|
1159
|
-
|
|
1160
|
-
|
|
1161
|
-
|
|
1162
|
-
|
|
1163
|
-
|
|
1164
|
-
|
|
1165
|
-
|
|
1166
|
-
|
|
1167
|
-
|
|
1168
|
-
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
|
-
|
|
1175
|
-
|
|
1176
|
-
|
|
1177
|
-
|
|
1178
|
-
|
|
1179
|
-
|
|
1180
|
-
|
|
1181
|
-
|
|
1182
|
-
|
|
1183
|
-
|
|
1184
|
-
|
|
1185
|
-
|
|
1186
|
-
|
|
1187
|
-
|
|
1188
|
-
|
|
1189
|
-
|
|
1190
|
-
|
|
1191
|
-
|
|
1192
|
-
|
|
1193
|
-
|
|
1194
|
-
|
|
1195
|
-
|
|
1196
|
-
|
|
1197
|
-
|
|
1198
|
-
|
|
1199
|
-
|
|
1200
|
-
|
|
1201
|
-
|
|
1202
|
-
|
|
1203
|
-
|
|
1204
|
-
|
|
1205
|
-
|
|
1206
|
-
|
|
1207
|
-
|
|
1208
|
-
|
|
1209
|
-
|
|
1210
|
-
|
|
1211
|
-
|
|
1212
|
-
|
|
1213
|
-
|
|
1214
|
-
|
|
1215
|
-
# find features
|
|
1216
|
-
features_df = feats[feats['ms2_scans'].
|
|
1217
|
-
|
|
1218
|
-
features_df, kdims=["rt", "mz"], vdims=["feature_uid", "inty", "quality", "rt_delta"
|
|
1219
|
-
).options(
|
|
1220
|
-
color=
|
|
1221
|
-
|
|
1222
|
-
|
|
1223
|
-
tools=["hover"],
|
|
1224
|
-
)
|
|
1225
|
-
|
|
1226
|
-
|
|
1227
|
-
|
|
1228
|
-
|
|
1229
|
-
|
|
1230
|
-
|
|
1231
|
-
|
|
1232
|
-
|
|
1233
|
-
|
|
1234
|
-
|
|
1235
|
-
|
|
1236
|
-
|
|
1237
|
-
|
|
1238
|
-
|
|
1239
|
-
|
|
1240
|
-
|
|
1241
|
-
|
|
1242
|
-
|
|
1243
|
-
|
|
1244
|
-
|
|
1245
|
-
|
|
1246
|
-
)
|
|
1247
|
-
|
|
1248
|
-
|
|
1249
|
-
|
|
1250
|
-
|
|
1251
|
-
|
|
1252
|
-
|
|
1253
|
-
|
|
1254
|
-
|
|
1255
|
-
|
|
1256
|
-
|
|
1257
|
-
|
|
1258
|
-
|
|
1259
|
-
|
|
1260
|
-
|
|
1261
|
-
|
|
1262
|
-
|
|
1263
|
-
|
|
1264
|
-
|
|
1265
|
-
|
|
1266
|
-
|
|
1267
|
-
|
|
1268
|
-
|
|
1269
|
-
|
|
1270
|
-
|
|
1271
|
-
|
|
1272
|
-
|
|
1273
|
-
|
|
1274
|
-
|
|
1275
|
-
|
|
1276
|
-
|
|
1277
|
-
|
|
1278
|
-
|
|
1279
|
-
if
|
|
1280
|
-
overlay = overlay
|
|
1281
|
-
|
|
1282
|
-
|
|
1283
|
-
|
|
1284
|
-
|
|
1285
|
-
|
|
1286
|
-
|
|
1287
|
-
|
|
1288
|
-
|
|
1289
|
-
|
|
1290
|
-
|
|
1291
|
-
|
|
1292
|
-
|
|
1293
|
-
|
|
1294
|
-
|
|
1295
|
-
|
|
1296
|
-
|
|
1297
|
-
|
|
1298
|
-
|
|
1299
|
-
|
|
1300
|
-
|
|
1301
|
-
|
|
1302
|
-
|
|
1303
|
-
|
|
1304
|
-
|
|
1305
|
-
|
|
1306
|
-
|
|
1307
|
-
|
|
1308
|
-
|
|
1309
|
-
|
|
1310
|
-
|
|
1311
|
-
|
|
1312
|
-
|
|
1313
|
-
|
|
1314
|
-
|
|
1315
|
-
|
|
1316
|
-
|
|
1317
|
-
|
|
1318
|
-
#
|
|
1319
|
-
|
|
1320
|
-
if
|
|
1321
|
-
print("
|
|
1322
|
-
return
|
|
1323
|
-
|
|
1324
|
-
if
|
|
1325
|
-
print("
|
|
1326
|
-
|
|
1327
|
-
|
|
1328
|
-
# get top n fragments
|
|
1329
|
-
|
|
1330
|
-
|
|
1331
|
-
|
|
1332
|
-
|
|
1333
|
-
|
|
1334
|
-
|
|
1335
|
-
|
|
1336
|
-
|
|
1337
|
-
|
|
1338
|
-
|
|
1339
|
-
|
|
1340
|
-
|
|
1341
|
-
|
|
1342
|
-
|
|
1343
|
-
|
|
1344
|
-
|
|
1345
|
-
|
|
1346
|
-
|
|
1347
|
-
|
|
1348
|
-
|
|
1349
|
-
|
|
1350
|
-
|
|
1351
|
-
|
|
1352
|
-
|
|
1353
|
-
|
|
1354
|
-
|
|
1355
|
-
|
|
1356
|
-
|
|
1357
|
-
|
|
1358
|
-
|
|
1359
|
-
|
|
1360
|
-
|
|
1361
|
-
|
|
1362
|
-
|
|
1363
|
-
|
|
1364
|
-
|
|
1365
|
-
|
|
1366
|
-
q1_df
|
|
1367
|
-
|
|
1368
|
-
|
|
1369
|
-
for
|
|
1370
|
-
|
|
1371
|
-
|
|
1372
|
-
|
|
1373
|
-
|
|
1374
|
-
|
|
1375
|
-
|
|
1376
|
-
|
|
1377
|
-
|
|
1378
|
-
|
|
1379
|
-
|
|
1380
|
-
|
|
1381
|
-
|
|
1382
|
-
|
|
1383
|
-
|
|
1384
|
-
|
|
1385
|
-
|
|
1386
|
-
|
|
1387
|
-
|
|
1388
|
-
|
|
1389
|
-
|
|
1390
|
-
|
|
1391
|
-
|
|
1392
|
-
|
|
1393
|
-
|
|
1394
|
-
|
|
1395
|
-
|
|
1396
|
-
|
|
1397
|
-
|
|
1398
|
-
|
|
1399
|
-
|
|
1400
|
-
|
|
1401
|
-
|
|
1402
|
-
|
|
1403
|
-
|
|
1404
|
-
|
|
1405
|
-
|
|
1406
|
-
|
|
1407
|
-
|
|
1408
|
-
|
|
1409
|
-
|
|
1410
|
-
|
|
1411
|
-
|
|
1412
|
-
|
|
1413
|
-
filename
|
|
1414
|
-
|
|
1415
|
-
|
|
1416
|
-
|
|
1417
|
-
|
|
1418
|
-
|
|
1419
|
-
|
|
1420
|
-
|
|
1421
|
-
|
|
1422
|
-
|
|
1423
|
-
|
|
1424
|
-
|
|
1425
|
-
|
|
1426
|
-
|
|
1427
|
-
|
|
1428
|
-
|
|
1429
|
-
|
|
1430
|
-
|
|
1431
|
-
|
|
1432
|
-
|
|
1433
|
-
|
|
1434
|
-
|
|
1435
|
-
stats =
|
|
1436
|
-
|
|
1437
|
-
|
|
1438
|
-
|
|
1439
|
-
|
|
1440
|
-
|
|
1441
|
-
|
|
1442
|
-
|
|
1443
|
-
"
|
|
1444
|
-
"
|
|
1445
|
-
|
|
1446
|
-
|
|
1447
|
-
|
|
1448
|
-
|
|
1449
|
-
|
|
1450
|
-
|
|
1451
|
-
|
|
1452
|
-
|
|
1453
|
-
|
|
1454
|
-
|
|
1455
|
-
|
|
1456
|
-
|
|
1457
|
-
|
|
1458
|
-
|
|
1459
|
-
|
|
1460
|
-
|
|
1461
|
-
|
|
1462
|
-
|
|
1463
|
-
|
|
1464
|
-
|
|
1465
|
-
|
|
1466
|
-
|
|
1467
|
-
|
|
1468
|
-
|
|
1469
|
-
|
|
1470
|
-
|
|
1471
|
-
|
|
1472
|
-
|
|
1473
|
-
|
|
1474
|
-
|
|
1475
|
-
|
|
1476
|
-
|
|
1477
|
-
|
|
1478
|
-
|
|
1479
|
-
|
|
1480
|
-
|
|
1481
|
-
|
|
1482
|
-
|
|
1483
|
-
|
|
1484
|
-
|
|
1485
|
-
|
|
1486
|
-
|
|
1487
|
-
|
|
1488
|
-
|
|
1489
|
-
|
|
1490
|
-
|
|
1491
|
-
|
|
1492
|
-
|
|
1493
|
-
|
|
1494
|
-
|
|
1495
|
-
|
|
1496
|
-
|
|
1497
|
-
|
|
1498
|
-
|
|
1499
|
-
|
|
1500
|
-
|
|
1501
|
-
|
|
1502
|
-
|
|
1503
|
-
|
|
1504
|
-
|
|
1505
|
-
|
|
1506
|
-
|
|
1507
|
-
|
|
1508
|
-
|
|
1509
|
-
|
|
1510
|
-
|
|
1511
|
-
|
|
1512
|
-
|
|
1513
|
-
|
|
1514
|
-
|
|
1515
|
-
|
|
1516
|
-
)
|
|
1517
|
-
#
|
|
1518
|
-
|
|
1519
|
-
|
|
1520
|
-
|
|
1521
|
-
|
|
1522
|
-
|
|
1523
|
-
|
|
1524
|
-
|
|
1525
|
-
|
|
1526
|
-
#
|
|
1527
|
-
|
|
1528
|
-
|
|
1529
|
-
|
|
1530
|
-
|
|
1531
|
-
|
|
1532
|
-
|
|
1533
|
-
|
|
1534
|
-
|
|
1535
|
-
|
|
1536
|
-
|
|
1537
|
-
|
|
1538
|
-
|
|
1539
|
-
|
|
1540
|
-
|
|
1541
|
-
|
|
1542
|
-
|
|
1543
|
-
|
|
1544
|
-
"
|
|
1545
|
-
"
|
|
1546
|
-
"
|
|
1547
|
-
|
|
1548
|
-
|
|
1549
|
-
|
|
1550
|
-
|
|
1551
|
-
|
|
1552
|
-
|
|
1553
|
-
|
|
1554
|
-
|
|
1555
|
-
#
|
|
1556
|
-
|
|
1557
|
-
|
|
1558
|
-
|
|
1559
|
-
|
|
1560
|
-
|
|
1561
|
-
|
|
1562
|
-
|
|
1563
|
-
|
|
1564
|
-
|
|
1565
|
-
|
|
1566
|
-
|
|
1567
|
-
|
|
1568
|
-
|
|
1569
|
-
|
|
1570
|
-
|
|
1571
|
-
|
|
1572
|
-
|
|
1573
|
-
|
|
1574
|
-
|
|
1575
|
-
)
|
|
1576
|
-
|
|
1577
|
-
|
|
1578
|
-
|
|
1579
|
-
|
|
1580
|
-
|
|
1581
|
-
|
|
1582
|
-
|
|
1583
|
-
|
|
1584
|
-
|
|
1585
|
-
|
|
1586
|
-
|
|
1587
|
-
|
|
1588
|
-
|
|
1589
|
-
|
|
1590
|
-
|
|
1591
|
-
|
|
1592
|
-
|
|
1593
|
-
|
|
1594
|
-
|
|
1595
|
-
|
|
1596
|
-
|
|
1597
|
-
|
|
1598
|
-
|
|
1599
|
-
|
|
1600
|
-
|
|
1601
|
-
|
|
1602
|
-
|
|
1603
|
-
|
|
1604
|
-
|
|
1605
|
-
|
|
1606
|
-
)
|
|
1607
|
-
|
|
1608
|
-
|
|
1609
|
-
|
|
1610
|
-
|
|
1611
|
-
# sort by rt
|
|
1612
|
-
data = data.sort_values("rt")
|
|
1613
|
-
|
|
1614
|
-
# plot using hv.Curve
|
|
1615
|
-
tic = hv.Curve(data, kdims=["rt"], vdims=["inty_tot"])
|
|
1616
|
-
tic.opts(
|
|
1617
|
-
title=title,
|
|
1618
|
-
xlabel="Retention Time (min)",
|
|
1619
|
-
ylabel="TIC",
|
|
1620
|
-
height=250,
|
|
1621
|
-
width=100,
|
|
1622
|
-
)
|
|
1
|
+
"""
|
|
2
|
+
_plots.py
|
|
3
|
+
|
|
4
|
+
This module provides visualization functions for mass spectrometry data analysis.
|
|
5
|
+
It contains plotting utilities for extracted ion chromatograms (EICs), 2D data maps,
|
|
6
|
+
feature visualizations, and interactive dashboards using modern visualization libraries.
|
|
7
|
+
|
|
8
|
+
Key Features:
|
|
9
|
+
- **Extracted Ion Chromatograms (EICs)**: Interactive chromatographic plotting with feature annotations.
|
|
10
|
+
- **2D Data Visualization**: Mass spectrometry data visualization with datashader for large datasets.
|
|
11
|
+
- **Feature Plotting**: Visualize detected features with retention time and m/z information.
|
|
12
|
+
- **Interactive Dashboards**: Create interactive panels for data exploration and analysis.
|
|
13
|
+
- **Multi-Sample Plotting**: Comparative visualizations across multiple samples.
|
|
14
|
+
- **Export Capabilities**: Save plots in various formats (HTML, PNG, SVG).
|
|
15
|
+
|
|
16
|
+
Dependencies:
|
|
17
|
+
- `holoviews`: For high-level data visualization and interactive plots.
|
|
18
|
+
- `datashader`: For rendering large datasets efficiently.
|
|
19
|
+
- `panel`: For creating interactive web applications and dashboards.
|
|
20
|
+
- `bokeh`: For low-level plotting control and customization.
|
|
21
|
+
- `polars` and `pandas`: For data manipulation and processing.
|
|
22
|
+
- `numpy`: For numerical computations.
|
|
23
|
+
|
|
24
|
+
Functions:
|
|
25
|
+
- `plot_eic()`: Generate extracted ion chromatograms with feature overlays.
|
|
26
|
+
- `plot_2d()`: Create 2D mass spectrometry data visualizations.
|
|
27
|
+
- `plot_features()`: Visualize detected features in retention time vs m/z space.
|
|
28
|
+
- Various utility functions for plot styling and configuration.
|
|
29
|
+
|
|
30
|
+
Supported Plot Types:
|
|
31
|
+
- Extracted Ion Chromatograms (EIC)
|
|
32
|
+
- Total Ion Chromatograms (TIC)
|
|
33
|
+
- Base Peak Chromatograms (BPC)
|
|
34
|
+
- 2D intensity maps (RT vs m/z)
|
|
35
|
+
- Feature scatter plots
|
|
36
|
+
- Interactive dashboards
|
|
37
|
+
|
|
38
|
+
See Also:
|
|
39
|
+
- `parameters._plot_parameters`: For plot-specific parameter configuration.
|
|
40
|
+
- `single.py`: For applying plotting methods to ddafile objects.
|
|
41
|
+
- `study.py`: For study-level visualization functions.
|
|
42
|
+
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
import os
|
|
46
|
+
|
|
47
|
+
import datashader as ds
|
|
48
|
+
import holoviews as hv
|
|
49
|
+
import holoviews.operation.datashader as hd
|
|
50
|
+
import numpy as np
|
|
51
|
+
import pandas as pd
|
|
52
|
+
import panel
|
|
53
|
+
import polars as pl
|
|
54
|
+
|
|
55
|
+
from bokeh.models import HoverTool
|
|
56
|
+
from holoviews import dim
|
|
57
|
+
from holoviews.plotting.util import process_cmap
|
|
58
|
+
from matplotlib.colors import rgb2hex
|
|
59
|
+
|
|
60
|
+
# Parameters removed - using hardcoded defaults
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
hv.extension("bokeh")
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def plot_eic(
|
|
67
|
+
self,
|
|
68
|
+
feature_uid=None,
|
|
69
|
+
filename=None,
|
|
70
|
+
rt_tol=10,
|
|
71
|
+
rt_tol_factor_plot=1,
|
|
72
|
+
mz_tol=0.0005,
|
|
73
|
+
mz_tol_factor_plot=1,
|
|
74
|
+
link_x=False,
|
|
75
|
+
):
|
|
76
|
+
"""
|
|
77
|
+
Plot Extracted Ion Chromatograms (EICs) for one or more features using MS1 data and feature metadata.
|
|
78
|
+
|
|
79
|
+
This function filters MS1 data based on retention time (rt) and mass-to-charge ratio (mz) windows
|
|
80
|
+
derived from feature information in `features_df`. It then generates interactive EIC plots using
|
|
81
|
+
HoloViews, with feature retention time windows annotated. Plots can be displayed interactively or
|
|
82
|
+
saved to a file.
|
|
83
|
+
|
|
84
|
+
Parameters:
|
|
85
|
+
feature_uid (int or list of int, optional):
|
|
86
|
+
Feature identifier(s) for EIC generation. If None, EICs for all features in `features_df` are plotted.
|
|
87
|
+
filename (str, optional):
|
|
88
|
+
Output file path. If ending with `.html`, saves as interactive HTML; otherwise, saves as PNG.
|
|
89
|
+
If not provided, displays the plot interactively.
|
|
90
|
+
rt_tol (float, default=10):
|
|
91
|
+
Retention time tolerance (in seconds) added to feature boundaries for MS1 data filtering.
|
|
92
|
+
rt_tol_factor_plot (float, default=1):
|
|
93
|
+
Retention time tolerance factor.
|
|
94
|
+
mz_tol (float, default=0.0005):
|
|
95
|
+
m/z tolerance added to feature boundaries for MS1 data filtering.
|
|
96
|
+
mz_tol_factor_plot (float, default=1):
|
|
97
|
+
m/z time tolerance factor.
|
|
98
|
+
link_x (bool, default=True):
|
|
99
|
+
If True, links the x-axes (retention time) across all EIC subplots.
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
None
|
|
103
|
+
|
|
104
|
+
Notes:
|
|
105
|
+
- Uses `features_df` for feature metadata and `ms1_df` (Polars DataFrame) for MS1 data.
|
|
106
|
+
- Aggregates MS1 intensities by retention time.
|
|
107
|
+
- Utilizes HoloViews for visualization and Panel for layout/display.
|
|
108
|
+
"""
|
|
109
|
+
# plots the EIC for a given feature id
|
|
110
|
+
# If rt or mz are not provided, they are extracted from features_df using the supplied feature id (feature_uid)
|
|
111
|
+
|
|
112
|
+
feature_uids = feature_uid
|
|
113
|
+
# if feature_uids is None, plot all features
|
|
114
|
+
if feature_uids is None:
|
|
115
|
+
feats = self.features_df.clone()
|
|
116
|
+
else:
|
|
117
|
+
if isinstance(feature_uids, int):
|
|
118
|
+
feature_uids = [feature_uids]
|
|
119
|
+
# select only the features with feature_uid in feature_uids
|
|
120
|
+
feats = self.features_df[self.features_df["feature_uid"].is_in(feature_uids)].clone()
|
|
121
|
+
|
|
122
|
+
# make sure feature_uid is a list of integers
|
|
123
|
+
|
|
124
|
+
eic_plots = []
|
|
125
|
+
feature_uids = feats["feature_uid"].values.tolist()
|
|
126
|
+
mz_tol_plot = mz_tol * mz_tol_factor_plot
|
|
127
|
+
rt_tol_plot = rt_tol * rt_tol_factor_plot
|
|
128
|
+
# iterate over the list of feature_uid
|
|
129
|
+
for feature_uid in feature_uids:
|
|
130
|
+
# Retrieve the feature info
|
|
131
|
+
feature_row = feats[feats["feature_uid"] == feature_uid]
|
|
132
|
+
# rt = feature_row["rt"].values[0]
|
|
133
|
+
rt_start = feature_row["rt_start"].values[0]
|
|
134
|
+
rt_end = feature_row["rt_end"].values[0]
|
|
135
|
+
mz = feature_row["mz"].values[0]
|
|
136
|
+
mz_start = feature_row["mz_start"].values[0]
|
|
137
|
+
mz_end = feature_row["mz_end"].values[0]
|
|
138
|
+
|
|
139
|
+
# filter self.ms1_df with rt_start, rt_end, mz_start, mz_end
|
|
140
|
+
eic_df = self.ms1_df.filter(
|
|
141
|
+
pl.col("rt") >= rt_start - rt_tol_plot,
|
|
142
|
+
pl.col("rt") <= rt_end + rt_tol_plot,
|
|
143
|
+
)
|
|
144
|
+
eic_df = eic_df.filter(
|
|
145
|
+
pl.col("mz") >= mz_start - mz_tol_plot,
|
|
146
|
+
pl.col("mz") <= mz_end + mz_tol_plot,
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
if eic_df.is_empty():
|
|
150
|
+
print("No MS1 data found in the specified window.")
|
|
151
|
+
continue
|
|
152
|
+
|
|
153
|
+
# convert to pandas DataFrame
|
|
154
|
+
eic_df = eic_df.to_pandas()
|
|
155
|
+
# aggregate all points with the same rt using the sum of inty
|
|
156
|
+
eic_df = eic_df.groupby("rt").agg({"inty": "sum"}).reset_index()
|
|
157
|
+
yname = f"inty_{feature_uid}"
|
|
158
|
+
eic_df.rename(columns={"inty": yname}, inplace=True)
|
|
159
|
+
|
|
160
|
+
# Plot the EIC using bokeh and ensure axes are independent by setting axiswise=True
|
|
161
|
+
eic = hv.Curve(eic_df, kdims=["rt"], vdims=[yname]).opts(
|
|
162
|
+
title=f"EIC for feature {feature_uid}, mz = {mz:.4f}",
|
|
163
|
+
xlabel="Retention time (s)",
|
|
164
|
+
ylabel="Intensity",
|
|
165
|
+
width=1000,
|
|
166
|
+
tools=["hover"],
|
|
167
|
+
height=250,
|
|
168
|
+
axiswise=True,
|
|
169
|
+
color="black",
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
# Add vertical lines at the start and end of the retention time
|
|
173
|
+
eic = eic * hv.VLine(rt_start).opts(
|
|
174
|
+
color="blue",
|
|
175
|
+
line_width=1,
|
|
176
|
+
line_dash="dashed",
|
|
177
|
+
axiswise=True,
|
|
178
|
+
)
|
|
179
|
+
eic = eic * hv.VLine(rt_end).opts(
|
|
180
|
+
color="blue",
|
|
181
|
+
line_width=1,
|
|
182
|
+
line_dash="dashed",
|
|
183
|
+
axiswise=True,
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
# Append the subplot without linking axes
|
|
187
|
+
eic_plots.append(eic)
|
|
188
|
+
if link_x:
|
|
189
|
+
# Create a layout with shared x-axis for all EIC plots
|
|
190
|
+
layout = hv.Layout(eic_plots).opts(shared_axes=True)
|
|
191
|
+
else:
|
|
192
|
+
layout = hv.Layout(eic_plots).opts(shared_axes=False)
|
|
193
|
+
|
|
194
|
+
layout = layout.cols(1)
|
|
195
|
+
layout = panel.Column(layout)
|
|
196
|
+
if filename is not None:
|
|
197
|
+
# if filename includes .html, save the panel layout to an HTML file
|
|
198
|
+
if filename.endswith(".html"):
|
|
199
|
+
layout.save(filename, embed=True)
|
|
200
|
+
else:
|
|
201
|
+
# save the panel layout as a png
|
|
202
|
+
hv.save(layout, filename, fmt="png")
|
|
203
|
+
else:
|
|
204
|
+
# Display the panel layout
|
|
205
|
+
layout.show()
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def plot_2d(
|
|
209
|
+
self,
|
|
210
|
+
filename=None,
|
|
211
|
+
show_features=True,
|
|
212
|
+
show_only_features_with_ms2=False,
|
|
213
|
+
show_isotopes=False,
|
|
214
|
+
show_ms2=False,
|
|
215
|
+
title=None,
|
|
216
|
+
cmap=None,
|
|
217
|
+
marker="circle",
|
|
218
|
+
markersize=10,
|
|
219
|
+
raster_dynamic=True,
|
|
220
|
+
raster_max_px=8,
|
|
221
|
+
raster_threshold=0.8,
|
|
222
|
+
mz_range=None,
|
|
223
|
+
rt_range=None,
|
|
224
|
+
):
|
|
225
|
+
"""
|
|
226
|
+
Plot a two-dimensional visualization of MS1 survey scan data with optional overlays
|
|
227
|
+
of feature and MS2 scan information.
|
|
228
|
+
This method creates a plot from the internal MS1 data loaded into self.ms1_df
|
|
229
|
+
and optionally overlays various feature and MS2 information depending on the provided
|
|
230
|
+
parameters. The visualization is built using HoloViews and Holoviews dynamic rasterization,
|
|
231
|
+
together with Panel for layout and exporting.
|
|
232
|
+
Parameters:
|
|
233
|
+
filename (str, optional):
|
|
234
|
+
Path to save the plot. If provided and ends with ".html", the plot is saved as an
|
|
235
|
+
interactive HTML file; otherwise, it is saved as a PNG image.
|
|
236
|
+
show_features (bool, default True):
|
|
237
|
+
Whether to overlay detected features on the plot.
|
|
238
|
+
show_only_features_with_ms2 (bool, default False):
|
|
239
|
+
If True, only display features that have associated MS2 scans. When False,
|
|
240
|
+
features without MS2 data are also shown.
|
|
241
|
+
show_isotopes (bool, default False):
|
|
242
|
+
Whether to overlay isotope information on top of the features.
|
|
243
|
+
show_ms2 (bool, default False):
|
|
244
|
+
Whether to overlay MS2 scan information on the plot.
|
|
245
|
+
title (str, optional):
|
|
246
|
+
Title of the plot.
|
|
247
|
+
cmap (str, optional):
|
|
248
|
+
Colormap to use for the background rasterized data. Defaults to "iridescent_r" unless
|
|
249
|
+
modified (e.g., if set to "grey", it is changed to "Greys256").
|
|
250
|
+
marker (str, default 'circle'):
|
|
251
|
+
Marker type to use for feature and MS2 points.
|
|
252
|
+
markersize (int, default 10):
|
|
253
|
+
Base size of the markers used for plotting points.
|
|
254
|
+
raster_dynamic (bool, default True):
|
|
255
|
+
Whether to use dynamic rasterization for the background point cloud.
|
|
256
|
+
raster_max_px (int, default 8):
|
|
257
|
+
Maximum pixel size for dynamic rasterization when using dynspread.
|
|
258
|
+
raster_threshold (float, default 0.8):
|
|
259
|
+
Threshold used for the dynspread process in dynamic rasterization.
|
|
260
|
+
Behavior:
|
|
261
|
+
- Checks for a loaded mzML file by verifying that self.file_obj is not None.
|
|
262
|
+
- Converts internal MS1 data (a Polars DataFrame) to a Pandas DataFrame and filters out low-intensity
|
|
263
|
+
points (inty < 1).
|
|
264
|
+
- Sets up the plot bounds for retention time (rt) and mass-to-charge ratio (mz) using a hook function.
|
|
265
|
+
- Renders the MS1 data as a background rasterized image with a logarithmic intensity normalization.
|
|
266
|
+
- Conditionally overlays feature points (with and without MS2 information), isotopes (if requested),
|
|
267
|
+
and MS2 scan points based on internal DataFrame data.
|
|
268
|
+
- Depending on the filename parameter, either displays the plot interactively using Panel or
|
|
269
|
+
saves it as an HTML or PNG file.
|
|
270
|
+
Returns:
|
|
271
|
+
None
|
|
272
|
+
Side Effects:
|
|
273
|
+
- May print a warning if no mzML file is loaded.
|
|
274
|
+
- Either shows the plot interactively or writes the output to a file.
|
|
275
|
+
"""
|
|
276
|
+
|
|
277
|
+
if self.ms1_df is None:
|
|
278
|
+
self.logger.error("No MS1 data available.")
|
|
279
|
+
return
|
|
280
|
+
|
|
281
|
+
if cmap is None:
|
|
282
|
+
cmap = "iridescent_r"
|
|
283
|
+
elif cmap == "grey":
|
|
284
|
+
cmap = "Greys256"
|
|
285
|
+
|
|
286
|
+
# get columns rt, mz, inty from self.ms1_df, It's polars DataFrame
|
|
287
|
+
spectradf = self.ms1_df.select(["rt", "mz", "inty"])
|
|
288
|
+
# remove any inty<1
|
|
289
|
+
spectradf = spectradf.filter(pl.col("inty") >= 1)
|
|
290
|
+
# keep only rt, mz, and inty
|
|
291
|
+
spectradf = spectradf.select(["rt", "mz", "inty"])
|
|
292
|
+
if mz_range is not None:
|
|
293
|
+
spectradf = spectradf[(spectradf["mz"] >= mz_range[0]) & (spectradf["mz"] <= mz_range[1])]
|
|
294
|
+
if rt_range is not None:
|
|
295
|
+
spectradf = spectradf[(spectradf["rt"] >= rt_range[0]) & (spectradf["rt"] <= rt_range[1])]
|
|
296
|
+
maxrt = spectradf["rt"].max()
|
|
297
|
+
minrt = spectradf["rt"].min()
|
|
298
|
+
maxmz = spectradf["mz"].max()
|
|
299
|
+
minmz = spectradf["mz"].min()
|
|
300
|
+
|
|
301
|
+
def new_bounds_hook(plot, elem):
|
|
302
|
+
x_range = plot.state.x_range
|
|
303
|
+
y_range = plot.state.y_range
|
|
304
|
+
x_range.bounds = minrt, maxrt
|
|
305
|
+
y_range.bounds = minmz, maxmz
|
|
306
|
+
|
|
307
|
+
points = hv.Points(
|
|
308
|
+
spectradf,
|
|
309
|
+
kdims=["rt", "mz"],
|
|
310
|
+
vdims=["inty"],
|
|
311
|
+
label="MS1 survey scans",
|
|
312
|
+
).opts(
|
|
313
|
+
fontsize={"title": 16, "labels": 14, "xticks": 6, "yticks": 12},
|
|
314
|
+
color=np.log(dim("inty")),
|
|
315
|
+
colorbar=True,
|
|
316
|
+
cmap="Magma",
|
|
317
|
+
tools=["hover"],
|
|
318
|
+
)
|
|
319
|
+
|
|
320
|
+
size_1 = 1 * markersize
|
|
321
|
+
color_1 = "forestgreen"
|
|
322
|
+
size_2 = 1 * markersize
|
|
323
|
+
color_2 = "darkorange"
|
|
324
|
+
if filename is not None:
|
|
325
|
+
dyn = False
|
|
326
|
+
if not filename.endswith(".html"):
|
|
327
|
+
size_1 = 2
|
|
328
|
+
color_1 = "forestgreen"
|
|
329
|
+
size_2 = 2
|
|
330
|
+
color_2 = "darkorange"
|
|
331
|
+
raster_dynamic = False
|
|
332
|
+
|
|
333
|
+
dyn = raster_dynamic
|
|
334
|
+
raster = hd.rasterize(
|
|
335
|
+
points,
|
|
336
|
+
aggregator=ds.max("inty"),
|
|
337
|
+
interpolation="bilinear",
|
|
338
|
+
dynamic=dyn, # alpha=10, min_alpha=0,
|
|
339
|
+
).opts(
|
|
340
|
+
active_tools=["box_zoom"],
|
|
341
|
+
cmap=process_cmap(cmap, provider="bokeh"), # blues
|
|
342
|
+
tools=["hover"],
|
|
343
|
+
hooks=[new_bounds_hook],
|
|
344
|
+
width=1000,
|
|
345
|
+
height=1000,
|
|
346
|
+
cnorm="log",
|
|
347
|
+
xlabel="Retention time (s)",
|
|
348
|
+
ylabel="m/z",
|
|
349
|
+
colorbar=True,
|
|
350
|
+
colorbar_position="right",
|
|
351
|
+
axiswise=True,
|
|
352
|
+
)
|
|
353
|
+
|
|
354
|
+
raster = hd.dynspread(
|
|
355
|
+
raster,
|
|
356
|
+
threshold=raster_threshold,
|
|
357
|
+
how="add",
|
|
358
|
+
shape="square",
|
|
359
|
+
max_px=raster_max_px,
|
|
360
|
+
)
|
|
361
|
+
feature_points_1 = None
|
|
362
|
+
feature_points_2 = None
|
|
363
|
+
feature_points_3 = None
|
|
364
|
+
feature_points_4 = None
|
|
365
|
+
feature_points_iso = None
|
|
366
|
+
# Plot features as red dots if features is True
|
|
367
|
+
if self.features_df is not None and show_features:
|
|
368
|
+
feats = self.features_df.clone()
|
|
369
|
+
# Convert to pandas for operations that require pandas functionality
|
|
370
|
+
if hasattr(feats, "to_pandas"):
|
|
371
|
+
feats = feats.to_pandas()
|
|
372
|
+
# if ms2_scans is not null, keep only the first element of the list
|
|
373
|
+
feats["ms2_scans"] = feats["ms2_scans"].apply(
|
|
374
|
+
lambda x: x[0] if type(x) == list else x,
|
|
375
|
+
)
|
|
376
|
+
if mz_range is not None:
|
|
377
|
+
feats = feats[(feats["mz"] >= mz_range[0]) & (feats["mz"] <= mz_range[1])]
|
|
378
|
+
if rt_range is not None:
|
|
379
|
+
feats = feats[(feats["rt"] >= rt_range[0]) & (feats["rt"] <= rt_range[1])]
|
|
380
|
+
# keep only iso==0, i.e. the main
|
|
381
|
+
feats = feats[feats["iso"] == 0]
|
|
382
|
+
# find features with ms2_scans not None and iso==0
|
|
383
|
+
features_df = feats[feats["ms2_scans"].notnull()]
|
|
384
|
+
feature_points_1 = hv.Points(
|
|
385
|
+
features_df,
|
|
386
|
+
kdims=["rt", "mz"],
|
|
387
|
+
vdims=[
|
|
388
|
+
"feature_uid",
|
|
389
|
+
"inty",
|
|
390
|
+
"quality",
|
|
391
|
+
"rt_delta",
|
|
392
|
+
"ms2_scans",
|
|
393
|
+
"chrom_coherence",
|
|
394
|
+
"chrom_prominence_scaled",
|
|
395
|
+
],
|
|
396
|
+
label="Features with MS2 data",
|
|
397
|
+
).options(
|
|
398
|
+
color=color_1,
|
|
399
|
+
marker=marker,
|
|
400
|
+
size=size_1,
|
|
401
|
+
tools=["hover"],
|
|
402
|
+
)
|
|
403
|
+
# find features without MS2 data
|
|
404
|
+
features_df = feats[feats["ms2_scans"].isnull()]
|
|
405
|
+
feature_points_2 = hv.Points(
|
|
406
|
+
features_df,
|
|
407
|
+
kdims=["rt", "mz"],
|
|
408
|
+
vdims=[
|
|
409
|
+
"feature_uid",
|
|
410
|
+
"inty",
|
|
411
|
+
"quality",
|
|
412
|
+
"rt_delta",
|
|
413
|
+
"chrom_coherence",
|
|
414
|
+
"chrom_prominence_scaled",
|
|
415
|
+
],
|
|
416
|
+
label="Features without MS2 data",
|
|
417
|
+
).options(
|
|
418
|
+
color="red",
|
|
419
|
+
size=size_2,
|
|
420
|
+
marker=marker,
|
|
421
|
+
tools=["hover"],
|
|
422
|
+
)
|
|
423
|
+
|
|
424
|
+
if show_isotopes:
|
|
425
|
+
# Use proper Polars filter syntax to avoid boolean indexing issues
|
|
426
|
+
features_df = self.features_df.filter(pl.col("iso") > 0)
|
|
427
|
+
# Convert to pandas for plotting compatibility
|
|
428
|
+
if hasattr(features_df, "to_pandas"):
|
|
429
|
+
features_df = features_df.to_pandas()
|
|
430
|
+
feature_points_iso = hv.Points(
|
|
431
|
+
features_df,
|
|
432
|
+
kdims=["rt", "mz"],
|
|
433
|
+
vdims=[
|
|
434
|
+
"feature_uid",
|
|
435
|
+
"inty",
|
|
436
|
+
"quality",
|
|
437
|
+
"rt_delta",
|
|
438
|
+
"iso",
|
|
439
|
+
"iso_of",
|
|
440
|
+
"chrom_coherence",
|
|
441
|
+
"chrom_prominence_scaled",
|
|
442
|
+
],
|
|
443
|
+
label="Isotopes",
|
|
444
|
+
).options(
|
|
445
|
+
color="violet",
|
|
446
|
+
marker=marker,
|
|
447
|
+
size=size_1,
|
|
448
|
+
tools=["hover"],
|
|
449
|
+
)
|
|
450
|
+
if show_ms2:
|
|
451
|
+
# find all self.scans_df with mslevel 2 that are not linked to a feature
|
|
452
|
+
ms2_orphan = self.scans_df.filter(pl.col("ms_level") == 2).filter(
|
|
453
|
+
pl.col("feature_uid") < 0,
|
|
454
|
+
)
|
|
455
|
+
|
|
456
|
+
if len(ms2_orphan) > 0:
|
|
457
|
+
# pandalize
|
|
458
|
+
ms2 = ms2_orphan.to_pandas()
|
|
459
|
+
feature_points_3 = hv.Points(
|
|
460
|
+
ms2,
|
|
461
|
+
kdims=["rt", "prec_mz"],
|
|
462
|
+
vdims=["index", "inty_tot", "bl"],
|
|
463
|
+
label="Orphan MS2 scans",
|
|
464
|
+
).options(
|
|
465
|
+
color=color_2,
|
|
466
|
+
marker="x",
|
|
467
|
+
size=size_2,
|
|
468
|
+
tools=["hover"],
|
|
469
|
+
)
|
|
470
|
+
|
|
471
|
+
ms2_linked = self.scans_df.filter(pl.col("ms_level") == 2).filter(
|
|
472
|
+
pl.col("feature_uid") >= 0,
|
|
473
|
+
)
|
|
474
|
+
if len(ms2_linked) > 0:
|
|
475
|
+
# pandalize
|
|
476
|
+
ms2 = ms2_linked.to_pandas()
|
|
477
|
+
feature_points_4 = hv.Points(
|
|
478
|
+
ms2,
|
|
479
|
+
kdims=["rt", "prec_mz"],
|
|
480
|
+
vdims=["index", "inty_tot", "bl"],
|
|
481
|
+
label="Linked MS2 scans",
|
|
482
|
+
).options(
|
|
483
|
+
color=color_1,
|
|
484
|
+
marker="x",
|
|
485
|
+
size=size_2,
|
|
486
|
+
tools=["hover"],
|
|
487
|
+
)
|
|
488
|
+
|
|
489
|
+
overlay = raster
|
|
490
|
+
|
|
491
|
+
if feature_points_4 is not None:
|
|
492
|
+
overlay = overlay * feature_points_4
|
|
493
|
+
if feature_points_3 is not None:
|
|
494
|
+
overlay = overlay * feature_points_3
|
|
495
|
+
if feature_points_1 is not None:
|
|
496
|
+
overlay = overlay * feature_points_1
|
|
497
|
+
if not show_only_features_with_ms2 and feature_points_2 is not None:
|
|
498
|
+
overlay = overlay * feature_points_2
|
|
499
|
+
if feature_points_iso is not None:
|
|
500
|
+
overlay = overlay * feature_points_iso
|
|
501
|
+
|
|
502
|
+
if title is not None:
|
|
503
|
+
overlay = overlay.opts(title=title)
|
|
504
|
+
|
|
505
|
+
# Create a panel layout
|
|
506
|
+
layout = panel.Column(overlay)
|
|
507
|
+
|
|
508
|
+
if filename is not None:
|
|
509
|
+
# if filename includes .html, save the panel layout to an HTML file
|
|
510
|
+
if filename.endswith(".html"):
|
|
511
|
+
layout.save(filename, embed=True)
|
|
512
|
+
else:
|
|
513
|
+
# save the panel layout as a png
|
|
514
|
+
hv.save(overlay, filename, fmt="png")
|
|
515
|
+
else:
|
|
516
|
+
# Display the panel layout
|
|
517
|
+
layout.show()
|
|
518
|
+
|
|
519
|
+
|
|
520
|
+
def plot_2d_oracle(
|
|
521
|
+
self,
|
|
522
|
+
oracle_folder=None,
|
|
523
|
+
link_by_feature_uid=None,
|
|
524
|
+
colorby="hg",
|
|
525
|
+
filename=None,
|
|
526
|
+
min_id_level=None,
|
|
527
|
+
max_id_level=None,
|
|
528
|
+
min_ms_level=None,
|
|
529
|
+
title=None,
|
|
530
|
+
cmap=None,
|
|
531
|
+
markersize=10,
|
|
532
|
+
raster_dynamic=True,
|
|
533
|
+
raster_max_px=8,
|
|
534
|
+
raster_threshold=0.8,
|
|
535
|
+
mz_range=None,
|
|
536
|
+
rt_range=None,
|
|
537
|
+
):
|
|
538
|
+
"""
|
|
539
|
+
Plot a 2D overlay visualization of MS1 survey scans and feature annotations, including oracle annotation data if provided.
|
|
540
|
+
|
|
541
|
+
This function reads the primary mass spectrometry data, applies filtering, processes oracle annotation data (if provided),
|
|
542
|
+
and produces an interactive plot combining various data layers. The visualization includes rasterized MS1 data and feature
|
|
543
|
+
points colored by annotation.
|
|
544
|
+
|
|
545
|
+
Parameters:
|
|
546
|
+
self: The object instance containing MS1 and feature data.
|
|
547
|
+
oracle_folder (str, optional): Path to the oracle folder containing the annotation file
|
|
548
|
+
(expected at "<oracle_folder>/diag/summary_by_feature.csv"). If None, oracle data is not used.
|
|
549
|
+
link_by_feature_uid (bool, optional): Whether to link features by their IDs in the overlay.
|
|
550
|
+
colorby (str, optional): Parameter that determines the color assignment for annotated features.
|
|
551
|
+
Expected values include 'hg', 'class', 'id_class', or 'id_hg'. Default is 'hg'.
|
|
552
|
+
filename (str, optional): Name of the file where the plot should be saved. If provided and ends with
|
|
553
|
+
".html", the panel layout is saved as an interactive HTML file; otherwise, the output is saved as a PNG.
|
|
554
|
+
min_id_level (int, optional): Minimum identification level for oracle annotations to include.
|
|
555
|
+
max_id_level (int, optional): Maximum identification level for oracle annotations to include.
|
|
556
|
+
min_ms_level (int, optional): Minimum MS level for features to include.
|
|
557
|
+
title (str, optional): Title to be displayed on the resulting plot. Default is None.
|
|
558
|
+
cmap (str, optional): Colormap to be used for the rasterized plot. Acceptable values include None, "grey",
|
|
559
|
+
"iridescent", or other valid colormap names. Default is None. When None, 'Greys256' is used.
|
|
560
|
+
markersize (int, optional): Marker size for feature points in the overlay. Default is 10.
|
|
561
|
+
raster_dynamic (bool, optional): If True, enables dynamic rasterization of the overlay. If filename is provided
|
|
562
|
+
and does not end with ".html", raster_dynamic is set to False. Default is True.
|
|
563
|
+
raster_max_px (int, optional): Maximum pixel size for dynamic rasterization. Default is 8.
|
|
564
|
+
raster_threshold (float, optional): Threshold for dynamic raster spread. Default is 0.8.
|
|
565
|
+
mz_range (tuple, optional): m/z range for filtering MS1 data.
|
|
566
|
+
rt_range (tuple, optional): Retention time range for filtering MS1 data.
|
|
567
|
+
|
|
568
|
+
Returns:
|
|
569
|
+
None
|
|
570
|
+
|
|
571
|
+
The function either displays the interactive panel layout or saves the visualization to a file based on
|
|
572
|
+
the provided filename. If the primary file object or feature data is missing, the function prints an
|
|
573
|
+
informative message and returns without plotting.
|
|
574
|
+
"""
|
|
575
|
+
|
|
576
|
+
if self.file_obj is None:
|
|
577
|
+
print("Please load a file first.")
|
|
578
|
+
return
|
|
579
|
+
|
|
580
|
+
if cmap is None or cmap == "grey":
|
|
581
|
+
cmap = "Greys256"
|
|
582
|
+
elif cmap == "iridescent":
|
|
583
|
+
cmap = "iridescent_r"
|
|
584
|
+
|
|
585
|
+
# get columns rt, mz, inty from self.ms1_df, It's polars DataFrame
|
|
586
|
+
spectradf = self.ms1_df.to_pandas()
|
|
587
|
+
|
|
588
|
+
# remove any inty<1
|
|
589
|
+
spectradf = spectradf[spectradf["inty"] >= 1]
|
|
590
|
+
# keep only rt, mz, and inty
|
|
591
|
+
spectradf = spectradf[["rt", "mz", "inty"]]
|
|
592
|
+
if mz_range is not None:
|
|
593
|
+
spectradf = spectradf[(spectradf["mz"] >= mz_range[0]) & (spectradf["mz"] <= mz_range[1])]
|
|
594
|
+
if rt_range is not None:
|
|
595
|
+
spectradf = spectradf[(spectradf["rt"] >= rt_range[0]) & (spectradf["rt"] <= rt_range[1])]
|
|
596
|
+
|
|
597
|
+
maxrt = spectradf["rt"].max()
|
|
598
|
+
minrt = spectradf["rt"].min()
|
|
599
|
+
maxmz = spectradf["mz"].max()
|
|
600
|
+
minmz = spectradf["mz"].min()
|
|
601
|
+
|
|
602
|
+
def new_bounds_hook(plot, elem):
|
|
603
|
+
x_range = plot.state.x_range
|
|
604
|
+
y_range = plot.state.y_range
|
|
605
|
+
x_range.bounds = minrt, maxrt
|
|
606
|
+
y_range.bounds = minmz, maxmz
|
|
607
|
+
|
|
608
|
+
points = hv.Points(
|
|
609
|
+
spectradf,
|
|
610
|
+
kdims=["rt", "mz"],
|
|
611
|
+
vdims=["inty"],
|
|
612
|
+
label="MS1 survey scans",
|
|
613
|
+
).opts(
|
|
614
|
+
fontsize={"title": 16, "labels": 14, "xticks": 6, "yticks": 12},
|
|
615
|
+
color=np.log(dim("inty")),
|
|
616
|
+
colorbar=True,
|
|
617
|
+
cmap="Magma",
|
|
618
|
+
tools=["hover"],
|
|
619
|
+
)
|
|
620
|
+
|
|
621
|
+
if filename is not None:
|
|
622
|
+
dyn = False
|
|
623
|
+
if not filename.endswith(".html"):
|
|
624
|
+
raster_dynamic = False
|
|
625
|
+
|
|
626
|
+
dyn = raster_dynamic
|
|
627
|
+
raster = hd.rasterize(
|
|
628
|
+
points,
|
|
629
|
+
aggregator=ds.max("inty"),
|
|
630
|
+
interpolation="bilinear",
|
|
631
|
+
dynamic=dyn, # alpha=10, min_alpha=0,
|
|
632
|
+
).opts(
|
|
633
|
+
active_tools=["box_zoom"],
|
|
634
|
+
cmap=process_cmap(cmap, provider="bokeh"), # blues
|
|
635
|
+
tools=["hover"],
|
|
636
|
+
hooks=[new_bounds_hook],
|
|
637
|
+
width=1000,
|
|
638
|
+
height=1000,
|
|
639
|
+
cnorm="log",
|
|
640
|
+
xlabel="Retention time (s)",
|
|
641
|
+
ylabel="m/z",
|
|
642
|
+
colorbar=True,
|
|
643
|
+
colorbar_position="right",
|
|
644
|
+
axiswise=True,
|
|
645
|
+
)
|
|
646
|
+
raster = hd.dynspread(
|
|
647
|
+
raster,
|
|
648
|
+
threshold=raster_threshold,
|
|
649
|
+
how="add",
|
|
650
|
+
shape="square",
|
|
651
|
+
max_px=raster_max_px,
|
|
652
|
+
)
|
|
653
|
+
|
|
654
|
+
if self.features_df is None:
|
|
655
|
+
return
|
|
656
|
+
feats = self.features_df.clone()
|
|
657
|
+
|
|
658
|
+
# Convert to pandas for oracle operations that require pandas functionality
|
|
659
|
+
if hasattr(feats, "to_pandas"):
|
|
660
|
+
feats = feats.to_pandas()
|
|
661
|
+
|
|
662
|
+
# check if annotationfile is not None
|
|
663
|
+
if oracle_folder is None:
|
|
664
|
+
return
|
|
665
|
+
# try to read the annotationfile as a csv file and add it to feats
|
|
666
|
+
try:
|
|
667
|
+
oracle_data = pd.read_csv(
|
|
668
|
+
os.path.join(oracle_folder, "diag", "summary_by_feature.csv"),
|
|
669
|
+
)
|
|
670
|
+
except:
|
|
671
|
+
print(f"Could not read {oracle_folder}/diag/summary_by_feature.csv")
|
|
672
|
+
return
|
|
673
|
+
|
|
674
|
+
if link_by_feature_uid:
|
|
675
|
+
# scan_idx slaw_id slaw_ms2_id mz rt level formula ion species name rarity lib_id hg mod lib score score2 score_db score_db_data ms2_tic ms2_evidence ms2_matched_n ms2_missed_n ms2_matched ms2_missed ms2_top1
|
|
676
|
+
cols_to_keep = [
|
|
677
|
+
"title",
|
|
678
|
+
"scan_idx",
|
|
679
|
+
"mslevel",
|
|
680
|
+
"hits",
|
|
681
|
+
"id_level",
|
|
682
|
+
"id_label",
|
|
683
|
+
"id_ion",
|
|
684
|
+
"id_class",
|
|
685
|
+
"id_evidence",
|
|
686
|
+
"score",
|
|
687
|
+
"score2",
|
|
688
|
+
]
|
|
689
|
+
oracle_data = oracle_data[cols_to_keep]
|
|
690
|
+
# extract feature_uid from title. It begins with "fid:XYZ;"
|
|
691
|
+
oracle_data["feature_uid"] = oracle_data["title"].str.extract(r"fid:(\d+)")
|
|
692
|
+
oracle_data["feature_uid"] = oracle_data["feature_uid"].astype(int)
|
|
693
|
+
# sort by id_level, remove duplicate feature_uid, keep the first one
|
|
694
|
+
oracle_data = oracle_data.sort_values(by=["id_level"], ascending=False)
|
|
695
|
+
oracle_data = oracle_data.drop_duplicates(subset=["feature_uid"], keep="first")
|
|
696
|
+
else:
|
|
697
|
+
cols_to_keep = [
|
|
698
|
+
"precursor",
|
|
699
|
+
"rt",
|
|
700
|
+
"title",
|
|
701
|
+
"scan_idx",
|
|
702
|
+
"mslevel",
|
|
703
|
+
"hits",
|
|
704
|
+
"id_level",
|
|
705
|
+
"id_label",
|
|
706
|
+
"id_ion",
|
|
707
|
+
"id_class",
|
|
708
|
+
"id_evidence",
|
|
709
|
+
"score",
|
|
710
|
+
"score2",
|
|
711
|
+
]
|
|
712
|
+
# link
|
|
713
|
+
oracle_data = oracle_data[cols_to_keep]
|
|
714
|
+
oracle_data["feature_uid"] = None
|
|
715
|
+
# iterate over the rows and find the feature_uid in feats by looking at the closest rt and mz
|
|
716
|
+
for i, row in oracle_data.iterrows():
|
|
717
|
+
candidates = feats[
|
|
718
|
+
(abs(feats["rt"] - row["rt"]) < 1) & (abs(feats["mz"] - row["precursor"]) < 0.005)
|
|
719
|
+
].copy()
|
|
720
|
+
if len(candidates) > 0:
|
|
721
|
+
# sort by delta rt
|
|
722
|
+
candidates["delta_rt"] = abs(candidates["rt"] - row["rt"])
|
|
723
|
+
candidates = candidates.sort_values(by=["delta_rt"])
|
|
724
|
+
oracle_data.at[i, "feature_uid"] = candidates["feature_uid"].values[0]
|
|
725
|
+
# remove precursor and rt columns
|
|
726
|
+
oracle_data = oracle_data.drop(columns=["precursor", "rt"])
|
|
727
|
+
|
|
728
|
+
feats = feats.merge(oracle_data, how="left", on="feature_uid")
|
|
729
|
+
|
|
730
|
+
# filter feats by id_level
|
|
731
|
+
if min_id_level is not None:
|
|
732
|
+
feats = feats[(feats["id_level"] >= min_id_level)]
|
|
733
|
+
if max_id_level is not None:
|
|
734
|
+
feats = feats[(feats["id_level"] <= max_id_level)]
|
|
735
|
+
if min_ms_level is not None:
|
|
736
|
+
feats = feats[(feats["mslevel"] >= min_ms_level)]
|
|
737
|
+
|
|
738
|
+
feats["color"] = "black"
|
|
739
|
+
|
|
740
|
+
cvalues = None
|
|
741
|
+
if colorby in ["class", "hg", "id_class", "id_hg"]:
|
|
742
|
+
# replace nans in feats['id_class'] with 'mix'
|
|
743
|
+
feats["id_class"] = feats["id_class"].fillna("mix")
|
|
744
|
+
cvalues = feats["id_class"].unique()
|
|
745
|
+
# sort alphabetically
|
|
746
|
+
cvalues = sorted(cvalues)
|
|
747
|
+
# flip the strings left to right
|
|
748
|
+
fcvalues = [cvalues[i][::-1] for i in range(len(cvalues))]
|
|
749
|
+
# sort in alphabetical order the flipped strings and return the index
|
|
750
|
+
idx = np.argsort(fcvalues)
|
|
751
|
+
# apply to cvalues
|
|
752
|
+
cvalues = [cvalues[i] for i in idx]
|
|
753
|
+
elif colorby in ["ion", "id_ion"]:
|
|
754
|
+
cvalues = feats["id_ion"].unique()
|
|
755
|
+
elif colorby in ["id_evidence", "ms2_evidence"]:
|
|
756
|
+
cvalues = feats["id_evidence"].unique()
|
|
757
|
+
|
|
758
|
+
if cvalues is not None:
|
|
759
|
+
num_colors = len(cvalues)
|
|
760
|
+
cmap = "rainbow"
|
|
761
|
+
cmap_provider = "colorcet"
|
|
762
|
+
cm = process_cmap(cmap, ncolors=num_colors, provider=cmap_provider)
|
|
763
|
+
colors = [
|
|
764
|
+
rgb2hex(cm[int(i * (len(cm) - 1) / (num_colors - 1))]) if num_colors > 1 else rgb2hex(cm[0])
|
|
765
|
+
for i in range(num_colors)
|
|
766
|
+
]
|
|
767
|
+
# assign color to each row based on id_class. If id_class is null, assign 'black'
|
|
768
|
+
feats["color"] = "black"
|
|
769
|
+
|
|
770
|
+
for i, c in enumerate(cvalues):
|
|
771
|
+
if colorby in ["class", "hg", "id_class", "id_hg"]:
|
|
772
|
+
feats.loc[feats["id_class"] == c, "color"] = colors[i]
|
|
773
|
+
elif colorby in ["ion", "id_ion"]:
|
|
774
|
+
feats.loc[feats["id_ion"] == c, "color"] = colors[i]
|
|
775
|
+
elif colorby in ["id_evidence", "ms2_evidence"]:
|
|
776
|
+
feats.loc[feats["id_evidence"] == c, "color"] = colors[i]
|
|
777
|
+
|
|
778
|
+
# replace NaN with 0 in id_level
|
|
779
|
+
feats["id_level"] = feats["id_level"].fillna(0)
|
|
780
|
+
# feature_points_1 are all features with column ms2_scans not null
|
|
781
|
+
feature_points_1 = None
|
|
782
|
+
feat_df = feats.copy()
|
|
783
|
+
feat_df = feat_df[feat_df["id_level"] == 2]
|
|
784
|
+
|
|
785
|
+
feature_points_1 = hv.Points(
|
|
786
|
+
feat_df,
|
|
787
|
+
kdims=["rt", "mz"],
|
|
788
|
+
vdims=[
|
|
789
|
+
"inty",
|
|
790
|
+
"feature_uid",
|
|
791
|
+
"id_level",
|
|
792
|
+
"id_class",
|
|
793
|
+
"id_label",
|
|
794
|
+
"id_ion",
|
|
795
|
+
"id_evidence",
|
|
796
|
+
"score",
|
|
797
|
+
"score2",
|
|
798
|
+
"color",
|
|
799
|
+
],
|
|
800
|
+
label="ID by MS2",
|
|
801
|
+
).options(
|
|
802
|
+
color="color",
|
|
803
|
+
marker="circle",
|
|
804
|
+
size=markersize,
|
|
805
|
+
fill_alpha=1.0,
|
|
806
|
+
tools=["hover"],
|
|
807
|
+
)
|
|
808
|
+
|
|
809
|
+
# feature_points_2 are all features that have ms2_scans not null and id_level ==1
|
|
810
|
+
feature_points_2 = None
|
|
811
|
+
feat_df = feats.copy()
|
|
812
|
+
feat_df = feat_df[(feat_df["ms2_scans"].notnull()) & (feat_df["id_level"] == 1)]
|
|
813
|
+
if len(feat_df) > 0:
|
|
814
|
+
feature_points_2 = hv.Points(
|
|
815
|
+
feat_df,
|
|
816
|
+
kdims=["rt", "mz"],
|
|
817
|
+
vdims=[
|
|
818
|
+
"inty",
|
|
819
|
+
"feature_uid",
|
|
820
|
+
"id_level",
|
|
821
|
+
"id_label",
|
|
822
|
+
"id_ion",
|
|
823
|
+
"id_class",
|
|
824
|
+
"color",
|
|
825
|
+
],
|
|
826
|
+
label="ID by MS1, with MS2",
|
|
827
|
+
).options(
|
|
828
|
+
color="color",
|
|
829
|
+
marker="circle",
|
|
830
|
+
size=markersize,
|
|
831
|
+
fill_alpha=0.0,
|
|
832
|
+
tools=["hover"],
|
|
833
|
+
)
|
|
834
|
+
|
|
835
|
+
# feature_points_3 are all features that have ms2_scans null and id_level ==1
|
|
836
|
+
feature_points_3 = None
|
|
837
|
+
feat_df = feats.copy()
|
|
838
|
+
feat_df = feat_df[(feat_df["ms2_scans"].isnull()) & (feat_df["id_level"] == 1)]
|
|
839
|
+
if len(feat_df) > 0:
|
|
840
|
+
feature_points_3 = hv.Points(
|
|
841
|
+
feat_df,
|
|
842
|
+
kdims=["rt", "mz"],
|
|
843
|
+
vdims=[
|
|
844
|
+
"inty",
|
|
845
|
+
"feature_uid",
|
|
846
|
+
"id_level",
|
|
847
|
+
"id_label",
|
|
848
|
+
"id_ion",
|
|
849
|
+
"id_class",
|
|
850
|
+
"color",
|
|
851
|
+
],
|
|
852
|
+
label="ID by MS1, no MS2",
|
|
853
|
+
).options(
|
|
854
|
+
color="color",
|
|
855
|
+
marker="diamond",
|
|
856
|
+
size=markersize,
|
|
857
|
+
fill_alpha=0.0,
|
|
858
|
+
tools=["hover"],
|
|
859
|
+
)
|
|
860
|
+
|
|
861
|
+
# feature_points_4 are all features that have ms2_scans null and id_level ==0
|
|
862
|
+
feature_points_4 = None
|
|
863
|
+
feat_df = feats.copy()
|
|
864
|
+
feat_df = feat_df[(feat_df["ms2_scans"].notnull()) & (feat_df["id_level"] < 1)]
|
|
865
|
+
if len(feat_df) > 0:
|
|
866
|
+
feature_points_4 = hv.Points(
|
|
867
|
+
feat_df,
|
|
868
|
+
kdims=["rt", "mz"],
|
|
869
|
+
vdims=["inty", "feature_uid"],
|
|
870
|
+
label="No ID, with MS2",
|
|
871
|
+
).options(
|
|
872
|
+
color="gray",
|
|
873
|
+
marker="circle",
|
|
874
|
+
size=markersize,
|
|
875
|
+
fill_alpha=0.0,
|
|
876
|
+
tools=["hover"],
|
|
877
|
+
)
|
|
878
|
+
|
|
879
|
+
# feature_points_4 are all features that have ms2_scans null and id_level ==0
|
|
880
|
+
feature_points_5 = None
|
|
881
|
+
feat_df = feats.copy()
|
|
882
|
+
feat_df = feat_df[(feat_df["ms2_scans"].isnull()) & (feat_df["id_level"] < 1)]
|
|
883
|
+
if len(feat_df) > 0:
|
|
884
|
+
feature_points_5 = hv.Points(
|
|
885
|
+
feat_df,
|
|
886
|
+
kdims=["rt", "mz"],
|
|
887
|
+
vdims=["inty", "feature_uid"],
|
|
888
|
+
label="No ID, no MS2",
|
|
889
|
+
).options(
|
|
890
|
+
color="gray",
|
|
891
|
+
marker="diamond",
|
|
892
|
+
fill_alpha=0.0,
|
|
893
|
+
size=markersize,
|
|
894
|
+
tools=["hover"],
|
|
895
|
+
)
|
|
896
|
+
|
|
897
|
+
overlay = raster
|
|
898
|
+
|
|
899
|
+
if feature_points_1 is not None:
|
|
900
|
+
overlay = overlay * feature_points_1
|
|
901
|
+
if feature_points_2 is not None:
|
|
902
|
+
overlay = overlay * feature_points_2
|
|
903
|
+
if feature_points_3 is not None:
|
|
904
|
+
overlay = overlay * feature_points_3
|
|
905
|
+
if feature_points_4 is not None:
|
|
906
|
+
overlay = overlay * feature_points_4
|
|
907
|
+
# if not show_only_features_with_ms2:
|
|
908
|
+
if feature_points_5 is not None:
|
|
909
|
+
overlay = overlay * feature_points_5
|
|
910
|
+
|
|
911
|
+
if title is not None:
|
|
912
|
+
overlay = overlay.opts(title=title)
|
|
913
|
+
|
|
914
|
+
# Create a panel layout
|
|
915
|
+
layout = panel.Column(overlay)
|
|
916
|
+
|
|
917
|
+
if filename is not None:
|
|
918
|
+
# if filename includes .html, save the panel layout to an HTML file
|
|
919
|
+
if filename.endswith(".html"):
|
|
920
|
+
layout.save(filename, embed=True)
|
|
921
|
+
else:
|
|
922
|
+
# save the panel layout as a png
|
|
923
|
+
hv.save(overlay, filename, fmt="png")
|
|
924
|
+
else:
|
|
925
|
+
# Display the panel layout
|
|
926
|
+
layout.show()
|
|
927
|
+
|
|
928
|
+
|
|
929
|
+
def plot_ms2_eic(
|
|
930
|
+
self,
|
|
931
|
+
feature_uid=None,
|
|
932
|
+
rt_tol=5,
|
|
933
|
+
mz_tol=0.05,
|
|
934
|
+
link_x=True,
|
|
935
|
+
n=20,
|
|
936
|
+
deisotope=True,
|
|
937
|
+
centroid=True,
|
|
938
|
+
filename=None,
|
|
939
|
+
):
|
|
940
|
+
"""
|
|
941
|
+
Plots the Extracted Ion Chromatograms (EIC) for the precursor and top n MS2 fragment ions of a given feature.
|
|
942
|
+
Parameters:
|
|
943
|
+
feature_uid: The feature unique identifier. Must be present in the features dataframe; if None, a message is printed.
|
|
944
|
+
rt_tol (float, optional): The retention time tolerance (in seconds) to extend the feature's rt start and end values. Default is 5.
|
|
945
|
+
mz_tol (float, optional): The m/z tolerance used when filtering the precursor and fragment ion intensities. Default is 0.05.
|
|
946
|
+
link_x (bool, optional): If True, the x-axis (retention time) of all subplots is linked. Default is True.
|
|
947
|
+
n (int, optional): The number of top MS2 fragment m/z values to consider for plotting. Default is 20.
|
|
948
|
+
deisotope (bool, optional): Flag that determines whether deisotoping should be applied to the MS2 fragments. Default is True.
|
|
949
|
+
centroid (bool, optional): Flag that controls whether centroiding is applied to the MS2 data. Default is True.
|
|
950
|
+
filename (str, optional): If provided, the function saves the plot to the specified file. Supports .html for interactive plots or other formats (e.g., png).
|
|
951
|
+
If None, the plot is displayed instead of being saved.
|
|
952
|
+
Returns:
|
|
953
|
+
None
|
|
954
|
+
Notes:
|
|
955
|
+
- The function first verifies the existence of the provided feature id and its associated MS2 spectrum.
|
|
956
|
+
- It retrieves the top n fragments by intensity from the MS2 spectrum and computes the EIC for both the precursor ion and the fragments.
|
|
957
|
+
- A helper method (_spec_to_mat) is used to convert spectral data into intensity matrices.
|
|
958
|
+
- The resulting plots include hover tools to display the retention time and scan identifier.
|
|
959
|
+
- The layout is arranged in a grid (4 columns by default) and may have linked x-axes based on the link_x parameter.
|
|
960
|
+
"""
|
|
961
|
+
# plots the EIC for a given feature id inlcusind the EIC of the top n MS2 fragments
|
|
962
|
+
|
|
963
|
+
if feature_uid is None:
|
|
964
|
+
print("Please provide a feature id.")
|
|
965
|
+
return
|
|
966
|
+
# check if feature_uid is in features_df
|
|
967
|
+
if feature_uid not in self.features_df["feature_uid"].values:
|
|
968
|
+
print("Feature id not found in features_df.")
|
|
969
|
+
|
|
970
|
+
feature = self.features_df[self.features_df["feature_uid"] == feature_uid]
|
|
971
|
+
# get top n fragments
|
|
972
|
+
ms2_specs = feature["ms2_specs"].values[0]
|
|
973
|
+
if ms2_specs is None:
|
|
974
|
+
print("No MS2 data found for this feature.")
|
|
975
|
+
return
|
|
976
|
+
|
|
977
|
+
if len(ms2_specs) == 0:
|
|
978
|
+
print("No MS2 data found for this feature.")
|
|
979
|
+
return
|
|
980
|
+
# get the MS2 spectrum
|
|
981
|
+
# get the mz of the top n fragments
|
|
982
|
+
ms2_specs_df = ms2_specs[0].pandalize()
|
|
983
|
+
ms2_specs_df = ms2_specs_df.sort_values(by="inty", ascending=False)
|
|
984
|
+
ms2_specs_df = ms2_specs_df.head(n)
|
|
985
|
+
top_mzs = ms2_specs_df["mz"].values.tolist()
|
|
986
|
+
|
|
987
|
+
# find rt_start and rt_end of the feature_uid
|
|
988
|
+
rt_start = feature["rt_start"].values[0] - rt_tol
|
|
989
|
+
rt_end = feature["rt_end"].values[0] + rt_tol
|
|
990
|
+
# get the cycle at rt_start and the cycle at rt_end from the closest scan with ms_level == 1
|
|
991
|
+
scans = self.scans_df.filter(pl.col("ms_level") == 1)
|
|
992
|
+
scans = scans.filter(pl.col("rt") > rt_start)
|
|
993
|
+
scans = scans.filter(pl.col("rt") < rt_end)
|
|
994
|
+
rts = scans["rt"].to_list()
|
|
995
|
+
if len(scans) == 0:
|
|
996
|
+
print(f"No scans found between {rt_start} and {rt_end}.")
|
|
997
|
+
return
|
|
998
|
+
scan_uids = scans["scan_uid"].to_list()
|
|
999
|
+
eic_prec = self._spec_to_mat(
|
|
1000
|
+
scan_uids,
|
|
1001
|
+
mz_ref=feature["mz"].values.tolist(),
|
|
1002
|
+
mz_tol=mz_tol,
|
|
1003
|
+
deisotope=False,
|
|
1004
|
+
centroid=True,
|
|
1005
|
+
)
|
|
1006
|
+
# convert eic_prec from matrix to list
|
|
1007
|
+
eic_prec = eic_prec[0].tolist()
|
|
1008
|
+
|
|
1009
|
+
# get all unique cycles from scans
|
|
1010
|
+
cycles = scans["cycle"].unique()
|
|
1011
|
+
scan_uids = []
|
|
1012
|
+
# iterate over all cycles and get the scan_uid of scan with ms_level == 2 and closest precursor_mz to spec.precursor_mz
|
|
1013
|
+
for cycle in cycles:
|
|
1014
|
+
scans = self.scans_df.filter(pl.col("cycle") == cycle)
|
|
1015
|
+
scans = scans.filter(pl.col("ms_level") == 2)
|
|
1016
|
+
scans = scans.filter(pl.col("prec_mz") > feature["mz"] - 5)
|
|
1017
|
+
scans = scans.filter(pl.col("prec_mz") < feature["mz"] + 5)
|
|
1018
|
+
if len(scans) == 0:
|
|
1019
|
+
print(
|
|
1020
|
+
f"No scans found for cycle {cycle} and mz {feature['mz']}. Increase mz_tol tolerance.",
|
|
1021
|
+
)
|
|
1022
|
+
return
|
|
1023
|
+
# get the scan with the closest precursor_mz to feature['mz']
|
|
1024
|
+
scan = scans[(scans["prec_mz"] - feature["mz"]).abs().arg_sort()[:1]]
|
|
1025
|
+
scan_uids.append(scan["scan_uid"][0])
|
|
1026
|
+
eic_prod = self._spec_to_mat(
|
|
1027
|
+
scan_uids,
|
|
1028
|
+
mz_ref=top_mzs,
|
|
1029
|
+
mz_tol=mz_tol,
|
|
1030
|
+
deisotope=deisotope,
|
|
1031
|
+
centroid=centroid,
|
|
1032
|
+
)
|
|
1033
|
+
|
|
1034
|
+
prec_name = f"prec {feature['mz'].values[0]:.3f}"
|
|
1035
|
+
eic_df = pd.DataFrame({"rt": rts, prec_name: eic_prec})
|
|
1036
|
+
# add scan_uid to eic_df for the tooltips
|
|
1037
|
+
eic_df["scan_uid"] = scan_uids
|
|
1038
|
+
|
|
1039
|
+
frag_names = [prec_name]
|
|
1040
|
+
for i, mz in enumerate(top_mzs):
|
|
1041
|
+
# add column to eic_df
|
|
1042
|
+
name = f"frag {mz:.3f}"
|
|
1043
|
+
frag_names.append(name)
|
|
1044
|
+
eic_df[name] = eic_prod[i]
|
|
1045
|
+
|
|
1046
|
+
# create a plot for all columns in eic_df
|
|
1047
|
+
eic_plots: list[hv.Curve] = []
|
|
1048
|
+
for name in frag_names:
|
|
1049
|
+
eic = hv.Curve(eic_df, kdims=["rt"], vdims=[name, "scan_uid"]).opts(
|
|
1050
|
+
title=name,
|
|
1051
|
+
xlabel="RT (s)",
|
|
1052
|
+
ylabel=f"Inty_f{len(eic_plots)}",
|
|
1053
|
+
width=250,
|
|
1054
|
+
height=200,
|
|
1055
|
+
axiswise=True,
|
|
1056
|
+
color="black",
|
|
1057
|
+
tools=[HoverTool(tooltips=[("rt", "@rt"), ("scan_uid", "@scan_uid")])],
|
|
1058
|
+
)
|
|
1059
|
+
eic_plots.append(eic)
|
|
1060
|
+
|
|
1061
|
+
# add as
|
|
1062
|
+
|
|
1063
|
+
layout = hv.Layout(eic_plots).cols(4)
|
|
1064
|
+
if link_x:
|
|
1065
|
+
layout = layout.opts(shared_axes=True)
|
|
1066
|
+
|
|
1067
|
+
if filename is not None:
|
|
1068
|
+
if filename.endswith(".html"):
|
|
1069
|
+
panel.panel(layout).save(filename, embed=True) # type: ignore[attr-defined]
|
|
1070
|
+
else:
|
|
1071
|
+
hv.save(layout, filename, fmt="png")
|
|
1072
|
+
else:
|
|
1073
|
+
panel.panel(layout).show()
|
|
1074
|
+
|
|
1075
|
+
|
|
1076
|
+
def plot_ms2_cycle(
|
|
1077
|
+
self,
|
|
1078
|
+
cycle=None,
|
|
1079
|
+
filename=None,
|
|
1080
|
+
title=None,
|
|
1081
|
+
cmap=None,
|
|
1082
|
+
raster_dynamic=True,
|
|
1083
|
+
raster_max_px=8,
|
|
1084
|
+
raster_threshold=0.8,
|
|
1085
|
+
centroid=True,
|
|
1086
|
+
deisotope=True,
|
|
1087
|
+
):
|
|
1088
|
+
if self.file_obj is None:
|
|
1089
|
+
print("Please load a mzML file first.")
|
|
1090
|
+
return
|
|
1091
|
+
|
|
1092
|
+
if cycle is None:
|
|
1093
|
+
print("Please provide a cycle number.")
|
|
1094
|
+
return
|
|
1095
|
+
|
|
1096
|
+
if cycle not in self.scans_df["cycle"].unique():
|
|
1097
|
+
print("Cycle number not found in scans_df.")
|
|
1098
|
+
return
|
|
1099
|
+
|
|
1100
|
+
if cmap is None:
|
|
1101
|
+
cmap = "iridescent_r"
|
|
1102
|
+
elif cmap == "grey":
|
|
1103
|
+
cmap = "Greys256"
|
|
1104
|
+
|
|
1105
|
+
# find all scans in cycle
|
|
1106
|
+
scans = self.scans_df.filter(pl.col("cycle") == cycle)
|
|
1107
|
+
scans = scans.filter(pl.col("ms_level") == 2)
|
|
1108
|
+
|
|
1109
|
+
ms2data = []
|
|
1110
|
+
# iterate through all rows
|
|
1111
|
+
for scan in scans.iter_rows(named=True):
|
|
1112
|
+
scan_uid = scan["scan_uid"]
|
|
1113
|
+
# get spectrum
|
|
1114
|
+
spec = self.get_spectrum(
|
|
1115
|
+
scan_uid,
|
|
1116
|
+
precursor_trim=None,
|
|
1117
|
+
centroid=centroid,
|
|
1118
|
+
deisotope=deisotope,
|
|
1119
|
+
)
|
|
1120
|
+
if spec.mz.size == 0:
|
|
1121
|
+
continue
|
|
1122
|
+
d = {
|
|
1123
|
+
"prec_mz": [scan["prec_mz"]] * spec.mz.size,
|
|
1124
|
+
"mz": spec.mz,
|
|
1125
|
+
"inty": spec.inty,
|
|
1126
|
+
}
|
|
1127
|
+
ms2data.append(d)
|
|
1128
|
+
|
|
1129
|
+
# convert to pandas DataFrame
|
|
1130
|
+
spectradf = pd.DataFrame(ms2data)
|
|
1131
|
+
|
|
1132
|
+
# remove any inty<1
|
|
1133
|
+
spectradf = spectradf[spectradf["inty"] >= 1]
|
|
1134
|
+
# keep only rt, mz, and inty
|
|
1135
|
+
spectradf = spectradf[["prec_mz", "mz", "inty"]]
|
|
1136
|
+
maxrt = spectradf["prec_mz"].max()
|
|
1137
|
+
minrt = spectradf["prec_mz"].min()
|
|
1138
|
+
maxmz = spectradf["mz"].max()
|
|
1139
|
+
minmz = spectradf["mz"].min()
|
|
1140
|
+
|
|
1141
|
+
# TODO elem not used
|
|
1142
|
+
def new_bounds_hook(plot, elem):
|
|
1143
|
+
x_range = plot.state.x_range
|
|
1144
|
+
y_range = plot.state.y_range
|
|
1145
|
+
x_range.bounds = minrt, maxrt
|
|
1146
|
+
y_range.bounds = minmz, maxmz
|
|
1147
|
+
|
|
1148
|
+
points = hv.Points(
|
|
1149
|
+
spectradf,
|
|
1150
|
+
kdims=["prec_mz", "mz"],
|
|
1151
|
+
vdims=["inty"],
|
|
1152
|
+
label="MS1 survey scans",
|
|
1153
|
+
).opts(
|
|
1154
|
+
fontsize={"title": 16, "labels": 14, "xticks": 6, "yticks": 12},
|
|
1155
|
+
color=np.log(dim("inty")),
|
|
1156
|
+
colorbar=True,
|
|
1157
|
+
cmap="Magma",
|
|
1158
|
+
tools=["hover"],
|
|
1159
|
+
)
|
|
1160
|
+
|
|
1161
|
+
raster = hd.rasterize(
|
|
1162
|
+
points,
|
|
1163
|
+
aggregator=ds.max("inty"),
|
|
1164
|
+
interpolation="bilinear",
|
|
1165
|
+
dynamic=raster_dynamic, # alpha=10, min_alpha=0,
|
|
1166
|
+
).opts(
|
|
1167
|
+
active_tools=["box_zoom"],
|
|
1168
|
+
cmap=process_cmap(cmap, provider="bokeh"), # blues
|
|
1169
|
+
tools=["hover"],
|
|
1170
|
+
hooks=[new_bounds_hook],
|
|
1171
|
+
width=1000,
|
|
1172
|
+
height=1000,
|
|
1173
|
+
cnorm="log",
|
|
1174
|
+
xlabel="Q1 m/z",
|
|
1175
|
+
ylabel="m/z",
|
|
1176
|
+
colorbar=True,
|
|
1177
|
+
colorbar_position="right",
|
|
1178
|
+
axiswise=True,
|
|
1179
|
+
)
|
|
1180
|
+
|
|
1181
|
+
overlay = hd.dynspread(
|
|
1182
|
+
raster,
|
|
1183
|
+
threshold=raster_threshold,
|
|
1184
|
+
how="add",
|
|
1185
|
+
shape="square",
|
|
1186
|
+
max_px=raster_max_px,
|
|
1187
|
+
)
|
|
1188
|
+
|
|
1189
|
+
"""
|
|
1190
|
+
feature_points_1 = None
|
|
1191
|
+
feature_points_2 = None
|
|
1192
|
+
feature_points_3 = None
|
|
1193
|
+
feature_points_4 = None
|
|
1194
|
+
feature_points_iso = None
|
|
1195
|
+
# Plot features as red dots if features is True
|
|
1196
|
+
if self.features_df is not None and show_features:
|
|
1197
|
+
feats = self.features_df.clone()
|
|
1198
|
+
# Convert to pandas for operations that require pandas functionality
|
|
1199
|
+
if hasattr(feats, 'to_pandas'):
|
|
1200
|
+
feats = feats.to_pandas()
|
|
1201
|
+
# if ms2_scans is not null, keep only the first element of the list
|
|
1202
|
+
feats['ms2_scans'] = feats['ms2_scans'].apply(lambda x: x[0] if type(x) == list else x)
|
|
1203
|
+
# keep only iso==0, i.e. the main
|
|
1204
|
+
feats = feats[feats['iso']==0]
|
|
1205
|
+
# find features with ms2_scans not None and iso==0
|
|
1206
|
+
features_df = feats[feats['ms2_scans'].notnull()]
|
|
1207
|
+
feature_points_1 = hv.Points(
|
|
1208
|
+
features_df, kdims=["rt", "mz"], vdims=["feature_uid", "inty", "quality", "rt_delta", "ms2_scans"], label="Features with MS2 data"
|
|
1209
|
+
).options(
|
|
1210
|
+
color=color_1,
|
|
1211
|
+
marker=marker,
|
|
1212
|
+
size=size_1,
|
|
1213
|
+
tools=["hover"],
|
|
1214
|
+
)
|
|
1215
|
+
# find features without MS2 data
|
|
1216
|
+
features_df = feats[feats['ms2_scans'].isnull()]
|
|
1217
|
+
feature_points_2 = hv.Points(
|
|
1218
|
+
features_df, kdims=["rt", "mz"], vdims=["feature_uid", "inty", "quality", "rt_delta"], label="Features without MS2 data"
|
|
1219
|
+
).options(
|
|
1220
|
+
color='red',
|
|
1221
|
+
size=size_2,
|
|
1222
|
+
marker=marker,
|
|
1223
|
+
tools=["hover"],
|
|
1224
|
+
)
|
|
1225
|
+
|
|
1226
|
+
if show_isotopes:
|
|
1227
|
+
feats = self.features_df
|
|
1228
|
+
features_df = feats[feats['iso']>0]
|
|
1229
|
+
feature_points_iso = hv.Points(
|
|
1230
|
+
features_df, kdims=["rt", "mz"], vdims=["feature_uid", "inty", "quality", "rt_delta", "iso", "iso_of"], label="Isotopes"
|
|
1231
|
+
).options(
|
|
1232
|
+
color='violet',
|
|
1233
|
+
marker=marker,
|
|
1234
|
+
size=size_1,
|
|
1235
|
+
tools=["hover"],
|
|
1236
|
+
)
|
|
1237
|
+
if show_ms2:
|
|
1238
|
+
# find all self.scans_df with mslevel 2 that are not linked to a feature
|
|
1239
|
+
ms2_orphan = self.scans_df.filter(pl.col('ms_level')==2).filter(pl.col("feature_uid")<0)
|
|
1240
|
+
|
|
1241
|
+
if len(ms2_orphan) > 0:
|
|
1242
|
+
# pandalize
|
|
1243
|
+
ms2 = ms2_orphan.to_pandas()
|
|
1244
|
+
feature_points_3 = hv.Points(
|
|
1245
|
+
ms2, kdims=["rt", "prec_mz"], vdims=["index", "inty_tot", "bl"], label="Orphan MS2 scans"
|
|
1246
|
+
).options(
|
|
1247
|
+
color=color_2,
|
|
1248
|
+
marker='x',
|
|
1249
|
+
size=size_2,
|
|
1250
|
+
tools=["hover"],
|
|
1251
|
+
)
|
|
1252
|
+
|
|
1253
|
+
ms2_linked = self.scans_df.filter(pl.col('ms_level')==2).filter(pl.col("feature_uid")>=0)
|
|
1254
|
+
if len(ms2_linked) > 0:
|
|
1255
|
+
# pandalize
|
|
1256
|
+
ms2 = ms2_linked.to_pandas()
|
|
1257
|
+
feature_points_4 = hv.Points(
|
|
1258
|
+
ms2, kdims=["rt", "prec_mz"], vdims=["index", "inty_tot", "bl"], label="Linked MS2 scans"
|
|
1259
|
+
).options(
|
|
1260
|
+
color=color_1,
|
|
1261
|
+
marker='x',
|
|
1262
|
+
size=size_2,
|
|
1263
|
+
tools=["hover"],
|
|
1264
|
+
)
|
|
1265
|
+
|
|
1266
|
+
|
|
1267
|
+
if feature_points_4 is not None:
|
|
1268
|
+
overlay = overlay * feature_points_4
|
|
1269
|
+
if feature_points_3 is not None:
|
|
1270
|
+
overlay = overlay * feature_points_3
|
|
1271
|
+
if feature_points_1 is not None:
|
|
1272
|
+
overlay = overlay * feature_points_1
|
|
1273
|
+
if not show_only_features_with_ms2:
|
|
1274
|
+
if feature_points_2 is not None:
|
|
1275
|
+
overlay = overlay * feature_points_2
|
|
1276
|
+
if feature_points_iso is not None:
|
|
1277
|
+
overlay = overlay * feature_points_iso
|
|
1278
|
+
"""
|
|
1279
|
+
if title is not None:
|
|
1280
|
+
overlay = overlay.opts(title=title)
|
|
1281
|
+
|
|
1282
|
+
# Create a panel layout
|
|
1283
|
+
layout = panel.Column(overlay)
|
|
1284
|
+
|
|
1285
|
+
if filename is not None:
|
|
1286
|
+
# if filename includes .html, save the panel layout to an HTML file
|
|
1287
|
+
if filename.endswith(".html"):
|
|
1288
|
+
layout.save(filename, embed=True)
|
|
1289
|
+
else:
|
|
1290
|
+
# save the panel layout as a png
|
|
1291
|
+
hv.save(overlay, filename, fmt="png")
|
|
1292
|
+
else:
|
|
1293
|
+
# Display the panel layout
|
|
1294
|
+
layout.show()
|
|
1295
|
+
|
|
1296
|
+
|
|
1297
|
+
def plot_ms2_q1(
|
|
1298
|
+
self,
|
|
1299
|
+
feature_uid=None,
|
|
1300
|
+
q1_width=10.0,
|
|
1301
|
+
mz_tol=0.01,
|
|
1302
|
+
link_x=True,
|
|
1303
|
+
n=20,
|
|
1304
|
+
deisotope=True,
|
|
1305
|
+
centroid=True,
|
|
1306
|
+
filename=None,
|
|
1307
|
+
):
|
|
1308
|
+
# plots the EIC for a given feature id including the EIC of the top n MS2 fragments
|
|
1309
|
+
|
|
1310
|
+
if feature_uid is None:
|
|
1311
|
+
print("Please provide a feature id.")
|
|
1312
|
+
return
|
|
1313
|
+
# check if feature_uid is in features_df
|
|
1314
|
+
if feature_uid not in self.features_df["feature_uid"].values:
|
|
1315
|
+
print("Feature id not found in features_df.")
|
|
1316
|
+
|
|
1317
|
+
feature = self.features_df[self.features_df["feature_uid"] == feature_uid]
|
|
1318
|
+
# get top n fragments
|
|
1319
|
+
ms2_specs = feature["ms2_specs"].values[0]
|
|
1320
|
+
if ms2_specs is None:
|
|
1321
|
+
print("No MS2 data found for this feature.")
|
|
1322
|
+
return
|
|
1323
|
+
|
|
1324
|
+
if len(ms2_specs) == 0:
|
|
1325
|
+
print("No MS2 data found for this feature.")
|
|
1326
|
+
return
|
|
1327
|
+
# get the MS2 spectrum
|
|
1328
|
+
# get the mz of the top n fragments
|
|
1329
|
+
ms2_specs_df = ms2_specs[0].pandalize()
|
|
1330
|
+
ms2_specs_df = ms2_specs_df.sort_values(by="inty", ascending=False)
|
|
1331
|
+
ms2_specs_df = ms2_specs_df.head(n)
|
|
1332
|
+
top_mzs = ms2_specs_df["mz"].values.tolist()
|
|
1333
|
+
|
|
1334
|
+
# cycles is the cycle of the feature plus/minus q1_width
|
|
1335
|
+
feature_scan = self.select_closest_scan(feature["rt"].values[0])
|
|
1336
|
+
cycle = feature_scan["cycle"][0]
|
|
1337
|
+
scans = self.scans_df.filter(pl.col("cycle") == cycle)
|
|
1338
|
+
scans = scans.filter(pl.col("ms_level") == 2)
|
|
1339
|
+
# find the scan in cycle whose 'prec_mz' is the closest to the feature['mz']
|
|
1340
|
+
scan_uid = scans[(scans["prec_mz"] - feature["mz"]).abs().arg_sort()[:1]]["scan_uid"][0]
|
|
1341
|
+
# get q1_width scans before and after the scan_uid
|
|
1342
|
+
scans = self.scans_df.filter(pl.col("scan_uid") >= scan_uid - q1_width)
|
|
1343
|
+
scans = scans.filter(pl.col("scan_uid") <= scan_uid + q1_width)
|
|
1344
|
+
scan_uids = scans["scan_uid"].to_list()
|
|
1345
|
+
q1s = scans["prec_mz"].to_list()
|
|
1346
|
+
|
|
1347
|
+
q1_prod = self._spec_to_mat(
|
|
1348
|
+
scan_uids,
|
|
1349
|
+
mz_ref=top_mzs,
|
|
1350
|
+
mz_tol=mz_tol,
|
|
1351
|
+
deisotope=deisotope,
|
|
1352
|
+
centroid=centroid,
|
|
1353
|
+
)
|
|
1354
|
+
q1_df = pd.DataFrame({"q1": q1s})
|
|
1355
|
+
|
|
1356
|
+
frag_names = []
|
|
1357
|
+
for i, mz in enumerate(top_mzs):
|
|
1358
|
+
# add column to q1_df
|
|
1359
|
+
name = f"frag {mz:.3f}"
|
|
1360
|
+
# if q1_ratio exists, add it to the name
|
|
1361
|
+
if "q1_ratio" in ms2_specs_df.columns:
|
|
1362
|
+
q1_ratio = ms2_specs_df["q1_ratio"].values[i]
|
|
1363
|
+
name += f" q1r: {q1_ratio:.2f}"
|
|
1364
|
+
frag_names.append(name)
|
|
1365
|
+
q1_df[name] = q1_prod[i]
|
|
1366
|
+
# add scan_uid to q1_df for the tooltips
|
|
1367
|
+
q1_df["scan_uid"] = scan_uids
|
|
1368
|
+
|
|
1369
|
+
# create a plot for all columns in eic_df
|
|
1370
|
+
eic_plots: list[hv.Curve] = []
|
|
1371
|
+
for name in frag_names:
|
|
1372
|
+
eic = hv.Curve(q1_df, kdims=["q1"], vdims=[name, "scan_uid"]).opts(
|
|
1373
|
+
title=name,
|
|
1374
|
+
xlabel="Q1 (m/z)",
|
|
1375
|
+
ylabel=f"Inty_f{len(eic_plots)}",
|
|
1376
|
+
width=250,
|
|
1377
|
+
height=200,
|
|
1378
|
+
axiswise=True,
|
|
1379
|
+
color="black",
|
|
1380
|
+
tools=[HoverTool(tooltips=[("Q1", "@q1"), ("scan_uid", "@scan_uid")])],
|
|
1381
|
+
)
|
|
1382
|
+
eic_plots.append(eic)
|
|
1383
|
+
|
|
1384
|
+
# add as
|
|
1385
|
+
|
|
1386
|
+
layout = hv.Layout(eic_plots).cols(4)
|
|
1387
|
+
if link_x:
|
|
1388
|
+
layout = layout.opts(shared_axes=True)
|
|
1389
|
+
|
|
1390
|
+
if filename is not None:
|
|
1391
|
+
if filename.endswith(".html"):
|
|
1392
|
+
panel.panel(layout).save(filename, embed=True) # type: ignore[attr-defined]
|
|
1393
|
+
else:
|
|
1394
|
+
hv.save(layout, filename, fmt="png")
|
|
1395
|
+
else:
|
|
1396
|
+
panel.panel(layout).show()
|
|
1397
|
+
|
|
1398
|
+
|
|
1399
|
+
def plot_dda_stats(
|
|
1400
|
+
self,
|
|
1401
|
+
filename=None,
|
|
1402
|
+
):
|
|
1403
|
+
"""
|
|
1404
|
+
Generates scatter plots for DDA statistics.
|
|
1405
|
+
This method retrieves statistical data using the `get_dda_stats` method, filters relevant
|
|
1406
|
+
columns, and preprocesses the data by replacing any values below 0 with None. It then creates
|
|
1407
|
+
a scatter plot for each metric specified in the `cols_to_plot` list. Each scatter plot uses "cycle"
|
|
1408
|
+
as the x-axis, and the corresponding metric as the y-axis. In addition, common hover tooltips are
|
|
1409
|
+
configured to display auxiliary data including "index", "cycle", "rt", and all other metric values.
|
|
1410
|
+
If the `filename` parameter is provided:
|
|
1411
|
+
- If it ends with ".html", the layout is saved as an interactive HTML file using Panel.
|
|
1412
|
+
- Otherwise, the layout is saved as a PNG image using HoloViews.
|
|
1413
|
+
If no filename is provided, the interactive panel is displayed.
|
|
1414
|
+
Parameters:
|
|
1415
|
+
filename (str, optional): The path and filename where the plot should be saved. If the filename
|
|
1416
|
+
ends with ".html", the plot is saved as an HTML file; otherwise, it is saved as a PNG image.
|
|
1417
|
+
If not provided, the plot is displayed interactively.
|
|
1418
|
+
Notes:
|
|
1419
|
+
- The method requires the holoviews, panel, and bokeh libraries for visualization.
|
|
1420
|
+
- The data is expected to include the columns 'index', 'cycle', 'rt', and the metrics listed in
|
|
1421
|
+
`cols_to_plot`.
|
|
1422
|
+
"""
|
|
1423
|
+
stats = self.get_dda_stats()
|
|
1424
|
+
cols_to_plot = [
|
|
1425
|
+
"inty_tot",
|
|
1426
|
+
"bl",
|
|
1427
|
+
"ms2_n",
|
|
1428
|
+
"time_cycle",
|
|
1429
|
+
"time_ms1_to_ms1",
|
|
1430
|
+
"time_ms1_to_ms2",
|
|
1431
|
+
"time_ms2_to_ms2",
|
|
1432
|
+
"time_ms2_to_ms1",
|
|
1433
|
+
]
|
|
1434
|
+
# Ensure that 'index' and 'rt' are kept for hover along with the columns to plot
|
|
1435
|
+
stats = stats[["scan_uid", "cycle", "rt", *cols_to_plot]]
|
|
1436
|
+
# set any value < 0 to None
|
|
1437
|
+
stats[stats < 0] = None
|
|
1438
|
+
|
|
1439
|
+
# Create a Scatter for each column in cols_to_plot stacked vertically, with hover enabled
|
|
1440
|
+
scatter_plots = []
|
|
1441
|
+
# Define common hover tooltips for all plots including all cols_to_plot
|
|
1442
|
+
common_tooltips = [
|
|
1443
|
+
("scan_uid", "@scan_uid"),
|
|
1444
|
+
("cycle", "@cycle"),
|
|
1445
|
+
("rt", "@rt"),
|
|
1446
|
+
] + [(c, f"@{c}") for c in cols_to_plot]
|
|
1447
|
+
for col in cols_to_plot:
|
|
1448
|
+
hover = HoverTool(tooltips=common_tooltips)
|
|
1449
|
+
scatter = hv.Scatter(
|
|
1450
|
+
stats,
|
|
1451
|
+
kdims="cycle",
|
|
1452
|
+
vdims=[col, "scan_uid", "rt"] + [c for c in cols_to_plot if c != col],
|
|
1453
|
+
).opts(
|
|
1454
|
+
title=col,
|
|
1455
|
+
xlabel="Cycle",
|
|
1456
|
+
ylabel=col,
|
|
1457
|
+
height=250,
|
|
1458
|
+
width=800,
|
|
1459
|
+
tools=[hover],
|
|
1460
|
+
size=3,
|
|
1461
|
+
)
|
|
1462
|
+
scatter_plots.append(scatter)
|
|
1463
|
+
|
|
1464
|
+
layout = hv.Layout(scatter_plots).cols(1)
|
|
1465
|
+
if filename is not None:
|
|
1466
|
+
if filename.endswith(".html"):
|
|
1467
|
+
panel.panel(layout).save(filename, embed=True) # type: ignore[attr-defined]
|
|
1468
|
+
else:
|
|
1469
|
+
hv.save(layout, filename, fmt="png")
|
|
1470
|
+
else:
|
|
1471
|
+
panel.panel(layout).show()
|
|
1472
|
+
|
|
1473
|
+
|
|
1474
|
+
def plot_feature_stats(
|
|
1475
|
+
self,
|
|
1476
|
+
filename=None,
|
|
1477
|
+
):
|
|
1478
|
+
"""
|
|
1479
|
+
Generates overlaid distribution plots for selected feature metrics.
|
|
1480
|
+
The distributions are created separately for features with and without MS2 data.
|
|
1481
|
+
Metrics include intensity, quality, retention time, m/z (and m/z delta), number of MS2 peaks,
|
|
1482
|
+
summed MS2 intensities, and the MS2-to-MS1 ratio. The plots help to visualize the distribution
|
|
1483
|
+
differences between features that are linked to MS2 spectra and those that are not.
|
|
1484
|
+
|
|
1485
|
+
Parameters:
|
|
1486
|
+
filename (str, optional): The output filename. If the filename ends with ".html",
|
|
1487
|
+
the plot is saved as an interactive HTML file; otherwise,
|
|
1488
|
+
if provided, the plot is saved as a PNG image. If not provided,
|
|
1489
|
+
the interactive plot is displayed.
|
|
1490
|
+
|
|
1491
|
+
Returns:
|
|
1492
|
+
None
|
|
1493
|
+
"""
|
|
1494
|
+
# Work on a copy of features_df
|
|
1495
|
+
feats = self.features_df.clone()
|
|
1496
|
+
# Convert to pandas for operations that require pandas functionality
|
|
1497
|
+
if hasattr(feats, "to_pandas"):
|
|
1498
|
+
feats = feats.to_pandas()
|
|
1499
|
+
# Compute m/z delta for each feature
|
|
1500
|
+
feats["mz_delta"] = feats["mz_end"] - feats["mz_start"]
|
|
1501
|
+
# Add a column with the number of peaks in the MS2 spectrum
|
|
1502
|
+
feats["MS2peaks"] = feats["ms2_specs"].apply(
|
|
1503
|
+
lambda x: len(x[0]) if x is not None else 0,
|
|
1504
|
+
)
|
|
1505
|
+
# Add a column with the sum of intensities in the MS2 spectrum
|
|
1506
|
+
feats["MS2int"] = feats["ms2_specs"].apply(
|
|
1507
|
+
lambda x: sum(x[0].inty) if x is not None else 0,
|
|
1508
|
+
)
|
|
1509
|
+
|
|
1510
|
+
# Calculate the ratio of MS2 to MS1 intensities
|
|
1511
|
+
feats["MS2toMS1"] = feats["MS2int"] / feats["inty"]
|
|
1512
|
+
# Apply log10 transformation to intensity, quality, and MS2int columns (handling non-positive values)
|
|
1513
|
+
feats["inty"] = np.where(feats["inty"] <= 0, np.nan, np.log10(feats["inty"]))
|
|
1514
|
+
# COMMENT: AR was bugging
|
|
1515
|
+
# feats["chrom_heights"] = np.where(
|
|
1516
|
+
# feats["chrom_heights"] <= 0, np.nan, np.log10(feats["chrom_heights"])
|
|
1517
|
+
# )
|
|
1518
|
+
|
|
1519
|
+
feats["quality"] = np.where(
|
|
1520
|
+
feats["quality"] <= 0,
|
|
1521
|
+
np.nan,
|
|
1522
|
+
np.log10(feats["quality"]),
|
|
1523
|
+
)
|
|
1524
|
+
feats["MS2int"] = np.where(feats["MS2int"] <= 0, np.nan, np.log10(feats["MS2int"]))
|
|
1525
|
+
|
|
1526
|
+
# Separate features based on presence of MS2 data
|
|
1527
|
+
feats_with_MS2 = feats[feats["ms2_scans"].notnull()]
|
|
1528
|
+
feats_without_MS2 = feats[feats["ms2_scans"].isnull()]
|
|
1529
|
+
|
|
1530
|
+
# Define the metrics to plot
|
|
1531
|
+
cols_to_plot = [
|
|
1532
|
+
"mz",
|
|
1533
|
+
"mz_delta",
|
|
1534
|
+
"inty",
|
|
1535
|
+
"quality",
|
|
1536
|
+
"rt",
|
|
1537
|
+
"rt_delta",
|
|
1538
|
+
"chrom_coherence",
|
|
1539
|
+
"chrom_prominence",
|
|
1540
|
+
"chrom_prominence_scaled",
|
|
1541
|
+
# COMMENT: AR was bugging
|
|
1542
|
+
# "chrom_heights",
|
|
1543
|
+
# "chrom_heights_scaled",
|
|
1544
|
+
"MS2peaks",
|
|
1545
|
+
"MS2int",
|
|
1546
|
+
"MS2toMS1",
|
|
1547
|
+
]
|
|
1548
|
+
|
|
1549
|
+
# Ensure an index column is available for plotting
|
|
1550
|
+
feats["index"] = feats.index
|
|
1551
|
+
|
|
1552
|
+
density_plots = []
|
|
1553
|
+
# Create overlaid distribution plots for each metric
|
|
1554
|
+
for col in cols_to_plot:
|
|
1555
|
+
# Extract non-null values from both groups
|
|
1556
|
+
data_with = feats_with_MS2[col].dropna().values
|
|
1557
|
+
data_without = feats_without_MS2[col].dropna().values
|
|
1558
|
+
|
|
1559
|
+
# Create distribution elements for features with and without MS2
|
|
1560
|
+
dist_with = hv.Distribution(data_with, label="With MS2").opts(
|
|
1561
|
+
color="red",
|
|
1562
|
+
alpha=0.6,
|
|
1563
|
+
)
|
|
1564
|
+
dist_without = hv.Distribution(data_without, label="Without MS2").opts(
|
|
1565
|
+
color="blue",
|
|
1566
|
+
alpha=0.6,
|
|
1567
|
+
)
|
|
1568
|
+
|
|
1569
|
+
# Overlay the distributions with a legend and hover tool enabled
|
|
1570
|
+
overlay = (dist_with * dist_without).opts(
|
|
1571
|
+
title=col,
|
|
1572
|
+
show_legend=True,
|
|
1573
|
+
tools=["hover"],
|
|
1574
|
+
)
|
|
1575
|
+
density_plots.append(overlay)
|
|
1576
|
+
|
|
1577
|
+
# Arrange the plots in a layout with three columns
|
|
1578
|
+
layout = hv.Layout(density_plots).cols(3).opts(shared_axes=False)
|
|
1579
|
+
|
|
1580
|
+
# Save or display the layout based on the filename parameter
|
|
1581
|
+
if filename is not None:
|
|
1582
|
+
if filename.endswith(".html"):
|
|
1583
|
+
panel.panel(layout).save(filename, embed=True) # type: ignore[attr-defined]
|
|
1584
|
+
else:
|
|
1585
|
+
hv.save(layout, filename, fmt="png")
|
|
1586
|
+
else:
|
|
1587
|
+
panel.panel(layout).show()
|
|
1588
|
+
|
|
1589
|
+
|
|
1590
|
+
def plot_tic(
|
|
1591
|
+
self,
|
|
1592
|
+
title=None,
|
|
1593
|
+
filename=None,
|
|
1594
|
+
):
|
|
1595
|
+
# get all ms_level ==1 scans from sefl.scans_df
|
|
1596
|
+
scans = self.scans_df.filter(pl.col("ms_level") == 1)
|
|
1597
|
+
# select rt, scan_uid and inty_tot, convert to pandas
|
|
1598
|
+
data = scans[["rt", "scan_uid", "inty_tot"]].to_pandas()
|
|
1599
|
+
# sort by rt
|
|
1600
|
+
data = data.sort_values("rt")
|
|
1601
|
+
|
|
1602
|
+
# plot using hv.Curve
|
|
1603
|
+
tic = hv.Curve(data, kdims=["rt"], vdims=["inty_tot"])
|
|
1604
|
+
tic.opts(
|
|
1605
|
+
title=title,
|
|
1606
|
+
xlabel="Retention Time (min)",
|
|
1607
|
+
ylabel="TIC",
|
|
1608
|
+
height=250,
|
|
1609
|
+
width=100,
|
|
1610
|
+
)
|