masster 0.2.5__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- masster/__init__.py +27 -27
- masster/_version.py +17 -17
- masster/chromatogram.py +497 -503
- masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.featureXML +199787 -0
- masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.sample5 +0 -0
- masster/logger.py +318 -244
- masster/sample/__init__.py +9 -9
- masster/sample/defaults/__init__.py +15 -15
- masster/sample/defaults/find_adducts_def.py +325 -325
- masster/sample/defaults/find_features_def.py +366 -366
- masster/sample/defaults/find_ms2_def.py +285 -285
- masster/sample/defaults/get_spectrum_def.py +314 -318
- masster/sample/defaults/sample_def.py +374 -378
- masster/sample/h5.py +1321 -1297
- masster/sample/helpers.py +833 -364
- masster/sample/lib.py +762 -0
- masster/sample/load.py +1220 -1187
- masster/sample/parameters.py +131 -131
- masster/sample/plot.py +1685 -1622
- masster/sample/processing.py +1402 -1416
- masster/sample/quant.py +209 -0
- masster/sample/sample.py +393 -387
- masster/sample/sample5_schema.json +181 -181
- masster/sample/save.py +737 -736
- masster/sample/sciex.py +1213 -0
- masster/spectrum.py +1287 -1319
- masster/study/__init__.py +9 -9
- masster/study/defaults/__init__.py +21 -19
- masster/study/defaults/align_def.py +267 -267
- masster/study/defaults/export_def.py +41 -40
- masster/study/defaults/fill_chrom_def.py +264 -264
- masster/study/defaults/fill_def.py +260 -0
- masster/study/defaults/find_consensus_def.py +256 -256
- masster/study/defaults/find_ms2_def.py +163 -163
- masster/study/defaults/integrate_chrom_def.py +225 -225
- masster/study/defaults/integrate_def.py +221 -0
- masster/study/defaults/merge_def.py +256 -0
- masster/study/defaults/study_def.py +272 -269
- masster/study/export.py +674 -287
- masster/study/h5.py +1406 -886
- masster/study/helpers.py +1713 -433
- masster/study/helpers_optimized.py +317 -0
- masster/study/load.py +1231 -1078
- masster/study/parameters.py +99 -99
- masster/study/plot.py +632 -645
- masster/study/processing.py +1057 -1046
- masster/study/save.py +161 -134
- masster/study/study.py +612 -522
- masster/study/study5_schema.json +253 -241
- {masster-0.2.5.dist-info → masster-0.3.1.dist-info}/METADATA +15 -10
- masster-0.3.1.dist-info/RECORD +59 -0
- {masster-0.2.5.dist-info → masster-0.3.1.dist-info}/licenses/LICENSE +661 -661
- masster-0.2.5.dist-info/RECORD +0 -50
- {masster-0.2.5.dist-info → masster-0.3.1.dist-info}/WHEEL +0 -0
- {masster-0.2.5.dist-info → masster-0.3.1.dist-info}/entry_points.txt +0 -0
masster/spectrum.py
CHANGED
|
@@ -1,1319 +1,1287 @@
|
|
|
1
|
-
"""
|
|
2
|
-
spec.py
|
|
3
|
-
|
|
4
|
-
This module provides tools for processing and analyzing individual mass spectra.
|
|
5
|
-
It defines the `spec` class for handling mass spectral data, including peak detection,
|
|
6
|
-
spectrum visualization, preprocessing operations, and spectral similarity calculations.
|
|
7
|
-
|
|
8
|
-
Key Features:
|
|
9
|
-
- **Spectrum Processing**: Handle m/z and intensity data with various preprocessing options.
|
|
10
|
-
- **Peak Detection**: Advanced peak picking with customizable parameters and algorithms.
|
|
11
|
-
- **Visualization**: Interactive and static spectral plots with annotation capabilities.
|
|
12
|
-
- **Spectrum Comparison**: Calculate spectral similarities and perform matching operations.
|
|
13
|
-
- **Data Export**: Save spectra in multiple formats including images and data files.
|
|
14
|
-
- **Preprocessing**: Smoothing, baseline correction, normalization, and noise filtering.
|
|
15
|
-
|
|
16
|
-
Dependencies:
|
|
17
|
-
- `numpy`: For numerical array operations and mathematical computations.
|
|
18
|
-
- `pandas`: For structured data handling and manipulation.
|
|
19
|
-
- `bokeh`: For interactive plotting and visualization.
|
|
20
|
-
- `scipy.signal`: For signal processing and peak detection algorithms.
|
|
21
|
-
- `holoviews`: For high-level data visualization and color mapping.
|
|
22
|
-
|
|
23
|
-
Classes:
|
|
24
|
-
- `spec`: Main class for individual spectrum processing, providing methods for data
|
|
25
|
-
manipulation, peak detection, visualization, and analysis.
|
|
26
|
-
|
|
27
|
-
Functions:
|
|
28
|
-
- `combine_peaks()`: Utility function for merging multiple peak lists.
|
|
29
|
-
- `spec_to_mgf()`: Convert spectrum objects to MGF format.
|
|
30
|
-
- Various utility functions for spectrum processing and analysis.
|
|
31
|
-
|
|
32
|
-
Example Usage:
|
|
33
|
-
```python
|
|
34
|
-
from spec import spec
|
|
35
|
-
import numpy as np
|
|
36
|
-
|
|
37
|
-
# Create spectrum from m/z and intensity arrays
|
|
38
|
-
mz = np.array([100.0, 150.0, 200.0, 250.0])
|
|
39
|
-
intensity = np.array([1000, 5000, 3000, 800])
|
|
40
|
-
spectrum = spec(mz=mz, inty=intensity, ms_level=1)
|
|
41
|
-
|
|
42
|
-
# Process and visualize
|
|
43
|
-
spectrum.find_peaks()
|
|
44
|
-
spectrum.plot()
|
|
45
|
-
spectrum.save_plot("spectrum.html")
|
|
46
|
-
```
|
|
47
|
-
|
|
48
|
-
See Also:
|
|
49
|
-
- `single.py`: For handling complete mass spectrometry files containing multiple spectra.
|
|
50
|
-
- `parameters.spectrum_parameters`: For spectrum-specific parameter configuration.
|
|
51
|
-
|
|
52
|
-
"""
|
|
53
|
-
|
|
54
|
-
from __future__ import annotations
|
|
55
|
-
|
|
56
|
-
import importlib
|
|
57
|
-
import re
|
|
58
|
-
import warnings
|
|
59
|
-
|
|
60
|
-
from dataclasses import dataclass
|
|
61
|
-
from typing import TYPE_CHECKING
|
|
62
|
-
|
|
63
|
-
import numpy as np
|
|
64
|
-
import pandas as pd
|
|
65
|
-
|
|
66
|
-
from bokeh.io import output_file
|
|
67
|
-
from bokeh.io import save
|
|
68
|
-
from bokeh.io.export import export_png
|
|
69
|
-
from bokeh.io.export import export_svg
|
|
70
|
-
from bokeh.models import BoxZoomTool
|
|
71
|
-
from bokeh.models import ColumnDataSource
|
|
72
|
-
from bokeh.models import FixedTicker
|
|
73
|
-
from bokeh.models import HoverTool
|
|
74
|
-
from bokeh.models import LinearColorMapper
|
|
75
|
-
from bokeh.models import LogScale
|
|
76
|
-
from bokeh.models import LogTickFormatter
|
|
77
|
-
from bokeh.models import NumeralTickFormatter
|
|
78
|
-
from bokeh.plotting import figure
|
|
79
|
-
from bokeh.plotting import show
|
|
80
|
-
|
|
81
|
-
if TYPE_CHECKING:
|
|
82
|
-
try:
|
|
83
|
-
from bokeh.models import ColorBar # type: ignore
|
|
84
|
-
except ImportError:
|
|
85
|
-
ColorBar = None
|
|
86
|
-
else:
|
|
87
|
-
try:
|
|
88
|
-
from bokeh.models import ColorBar # type: ignore
|
|
89
|
-
except ImportError:
|
|
90
|
-
try:
|
|
91
|
-
from bokeh.models.annotations import ColorBar # type: ignore[import-untyped]
|
|
92
|
-
except ImportError:
|
|
93
|
-
ColorBar = None
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
try:
|
|
97
|
-
from holoviews.plotting.util import process_cmap
|
|
98
|
-
except ImportError:
|
|
99
|
-
process_cmap = None
|
|
100
|
-
from matplotlib.colors import rgb2hex
|
|
101
|
-
from scipy.signal import find_peaks
|
|
102
|
-
from scipy.signal import find_peaks_cwt
|
|
103
|
-
from scipy.signal import peak_prominences
|
|
104
|
-
from scipy.signal import peak_widths
|
|
105
|
-
from scipy.signal import savgol_filter
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
if TYPE_CHECKING:
|
|
109
|
-
from collections.abc import Callable
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
@dataclass
|
|
113
|
-
class Spectrum:
|
|
114
|
-
"""
|
|
115
|
-
A class for processing and analyzing individual mass spectra.
|
|
116
|
-
|
|
117
|
-
The `spec` class provides comprehensive tools for handling mass spectral data,
|
|
118
|
-
including peak detection, preprocessing, visualization, and spectral analysis.
|
|
119
|
-
It supports both centroided and profile mode spectra and offers various
|
|
120
|
-
algorithms for peak picking and spectral processing.
|
|
121
|
-
|
|
122
|
-
Attributes:
|
|
123
|
-
mz (np.ndarray): Mass-to-charge ratio values.
|
|
124
|
-
inty (np.ndarray): Intensity values corresponding to m/z values.
|
|
125
|
-
ms_level (int, optional): MS level (1 for MS1, 2 for MS2, etc.).
|
|
126
|
-
label (str, optional): Text label for the spectrum.
|
|
127
|
-
centroided (bool, optional): Whether the spectrum is centroided.
|
|
128
|
-
history (str): Processing history log.
|
|
129
|
-
bl (np.ndarray, optional): Baseline values for baseline correction.
|
|
130
|
-
|
|
131
|
-
Key Methods:
|
|
132
|
-
- `find_peaks()`: Detect peaks in the spectrum using various algorithms.
|
|
133
|
-
- `plot()`: Generate interactive or static plots of the spectrum.
|
|
134
|
-
- `denoise()`: Remove noise and low-intensity signals.
|
|
135
|
-
- `smooth()`: Apply smoothing algorithms to the spectrum.
|
|
136
|
-
- `normalize()`: Normalize spectrum intensities.
|
|
137
|
-
- `copy()`: Create a deep copy of the spectrum object.
|
|
138
|
-
|
|
139
|
-
Example Usage:
|
|
140
|
-
>>> import numpy as np
|
|
141
|
-
>>> from masster import spec
|
|
142
|
-
>>> mz = np.array([100.0, 150.0, 200.0, 250.0])
|
|
143
|
-
>>> intensity = np.array([1000, 5000, 3000, 800])
|
|
144
|
-
>>> spectrum = spec(mz=mz, inty=intensity, ms_level=1)
|
|
145
|
-
>>> spectrum.find_peaks()
|
|
146
|
-
>>> spectrum.plot()
|
|
147
|
-
|
|
148
|
-
See Also:
|
|
149
|
-
- `ddafile`: For handling complete mass spectrometry files.
|
|
150
|
-
- `SpectrumParameters`: For spectrum-specific parameter configuration.
|
|
151
|
-
"""
|
|
152
|
-
|
|
153
|
-
def __init__(
|
|
154
|
-
self,
|
|
155
|
-
mz: np.ndarray | None = None,
|
|
156
|
-
inty: np.ndarray | None = None,
|
|
157
|
-
ms_level: int | None = None,
|
|
158
|
-
label: str | None = None,
|
|
159
|
-
centroided=None,
|
|
160
|
-
**kwargs,
|
|
161
|
-
):
|
|
162
|
-
# Handle case where mz and inty might be in kwargs (from from_dict/from_json)
|
|
163
|
-
if mz is None and "mz" in kwargs:
|
|
164
|
-
mz = kwargs.pop("mz")
|
|
165
|
-
if inty is None and "inty" in kwargs:
|
|
166
|
-
inty = kwargs.pop("inty")
|
|
167
|
-
|
|
168
|
-
# Ensure mz and inty are provided
|
|
169
|
-
if mz is None or inty is None:
|
|
170
|
-
raise ValueError("mz and inty arrays are required")
|
|
171
|
-
|
|
172
|
-
self.label = label
|
|
173
|
-
self.ms_level = ms_level
|
|
174
|
-
self.centroided = centroided
|
|
175
|
-
self.mz = mz
|
|
176
|
-
self.inty = inty
|
|
177
|
-
self.history = ""
|
|
178
|
-
self.bl: float | None = None
|
|
179
|
-
# Optional attributes for peak analysis
|
|
180
|
-
self.width: np.ndarray | None = None
|
|
181
|
-
self.prominence: np.ndarray | None = None
|
|
182
|
-
self.__dict__.update(kwargs)
|
|
183
|
-
self.__post_init__()
|
|
184
|
-
if centroided is None:
|
|
185
|
-
self.centroided = self.check_if_centroided()
|
|
186
|
-
|
|
187
|
-
def __post_init__(self):
|
|
188
|
-
self.mz = np.asarray(self.mz)
|
|
189
|
-
self.inty = np.asarray(self.inty)
|
|
190
|
-
if self.mz.shape != self.inty.shape:
|
|
191
|
-
raise ValueError("mz and intensity arrays must have the same shape")
|
|
192
|
-
if self.centroided is None:
|
|
193
|
-
self.centroided = self.check_if_centroided()
|
|
194
|
-
if self.history is None:
|
|
195
|
-
self.history = ""
|
|
196
|
-
if self.bl is None:
|
|
197
|
-
self.bl = None
|
|
198
|
-
|
|
199
|
-
def check_if_centroided(self) -> bool:
|
|
200
|
-
if self.mz.size == 0:
|
|
201
|
-
return True
|
|
202
|
-
mzs = self.mz[self.mz < np.min(self.mz) + 0.4]
|
|
203
|
-
if len(mzs) < 20:
|
|
204
|
-
if len(mzs) < 3:
|
|
205
|
-
return True
|
|
206
|
-
min_distance = np.min(np.diff(mzs))
|
|
207
|
-
if min_distance > 0.003:
|
|
208
|
-
return True
|
|
209
|
-
return False
|
|
210
|
-
|
|
211
|
-
def reload(self):
|
|
212
|
-
modname = self.__class__.__module__
|
|
213
|
-
mod = __import__(modname, fromlist=[modname.split(".")[0]])
|
|
214
|
-
importlib.reload(mod)
|
|
215
|
-
new = getattr(mod, self.__class__.__name__)
|
|
216
|
-
setattr(self, "__class__", new) # noqa: B010
|
|
217
|
-
|
|
218
|
-
def to_dict(self):
|
|
219
|
-
# return a dictionary representation of the spectrum. include all the attributes
|
|
220
|
-
# Create a copy to avoid modifying the original object
|
|
221
|
-
import copy
|
|
222
|
-
|
|
223
|
-
result = {}
|
|
224
|
-
|
|
225
|
-
# Handle numpy arrays by creating copies and converting to lists
|
|
226
|
-
for key, value in self.__dict__.items():
|
|
227
|
-
if isinstance(value, np.ndarray):
|
|
228
|
-
result[key] = value.copy().tolist()
|
|
229
|
-
elif isinstance(value, (list, dict)):
|
|
230
|
-
# Create copies of mutable objects
|
|
231
|
-
result[key] = copy.deepcopy(value)
|
|
232
|
-
else:
|
|
233
|
-
# Immutable objects can be copied directly
|
|
234
|
-
result[key] = value
|
|
235
|
-
# round m/z to 5 decimal places and intensity to 2 decimal places
|
|
236
|
-
if
|
|
237
|
-
result[
|
|
238
|
-
if
|
|
239
|
-
result[
|
|
240
|
-
|
|
241
|
-
return result
|
|
242
|
-
|
|
243
|
-
@classmethod
|
|
244
|
-
def from_dict(cls, data: dict):
|
|
245
|
-
# Create instance directly from data dictionary
|
|
246
|
-
return cls(**data)
|
|
247
|
-
|
|
248
|
-
def to_json(self):
|
|
249
|
-
"""
|
|
250
|
-
Serialize the spectrum to a JSON string.
|
|
251
|
-
|
|
252
|
-
Returns:
|
|
253
|
-
str: JSON string representation of the spectrum.
|
|
254
|
-
"""
|
|
255
|
-
import json
|
|
256
|
-
|
|
257
|
-
data = self.to_dict()
|
|
258
|
-
return json.dumps(data, indent=2)
|
|
259
|
-
|
|
260
|
-
@classmethod
|
|
261
|
-
def from_json(cls, json_str):
|
|
262
|
-
"""
|
|
263
|
-
Create a Spectrum instance from a JSON string.
|
|
264
|
-
|
|
265
|
-
Args:
|
|
266
|
-
json_str (str): JSON string containing spectrum data.
|
|
267
|
-
|
|
268
|
-
Returns:
|
|
269
|
-
Spectrum: New instance with attributes set from the JSON data.
|
|
270
|
-
"""
|
|
271
|
-
import json
|
|
272
|
-
|
|
273
|
-
data = json.loads(json_str)
|
|
274
|
-
return cls.from_dict(data)
|
|
275
|
-
|
|
276
|
-
def pandalize(self):
|
|
277
|
-
data = {
|
|
278
|
-
key: val
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
"""
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
return spec_obj
|
|
353
|
-
|
|
354
|
-
def
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
return
|
|
383
|
-
|
|
384
|
-
def
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
def
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
self_c
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
spec_obj.
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
spec_obj.
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
if
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
s.
|
|
532
|
-
s.
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
s
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
if
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
smooth
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
)
|
|
649
|
-
s.
|
|
650
|
-
s.
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
s.
|
|
663
|
-
s.
|
|
664
|
-
s.history_add("
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
)
|
|
819
|
-
if
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
p
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
|
|
916
|
-
|
|
917
|
-
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
|
|
927
|
-
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
|
|
939
|
-
|
|
940
|
-
|
|
941
|
-
|
|
942
|
-
|
|
943
|
-
|
|
944
|
-
|
|
945
|
-
|
|
946
|
-
|
|
947
|
-
|
|
948
|
-
|
|
949
|
-
|
|
950
|
-
|
|
951
|
-
|
|
952
|
-
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
|
|
956
|
-
|
|
957
|
-
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
|
|
961
|
-
|
|
962
|
-
|
|
963
|
-
|
|
964
|
-
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
|
|
989
|
-
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
993
|
-
|
|
994
|
-
|
|
995
|
-
|
|
996
|
-
|
|
997
|
-
|
|
998
|
-
|
|
999
|
-
|
|
1000
|
-
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
|
|
1047
|
-
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
|
|
1065
|
-
|
|
1066
|
-
|
|
1067
|
-
|
|
1068
|
-
|
|
1069
|
-
|
|
1070
|
-
|
|
1071
|
-
|
|
1072
|
-
|
|
1073
|
-
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
|
|
1079
|
-
)
|
|
1080
|
-
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
|
|
1089
|
-
|
|
1090
|
-
|
|
1091
|
-
|
|
1092
|
-
|
|
1093
|
-
|
|
1094
|
-
|
|
1095
|
-
|
|
1096
|
-
|
|
1097
|
-
|
|
1098
|
-
|
|
1099
|
-
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
|
|
1104
|
-
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
|
|
1108
|
-
|
|
1109
|
-
|
|
1110
|
-
|
|
1111
|
-
|
|
1112
|
-
|
|
1113
|
-
|
|
1114
|
-
|
|
1115
|
-
|
|
1116
|
-
|
|
1117
|
-
|
|
1118
|
-
|
|
1119
|
-
|
|
1120
|
-
|
|
1121
|
-
|
|
1122
|
-
|
|
1123
|
-
|
|
1124
|
-
|
|
1125
|
-
|
|
1126
|
-
|
|
1127
|
-
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
|
|
1134
|
-
|
|
1135
|
-
|
|
1136
|
-
|
|
1137
|
-
|
|
1138
|
-
|
|
1139
|
-
|
|
1140
|
-
|
|
1141
|
-
|
|
1142
|
-
|
|
1143
|
-
|
|
1144
|
-
|
|
1145
|
-
|
|
1146
|
-
|
|
1147
|
-
|
|
1148
|
-
|
|
1149
|
-
|
|
1150
|
-
|
|
1151
|
-
|
|
1152
|
-
|
|
1153
|
-
|
|
1154
|
-
|
|
1155
|
-
|
|
1156
|
-
|
|
1157
|
-
|
|
1158
|
-
|
|
1159
|
-
|
|
1160
|
-
|
|
1161
|
-
|
|
1162
|
-
|
|
1163
|
-
|
|
1164
|
-
|
|
1165
|
-
|
|
1166
|
-
|
|
1167
|
-
|
|
1168
|
-
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
|
-
|
|
1175
|
-
|
|
1176
|
-
|
|
1177
|
-
|
|
1178
|
-
|
|
1179
|
-
|
|
1180
|
-
|
|
1181
|
-
|
|
1182
|
-
|
|
1183
|
-
|
|
1184
|
-
|
|
1185
|
-
|
|
1186
|
-
|
|
1187
|
-
|
|
1188
|
-
|
|
1189
|
-
|
|
1190
|
-
|
|
1191
|
-
|
|
1192
|
-
|
|
1193
|
-
|
|
1194
|
-
|
|
1195
|
-
|
|
1196
|
-
|
|
1197
|
-
|
|
1198
|
-
|
|
1199
|
-
|
|
1200
|
-
|
|
1201
|
-
|
|
1202
|
-
|
|
1203
|
-
|
|
1204
|
-
|
|
1205
|
-
|
|
1206
|
-
|
|
1207
|
-
|
|
1208
|
-
|
|
1209
|
-
|
|
1210
|
-
|
|
1211
|
-
|
|
1212
|
-
|
|
1213
|
-
|
|
1214
|
-
|
|
1215
|
-
|
|
1216
|
-
|
|
1217
|
-
|
|
1218
|
-
|
|
1219
|
-
|
|
1220
|
-
|
|
1221
|
-
|
|
1222
|
-
|
|
1223
|
-
|
|
1224
|
-
|
|
1225
|
-
|
|
1226
|
-
|
|
1227
|
-
|
|
1228
|
-
|
|
1229
|
-
|
|
1230
|
-
|
|
1231
|
-
|
|
1232
|
-
|
|
1233
|
-
|
|
1234
|
-
|
|
1235
|
-
|
|
1236
|
-
|
|
1237
|
-
|
|
1238
|
-
|
|
1239
|
-
|
|
1240
|
-
|
|
1241
|
-
|
|
1242
|
-
|
|
1243
|
-
|
|
1244
|
-
|
|
1245
|
-
|
|
1246
|
-
|
|
1247
|
-
|
|
1248
|
-
|
|
1249
|
-
|
|
1250
|
-
|
|
1251
|
-
|
|
1252
|
-
|
|
1253
|
-
|
|
1254
|
-
|
|
1255
|
-
|
|
1256
|
-
|
|
1257
|
-
|
|
1258
|
-
|
|
1259
|
-
|
|
1260
|
-
|
|
1261
|
-
|
|
1262
|
-
|
|
1263
|
-
|
|
1264
|
-
|
|
1265
|
-
|
|
1266
|
-
|
|
1267
|
-
|
|
1268
|
-
|
|
1269
|
-
|
|
1270
|
-
|
|
1271
|
-
|
|
1272
|
-
|
|
1273
|
-
|
|
1274
|
-
|
|
1275
|
-
|
|
1276
|
-
|
|
1277
|
-
|
|
1278
|
-
|
|
1279
|
-
|
|
1280
|
-
|
|
1281
|
-
|
|
1282
|
-
|
|
1283
|
-
|
|
1284
|
-
|
|
1285
|
-
|
|
1286
|
-
|
|
1287
|
-
|
|
1288
|
-
ticker=FixedTicker(ticks=[-1, -0.5, 0, 0.5, 1]),
|
|
1289
|
-
location=(0, 0),
|
|
1290
|
-
)
|
|
1291
|
-
p.add_layout(color_bar, "right")
|
|
1292
|
-
|
|
1293
|
-
if ylog:
|
|
1294
|
-
p.y_scale = LogScale()
|
|
1295
|
-
p.yaxis.formatter = LogTickFormatter()
|
|
1296
|
-
else:
|
|
1297
|
-
p.yaxis.formatter = NumeralTickFormatter(format="0.0e0")
|
|
1298
|
-
p.legend.click_policy = "hide"
|
|
1299
|
-
|
|
1300
|
-
p.legend.click_policy = "hide"
|
|
1301
|
-
p.yaxis.formatter = NumeralTickFormatter(format="0.0e0")
|
|
1302
|
-
|
|
1303
|
-
if filename is not None:
|
|
1304
|
-
if filename.endswith(".html"):
|
|
1305
|
-
output_file(filename)
|
|
1306
|
-
save(p)
|
|
1307
|
-
elif filename.endswith(".svg"):
|
|
1308
|
-
p.output_backend = "svg"
|
|
1309
|
-
export_svg(p, filename=filename)
|
|
1310
|
-
elif filename.endswith(".png"):
|
|
1311
|
-
export_png(p, filename=filename)
|
|
1312
|
-
else:
|
|
1313
|
-
show(p)
|
|
1314
|
-
else:
|
|
1315
|
-
show(p)
|
|
1316
|
-
|
|
1317
|
-
|
|
1318
|
-
if __name__ == "__main__":
|
|
1319
|
-
pass
|
|
1
|
+
"""
|
|
2
|
+
spec.py
|
|
3
|
+
|
|
4
|
+
This module provides tools for processing and analyzing individual mass spectra.
|
|
5
|
+
It defines the `spec` class for handling mass spectral data, including peak detection,
|
|
6
|
+
spectrum visualization, preprocessing operations, and spectral similarity calculations.
|
|
7
|
+
|
|
8
|
+
Key Features:
|
|
9
|
+
- **Spectrum Processing**: Handle m/z and intensity data with various preprocessing options.
|
|
10
|
+
- **Peak Detection**: Advanced peak picking with customizable parameters and algorithms.
|
|
11
|
+
- **Visualization**: Interactive and static spectral plots with annotation capabilities.
|
|
12
|
+
- **Spectrum Comparison**: Calculate spectral similarities and perform matching operations.
|
|
13
|
+
- **Data Export**: Save spectra in multiple formats including images and data files.
|
|
14
|
+
- **Preprocessing**: Smoothing, baseline correction, normalization, and noise filtering.
|
|
15
|
+
|
|
16
|
+
Dependencies:
|
|
17
|
+
- `numpy`: For numerical array operations and mathematical computations.
|
|
18
|
+
- `pandas`: For structured data handling and manipulation.
|
|
19
|
+
- `bokeh`: For interactive plotting and visualization.
|
|
20
|
+
- `scipy.signal`: For signal processing and peak detection algorithms.
|
|
21
|
+
- `holoviews`: For high-level data visualization and color mapping.
|
|
22
|
+
|
|
23
|
+
Classes:
|
|
24
|
+
- `spec`: Main class for individual spectrum processing, providing methods for data
|
|
25
|
+
manipulation, peak detection, visualization, and analysis.
|
|
26
|
+
|
|
27
|
+
Functions:
|
|
28
|
+
- `combine_peaks()`: Utility function for merging multiple peak lists.
|
|
29
|
+
- `spec_to_mgf()`: Convert spectrum objects to MGF format.
|
|
30
|
+
- Various utility functions for spectrum processing and analysis.
|
|
31
|
+
|
|
32
|
+
Example Usage:
|
|
33
|
+
```python
|
|
34
|
+
from spec import spec
|
|
35
|
+
import numpy as np
|
|
36
|
+
|
|
37
|
+
# Create spectrum from m/z and intensity arrays
|
|
38
|
+
mz = np.array([100.0, 150.0, 200.0, 250.0])
|
|
39
|
+
intensity = np.array([1000, 5000, 3000, 800])
|
|
40
|
+
spectrum = spec(mz=mz, inty=intensity, ms_level=1)
|
|
41
|
+
|
|
42
|
+
# Process and visualize
|
|
43
|
+
spectrum.find_peaks()
|
|
44
|
+
spectrum.plot()
|
|
45
|
+
spectrum.save_plot("spectrum.html")
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
See Also:
|
|
49
|
+
- `single.py`: For handling complete mass spectrometry files containing multiple spectra.
|
|
50
|
+
- `parameters.spectrum_parameters`: For spectrum-specific parameter configuration.
|
|
51
|
+
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
from __future__ import annotations
|
|
55
|
+
|
|
56
|
+
import importlib
|
|
57
|
+
import re
|
|
58
|
+
import warnings
|
|
59
|
+
|
|
60
|
+
from dataclasses import dataclass
|
|
61
|
+
from typing import TYPE_CHECKING
|
|
62
|
+
|
|
63
|
+
import numpy as np
|
|
64
|
+
import pandas as pd
|
|
65
|
+
|
|
66
|
+
from bokeh.io import output_file
|
|
67
|
+
from bokeh.io import save
|
|
68
|
+
from bokeh.io.export import export_png
|
|
69
|
+
from bokeh.io.export import export_svg
|
|
70
|
+
from bokeh.models import BoxZoomTool
|
|
71
|
+
from bokeh.models import ColumnDataSource
|
|
72
|
+
from bokeh.models import FixedTicker
|
|
73
|
+
from bokeh.models import HoverTool
|
|
74
|
+
from bokeh.models import LinearColorMapper
|
|
75
|
+
from bokeh.models import LogScale
|
|
76
|
+
from bokeh.models import LogTickFormatter
|
|
77
|
+
from bokeh.models import NumeralTickFormatter
|
|
78
|
+
from bokeh.plotting import figure
|
|
79
|
+
from bokeh.plotting import show
|
|
80
|
+
|
|
81
|
+
if TYPE_CHECKING:
|
|
82
|
+
try:
|
|
83
|
+
from bokeh.models import ColorBar # type: ignore
|
|
84
|
+
except ImportError:
|
|
85
|
+
ColorBar = None
|
|
86
|
+
else:
|
|
87
|
+
try:
|
|
88
|
+
from bokeh.models import ColorBar # type: ignore
|
|
89
|
+
except ImportError:
|
|
90
|
+
try:
|
|
91
|
+
from bokeh.models.annotations import ColorBar # type: ignore[import-untyped]
|
|
92
|
+
except ImportError:
|
|
93
|
+
ColorBar = None
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
try:
|
|
97
|
+
from holoviews.plotting.util import process_cmap
|
|
98
|
+
except ImportError:
|
|
99
|
+
process_cmap = None
|
|
100
|
+
from matplotlib.colors import rgb2hex
|
|
101
|
+
from scipy.signal import find_peaks
|
|
102
|
+
from scipy.signal import find_peaks_cwt
|
|
103
|
+
from scipy.signal import peak_prominences
|
|
104
|
+
from scipy.signal import peak_widths
|
|
105
|
+
from scipy.signal import savgol_filter
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
if TYPE_CHECKING:
|
|
109
|
+
from collections.abc import Callable
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
@dataclass
|
|
113
|
+
class Spectrum:
|
|
114
|
+
"""
|
|
115
|
+
A class for processing and analyzing individual mass spectra.
|
|
116
|
+
|
|
117
|
+
The `spec` class provides comprehensive tools for handling mass spectral data,
|
|
118
|
+
including peak detection, preprocessing, visualization, and spectral analysis.
|
|
119
|
+
It supports both centroided and profile mode spectra and offers various
|
|
120
|
+
algorithms for peak picking and spectral processing.
|
|
121
|
+
|
|
122
|
+
Attributes:
|
|
123
|
+
mz (np.ndarray): Mass-to-charge ratio values.
|
|
124
|
+
inty (np.ndarray): Intensity values corresponding to m/z values.
|
|
125
|
+
ms_level (int, optional): MS level (1 for MS1, 2 for MS2, etc.).
|
|
126
|
+
label (str, optional): Text label for the spectrum.
|
|
127
|
+
centroided (bool, optional): Whether the spectrum is centroided.
|
|
128
|
+
history (str): Processing history log.
|
|
129
|
+
bl (np.ndarray, optional): Baseline values for baseline correction.
|
|
130
|
+
|
|
131
|
+
Key Methods:
|
|
132
|
+
- `find_peaks()`: Detect peaks in the spectrum using various algorithms.
|
|
133
|
+
- `plot()`: Generate interactive or static plots of the spectrum.
|
|
134
|
+
- `denoise()`: Remove noise and low-intensity signals.
|
|
135
|
+
- `smooth()`: Apply smoothing algorithms to the spectrum.
|
|
136
|
+
- `normalize()`: Normalize spectrum intensities.
|
|
137
|
+
- `copy()`: Create a deep copy of the spectrum object.
|
|
138
|
+
|
|
139
|
+
Example Usage:
|
|
140
|
+
>>> import numpy as np
|
|
141
|
+
>>> from masster import spec
|
|
142
|
+
>>> mz = np.array([100.0, 150.0, 200.0, 250.0])
|
|
143
|
+
>>> intensity = np.array([1000, 5000, 3000, 800])
|
|
144
|
+
>>> spectrum = spec(mz=mz, inty=intensity, ms_level=1)
|
|
145
|
+
>>> spectrum.find_peaks()
|
|
146
|
+
>>> spectrum.plot()
|
|
147
|
+
|
|
148
|
+
See Also:
|
|
149
|
+
- `ddafile`: For handling complete mass spectrometry files.
|
|
150
|
+
- `SpectrumParameters`: For spectrum-specific parameter configuration.
|
|
151
|
+
"""
|
|
152
|
+
|
|
153
|
+
def __init__(
|
|
154
|
+
self,
|
|
155
|
+
mz: np.ndarray | None = None,
|
|
156
|
+
inty: np.ndarray | None = None,
|
|
157
|
+
ms_level: int | None = None,
|
|
158
|
+
label: str | None = None,
|
|
159
|
+
centroided=None,
|
|
160
|
+
**kwargs,
|
|
161
|
+
):
|
|
162
|
+
# Handle case where mz and inty might be in kwargs (from from_dict/from_json)
|
|
163
|
+
if mz is None and "mz" in kwargs:
|
|
164
|
+
mz = kwargs.pop("mz")
|
|
165
|
+
if inty is None and "inty" in kwargs:
|
|
166
|
+
inty = kwargs.pop("inty")
|
|
167
|
+
|
|
168
|
+
# Ensure mz and inty are provided
|
|
169
|
+
if mz is None or inty is None:
|
|
170
|
+
raise ValueError("mz and inty arrays are required")
|
|
171
|
+
|
|
172
|
+
self.label = label
|
|
173
|
+
self.ms_level = ms_level
|
|
174
|
+
self.centroided = centroided
|
|
175
|
+
self.mz = mz
|
|
176
|
+
self.inty = inty
|
|
177
|
+
self.history = ""
|
|
178
|
+
self.bl: float | None = None
|
|
179
|
+
# Optional attributes for peak analysis
|
|
180
|
+
self.width: np.ndarray | None = None
|
|
181
|
+
self.prominence: np.ndarray | None = None
|
|
182
|
+
self.__dict__.update(kwargs)
|
|
183
|
+
self.__post_init__()
|
|
184
|
+
if centroided is None:
|
|
185
|
+
self.centroided = self.check_if_centroided()
|
|
186
|
+
|
|
187
|
+
def __post_init__(self):
|
|
188
|
+
self.mz = np.asarray(self.mz)
|
|
189
|
+
self.inty = np.asarray(self.inty)
|
|
190
|
+
if self.mz.shape != self.inty.shape:
|
|
191
|
+
raise ValueError("mz and intensity arrays must have the same shape")
|
|
192
|
+
if self.centroided is None:
|
|
193
|
+
self.centroided = self.check_if_centroided()
|
|
194
|
+
if self.history is None:
|
|
195
|
+
self.history = ""
|
|
196
|
+
if self.bl is None:
|
|
197
|
+
self.bl = None
|
|
198
|
+
|
|
199
|
+
def check_if_centroided(self) -> bool:
|
|
200
|
+
if self.mz.size == 0:
|
|
201
|
+
return True
|
|
202
|
+
mzs = self.mz[self.mz < np.min(self.mz) + 0.4]
|
|
203
|
+
if len(mzs) < 20:
|
|
204
|
+
if len(mzs) < 3:
|
|
205
|
+
return True
|
|
206
|
+
min_distance = np.min(np.diff(mzs))
|
|
207
|
+
if min_distance > 0.003:
|
|
208
|
+
return True
|
|
209
|
+
return False
|
|
210
|
+
|
|
211
|
+
def reload(self):
|
|
212
|
+
modname = self.__class__.__module__
|
|
213
|
+
mod = __import__(modname, fromlist=[modname.split(".")[0]])
|
|
214
|
+
importlib.reload(mod)
|
|
215
|
+
new = getattr(mod, self.__class__.__name__)
|
|
216
|
+
setattr(self, "__class__", new) # noqa: B010
|
|
217
|
+
|
|
218
|
+
def to_dict(self):
|
|
219
|
+
# return a dictionary representation of the spectrum. include all the attributes
|
|
220
|
+
# Create a copy to avoid modifying the original object
|
|
221
|
+
import copy
|
|
222
|
+
|
|
223
|
+
result = {}
|
|
224
|
+
|
|
225
|
+
# Handle numpy arrays by creating copies and converting to lists
|
|
226
|
+
for key, value in self.__dict__.items():
|
|
227
|
+
if isinstance(value, np.ndarray):
|
|
228
|
+
result[key] = value.copy().tolist()
|
|
229
|
+
elif isinstance(value, (list, dict)):
|
|
230
|
+
# Create copies of mutable objects
|
|
231
|
+
result[key] = copy.deepcopy(value)
|
|
232
|
+
else:
|
|
233
|
+
# Immutable objects can be copied directly
|
|
234
|
+
result[key] = value
|
|
235
|
+
# round m/z to 5 decimal places and intensity to 2 decimal places
|
|
236
|
+
if "mz" in result:
|
|
237
|
+
result["mz"] = np.round(result["mz"], 5).tolist()
|
|
238
|
+
if "inty" in result:
|
|
239
|
+
result["inty"] = np.round(result["inty"], 2).tolist()
|
|
240
|
+
|
|
241
|
+
return result
|
|
242
|
+
|
|
243
|
+
@classmethod
|
|
244
|
+
def from_dict(cls, data: dict):
|
|
245
|
+
# Create instance directly from data dictionary
|
|
246
|
+
return cls(**data)
|
|
247
|
+
|
|
248
|
+
def to_json(self):
|
|
249
|
+
"""
|
|
250
|
+
Serialize the spectrum to a JSON string.
|
|
251
|
+
|
|
252
|
+
Returns:
|
|
253
|
+
str: JSON string representation of the spectrum.
|
|
254
|
+
"""
|
|
255
|
+
import json
|
|
256
|
+
|
|
257
|
+
data = self.to_dict()
|
|
258
|
+
return json.dumps(data, indent=2)
|
|
259
|
+
|
|
260
|
+
@classmethod
|
|
261
|
+
def from_json(cls, json_str):
|
|
262
|
+
"""
|
|
263
|
+
Create a Spectrum instance from a JSON string.
|
|
264
|
+
|
|
265
|
+
Args:
|
|
266
|
+
json_str (str): JSON string containing spectrum data.
|
|
267
|
+
|
|
268
|
+
Returns:
|
|
269
|
+
Spectrum: New instance with attributes set from the JSON data.
|
|
270
|
+
"""
|
|
271
|
+
import json
|
|
272
|
+
|
|
273
|
+
data = json.loads(json_str)
|
|
274
|
+
return cls.from_dict(data)
|
|
275
|
+
|
|
276
|
+
def pandalize(self):
|
|
277
|
+
data = {
|
|
278
|
+
key: val for key, val in self.__dict__.items() if isinstance(val, np.ndarray) and val.size == self.mz.size
|
|
279
|
+
}
|
|
280
|
+
return pd.DataFrame(data)
|
|
281
|
+
|
|
282
|
+
def to_df(self):
|
|
283
|
+
return self.pandalize()
|
|
284
|
+
|
|
285
|
+
def mz_trim(self, *args, **kwargs):
|
|
286
|
+
"""
|
|
287
|
+
Alias for trim method to maintain compatibility with older code.
|
|
288
|
+
"""
|
|
289
|
+
return self.trim(*args, **kwargs)
|
|
290
|
+
|
|
291
|
+
def trim(
|
|
292
|
+
self,
|
|
293
|
+
mz_min: float | None = None,
|
|
294
|
+
mz_max: float | None = None,
|
|
295
|
+
) -> Spectrum:
|
|
296
|
+
if mz_min is not None:
|
|
297
|
+
mask = self.mz >= mz_min
|
|
298
|
+
self.mz = self.mz[mask]
|
|
299
|
+
self.inty = self.inty[mask]
|
|
300
|
+
for key in self.__dict__:
|
|
301
|
+
if isinstance(self.__dict__[key], np.ndarray) and self.__dict__[key].size == mask.size:
|
|
302
|
+
self.__dict__[key] = self.__dict__[key][mask]
|
|
303
|
+
if mz_max is not None:
|
|
304
|
+
mask = self.mz <= mz_max
|
|
305
|
+
self.mz = self.mz[mask]
|
|
306
|
+
self.inty = self.inty[mask]
|
|
307
|
+
for key in self.__dict__:
|
|
308
|
+
if isinstance(self.__dict__[key], np.ndarray) and self.__dict__[key].size == mask.size:
|
|
309
|
+
self.__dict__[key] = self.__dict__[key][mask]
|
|
310
|
+
return self
|
|
311
|
+
|
|
312
|
+
def mz_min(self):
|
|
313
|
+
if len(self.mz) == 0:
|
|
314
|
+
return 0
|
|
315
|
+
return np.min(self.mz)
|
|
316
|
+
|
|
317
|
+
def mz_max(self):
|
|
318
|
+
if len(self.mz) == 0:
|
|
319
|
+
return 0
|
|
320
|
+
return np.max(self.mz)
|
|
321
|
+
|
|
322
|
+
def inty_min(self):
|
|
323
|
+
if len(self.inty) == 0:
|
|
324
|
+
return 0
|
|
325
|
+
return np.min(self.inty)
|
|
326
|
+
|
|
327
|
+
def inty_max(self):
|
|
328
|
+
if len(self.inty) == 0:
|
|
329
|
+
return 0
|
|
330
|
+
return np.max(self.inty)
|
|
331
|
+
|
|
332
|
+
def tic(self):
|
|
333
|
+
if len(self.inty) == 0:
|
|
334
|
+
return 0
|
|
335
|
+
return np.sum(self.inty)
|
|
336
|
+
|
|
337
|
+
def keep_top(self, n: int = 100, inplace: bool = False) -> Spectrum:
|
|
338
|
+
idx = np.argsort(self.inty)[-n:]
|
|
339
|
+
spec_obj = self if inplace else self.copy()
|
|
340
|
+
array_length = self.mz.size
|
|
341
|
+
for key, val in spec_obj.__dict__.items():
|
|
342
|
+
if isinstance(val, np.ndarray) and val.size == array_length:
|
|
343
|
+
spec_obj.__dict__[key] = val[idx]
|
|
344
|
+
return spec_obj
|
|
345
|
+
|
|
346
|
+
def scale(self, factor: float = 1.0) -> Spectrum:
|
|
347
|
+
if factor == 1.0:
|
|
348
|
+
return self.copy()
|
|
349
|
+
spec_obj = self.copy()
|
|
350
|
+
spec_obj.inty = spec_obj.inty.astype(float) * factor
|
|
351
|
+
spec_obj.history_add(f"s[{factor}]")
|
|
352
|
+
return spec_obj
|
|
353
|
+
|
|
354
|
+
def baseline(self):
|
|
355
|
+
mz = self.mz
|
|
356
|
+
inty = self.inty
|
|
357
|
+
mz = mz[inty != 0]
|
|
358
|
+
inty = inty[inty != 0]
|
|
359
|
+
if len(mz) == 0:
|
|
360
|
+
return 0
|
|
361
|
+
idx = np.argsort(mz)
|
|
362
|
+
mz = mz[idx]
|
|
363
|
+
inty = inty[idx]
|
|
364
|
+
if len(mz) > 50:
|
|
365
|
+
# TODO not used
|
|
366
|
+
mz = mz[-50:]
|
|
367
|
+
inty = inty[-50:]
|
|
368
|
+
while True:
|
|
369
|
+
baseline = 1.5 * np.mean(inty)
|
|
370
|
+
mask = inty > baseline
|
|
371
|
+
if np.sum(mask) == 0:
|
|
372
|
+
break
|
|
373
|
+
inty = inty[~mask]
|
|
374
|
+
return baseline
|
|
375
|
+
|
|
376
|
+
def entropy(self) -> float:
|
|
377
|
+
peaks = np.column_stack((self.mz, self.inty))
|
|
378
|
+
entropy = -np.sum(peaks[:, 1] * np.log(peaks[:, 1] + 1e-9))
|
|
379
|
+
return float(entropy)
|
|
380
|
+
|
|
381
|
+
def __len__(self):
|
|
382
|
+
return self.mz.size
|
|
383
|
+
|
|
384
|
+
def __sizeof__(self):
|
|
385
|
+
return self.mz.size
|
|
386
|
+
|
|
387
|
+
def length(self):
|
|
388
|
+
return self.__len__()
|
|
389
|
+
|
|
390
|
+
def history_add(self, term: str):
|
|
391
|
+
if getattr(self, "history", None) is None:
|
|
392
|
+
self.history = ""
|
|
393
|
+
if len(self.history) > 0:
|
|
394
|
+
self.history += f" {term}"
|
|
395
|
+
else:
|
|
396
|
+
self.history = term
|
|
397
|
+
|
|
398
|
+
def history_check(self, term):
|
|
399
|
+
m = re.search(f"{term}[([A-Za-z0-9]*)]", self.history)
|
|
400
|
+
if m is None:
|
|
401
|
+
return None
|
|
402
|
+
return [x[1:-1] for x in m.group(0).split(",")]
|
|
403
|
+
|
|
404
|
+
def copy(self) -> Spectrum:
|
|
405
|
+
new = Spectrum(
|
|
406
|
+
mz=self.mz.copy(),
|
|
407
|
+
inty=self.inty.copy(),
|
|
408
|
+
ms_level=self.ms_level,
|
|
409
|
+
centroided=self.centroided,
|
|
410
|
+
label=self.label,
|
|
411
|
+
)
|
|
412
|
+
for key, val in self.__dict__.items():
|
|
413
|
+
if isinstance(val, np.ndarray):
|
|
414
|
+
new.__dict__[key] = val.copy()
|
|
415
|
+
else:
|
|
416
|
+
new.__dict__[key] = val
|
|
417
|
+
return new
|
|
418
|
+
|
|
419
|
+
def denoise(self, threshold: float | None = None) -> Spectrum:
|
|
420
|
+
if threshold is None:
|
|
421
|
+
threshold = self.baseline()
|
|
422
|
+
self_c = self.copy()
|
|
423
|
+
mask = self_c.inty > threshold
|
|
424
|
+
length = self_c.mz.size
|
|
425
|
+
for key in self_c.__dict__:
|
|
426
|
+
if isinstance(self_c.__dict__[key], np.ndarray) and self_c.__dict__[key].size == length:
|
|
427
|
+
self_c.__dict__[key] = self_c.__dict__[key][mask]
|
|
428
|
+
self_c.history_add("t[BL]")
|
|
429
|
+
self_c.bl = threshold
|
|
430
|
+
return self_c
|
|
431
|
+
|
|
432
|
+
def filter(
|
|
433
|
+
self,
|
|
434
|
+
inty_min: float | None = None,
|
|
435
|
+
inty_max: float | None = None,
|
|
436
|
+
q1_ratio_min: float | None = None,
|
|
437
|
+
q1_ratio_max: float | None = None,
|
|
438
|
+
eic_corr_min: float | None = None,
|
|
439
|
+
eic_corr_max: float | None = None,
|
|
440
|
+
) -> Spectrum:
|
|
441
|
+
spec_obj = self.copy()
|
|
442
|
+
mask: np.ndarray = np.ones(len(spec_obj.mz), dtype=bool)
|
|
443
|
+
if inty_min is not None and inty_min > 0:
|
|
444
|
+
if inty_min < 1:
|
|
445
|
+
# TODO not used
|
|
446
|
+
inty_min = inty_min * spec_obj.inty.max()
|
|
447
|
+
else:
|
|
448
|
+
mask = mask & (spec_obj.inty >= inty_min)
|
|
449
|
+
spec_obj.history_add("f[inty_min%]")
|
|
450
|
+
if inty_max is not None and inty_max > 0:
|
|
451
|
+
mask = mask & (spec_obj.inty <= inty_max)
|
|
452
|
+
spec_obj.history_add("f[inty_max]")
|
|
453
|
+
if q1_ratio_min is not None and hasattr(spec_obj, "q1_ratio"):
|
|
454
|
+
mask = mask & (spec_obj.q1_ratio >= q1_ratio_min)
|
|
455
|
+
spec_obj.history_add("f[q1_ratio_min]")
|
|
456
|
+
if q1_ratio_max is not None and hasattr(spec_obj, "q1_ratio"):
|
|
457
|
+
mask = mask & (spec_obj.q1_ratio <= q1_ratio_max)
|
|
458
|
+
spec_obj.history_add("f[q1_ratio_max]")
|
|
459
|
+
if eic_corr_min is not None and hasattr(spec_obj, "eic_corr"):
|
|
460
|
+
mask = mask & (spec_obj.eic_corr >= eic_corr_min)
|
|
461
|
+
spec_obj.history_add("f[eic_corr_min]")
|
|
462
|
+
if eic_corr_max is not None and hasattr(spec_obj, "eic_corr"):
|
|
463
|
+
mask = mask & (spec_obj.eic_corr <= eic_corr_max)
|
|
464
|
+
spec_obj.history_add("f[eic_corr_max]")
|
|
465
|
+
mask_length = len(mask)
|
|
466
|
+
for key in spec_obj.__dict__:
|
|
467
|
+
if isinstance(spec_obj.__dict__[key], np.ndarray) and spec_obj.__dict__[key].size == mask_length:
|
|
468
|
+
spec_obj.__dict__[key] = spec_obj.__dict__[key][mask]
|
|
469
|
+
return spec_obj
|
|
470
|
+
|
|
471
|
+
def centroid(self, algo: str = "cr", **kwargs) -> Spectrum:
|
|
472
|
+
algo = algo.lower()
|
|
473
|
+
if algo == "cr":
|
|
474
|
+
return self.centroid_cr(**kwargs)
|
|
475
|
+
elif algo == "cwt":
|
|
476
|
+
return self.centroid_cwt(**kwargs)
|
|
477
|
+
elif algo in ["slm", "lm", "slmp", "lmp"]:
|
|
478
|
+
return self.centroid_lm(**kwargs)
|
|
479
|
+
else:
|
|
480
|
+
raise ValueError(f"Unknown centroiding algorithm: {algo}")
|
|
481
|
+
|
|
482
|
+
# TODO externalize params
|
|
483
|
+
def centroid_cr(
|
|
484
|
+
self,
|
|
485
|
+
tolerance: float = 0.002,
|
|
486
|
+
ppm: float = 5,
|
|
487
|
+
time_domain: bool = True,
|
|
488
|
+
inty_fun=np.max,
|
|
489
|
+
weighted: bool = True,
|
|
490
|
+
exponent: float = 3,
|
|
491
|
+
mode: str = "union",
|
|
492
|
+
min_prop: float = 0.5,
|
|
493
|
+
min_points: int = 5,
|
|
494
|
+
stats: bool = False,
|
|
495
|
+
wlen=50,
|
|
496
|
+
prominence=None,
|
|
497
|
+
**kwargs,
|
|
498
|
+
) -> Spectrum:
|
|
499
|
+
if self.centroided:
|
|
500
|
+
return self
|
|
501
|
+
s = self.copy()
|
|
502
|
+
with warnings.catch_warnings():
|
|
503
|
+
warnings.simplefilter("ignore")
|
|
504
|
+
new_spec = combine_peaks(
|
|
505
|
+
[s],
|
|
506
|
+
tolerance=tolerance,
|
|
507
|
+
ppm=ppm,
|
|
508
|
+
time_domain=time_domain,
|
|
509
|
+
inty_fun=inty_fun,
|
|
510
|
+
weighted=weighted,
|
|
511
|
+
exponent=exponent,
|
|
512
|
+
mode=mode,
|
|
513
|
+
min_prop=min_prop,
|
|
514
|
+
min_points=min_points,
|
|
515
|
+
main=None,
|
|
516
|
+
)
|
|
517
|
+
s.history_add("c[CR]")
|
|
518
|
+
s.history_add("c[CR]")
|
|
519
|
+
if stats or (prominence is not None):
|
|
520
|
+
indexes = np.searchsorted(s.mz, new_spec.mz)
|
|
521
|
+
widths = peak_widths(s.inty, indexes, rel_height=0.75)[0]
|
|
522
|
+
prominences = peak_prominences(s.inty, indexes, wlen=wlen)[0]
|
|
523
|
+
s.width = widths
|
|
524
|
+
s.prominence = prominences
|
|
525
|
+
|
|
526
|
+
s.mz = new_spec.mz
|
|
527
|
+
s.inty = new_spec.inty
|
|
528
|
+
s.centroided = True
|
|
529
|
+
if prominence is not None:
|
|
530
|
+
mask = prominences >= prominence
|
|
531
|
+
s.mz = s.mz[mask]
|
|
532
|
+
s.inty = s.inty[mask]
|
|
533
|
+
s.width = s.width[mask]
|
|
534
|
+
s.prominence = s.prominence[mask]
|
|
535
|
+
s.history_add("f[PRO]")
|
|
536
|
+
s.history_add("f[PRO]")
|
|
537
|
+
return s
|
|
538
|
+
|
|
539
|
+
def smooth(self, algo: str = "savgol", window_length: int = 7) -> Spectrum:
|
|
540
|
+
if self.centroided:
|
|
541
|
+
return self
|
|
542
|
+
s = self.copy()
|
|
543
|
+
match algo.lower():
|
|
544
|
+
case "savgol":
|
|
545
|
+
s.inty = savgol_filter(s.inty, window_length, 2)
|
|
546
|
+
s.history_add("s[SG]")
|
|
547
|
+
s.history_add("s[SG]")
|
|
548
|
+
case "cumsum":
|
|
549
|
+
cumsum_vec = np.cumsum(np.insert(s.inty, 0, 0))
|
|
550
|
+
ma_vec = (cumsum_vec[window_length:] - cumsum_vec[:-window_length]) / window_length
|
|
551
|
+
s.inty = np.concatenate((
|
|
552
|
+
s.inty[: window_length // 2],
|
|
553
|
+
ma_vec,
|
|
554
|
+
s.inty[-window_length // 2 :],
|
|
555
|
+
))
|
|
556
|
+
s.history_add("s[CSM]")
|
|
557
|
+
s.history_add("s[CSM]")
|
|
558
|
+
return s
|
|
559
|
+
|
|
560
|
+
# TODO externalize params
|
|
561
|
+
def centroid_cwt(
|
|
562
|
+
self,
|
|
563
|
+
stats: bool = False,
|
|
564
|
+
# TODO not used
|
|
565
|
+
wlen=50,
|
|
566
|
+
prominence=None,
|
|
567
|
+
**kwargs,
|
|
568
|
+
) -> Spectrum:
|
|
569
|
+
if self.centroided:
|
|
570
|
+
return self
|
|
571
|
+
s = self.copy()
|
|
572
|
+
with warnings.catch_warnings():
|
|
573
|
+
warnings.simplefilter("ignore")
|
|
574
|
+
peaks = find_peaks_cwt(s.inty, widths=np.arange(4, 30), min_snr=1)
|
|
575
|
+
if stats or (prominence is not None):
|
|
576
|
+
widths = peak_widths(s.inty, peaks, rel_height=0.75)
|
|
577
|
+
prominences = peak_prominences(s.inty, peaks)[0]
|
|
578
|
+
s.width = widths
|
|
579
|
+
s.prominence = prominences
|
|
580
|
+
s.mz = s.mz[peaks]
|
|
581
|
+
s.inty = s.inty[peaks]
|
|
582
|
+
s.centroided = True
|
|
583
|
+
s.history_add("c[CWT]")
|
|
584
|
+
s.history_add("c[CWT]")
|
|
585
|
+
if prominence is not None:
|
|
586
|
+
mask = prominences >= prominence
|
|
587
|
+
s.mz = s.mz[mask]
|
|
588
|
+
s.inty = s.inty[mask]
|
|
589
|
+
s.width = s.width[mask]
|
|
590
|
+
s.prominence = s.prominence[mask]
|
|
591
|
+
s.history_add("f[PRO]")
|
|
592
|
+
s.history_add("f[PRO]")
|
|
593
|
+
return s
|
|
594
|
+
|
|
595
|
+
# TODO externalize params
|
|
596
|
+
def centroid_lm(
|
|
597
|
+
self,
|
|
598
|
+
smooth=5,
|
|
599
|
+
# TODO not used
|
|
600
|
+
stats: bool = False,
|
|
601
|
+
min_points: int = 1,
|
|
602
|
+
##
|
|
603
|
+
distance: float = 5,
|
|
604
|
+
wlen=30,
|
|
605
|
+
plateau_size=None,
|
|
606
|
+
prominence=None,
|
|
607
|
+
refine: bool = True,
|
|
608
|
+
**kwargs,
|
|
609
|
+
) -> Spectrum:
|
|
610
|
+
if self.centroided:
|
|
611
|
+
return self
|
|
612
|
+
s = self.copy()
|
|
613
|
+
not_smothed_inty = s.inty.copy()
|
|
614
|
+
if smooth is not None:
|
|
615
|
+
try:
|
|
616
|
+
if len(s.mz) > smooth * 2:
|
|
617
|
+
s.inty = savgol_filter(s.inty, smooth, 2)
|
|
618
|
+
except: # noqa: E722
|
|
619
|
+
pass
|
|
620
|
+
if prominence is not None and prominence < 0 and s.bl is not None:
|
|
621
|
+
prominence = s.bl
|
|
622
|
+
with warnings.catch_warnings():
|
|
623
|
+
warnings.simplefilter("ignore")
|
|
624
|
+
peaks, props = find_peaks(
|
|
625
|
+
s.inty,
|
|
626
|
+
height=0,
|
|
627
|
+
width=1,
|
|
628
|
+
distance=distance,
|
|
629
|
+
plateau_size=plateau_size,
|
|
630
|
+
rel_height=0.75,
|
|
631
|
+
wlen=wlen,
|
|
632
|
+
)
|
|
633
|
+
s.width = props["widths"]
|
|
634
|
+
s.prominence = props["prominences"]
|
|
635
|
+
if refine:
|
|
636
|
+
prof_mz = s.mz
|
|
637
|
+
prof_inty = s.inty
|
|
638
|
+
for idx in peaks:
|
|
639
|
+
idxs = np.arange(idx - 2, idx + 3)
|
|
640
|
+
if idxs[0] < 0 or idxs[-1] >= len(prof_mz):
|
|
641
|
+
continue
|
|
642
|
+
s.mz[idx] = np.average(prof_mz[idxs], weights=prof_inty[idxs] ** 3 + 1)
|
|
643
|
+
inty_smoothed = np.max(prof_inty[idxs])
|
|
644
|
+
inty_not_smoothed = np.max(not_smothed_inty[idxs])
|
|
645
|
+
s.inty[idx] = np.max([inty_smoothed, inty_not_smoothed])
|
|
646
|
+
s.mz = s.mz[peaks]
|
|
647
|
+
s.inty = s.inty[peaks]
|
|
648
|
+
s.history_add("c[SLMR]")
|
|
649
|
+
s.history_add("c[SLMR]")
|
|
650
|
+
s.centroided = True
|
|
651
|
+
else:
|
|
652
|
+
s.mz = s.mz[peaks]
|
|
653
|
+
s.inty = props["peak_heights"]
|
|
654
|
+
s.history_add("c[SLM]")
|
|
655
|
+
s.history_add("c[SLM]")
|
|
656
|
+
s.centroided = True
|
|
657
|
+
if prominence is not None:
|
|
658
|
+
mask = s.prominence >= prominence
|
|
659
|
+
s.mz = s.mz[mask]
|
|
660
|
+
s.inty = s.inty[mask]
|
|
661
|
+
s.width = s.width[mask]
|
|
662
|
+
s.prominence = s.prominence[mask]
|
|
663
|
+
s.history_add("f[PRO]")
|
|
664
|
+
s.history_add("f[PRO]")
|
|
665
|
+
return s
|
|
666
|
+
|
|
667
|
+
def deisotope(self, mz_tol: float = 0.02, ratio_max: float = 1.5) -> Spectrum:
|
|
668
|
+
self_c = self.copy()
|
|
669
|
+
mzs = self_c.mz
|
|
670
|
+
intys = self_c.inty
|
|
671
|
+
is_isotopolog_of = np.zeros(len(mzs)).astype(np.int32)
|
|
672
|
+
i = 0
|
|
673
|
+
j = 1
|
|
674
|
+
while j < len(mzs) and i < len(mzs):
|
|
675
|
+
isodelta = mzs[j] - mzs[i] - 1.00335
|
|
676
|
+
if isodelta < -mz_tol:
|
|
677
|
+
j += 1
|
|
678
|
+
elif isodelta <= mz_tol:
|
|
679
|
+
if intys[j] < intys[i] * ratio_max:
|
|
680
|
+
if is_isotopolog_of[i] == 0:
|
|
681
|
+
is_isotopolog_of[j] = i
|
|
682
|
+
else:
|
|
683
|
+
is_isotopolog_of[j] = is_isotopolog_of[i]
|
|
684
|
+
j += 1
|
|
685
|
+
else:
|
|
686
|
+
i += 1
|
|
687
|
+
mask = np.where(is_isotopolog_of == 0)[0]
|
|
688
|
+
for key in self_c.__dict__:
|
|
689
|
+
if isinstance(self_c.__dict__[key], np.ndarray) and self_c.__dict__[key].size == len(is_isotopolog_of):
|
|
690
|
+
self_c.__dict__[key] = self_c.__dict__[key][mask]
|
|
691
|
+
if self_c.label is not None:
|
|
692
|
+
self_c.label = self_c.label + " deiso."
|
|
693
|
+
self_c.history_add("f[iso]")
|
|
694
|
+
self_c.history_add("f[iso]")
|
|
695
|
+
return self_c
|
|
696
|
+
|
|
697
|
+
# TODO externalize params
|
|
698
|
+
def plot(
|
|
699
|
+
self,
|
|
700
|
+
mz_start: float | None = None,
|
|
701
|
+
mz_stop: float | None = None,
|
|
702
|
+
ylog: bool = False,
|
|
703
|
+
title: str | None = None,
|
|
704
|
+
width: int = 1000,
|
|
705
|
+
height: int = 250,
|
|
706
|
+
colorby: str | None = None,
|
|
707
|
+
cmap: str = "rainbow",
|
|
708
|
+
cmap_provider: str = "colorcet",
|
|
709
|
+
cmap_min: float = -1,
|
|
710
|
+
cmap_max: float = 1,
|
|
711
|
+
filename: str | None = None,
|
|
712
|
+
):
|
|
713
|
+
cvalues = None
|
|
714
|
+
colors = ["black"] * len(self.mz)
|
|
715
|
+
if colorby is not None:
|
|
716
|
+
if not hasattr(self, colorby):
|
|
717
|
+
raise ValueError(f"{colorby} is not a valid attribute of the spectrum")
|
|
718
|
+
if not isinstance(self.__dict__[colorby], np.ndarray):
|
|
719
|
+
raise ValueError(f"{colorby} is not a valid attribute of the spectrum")
|
|
720
|
+
if len(self.__dict__[colorby]) != len(self.mz):
|
|
721
|
+
raise ValueError(f"{colorby} is not a valid attribute of the spectrum")
|
|
722
|
+
else:
|
|
723
|
+
cvalues = self.__dict__[colorby].copy()
|
|
724
|
+
cvalues[cvalues < cmap_min] = cmap_min
|
|
725
|
+
cvalues[cvalues > cmap_max] = cmap_max
|
|
726
|
+
cvalues = (cvalues - cmap_min) / (cmap_max - cmap_min) * 255
|
|
727
|
+
cm = process_cmap(cmap, ncolors=255, provider=cmap_provider)
|
|
728
|
+
colors = [
|
|
729
|
+
rgb2hex(cm[int(i * (len(cm) - 1) / 255)]) if not np.isnan(i) else rgb2hex((0, 0, 0))
|
|
730
|
+
for i in cvalues
|
|
731
|
+
]
|
|
732
|
+
p = figure(
|
|
733
|
+
width=width,
|
|
734
|
+
height=height,
|
|
735
|
+
title=title,
|
|
736
|
+
)
|
|
737
|
+
label = None
|
|
738
|
+
if self.label is not None:
|
|
739
|
+
label = self.label
|
|
740
|
+
mz = self.mz
|
|
741
|
+
inty = self.inty
|
|
742
|
+
if mz_start is not None:
|
|
743
|
+
mask = mz >= mz_start
|
|
744
|
+
mz = mz[mask]
|
|
745
|
+
inty = inty[mask]
|
|
746
|
+
colors = np.array(colors)[mask].tolist()
|
|
747
|
+
if mz_stop is not None:
|
|
748
|
+
mask = mz <= mz_stop
|
|
749
|
+
mz = mz[mask]
|
|
750
|
+
inty = inty[mask]
|
|
751
|
+
colors = np.array(colors)[mask].tolist()
|
|
752
|
+
if len(mz) == 0:
|
|
753
|
+
print("No peaks in spectrum after trimming")
|
|
754
|
+
return
|
|
755
|
+
if not self.centroided:
|
|
756
|
+
mz_diff = np.diff(mz)
|
|
757
|
+
new_mzs: list[float] = []
|
|
758
|
+
new_inty: list[float] = []
|
|
759
|
+
last_good_step = 1
|
|
760
|
+
for i in range(len(mz_diff)):
|
|
761
|
+
if mz_diff[i] > last_good_step * 4:
|
|
762
|
+
new_mzs.append(mz[i] + last_good_step)
|
|
763
|
+
new_inty.append(0)
|
|
764
|
+
new_mzs.append(mz[i + 1] - last_good_step)
|
|
765
|
+
new_inty.append(0)
|
|
766
|
+
else:
|
|
767
|
+
last_good_step = mz_diff[i]
|
|
768
|
+
if len(new_mzs) > 0:
|
|
769
|
+
new_mzs_array = np.array(new_mzs)
|
|
770
|
+
new_inty_array = np.array(new_inty)
|
|
771
|
+
mz = np.append(mz, new_mzs_array)
|
|
772
|
+
inty = np.append(inty, new_inty_array)
|
|
773
|
+
idx = np.argsort(mz)
|
|
774
|
+
mz = mz[idx]
|
|
775
|
+
inty = inty[idx]
|
|
776
|
+
p.line(mz, inty, line_color="black", legend_label=label)
|
|
777
|
+
else:
|
|
778
|
+
data = self.to_dict()
|
|
779
|
+
data = {key: val for key, val in data.items() if isinstance(val, np.ndarray) and val.size == mz.size}
|
|
780
|
+
if ylog:
|
|
781
|
+
data["zeros"] = np.ones_like(mz)
|
|
782
|
+
else:
|
|
783
|
+
data["zeros"] = np.zeros_like(mz)
|
|
784
|
+
data["color"] = colors
|
|
785
|
+
source = ColumnDataSource(data)
|
|
786
|
+
p.segment(
|
|
787
|
+
x0="mz",
|
|
788
|
+
y0="zeros",
|
|
789
|
+
x1="mz",
|
|
790
|
+
y1="inty",
|
|
791
|
+
line_color="black",
|
|
792
|
+
legend_label=label,
|
|
793
|
+
source=source,
|
|
794
|
+
)
|
|
795
|
+
if cvalues is not None:
|
|
796
|
+
sc = p.scatter(
|
|
797
|
+
x="mz",
|
|
798
|
+
y="inty",
|
|
799
|
+
size=5,
|
|
800
|
+
fill_color="color",
|
|
801
|
+
line_color="color",
|
|
802
|
+
legend_label=label,
|
|
803
|
+
source=source,
|
|
804
|
+
)
|
|
805
|
+
else:
|
|
806
|
+
sc = p.scatter(
|
|
807
|
+
x="mz",
|
|
808
|
+
y="inty",
|
|
809
|
+
size=3,
|
|
810
|
+
fill_color="black",
|
|
811
|
+
line_color="black",
|
|
812
|
+
legend_label=label,
|
|
813
|
+
source=source,
|
|
814
|
+
)
|
|
815
|
+
tooltips = [(k, "@" + k) for k in source.data if k != "zeros"]
|
|
816
|
+
hover_tool = HoverTool(renderers=[sc], tooltips=tooltips)
|
|
817
|
+
p.add_tools(hover_tool)
|
|
818
|
+
box_zoom_tools = [tool for tool in p.toolbar.tools if isinstance(tool, BoxZoomTool)]
|
|
819
|
+
if box_zoom_tools:
|
|
820
|
+
p.toolbar.active_drag = box_zoom_tools[0]
|
|
821
|
+
if colorby is not None:
|
|
822
|
+
mapper = LinearColorMapper(
|
|
823
|
+
palette=[rgb2hex(c) for c in cm],
|
|
824
|
+
low=cmap_min,
|
|
825
|
+
high=cmap_max,
|
|
826
|
+
)
|
|
827
|
+
if ColorBar is not None:
|
|
828
|
+
color_bar = ColorBar(
|
|
829
|
+
color_mapper=mapper,
|
|
830
|
+
location=(0, 0),
|
|
831
|
+
title=colorby,
|
|
832
|
+
)
|
|
833
|
+
p.add_layout(color_bar, "right")
|
|
834
|
+
if ylog:
|
|
835
|
+
p.y_scale = LogScale()
|
|
836
|
+
p.yaxis.formatter = LogTickFormatter()
|
|
837
|
+
else:
|
|
838
|
+
p.yaxis.formatter = NumeralTickFormatter(format="0.0e0")
|
|
839
|
+
if filename is not None:
|
|
840
|
+
if filename.endswith(".html"):
|
|
841
|
+
output_file(filename)
|
|
842
|
+
save(p)
|
|
843
|
+
elif filename.endswith(".png"):
|
|
844
|
+
export_png(p, filename=filename)
|
|
845
|
+
else:
|
|
846
|
+
show(p)
|
|
847
|
+
else:
|
|
848
|
+
show(p)
|
|
849
|
+
|
|
850
|
+
def plot_stats(self):
|
|
851
|
+
df = self.pandalize()
|
|
852
|
+
from bokeh.plotting import show
|
|
853
|
+
from hvplot.plotting import parallel_coordinates
|
|
854
|
+
|
|
855
|
+
p = parallel_coordinates(
|
|
856
|
+
df,
|
|
857
|
+
color="black",
|
|
858
|
+
width=1000,
|
|
859
|
+
height=250,
|
|
860
|
+
line_width=1,
|
|
861
|
+
hover_color="red",
|
|
862
|
+
)
|
|
863
|
+
show(p)
|
|
864
|
+
|
|
865
|
+
def plot_dist(self):
|
|
866
|
+
from bokeh.plotting import figure
|
|
867
|
+
from bokeh.plotting import show
|
|
868
|
+
|
|
869
|
+
for _i, attr in enumerate(self.__dict__):
|
|
870
|
+
if isinstance(self.__dict__[attr], np.ndarray):
|
|
871
|
+
hist, edges = np.histogram(self.__dict__[attr], bins=100)
|
|
872
|
+
p = figure(
|
|
873
|
+
width=250,
|
|
874
|
+
height=250,
|
|
875
|
+
title=attr,
|
|
876
|
+
)
|
|
877
|
+
p.quad(
|
|
878
|
+
top=hist,
|
|
879
|
+
bottom=0,
|
|
880
|
+
left=edges[:-1],
|
|
881
|
+
right=edges[1:],
|
|
882
|
+
fill_color="navy",
|
|
883
|
+
line_color="white",
|
|
884
|
+
alpha=0.5,
|
|
885
|
+
)
|
|
886
|
+
show(p)
|
|
887
|
+
|
|
888
|
+
|
|
889
|
+
# TODO externalize params
|
|
890
|
+
def group_peaks(
|
|
891
|
+
mz_values: np.ndarray,
|
|
892
|
+
tolerance: float = 0,
|
|
893
|
+
ppm: float = 0,
|
|
894
|
+
time_domain: bool = False,
|
|
895
|
+
) -> np.ndarray:
|
|
896
|
+
"""
|
|
897
|
+
Group peaks based on m/z values using tolerance and ppm.
|
|
898
|
+
|
|
899
|
+
Args:
|
|
900
|
+
mz_values: Array of m/z values
|
|
901
|
+
tolerance: Absolute tolerance for grouping
|
|
902
|
+
ppm: Parts per million tolerance
|
|
903
|
+
time_domain: If True, grouping is done on sqrt(mz)
|
|
904
|
+
|
|
905
|
+
Returns:
|
|
906
|
+
Array of group indices for each peak
|
|
907
|
+
"""
|
|
908
|
+
values = np.sqrt(mz_values) if time_domain else mz_values
|
|
909
|
+
values = np.sqrt(mz_values) if time_domain else mz_values
|
|
910
|
+
|
|
911
|
+
# Initialize groups
|
|
912
|
+
groups = np.zeros(len(values), dtype=int)
|
|
913
|
+
current_group = 0
|
|
914
|
+
|
|
915
|
+
for i in range(1, len(values)):
|
|
916
|
+
diff = values[i] - values[i - 1]
|
|
917
|
+
ppm_tolerance = values[i - 1] * ppm * 1e-6 if ppm else 0
|
|
918
|
+
max_diff = max(tolerance, ppm_tolerance)
|
|
919
|
+
|
|
920
|
+
if diff > max_diff:
|
|
921
|
+
current_group += 1
|
|
922
|
+
groups[i] = current_group
|
|
923
|
+
|
|
924
|
+
return groups
|
|
925
|
+
|
|
926
|
+
|
|
927
|
+
# TODO externalize params
|
|
928
|
+
def combine_peaks(
|
|
929
|
+
spectra: list[Spectrum],
|
|
930
|
+
inty_fun: Callable = np.sum,
|
|
931
|
+
mz_fun: Callable = np.mean,
|
|
932
|
+
weighted: bool = False,
|
|
933
|
+
exponent: float = 3,
|
|
934
|
+
tolerance: float = 0.002,
|
|
935
|
+
ppm: float = 5,
|
|
936
|
+
time_domain: bool = True,
|
|
937
|
+
mode: str = "union",
|
|
938
|
+
main: int | None = None,
|
|
939
|
+
min_points: int | None = None,
|
|
940
|
+
min_prop: float = 0.5,
|
|
941
|
+
) -> Spectrum:
|
|
942
|
+
"""
|
|
943
|
+
Combine multiple spectra into a single spectrum.
|
|
944
|
+
Args:
|
|
945
|
+
spectra: List of PeakMatrix objects to combine
|
|
946
|
+
inty_fun: Function to combine intensities
|
|
947
|
+
mz_fun: Function to combine m/z values
|
|
948
|
+
weighted: Use intensity-weighted mean for m/z values
|
|
949
|
+
exponent: Exponent for intensity weighting
|
|
950
|
+
tolerance: Absolute tolerance for peak grouping
|
|
951
|
+
ppm: Parts per million tolerance for peak grouping
|
|
952
|
+
time_domain: If True, grouping is done on sqrt(mz)
|
|
953
|
+
mode: Strategy for combining peaks ("union" or "intersect")
|
|
954
|
+
main: Index of main spectrum to keep peaks from
|
|
955
|
+
min_points: Minimum number of points to retain a peak
|
|
956
|
+
min_prop: Minimum proportion for intersect strategy
|
|
957
|
+
|
|
958
|
+
Returns:
|
|
959
|
+
Combined Spectrum
|
|
960
|
+
|
|
961
|
+
"""
|
|
962
|
+
|
|
963
|
+
if len(spectra) == 1:
|
|
964
|
+
all_mz = spectra[0].mz
|
|
965
|
+
all_inty = spectra[0].inty
|
|
966
|
+
spectrum_indices: np.ndarray = np.zeros(all_mz.size)
|
|
967
|
+
else:
|
|
968
|
+
# Concatenate all m/z and intensity values
|
|
969
|
+
all_mz = np.concatenate([pm.mz for pm in spectra])
|
|
970
|
+
all_inty = np.concatenate([pm.inty for pm in spectra])
|
|
971
|
+
|
|
972
|
+
# Track which spectrum each peak came from
|
|
973
|
+
spectrum_indices = np.concatenate([np.full(len(pm.mz), i) for i, pm in enumerate(spectra)])
|
|
974
|
+
|
|
975
|
+
if all_mz.size < 2:
|
|
976
|
+
return Spectrum(
|
|
977
|
+
mz=all_mz,
|
|
978
|
+
inty=all_inty,
|
|
979
|
+
ms_level=spectra[0].ms_level,
|
|
980
|
+
centroided=True,
|
|
981
|
+
)
|
|
982
|
+
# Sort by m/z
|
|
983
|
+
sort_idx = np.argsort(all_mz)
|
|
984
|
+
all_mz = all_mz[sort_idx]
|
|
985
|
+
all_inty = all_inty[sort_idx]
|
|
986
|
+
spectrum_indices = spectrum_indices[sort_idx]
|
|
987
|
+
|
|
988
|
+
# Group peaks
|
|
989
|
+
groups = group_peaks(all_mz, tolerance, ppm, time_domain)
|
|
990
|
+
unique_groups = np.unique(groups)
|
|
991
|
+
|
|
992
|
+
# Process each group
|
|
993
|
+
combined_mz = []
|
|
994
|
+
combined_inty = []
|
|
995
|
+
|
|
996
|
+
for group in unique_groups:
|
|
997
|
+
mask = groups == group
|
|
998
|
+
# check if the number of points is greater than min_points
|
|
999
|
+
if min_points is not None and np.sum(mask) < min_points:
|
|
1000
|
+
continue
|
|
1001
|
+
if min_points is not None and np.sum(mask) < min_points:
|
|
1002
|
+
continue
|
|
1003
|
+
group_mz = all_mz[mask]
|
|
1004
|
+
group_inty = all_inty[mask]
|
|
1005
|
+
group_spectra = spectrum_indices[mask]
|
|
1006
|
+
|
|
1007
|
+
# Handle intersect strategy
|
|
1008
|
+
if mode == "intersect":
|
|
1009
|
+
unique_spectra = len(np.unique(group_spectra))
|
|
1010
|
+
if unique_spectra < (len(spectra) * min_prop):
|
|
1011
|
+
continue
|
|
1012
|
+
|
|
1013
|
+
# Handle main spectrum filtering
|
|
1014
|
+
if main is not None and main not in group_spectra:
|
|
1015
|
+
continue
|
|
1016
|
+
if main is not None and main not in group_spectra:
|
|
1017
|
+
continue
|
|
1018
|
+
|
|
1019
|
+
# Calculate combined values
|
|
1020
|
+
|
|
1021
|
+
if weighted:
|
|
1022
|
+
combined_mz.append(np.average(group_mz, weights=group_inty**exponent))
|
|
1023
|
+
else:
|
|
1024
|
+
combined_mz.append(mz_fun(group_mz))
|
|
1025
|
+
|
|
1026
|
+
combined_inty.append(inty_fun(group_inty))
|
|
1027
|
+
|
|
1028
|
+
if not combined_mz:
|
|
1029
|
+
return Spectrum(mz=np.array([]), inty=np.array([]))
|
|
1030
|
+
|
|
1031
|
+
return Spectrum(
|
|
1032
|
+
mz=np.array(combined_mz),
|
|
1033
|
+
inty=np.array(combined_inty),
|
|
1034
|
+
ms_level=spectra[0].ms_level,
|
|
1035
|
+
centroided=True,
|
|
1036
|
+
)
|
|
1037
|
+
|
|
1038
|
+
|
|
1039
|
+
# TODO externalize params
|
|
1040
|
+
def plot_spectra(
|
|
1041
|
+
spectra: list[Spectrum],
|
|
1042
|
+
labels: list[str] | None = None,
|
|
1043
|
+
mz_start: float | None = None,
|
|
1044
|
+
mz_stop: float | None = None,
|
|
1045
|
+
title: str | None = None,
|
|
1046
|
+
width: int = 1000,
|
|
1047
|
+
height: int = 250,
|
|
1048
|
+
cmap: str = "rainbow",
|
|
1049
|
+
cmap_provider: str = "colorcet",
|
|
1050
|
+
filename: str | None = None,
|
|
1051
|
+
colorby: str | None = None,
|
|
1052
|
+
ylog: bool = False,
|
|
1053
|
+
) -> None:
|
|
1054
|
+
"""
|
|
1055
|
+
Plot multiple mass spectrometry spectra on a single Bokeh figure.
|
|
1056
|
+
This function displays profile spectra as continuous lines and centroided spectra as vertical segments
|
|
1057
|
+
(with circles at the peak tops) on a Bokeh plot. Spectra can be optionally trimmed by m/z range using the
|
|
1058
|
+
mz_start and mz_stop parameters. Additionally, a colormap is applied to differentiate between spectra.
|
|
1059
|
+
Parameters:
|
|
1060
|
+
spectra (List[spectrum]): A list of spectrum objects to be plotted. Each object must have attributes
|
|
1061
|
+
'mz' (mass-to-charge ratio), 'inty' (intensity), and 'centroided' (a boolean
|
|
1062
|
+
indicating if the spectrum is centroided).
|
|
1063
|
+
labels (List[str], optional): A list of labels for the spectra. If provided and its length is at least as
|
|
1064
|
+
long as the number of spectra, these labels override the default spectrum
|
|
1065
|
+
naming.
|
|
1066
|
+
mz_start (float, optional): The lower bound for m/z values. Peaks with m/z values below this threshold
|
|
1067
|
+
are excluded from the plot.
|
|
1068
|
+
mz_stop (float, optional): The upper bound for m/z values. Peaks with m/z values above this threshold
|
|
1069
|
+
are excluded from the plot.
|
|
1070
|
+
title (str, optional): The title of the plot.
|
|
1071
|
+
width (int, optional): The width of the plot in pixels. Default is 1000.
|
|
1072
|
+
height (int, optional): The height of the plot in pixels. Default is 250.
|
|
1073
|
+
cmap (str, optional): The colormap name used to assign colors to the spectra. Default is "rainbow".
|
|
1074
|
+
cmap_provider (str, optional): The provider for the specified colormap. Default is "colorcet".
|
|
1075
|
+
filename (str, optional): If provided, the plot is saved to a file. The export format is determined by the
|
|
1076
|
+
file extension—HTML for ".html" and PNG for ".png". If the filename does not
|
|
1077
|
+
have an appropriate extension, the plot is simply displayed.
|
|
1078
|
+
ylog (bool, optional): If True, the y-axis is set to a logarithmic scale. Default is False.
|
|
1079
|
+
colorby (str, optional): If provided, the color of each spectrum is determined by this attribute.
|
|
1080
|
+
|
|
1081
|
+
Returns:
|
|
1082
|
+
None
|
|
1083
|
+
Side Effects:
|
|
1084
|
+
- Displays the Bokeh plot in a browser window if no filename is provided.
|
|
1085
|
+
- Exports the plot to a file if a valid filename is provided.
|
|
1086
|
+
- Prints a message to the console if a spectrum contains no peaks after applying the m/z trimming.
|
|
1087
|
+
"""
|
|
1088
|
+
import numpy as np
|
|
1089
|
+
|
|
1090
|
+
from bokeh.io import output_file
|
|
1091
|
+
from bokeh.io import save
|
|
1092
|
+
from bokeh.io.export import export_png
|
|
1093
|
+
from bokeh.models import BoxZoomTool
|
|
1094
|
+
from bokeh.models import ColumnDataSource
|
|
1095
|
+
from bokeh.models import HoverTool
|
|
1096
|
+
from bokeh.models import LogScale
|
|
1097
|
+
from bokeh.models import LogTickFormatter
|
|
1098
|
+
from bokeh.models import NumeralTickFormatter
|
|
1099
|
+
from bokeh.plotting import figure
|
|
1100
|
+
from bokeh.plotting import show
|
|
1101
|
+
from holoviews.plotting.util import process_cmap
|
|
1102
|
+
from matplotlib.colors import rgb2hex
|
|
1103
|
+
|
|
1104
|
+
num_plots = len(spectra)
|
|
1105
|
+
cm = process_cmap(cmap, ncolors=num_plots, provider=cmap_provider)
|
|
1106
|
+
colors = [
|
|
1107
|
+
rgb2hex(cm[int(i * (len(cm) - 1) / (num_plots - 1))]) if num_plots > 1 else rgb2hex(cm[0])
|
|
1108
|
+
for i in range(num_plots)
|
|
1109
|
+
]
|
|
1110
|
+
|
|
1111
|
+
p = figure(
|
|
1112
|
+
width=width,
|
|
1113
|
+
height=height,
|
|
1114
|
+
title=title,
|
|
1115
|
+
)
|
|
1116
|
+
|
|
1117
|
+
for spec_idx, spec in enumerate(spectra):
|
|
1118
|
+
try:
|
|
1119
|
+
label = f"Spectrum {spec_idx}"
|
|
1120
|
+
if spec.label is not None:
|
|
1121
|
+
label = spec.label
|
|
1122
|
+
if labels is not None and len(labels) >= num_plots:
|
|
1123
|
+
label = labels[spec_idx]
|
|
1124
|
+
|
|
1125
|
+
mcvalues = None
|
|
1126
|
+
mcolors = ["black"] * len(spec.mz)
|
|
1127
|
+
if colorby is not None:
|
|
1128
|
+
# check whether the string is a valid attribute of the spectrum
|
|
1129
|
+
if not hasattr(spec, colorby):
|
|
1130
|
+
raise ValueError(
|
|
1131
|
+
f"{colorby} is not a valid attribute of the spectrum {spec_idx}",
|
|
1132
|
+
)
|
|
1133
|
+
if not isinstance(spec.__dict__[colorby], np.ndarray):
|
|
1134
|
+
raise ValueError(
|
|
1135
|
+
f"{colorby} is not a valid attribute of the spectrum {spec_idx}",
|
|
1136
|
+
)
|
|
1137
|
+
if len(spec.__dict__[colorby]) != len(spec.mz):
|
|
1138
|
+
raise ValueError(
|
|
1139
|
+
f"{colorby} is not a valid attribute of the spectrum {spec_idx}",
|
|
1140
|
+
)
|
|
1141
|
+
else:
|
|
1142
|
+
mcvalues = spec.__dict__[colorby]
|
|
1143
|
+
mcvalues[mcvalues < -1] = -1
|
|
1144
|
+
mcvalues[mcvalues > 1] = 1
|
|
1145
|
+
|
|
1146
|
+
# normalize the values to be between 0 and 255
|
|
1147
|
+
mcvalues = (mcvalues + 1) / 2 * 255
|
|
1148
|
+
|
|
1149
|
+
cm_markers = process_cmap(cmap, ncolors=255, provider=cmap_provider)
|
|
1150
|
+
# assign colors to the peaks based on the colorby attribute. Set Nans to black
|
|
1151
|
+
mcolors = [
|
|
1152
|
+
rgb2hex(cm_markers[int(i * (len(cm_markers) - 1) / 255)])
|
|
1153
|
+
if not np.isnan(i)
|
|
1154
|
+
else rgb2hex((0, 0, 0))
|
|
1155
|
+
for i in mcvalues
|
|
1156
|
+
]
|
|
1157
|
+
|
|
1158
|
+
color = colors[spec_idx]
|
|
1159
|
+
mz = spec.mz
|
|
1160
|
+
inty = spec.inty
|
|
1161
|
+
if mz_start is not None:
|
|
1162
|
+
mask = mz >= mz_start
|
|
1163
|
+
mz = mz[mask]
|
|
1164
|
+
inty = inty[mask]
|
|
1165
|
+
mcolors = np.array(mcolors)[mask].tolist()
|
|
1166
|
+
if mz_stop is not None:
|
|
1167
|
+
mask = mz <= mz_stop
|
|
1168
|
+
mz = mz[mask]
|
|
1169
|
+
inty = inty[mask]
|
|
1170
|
+
mcolors = np.array(mcolors)[mask].tolist()
|
|
1171
|
+
|
|
1172
|
+
if len(mz) == 0:
|
|
1173
|
+
print("No peaks in spectrum after trimming")
|
|
1174
|
+
return
|
|
1175
|
+
|
|
1176
|
+
if not spec.centroided:
|
|
1177
|
+
# For profile spectra, adjust the points for line continuity
|
|
1178
|
+
mz_diff = np.diff(mz)
|
|
1179
|
+
new_mzs: list[float] = []
|
|
1180
|
+
new_inty: list[float] = []
|
|
1181
|
+
last_good_step = 1
|
|
1182
|
+
for i in range(len(mz_diff)):
|
|
1183
|
+
if mz_diff[i] > last_good_step * 4:
|
|
1184
|
+
new_mzs.append(mz[i] + last_good_step)
|
|
1185
|
+
new_inty.append(0)
|
|
1186
|
+
new_mzs.append(mz[i + 1] - last_good_step)
|
|
1187
|
+
new_inty.append(0)
|
|
1188
|
+
else:
|
|
1189
|
+
last_good_step = mz_diff[i]
|
|
1190
|
+
if len(new_mzs) > 0:
|
|
1191
|
+
new_mzs_array = np.array(new_mzs)
|
|
1192
|
+
new_inty_array = np.array(new_inty)
|
|
1193
|
+
mz = np.append(mz, new_mzs_array)
|
|
1194
|
+
inty = np.append(inty, new_inty_array)
|
|
1195
|
+
idx = np.argsort(mz)
|
|
1196
|
+
mz = mz[idx]
|
|
1197
|
+
inty = inty[idx]
|
|
1198
|
+
|
|
1199
|
+
# Plot profile spectrum as a line
|
|
1200
|
+
p.line(mz, inty, line_color=color, legend_label=label)
|
|
1201
|
+
else:
|
|
1202
|
+
# For centroided spectra, build a data source that includes all available array attributes
|
|
1203
|
+
data = spec.to_dict()
|
|
1204
|
+
# remove all keys whose value does not have the size of mz
|
|
1205
|
+
data = {key: val for key, val in data.items() if isinstance(val, np.ndarray) and val.size == mz.size}
|
|
1206
|
+
data["zeros"] = np.zeros_like(mz)
|
|
1207
|
+
if colorby is not None:
|
|
1208
|
+
data[colorby] = mcolors
|
|
1209
|
+
source = ColumnDataSource(data)
|
|
1210
|
+
|
|
1211
|
+
# seg = p.segment(
|
|
1212
|
+
# x0="mz",
|
|
1213
|
+
# y0="zeros",
|
|
1214
|
+
# x1="mz",
|
|
1215
|
+
# y1="inty",
|
|
1216
|
+
# line_color=color,
|
|
1217
|
+
# legend_label=label,
|
|
1218
|
+
# source=source,
|
|
1219
|
+
# )
|
|
1220
|
+
if colorby is not None:
|
|
1221
|
+
sc = p.scatter(
|
|
1222
|
+
x="mz",
|
|
1223
|
+
y="inty",
|
|
1224
|
+
size=5,
|
|
1225
|
+
fill_color=colorby,
|
|
1226
|
+
line_color=colorby,
|
|
1227
|
+
legend_label=label,
|
|
1228
|
+
source=source,
|
|
1229
|
+
)
|
|
1230
|
+
else:
|
|
1231
|
+
sc = p.scatter(
|
|
1232
|
+
x="mz",
|
|
1233
|
+
y="inty",
|
|
1234
|
+
size=3,
|
|
1235
|
+
fill_color=color,
|
|
1236
|
+
line_color=color,
|
|
1237
|
+
legend_label=label,
|
|
1238
|
+
source=source,
|
|
1239
|
+
)
|
|
1240
|
+
# Create tooltips for all columns in the data source
|
|
1241
|
+
tooltips = [(k, "@" + k) for k in source.data if k != "zeros"]
|
|
1242
|
+
hover_tool = HoverTool(renderers=[sc], tooltips=tooltips) # seg
|
|
1243
|
+
p.add_tools(hover_tool)
|
|
1244
|
+
box_zoom_tools = [tool for tool in p.toolbar.tools if isinstance(tool, BoxZoomTool)]
|
|
1245
|
+
if box_zoom_tools:
|
|
1246
|
+
p.toolbar.active_drag = box_zoom_tools[0]
|
|
1247
|
+
except Exception as e:
|
|
1248
|
+
print(f"Error plotting spectrum {spec_idx}: {e}")
|
|
1249
|
+
|
|
1250
|
+
if colorby is not None:
|
|
1251
|
+
# Create a color mapper using the colormap (cm) with fixed range from -1 to 1
|
|
1252
|
+
color_mapper = LinearColorMapper(palette=cm_markers, low=-1, high=1)
|
|
1253
|
+
if ColorBar is not None:
|
|
1254
|
+
color_bar = ColorBar(
|
|
1255
|
+
color_mapper=color_mapper,
|
|
1256
|
+
ticker=FixedTicker(ticks=[-1, -0.5, 0, 0.5, 1]),
|
|
1257
|
+
location=(0, 0),
|
|
1258
|
+
)
|
|
1259
|
+
p.add_layout(color_bar, "right")
|
|
1260
|
+
|
|
1261
|
+
if ylog:
|
|
1262
|
+
p.y_scale = LogScale()
|
|
1263
|
+
p.yaxis.formatter = LogTickFormatter()
|
|
1264
|
+
else:
|
|
1265
|
+
p.yaxis.formatter = NumeralTickFormatter(format="0.0e0")
|
|
1266
|
+
p.legend.click_policy = "hide"
|
|
1267
|
+
|
|
1268
|
+
p.legend.click_policy = "hide"
|
|
1269
|
+
p.yaxis.formatter = NumeralTickFormatter(format="0.0e0")
|
|
1270
|
+
|
|
1271
|
+
if filename is not None:
|
|
1272
|
+
if filename.endswith(".html"):
|
|
1273
|
+
output_file(filename)
|
|
1274
|
+
save(p)
|
|
1275
|
+
elif filename.endswith(".svg"):
|
|
1276
|
+
p.output_backend = "svg"
|
|
1277
|
+
export_svg(p, filename=filename)
|
|
1278
|
+
elif filename.endswith(".png"):
|
|
1279
|
+
export_png(p, filename=filename)
|
|
1280
|
+
else:
|
|
1281
|
+
show(p)
|
|
1282
|
+
else:
|
|
1283
|
+
show(p)
|
|
1284
|
+
|
|
1285
|
+
|
|
1286
|
+
if __name__ == "__main__":
|
|
1287
|
+
pass
|