masster 0.2.5__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

Files changed (55) hide show
  1. masster/__init__.py +27 -27
  2. masster/_version.py +17 -17
  3. masster/chromatogram.py +497 -503
  4. masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.featureXML +199787 -0
  5. masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.sample5 +0 -0
  6. masster/logger.py +318 -244
  7. masster/sample/__init__.py +9 -9
  8. masster/sample/defaults/__init__.py +15 -15
  9. masster/sample/defaults/find_adducts_def.py +325 -325
  10. masster/sample/defaults/find_features_def.py +366 -366
  11. masster/sample/defaults/find_ms2_def.py +285 -285
  12. masster/sample/defaults/get_spectrum_def.py +314 -318
  13. masster/sample/defaults/sample_def.py +374 -378
  14. masster/sample/h5.py +1321 -1297
  15. masster/sample/helpers.py +833 -364
  16. masster/sample/lib.py +762 -0
  17. masster/sample/load.py +1220 -1187
  18. masster/sample/parameters.py +131 -131
  19. masster/sample/plot.py +1685 -1622
  20. masster/sample/processing.py +1402 -1416
  21. masster/sample/quant.py +209 -0
  22. masster/sample/sample.py +393 -387
  23. masster/sample/sample5_schema.json +181 -181
  24. masster/sample/save.py +737 -736
  25. masster/sample/sciex.py +1213 -0
  26. masster/spectrum.py +1287 -1319
  27. masster/study/__init__.py +9 -9
  28. masster/study/defaults/__init__.py +21 -19
  29. masster/study/defaults/align_def.py +267 -267
  30. masster/study/defaults/export_def.py +41 -40
  31. masster/study/defaults/fill_chrom_def.py +264 -264
  32. masster/study/defaults/fill_def.py +260 -0
  33. masster/study/defaults/find_consensus_def.py +256 -256
  34. masster/study/defaults/find_ms2_def.py +163 -163
  35. masster/study/defaults/integrate_chrom_def.py +225 -225
  36. masster/study/defaults/integrate_def.py +221 -0
  37. masster/study/defaults/merge_def.py +256 -0
  38. masster/study/defaults/study_def.py +272 -269
  39. masster/study/export.py +674 -287
  40. masster/study/h5.py +1406 -886
  41. masster/study/helpers.py +1713 -433
  42. masster/study/helpers_optimized.py +317 -0
  43. masster/study/load.py +1231 -1078
  44. masster/study/parameters.py +99 -99
  45. masster/study/plot.py +632 -645
  46. masster/study/processing.py +1057 -1046
  47. masster/study/save.py +161 -134
  48. masster/study/study.py +612 -522
  49. masster/study/study5_schema.json +253 -241
  50. {masster-0.2.5.dist-info → masster-0.3.1.dist-info}/METADATA +15 -10
  51. masster-0.3.1.dist-info/RECORD +59 -0
  52. {masster-0.2.5.dist-info → masster-0.3.1.dist-info}/licenses/LICENSE +661 -661
  53. masster-0.2.5.dist-info/RECORD +0 -50
  54. {masster-0.2.5.dist-info → masster-0.3.1.dist-info}/WHEEL +0 -0
  55. {masster-0.2.5.dist-info → masster-0.3.1.dist-info}/entry_points.txt +0 -0
masster/spectrum.py CHANGED
@@ -1,1319 +1,1287 @@
1
- """
2
- spec.py
3
-
4
- This module provides tools for processing and analyzing individual mass spectra.
5
- It defines the `spec` class for handling mass spectral data, including peak detection,
6
- spectrum visualization, preprocessing operations, and spectral similarity calculations.
7
-
8
- Key Features:
9
- - **Spectrum Processing**: Handle m/z and intensity data with various preprocessing options.
10
- - **Peak Detection**: Advanced peak picking with customizable parameters and algorithms.
11
- - **Visualization**: Interactive and static spectral plots with annotation capabilities.
12
- - **Spectrum Comparison**: Calculate spectral similarities and perform matching operations.
13
- - **Data Export**: Save spectra in multiple formats including images and data files.
14
- - **Preprocessing**: Smoothing, baseline correction, normalization, and noise filtering.
15
-
16
- Dependencies:
17
- - `numpy`: For numerical array operations and mathematical computations.
18
- - `pandas`: For structured data handling and manipulation.
19
- - `bokeh`: For interactive plotting and visualization.
20
- - `scipy.signal`: For signal processing and peak detection algorithms.
21
- - `holoviews`: For high-level data visualization and color mapping.
22
-
23
- Classes:
24
- - `spec`: Main class for individual spectrum processing, providing methods for data
25
- manipulation, peak detection, visualization, and analysis.
26
-
27
- Functions:
28
- - `combine_peaks()`: Utility function for merging multiple peak lists.
29
- - `spec_to_mgf()`: Convert spectrum objects to MGF format.
30
- - Various utility functions for spectrum processing and analysis.
31
-
32
- Example Usage:
33
- ```python
34
- from spec import spec
35
- import numpy as np
36
-
37
- # Create spectrum from m/z and intensity arrays
38
- mz = np.array([100.0, 150.0, 200.0, 250.0])
39
- intensity = np.array([1000, 5000, 3000, 800])
40
- spectrum = spec(mz=mz, inty=intensity, ms_level=1)
41
-
42
- # Process and visualize
43
- spectrum.find_peaks()
44
- spectrum.plot()
45
- spectrum.save_plot("spectrum.html")
46
- ```
47
-
48
- See Also:
49
- - `single.py`: For handling complete mass spectrometry files containing multiple spectra.
50
- - `parameters.spectrum_parameters`: For spectrum-specific parameter configuration.
51
-
52
- """
53
-
54
- from __future__ import annotations
55
-
56
- import importlib
57
- import re
58
- import warnings
59
-
60
- from dataclasses import dataclass
61
- from typing import TYPE_CHECKING
62
-
63
- import numpy as np
64
- import pandas as pd
65
-
66
- from bokeh.io import output_file
67
- from bokeh.io import save
68
- from bokeh.io.export import export_png
69
- from bokeh.io.export import export_svg
70
- from bokeh.models import BoxZoomTool
71
- from bokeh.models import ColumnDataSource
72
- from bokeh.models import FixedTicker
73
- from bokeh.models import HoverTool
74
- from bokeh.models import LinearColorMapper
75
- from bokeh.models import LogScale
76
- from bokeh.models import LogTickFormatter
77
- from bokeh.models import NumeralTickFormatter
78
- from bokeh.plotting import figure
79
- from bokeh.plotting import show
80
-
81
- if TYPE_CHECKING:
82
- try:
83
- from bokeh.models import ColorBar # type: ignore
84
- except ImportError:
85
- ColorBar = None
86
- else:
87
- try:
88
- from bokeh.models import ColorBar # type: ignore
89
- except ImportError:
90
- try:
91
- from bokeh.models.annotations import ColorBar # type: ignore[import-untyped]
92
- except ImportError:
93
- ColorBar = None
94
-
95
-
96
- try:
97
- from holoviews.plotting.util import process_cmap
98
- except ImportError:
99
- process_cmap = None
100
- from matplotlib.colors import rgb2hex
101
- from scipy.signal import find_peaks
102
- from scipy.signal import find_peaks_cwt
103
- from scipy.signal import peak_prominences
104
- from scipy.signal import peak_widths
105
- from scipy.signal import savgol_filter
106
-
107
-
108
- if TYPE_CHECKING:
109
- from collections.abc import Callable
110
-
111
-
112
- @dataclass
113
- class Spectrum:
114
- """
115
- A class for processing and analyzing individual mass spectra.
116
-
117
- The `spec` class provides comprehensive tools for handling mass spectral data,
118
- including peak detection, preprocessing, visualization, and spectral analysis.
119
- It supports both centroided and profile mode spectra and offers various
120
- algorithms for peak picking and spectral processing.
121
-
122
- Attributes:
123
- mz (np.ndarray): Mass-to-charge ratio values.
124
- inty (np.ndarray): Intensity values corresponding to m/z values.
125
- ms_level (int, optional): MS level (1 for MS1, 2 for MS2, etc.).
126
- label (str, optional): Text label for the spectrum.
127
- centroided (bool, optional): Whether the spectrum is centroided.
128
- history (str): Processing history log.
129
- bl (np.ndarray, optional): Baseline values for baseline correction.
130
-
131
- Key Methods:
132
- - `find_peaks()`: Detect peaks in the spectrum using various algorithms.
133
- - `plot()`: Generate interactive or static plots of the spectrum.
134
- - `denoise()`: Remove noise and low-intensity signals.
135
- - `smooth()`: Apply smoothing algorithms to the spectrum.
136
- - `normalize()`: Normalize spectrum intensities.
137
- - `copy()`: Create a deep copy of the spectrum object.
138
-
139
- Example Usage:
140
- >>> import numpy as np
141
- >>> from masster import spec
142
- >>> mz = np.array([100.0, 150.0, 200.0, 250.0])
143
- >>> intensity = np.array([1000, 5000, 3000, 800])
144
- >>> spectrum = spec(mz=mz, inty=intensity, ms_level=1)
145
- >>> spectrum.find_peaks()
146
- >>> spectrum.plot()
147
-
148
- See Also:
149
- - `ddafile`: For handling complete mass spectrometry files.
150
- - `SpectrumParameters`: For spectrum-specific parameter configuration.
151
- """
152
-
153
- def __init__(
154
- self,
155
- mz: np.ndarray | None = None,
156
- inty: np.ndarray | None = None,
157
- ms_level: int | None = None,
158
- label: str | None = None,
159
- centroided=None,
160
- **kwargs,
161
- ):
162
- # Handle case where mz and inty might be in kwargs (from from_dict/from_json)
163
- if mz is None and "mz" in kwargs:
164
- mz = kwargs.pop("mz")
165
- if inty is None and "inty" in kwargs:
166
- inty = kwargs.pop("inty")
167
-
168
- # Ensure mz and inty are provided
169
- if mz is None or inty is None:
170
- raise ValueError("mz and inty arrays are required")
171
-
172
- self.label = label
173
- self.ms_level = ms_level
174
- self.centroided = centroided
175
- self.mz = mz
176
- self.inty = inty
177
- self.history = ""
178
- self.bl: float | None = None
179
- # Optional attributes for peak analysis
180
- self.width: np.ndarray | None = None
181
- self.prominence: np.ndarray | None = None
182
- self.__dict__.update(kwargs)
183
- self.__post_init__()
184
- if centroided is None:
185
- self.centroided = self.check_if_centroided()
186
-
187
- def __post_init__(self):
188
- self.mz = np.asarray(self.mz)
189
- self.inty = np.asarray(self.inty)
190
- if self.mz.shape != self.inty.shape:
191
- raise ValueError("mz and intensity arrays must have the same shape")
192
- if self.centroided is None:
193
- self.centroided = self.check_if_centroided()
194
- if self.history is None:
195
- self.history = ""
196
- if self.bl is None:
197
- self.bl = None
198
-
199
- def check_if_centroided(self) -> bool:
200
- if self.mz.size == 0:
201
- return True
202
- mzs = self.mz[self.mz < np.min(self.mz) + 0.4]
203
- if len(mzs) < 20:
204
- if len(mzs) < 3:
205
- return True
206
- min_distance = np.min(np.diff(mzs))
207
- if min_distance > 0.003:
208
- return True
209
- return False
210
-
211
- def reload(self):
212
- modname = self.__class__.__module__
213
- mod = __import__(modname, fromlist=[modname.split(".")[0]])
214
- importlib.reload(mod)
215
- new = getattr(mod, self.__class__.__name__)
216
- setattr(self, "__class__", new) # noqa: B010
217
-
218
- def to_dict(self):
219
- # return a dictionary representation of the spectrum. include all the attributes
220
- # Create a copy to avoid modifying the original object
221
- import copy
222
-
223
- result = {}
224
-
225
- # Handle numpy arrays by creating copies and converting to lists
226
- for key, value in self.__dict__.items():
227
- if isinstance(value, np.ndarray):
228
- result[key] = value.copy().tolist()
229
- elif isinstance(value, (list, dict)):
230
- # Create copies of mutable objects
231
- result[key] = copy.deepcopy(value)
232
- else:
233
- # Immutable objects can be copied directly
234
- result[key] = value
235
- # round m/z to 5 decimal places and intensity to 2 decimal places
236
- if 'mz' in result:
237
- result['mz'] = np.round(result['mz'], 5).tolist()
238
- if 'inty' in result:
239
- result['inty'] = np.round(result['inty'], 2).tolist()
240
-
241
- return result
242
-
243
- @classmethod
244
- def from_dict(cls, data: dict):
245
- # Create instance directly from data dictionary
246
- return cls(**data)
247
-
248
- def to_json(self):
249
- """
250
- Serialize the spectrum to a JSON string.
251
-
252
- Returns:
253
- str: JSON string representation of the spectrum.
254
- """
255
- import json
256
-
257
- data = self.to_dict()
258
- return json.dumps(data, indent=2)
259
-
260
- @classmethod
261
- def from_json(cls, json_str):
262
- """
263
- Create a Spectrum instance from a JSON string.
264
-
265
- Args:
266
- json_str (str): JSON string containing spectrum data.
267
-
268
- Returns:
269
- Spectrum: New instance with attributes set from the JSON data.
270
- """
271
- import json
272
-
273
- data = json.loads(json_str)
274
- return cls.from_dict(data)
275
-
276
- def pandalize(self):
277
- data = {
278
- key: val
279
- for key, val in self.__dict__.items()
280
- if isinstance(val, np.ndarray) and val.size == self.mz.size
281
- }
282
- return pd.DataFrame(data)
283
-
284
- def to_df(self):
285
- return self.pandalize()
286
-
287
- def mz_trim(self, *args, **kwargs):
288
- """
289
- Alias for trim method to maintain compatibility with older code.
290
- """
291
- return self.trim(*args, **kwargs)
292
-
293
- def trim(
294
- self,
295
- mz_min: float | None = None,
296
- mz_max: float | None = None,
297
- ) -> Spectrum:
298
- if mz_min is not None:
299
- mask = self.mz >= mz_min
300
- self.mz = self.mz[mask]
301
- self.inty = self.inty[mask]
302
- for key in self.__dict__:
303
- if (
304
- isinstance(self.__dict__[key], np.ndarray)
305
- and self.__dict__[key].size == mask.size
306
- ):
307
- self.__dict__[key] = self.__dict__[key][mask]
308
- if mz_max is not None:
309
- mask = self.mz <= mz_max
310
- self.mz = self.mz[mask]
311
- self.inty = self.inty[mask]
312
- for key in self.__dict__:
313
- if (
314
- isinstance(self.__dict__[key], np.ndarray)
315
- and self.__dict__[key].size == mask.size
316
- ):
317
- self.__dict__[key] = self.__dict__[key][mask]
318
- return self
319
-
320
- def mz_min(self):
321
- if len(self.mz) == 0:
322
- return 0
323
- return np.min(self.mz)
324
-
325
- def mz_max(self):
326
- if len(self.mz) == 0:
327
- return 0
328
- return np.max(self.mz)
329
-
330
- def inty_min(self):
331
- if len(self.inty) == 0:
332
- return 0
333
- return np.min(self.inty)
334
-
335
- def inty_max(self):
336
- if len(self.inty) == 0:
337
- return 0
338
- return np.max(self.inty)
339
-
340
- def tic(self):
341
- if len(self.inty) == 0:
342
- return 0
343
- return np.sum(self.inty)
344
-
345
- def keep_top(self, n: int = 100, inplace: bool = False) -> Spectrum:
346
- idx = np.argsort(self.inty)[-n:]
347
- spec_obj = self if inplace else self.copy()
348
- array_length = self.mz.size
349
- for key, val in spec_obj.__dict__.items():
350
- if isinstance(val, np.ndarray) and val.size == array_length:
351
- spec_obj.__dict__[key] = val[idx]
352
- return spec_obj
353
-
354
- def scale(self, factor: float = 1.0) -> Spectrum:
355
- if factor == 1.0:
356
- return self.copy()
357
- spec_obj = self.copy()
358
- spec_obj.inty = spec_obj.inty.astype(float) * factor
359
- spec_obj.history_add(f"s[{factor}]")
360
- return spec_obj
361
-
362
- def baseline(self):
363
- mz = self.mz
364
- inty = self.inty
365
- mz = mz[inty != 0]
366
- inty = inty[inty != 0]
367
- if len(mz) == 0:
368
- return 0
369
- idx = np.argsort(mz)
370
- mz = mz[idx]
371
- inty = inty[idx]
372
- if len(mz) > 50:
373
- # TODO not used
374
- mz = mz[-50:]
375
- inty = inty[-50:]
376
- while True:
377
- baseline = 1.5 * np.mean(inty)
378
- mask = inty > baseline
379
- if np.sum(mask) == 0:
380
- break
381
- inty = inty[~mask]
382
- return baseline
383
-
384
- def entropy(self) -> float:
385
- peaks = np.column_stack((self.mz, self.inty))
386
- entropy = -np.sum(peaks[:, 1] * np.log(peaks[:, 1] + 1e-9))
387
- return float(entropy)
388
-
389
- def __len__(self):
390
- return self.mz.size
391
-
392
- def __sizeof__(self):
393
- return self.mz.size
394
-
395
- def length(self):
396
- return self.__len__()
397
-
398
- def history_add(self, term: str):
399
- if getattr(self, "history", None) is None:
400
- self.history = ""
401
- if len(self.history) > 0:
402
- self.history += f" {term}"
403
- else:
404
- self.history = term
405
-
406
- def history_check(self, term):
407
- m = re.search(f"{term}[([A-Za-z0-9]*)]", self.history)
408
- if m is None:
409
- return None
410
- return [x[1:-1] for x in m.group(0).split(",")]
411
-
412
- def copy(self) -> Spectrum:
413
- new = Spectrum(
414
- mz=self.mz.copy(),
415
- inty=self.inty.copy(),
416
- ms_level=self.ms_level,
417
- centroided=self.centroided,
418
- label=self.label,
419
- )
420
- for key, val in self.__dict__.items():
421
- if isinstance(val, np.ndarray):
422
- new.__dict__[key] = val.copy()
423
- else:
424
- new.__dict__[key] = val
425
- return new
426
-
427
- def denoise(self, threshold: float | None = None) -> Spectrum:
428
- if threshold is None:
429
- threshold = self.baseline()
430
- self_c = self.copy()
431
- mask = self_c.inty > threshold
432
- length = self_c.mz.size
433
- for key in self_c.__dict__:
434
- if (
435
- isinstance(self_c.__dict__[key], np.ndarray)
436
- and self_c.__dict__[key].size == length
437
- ):
438
- self_c.__dict__[key] = self_c.__dict__[key][mask]
439
- self_c.history_add("t[BL]")
440
- self_c.bl = threshold
441
- return self_c
442
-
443
- def filter(
444
- self,
445
- inty_min: float | None = None,
446
- inty_max: float | None = None,
447
- q1_ratio_min: float | None = None,
448
- q1_ratio_max: float | None = None,
449
- eic_corr_min: float | None = None,
450
- eic_corr_max: float | None = None,
451
- ) -> Spectrum:
452
- spec_obj = self.copy()
453
- mask: np.ndarray = np.ones(len(spec_obj.mz), dtype=bool)
454
- if inty_min is not None and inty_min > 0:
455
- if inty_min < 1:
456
- # TODO not used
457
- inty_min = inty_min * spec_obj.inty.max()
458
- else:
459
- mask = mask & (spec_obj.inty >= inty_min)
460
- spec_obj.history_add("f[inty_min%]")
461
- if inty_max is not None and inty_max > 0:
462
- mask = mask & (spec_obj.inty <= inty_max)
463
- spec_obj.history_add("f[inty_max]")
464
- if q1_ratio_min is not None and hasattr(spec_obj, "q1_ratio"):
465
- mask = mask & (spec_obj.q1_ratio >= q1_ratio_min)
466
- spec_obj.history_add("f[q1_ratio_min]")
467
- if q1_ratio_max is not None and hasattr(spec_obj, "q1_ratio"):
468
- mask = mask & (spec_obj.q1_ratio <= q1_ratio_max)
469
- spec_obj.history_add("f[q1_ratio_max]")
470
- if eic_corr_min is not None and hasattr(spec_obj, "eic_corr"):
471
- mask = mask & (spec_obj.eic_corr >= eic_corr_min)
472
- spec_obj.history_add("f[eic_corr_min]")
473
- if eic_corr_max is not None and hasattr(spec_obj, "eic_corr"):
474
- mask = mask & (spec_obj.eic_corr <= eic_corr_max)
475
- spec_obj.history_add("f[eic_corr_max]")
476
- mask_length = len(mask)
477
- for key in spec_obj.__dict__:
478
- if (
479
- isinstance(spec_obj.__dict__[key], np.ndarray)
480
- and spec_obj.__dict__[key].size == mask_length
481
- ):
482
- spec_obj.__dict__[key] = spec_obj.__dict__[key][mask]
483
- return spec_obj
484
-
485
- def centroid(self, algo: str = "cr", **kwargs) -> Spectrum:
486
- algo = algo.lower()
487
- if algo == "cr":
488
- return self.centroid_cr(**kwargs)
489
- elif algo == "cwt":
490
- return self.centroid_cwt(**kwargs)
491
- elif algo in ["slm", "lm", "slmp", "lmp"]:
492
- return self.centroid_lm(**kwargs)
493
- else:
494
- raise ValueError(f"Unknown centroiding algorithm: {algo}")
495
-
496
- # TODO externalize params
497
- def centroid_cr(
498
- self,
499
- tolerance: float = 0.002,
500
- ppm: float = 5,
501
- time_domain: bool = True,
502
- inty_fun=np.max,
503
- weighted: bool = True,
504
- exponent: float = 3,
505
- mode: str = "union",
506
- min_prop: float = 0.5,
507
- min_points: int = 5,
508
- stats: bool = False,
509
- wlen=50,
510
- prominence=None,
511
- **kwargs,
512
- ) -> Spectrum:
513
- if self.centroided:
514
- return self
515
- s = self.copy()
516
- with warnings.catch_warnings():
517
- warnings.simplefilter("ignore")
518
- new_spec = combine_peaks(
519
- [s],
520
- tolerance=tolerance,
521
- ppm=ppm,
522
- time_domain=time_domain,
523
- inty_fun=inty_fun,
524
- weighted=weighted,
525
- exponent=exponent,
526
- mode=mode,
527
- min_prop=min_prop,
528
- min_points=min_points,
529
- main=None,
530
- )
531
- s.history_add("c[CR]")
532
- s.history_add("c[CR]")
533
- if stats or (prominence is not None):
534
- indexes = np.searchsorted(s.mz, new_spec.mz)
535
- widths = peak_widths(s.inty, indexes, rel_height=0.75)[0]
536
- prominences = peak_prominences(s.inty, indexes, wlen=wlen)[0]
537
- s.width = widths
538
- s.prominence = prominences
539
-
540
- s.mz = new_spec.mz
541
- s.inty = new_spec.inty
542
- s.centroided = True
543
- if prominence is not None:
544
- mask = prominences >= prominence
545
- s.mz = s.mz[mask]
546
- s.inty = s.inty[mask]
547
- s.width = s.width[mask]
548
- s.prominence = s.prominence[mask]
549
- s.history_add("f[PRO]")
550
- s.history_add("f[PRO]")
551
- return s
552
-
553
- def smooth(self, algo: str = "savgol", window_length: int = 7) -> Spectrum:
554
- if self.centroided:
555
- return self
556
- s = self.copy()
557
- match algo.lower():
558
- case "savgol":
559
- s.inty = savgol_filter(s.inty, window_length, 2)
560
- s.history_add("s[SG]")
561
- s.history_add("s[SG]")
562
- case "cumsum":
563
- cumsum_vec = np.cumsum(np.insert(s.inty, 0, 0))
564
- ma_vec = (
565
- cumsum_vec[window_length:] - cumsum_vec[:-window_length]
566
- ) / window_length
567
- s.inty = np.concatenate((
568
- s.inty[: window_length // 2],
569
- ma_vec,
570
- s.inty[-window_length // 2 :],
571
- ))
572
- s.history_add("s[CSM]")
573
- s.history_add("s[CSM]")
574
- return s
575
-
576
- # TODO externalize params
577
- def centroid_cwt(
578
- self,
579
- stats: bool = False,
580
- # TODO not used
581
- wlen=50,
582
- prominence=None,
583
- **kwargs,
584
- ) -> Spectrum:
585
- if self.centroided:
586
- return self
587
- s = self.copy()
588
- with warnings.catch_warnings():
589
- warnings.simplefilter("ignore")
590
- peaks = find_peaks_cwt(s.inty, widths=np.arange(4, 30), min_snr=1)
591
- if stats or (prominence is not None):
592
- widths = peak_widths(s.inty, peaks, rel_height=0.75)
593
- prominences = peak_prominences(s.inty, peaks)[0]
594
- s.width = widths
595
- s.prominence = prominences
596
- s.mz = s.mz[peaks]
597
- s.inty = s.inty[peaks]
598
- s.centroided = True
599
- s.history_add("c[CWT]")
600
- s.history_add("c[CWT]")
601
- if prominence is not None:
602
- mask = prominences >= prominence
603
- s.mz = s.mz[mask]
604
- s.inty = s.inty[mask]
605
- s.width = s.width[mask]
606
- s.prominence = s.prominence[mask]
607
- s.history_add("f[PRO]")
608
- s.history_add("f[PRO]")
609
- return s
610
-
611
- # TODO externalize params
612
- def centroid_lm(
613
- self,
614
- smooth=5,
615
- # TODO not used
616
- stats: bool = False,
617
- min_points: int = 1,
618
- ##
619
- distance: float = 5,
620
- wlen=30,
621
- plateau_size=None,
622
- prominence=None,
623
- refine: bool = True,
624
- **kwargs,
625
- ) -> Spectrum:
626
- if self.centroided:
627
- return self
628
- s = self.copy()
629
- not_smothed_inty = s.inty.copy()
630
- if smooth is not None:
631
- try:
632
- if len(s.mz) > smooth * 2:
633
- s.inty = savgol_filter(s.inty, smooth, 2)
634
- except: # noqa: E722
635
- pass
636
- if prominence is not None and prominence < 0 and s.bl is not None:
637
- prominence = s.bl
638
- with warnings.catch_warnings():
639
- warnings.simplefilter("ignore")
640
- peaks, props = find_peaks(
641
- s.inty,
642
- height=0,
643
- width=1,
644
- distance=distance,
645
- plateau_size=plateau_size,
646
- rel_height=0.75,
647
- wlen=wlen,
648
- )
649
- s.width = props["widths"]
650
- s.prominence = props["prominences"]
651
- if refine:
652
- prof_mz = s.mz
653
- prof_inty = s.inty
654
- for idx in peaks:
655
- idxs = np.arange(idx - 2, idx + 3)
656
- if idxs[0] < 0 or idxs[-1] >= len(prof_mz):
657
- continue
658
- s.mz[idx] = np.average(prof_mz[idxs], weights=prof_inty[idxs] ** 3 + 1)
659
- inty_smoothed = np.max(prof_inty[idxs])
660
- inty_not_smoothed = np.max(not_smothed_inty[idxs])
661
- s.inty[idx] = np.max([inty_smoothed, inty_not_smoothed])
662
- s.mz = s.mz[peaks]
663
- s.inty = s.inty[peaks]
664
- s.history_add("c[SLMR]")
665
- s.history_add("c[SLMR]")
666
- s.centroided = True
667
- else:
668
- s.mz = s.mz[peaks]
669
- s.inty = props["peak_heights"]
670
- s.history_add("c[SLM]")
671
- s.history_add("c[SLM]")
672
- s.centroided = True
673
- if prominence is not None:
674
- mask = s.prominence >= prominence
675
- s.mz = s.mz[mask]
676
- s.inty = s.inty[mask]
677
- s.width = s.width[mask]
678
- s.prominence = s.prominence[mask]
679
- s.history_add("f[PRO]")
680
- s.history_add("f[PRO]")
681
- return s
682
-
683
- def deisotope(self, mz_tol: float = 0.02, ratio_max: float = 1.5) -> Spectrum:
684
- self_c = self.copy()
685
- mzs = self_c.mz
686
- intys = self_c.inty
687
- is_isotopolog_of = np.zeros(len(mzs)).astype(np.int32)
688
- i = 0
689
- j = 1
690
- while j < len(mzs) and i < len(mzs):
691
- isodelta = mzs[j] - mzs[i] - 1.00335
692
- if isodelta < -mz_tol:
693
- j += 1
694
- elif isodelta <= mz_tol:
695
- if intys[j] < intys[i] * ratio_max:
696
- if is_isotopolog_of[i] == 0:
697
- is_isotopolog_of[j] = i
698
- else:
699
- is_isotopolog_of[j] = is_isotopolog_of[i]
700
- j += 1
701
- else:
702
- i += 1
703
- mask = np.where(is_isotopolog_of == 0)[0]
704
- for key in self_c.__dict__:
705
- if isinstance(self_c.__dict__[key], np.ndarray) and self_c.__dict__[
706
- key
707
- ].size == len(is_isotopolog_of):
708
- self_c.__dict__[key] = self_c.__dict__[key][mask]
709
- if self_c.label is not None:
710
- self_c.label = self_c.label + " deiso."
711
- self_c.history_add("f[iso]")
712
- self_c.history_add("f[iso]")
713
- return self_c
714
-
715
- # TODO externalize params
716
- def plot(
717
- self,
718
- mz_start: float | None = None,
719
- mz_stop: float | None = None,
720
- ylog: bool = False,
721
- title: str | None = None,
722
- width: int = 1000,
723
- height: int = 250,
724
- colorby: str | None = None,
725
- cmap: str = "rainbow",
726
- cmap_provider: str = "colorcet",
727
- cmap_min: float = -1,
728
- cmap_max: float = 1,
729
- filename: str | None = None,
730
- ):
731
- cvalues = None
732
- colors = ["black"] * len(self.mz)
733
- if colorby is not None:
734
- if not hasattr(self, colorby):
735
- raise ValueError(f"{colorby} is not a valid attribute of the spectrum")
736
- if not isinstance(self.__dict__[colorby], np.ndarray):
737
- raise ValueError(f"{colorby} is not a valid attribute of the spectrum")
738
- if len(self.__dict__[colorby]) != len(self.mz):
739
- raise ValueError(f"{colorby} is not a valid attribute of the spectrum")
740
- else:
741
- cvalues = self.__dict__[colorby].copy()
742
- cvalues[cvalues < cmap_min] = cmap_min
743
- cvalues[cvalues > cmap_max] = cmap_max
744
- cvalues = (cvalues - cmap_min) / (cmap_max - cmap_min) * 255
745
- cm = process_cmap(cmap, ncolors=255, provider=cmap_provider)
746
- colors = [
747
- rgb2hex(cm[int(i * (len(cm) - 1) / 255)])
748
- if not np.isnan(i)
749
- else rgb2hex((0, 0, 0))
750
- for i in cvalues
751
- ]
752
- p = figure(
753
- width=width,
754
- height=height,
755
- title=title,
756
- )
757
- label = None
758
- if self.label is not None:
759
- label = self.label
760
- mz = self.mz
761
- inty = self.inty
762
- if mz_start is not None:
763
- mask = mz >= mz_start
764
- mz = mz[mask]
765
- inty = inty[mask]
766
- colors = np.array(colors)[mask].tolist()
767
- if mz_stop is not None:
768
- mask = mz <= mz_stop
769
- mz = mz[mask]
770
- inty = inty[mask]
771
- colors = np.array(colors)[mask].tolist()
772
- if len(mz) == 0:
773
- print("No peaks in spectrum after trimming")
774
- return
775
- if not self.centroided:
776
- mz_diff = np.diff(mz)
777
- new_mzs: list[float] = []
778
- new_inty: list[float] = []
779
- last_good_step = 1
780
- for i in range(len(mz_diff)):
781
- if mz_diff[i] > last_good_step * 4:
782
- new_mzs.append(mz[i] + last_good_step)
783
- new_inty.append(0)
784
- new_mzs.append(mz[i + 1] - last_good_step)
785
- new_inty.append(0)
786
- else:
787
- last_good_step = mz_diff[i]
788
- if len(new_mzs) > 0:
789
- new_mzs_array = np.array(new_mzs)
790
- new_inty_array = np.array(new_inty)
791
- mz = np.append(mz, new_mzs_array)
792
- inty = np.append(inty, new_inty_array)
793
- idx = np.argsort(mz)
794
- mz = mz[idx]
795
- inty = inty[idx]
796
- p.line(mz, inty, line_color="black", legend_label=label)
797
- else:
798
- data = self.to_dict()
799
- data = {
800
- key: val
801
- for key, val in data.items()
802
- if isinstance(val, np.ndarray) and val.size == mz.size
803
- }
804
- if ylog:
805
- data["zeros"] = np.ones_like(mz)
806
- else:
807
- data["zeros"] = np.zeros_like(mz)
808
- data["color"] = colors
809
- source = ColumnDataSource(data)
810
- p.segment(
811
- x0="mz",
812
- y0="zeros",
813
- x1="mz",
814
- y1="inty",
815
- line_color="black",
816
- legend_label=label,
817
- source=source,
818
- )
819
- if cvalues is not None:
820
- sc = p.scatter(
821
- x="mz",
822
- y="inty",
823
- size=5,
824
- fill_color="color",
825
- line_color="color",
826
- legend_label=label,
827
- source=source,
828
- )
829
- else:
830
- sc = p.scatter(
831
- x="mz",
832
- y="inty",
833
- size=3,
834
- fill_color="black",
835
- line_color="black",
836
- legend_label=label,
837
- source=source,
838
- )
839
- tooltips = [(k, "@" + k) for k in source.data if k != "zeros"]
840
- hover_tool = HoverTool(renderers=[sc], tooltips=tooltips)
841
- p.add_tools(hover_tool)
842
- box_zoom_tools = [tool for tool in p.toolbar.tools if isinstance(tool, BoxZoomTool)]
843
- if box_zoom_tools:
844
- p.toolbar.active_drag = box_zoom_tools[0]
845
- if colorby is not None:
846
- mapper = LinearColorMapper(
847
- palette=[rgb2hex(c) for c in cm],
848
- low=cmap_min,
849
- high=cmap_max,
850
- )
851
- if ColorBar is not None:
852
- color_bar = ColorBar(
853
- color_mapper=mapper,
854
- location=(0, 0),
855
- title=colorby,
856
- )
857
- p.add_layout(color_bar, "right")
858
- if ylog:
859
- p.y_scale = LogScale()
860
- p.yaxis.formatter = LogTickFormatter()
861
- else:
862
- p.yaxis.formatter = NumeralTickFormatter(format="0.0e0")
863
- if filename is not None:
864
- if filename.endswith(".html"):
865
- output_file(filename)
866
- save(p)
867
- elif filename.endswith(".png"):
868
- export_png(p, filename=filename)
869
- else:
870
- show(p)
871
- else:
872
- show(p)
873
-
874
- def plot_stats(self):
875
- df = self.pandalize()
876
- from bokeh.plotting import show
877
- from hvplot.plotting import parallel_coordinates
878
-
879
- p = parallel_coordinates(
880
- df,
881
- color="black",
882
- width=1000,
883
- height=250,
884
- line_width=1,
885
- hover_color="red",
886
- )
887
- show(p)
888
-
889
- def plot_dist(self):
890
- from bokeh.plotting import figure
891
- from bokeh.plotting import show
892
-
893
- for _i, attr in enumerate(self.__dict__):
894
- if isinstance(self.__dict__[attr], np.ndarray):
895
- hist, edges = np.histogram(self.__dict__[attr], bins=100)
896
- p = figure(
897
- width=250,
898
- height=250,
899
- title=attr,
900
- )
901
- p.quad(
902
- top=hist,
903
- bottom=0,
904
- left=edges[:-1],
905
- right=edges[1:],
906
- fill_color="navy",
907
- line_color="white",
908
- alpha=0.5,
909
- )
910
- show(p)
911
-
912
-
913
- # TODO externalize params
914
- def group_peaks(
915
- mz_values: np.ndarray,
916
- tolerance: float = 0,
917
- ppm: float = 0,
918
- time_domain: bool = False,
919
- ) -> np.ndarray:
920
- """
921
- Group peaks based on m/z values using tolerance and ppm.
922
-
923
- Args:
924
- mz_values: Array of m/z values
925
- tolerance: Absolute tolerance for grouping
926
- ppm: Parts per million tolerance
927
- time_domain: If True, grouping is done on sqrt(mz)
928
-
929
- Returns:
930
- Array of group indices for each peak
931
- """
932
- values = np.sqrt(mz_values) if time_domain else mz_values
933
- values = np.sqrt(mz_values) if time_domain else mz_values
934
-
935
- # Initialize groups
936
- groups = np.zeros(len(values), dtype=int)
937
- current_group = 0
938
-
939
- for i in range(1, len(values)):
940
- diff = values[i] - values[i - 1]
941
- ppm_tolerance = values[i - 1] * ppm * 1e-6 if ppm else 0
942
- max_diff = max(tolerance, ppm_tolerance)
943
-
944
- if diff > max_diff:
945
- current_group += 1
946
- groups[i] = current_group
947
-
948
- return groups
949
-
950
-
951
- # TODO externalize params
952
- def combine_peaks(
953
- spectra: list[Spectrum],
954
- inty_fun: Callable = np.sum,
955
- mz_fun: Callable = np.mean,
956
- weighted: bool = False,
957
- exponent: float = 3,
958
- tolerance: float = 0.002,
959
- ppm: float = 5,
960
- time_domain: bool = True,
961
- mode: str = "union",
962
- main: int | None = None,
963
- min_points: int | None = None,
964
- min_prop: float = 0.5,
965
- ) -> Spectrum:
966
- """
967
- Combine multiple spectra into a single spectrum.
968
- Args:
969
- spectra: List of PeakMatrix objects to combine
970
- inty_fun: Function to combine intensities
971
- mz_fun: Function to combine m/z values
972
- weighted: Use intensity-weighted mean for m/z values
973
- exponent: Exponent for intensity weighting
974
- tolerance: Absolute tolerance for peak grouping
975
- ppm: Parts per million tolerance for peak grouping
976
- time_domain: If True, grouping is done on sqrt(mz)
977
- mode: Strategy for combining peaks ("union" or "intersect")
978
- main: Index of main spectrum to keep peaks from
979
- min_points: Minimum number of points to retain a peak
980
- min_prop: Minimum proportion for intersect strategy
981
-
982
- Returns:
983
- Combined Spectrum
984
-
985
- """
986
-
987
- if len(spectra) == 1:
988
- all_mz = spectra[0].mz
989
- all_inty = spectra[0].inty
990
- spectrum_indices: np.ndarray = np.zeros(all_mz.size)
991
- else:
992
- # Concatenate all m/z and intensity values
993
- all_mz = np.concatenate([pm.mz for pm in spectra])
994
- all_inty = np.concatenate([pm.inty for pm in spectra])
995
-
996
- # Track which spectrum each peak came from
997
- spectrum_indices = np.concatenate([
998
- np.full(len(pm.mz), i) for i, pm in enumerate(spectra)
999
- ])
1000
-
1001
- if all_mz.size < 2:
1002
- return Spectrum(
1003
- mz=all_mz,
1004
- inty=all_inty,
1005
- ms_level=spectra[0].ms_level,
1006
- centroided=True,
1007
- )
1008
- # Sort by m/z
1009
- sort_idx = np.argsort(all_mz)
1010
- all_mz = all_mz[sort_idx]
1011
- all_inty = all_inty[sort_idx]
1012
- spectrum_indices = spectrum_indices[sort_idx]
1013
-
1014
- # Group peaks
1015
- groups = group_peaks(all_mz, tolerance, ppm, time_domain)
1016
- unique_groups = np.unique(groups)
1017
-
1018
- # Process each group
1019
- combined_mz = []
1020
- combined_inty = []
1021
-
1022
- for group in unique_groups:
1023
- mask = groups == group
1024
- # check if the number of points is greater than min_points
1025
- if min_points is not None and np.sum(mask) < min_points:
1026
- continue
1027
- if min_points is not None and np.sum(mask) < min_points:
1028
- continue
1029
- group_mz = all_mz[mask]
1030
- group_inty = all_inty[mask]
1031
- group_spectra = spectrum_indices[mask]
1032
-
1033
- # Handle intersect strategy
1034
- if mode == "intersect":
1035
- unique_spectra = len(np.unique(group_spectra))
1036
- if unique_spectra < (len(spectra) * min_prop):
1037
- continue
1038
-
1039
- # Handle main spectrum filtering
1040
- if main is not None and main not in group_spectra:
1041
- continue
1042
- if main is not None and main not in group_spectra:
1043
- continue
1044
-
1045
- # Calculate combined values
1046
-
1047
- if weighted:
1048
- combined_mz.append(np.average(group_mz, weights=group_inty**exponent))
1049
- else:
1050
- combined_mz.append(mz_fun(group_mz))
1051
-
1052
- combined_inty.append(inty_fun(group_inty))
1053
-
1054
- if not combined_mz:
1055
- return Spectrum(mz=np.array([]), inty=np.array([]))
1056
-
1057
- return Spectrum(
1058
- mz=np.array(combined_mz),
1059
- inty=np.array(combined_inty),
1060
- ms_level=spectra[0].ms_level,
1061
- centroided=True,
1062
- )
1063
-
1064
-
1065
- # TODO externalize params
1066
- def plot_spectra(
1067
- spectra: list[Spectrum],
1068
- labels: list[str] | None = None,
1069
- mz_start: float | None = None,
1070
- mz_stop: float | None = None,
1071
- title: str | None = None,
1072
- width: int = 1000,
1073
- height: int = 250,
1074
- cmap: str = "rainbow",
1075
- cmap_provider: str = "colorcet",
1076
- filename: str | None = None,
1077
- colorby: str | None = None,
1078
- ylog: bool = False,
1079
- ) -> None:
1080
- """
1081
- Plot multiple mass spectrometry spectra on a single Bokeh figure.
1082
- This function displays profile spectra as continuous lines and centroided spectra as vertical segments
1083
- (with circles at the peak tops) on a Bokeh plot. Spectra can be optionally trimmed by m/z range using the
1084
- mz_start and mz_stop parameters. Additionally, a colormap is applied to differentiate between spectra.
1085
- Parameters:
1086
- spectra (List[spectrum]): A list of spectrum objects to be plotted. Each object must have attributes
1087
- 'mz' (mass-to-charge ratio), 'inty' (intensity), and 'centroided' (a boolean
1088
- indicating if the spectrum is centroided).
1089
- labels (List[str], optional): A list of labels for the spectra. If provided and its length is at least as
1090
- long as the number of spectra, these labels override the default spectrum
1091
- naming.
1092
- mz_start (float, optional): The lower bound for m/z values. Peaks with m/z values below this threshold
1093
- are excluded from the plot.
1094
- mz_stop (float, optional): The upper bound for m/z values. Peaks with m/z values above this threshold
1095
- are excluded from the plot.
1096
- title (str, optional): The title of the plot.
1097
- width (int, optional): The width of the plot in pixels. Default is 1000.
1098
- height (int, optional): The height of the plot in pixels. Default is 250.
1099
- cmap (str, optional): The colormap name used to assign colors to the spectra. Default is "rainbow".
1100
- cmap_provider (str, optional): The provider for the specified colormap. Default is "colorcet".
1101
- filename (str, optional): If provided, the plot is saved to a file. The export format is determined by the
1102
- file extension—HTML for ".html" and PNG for ".png". If the filename does not
1103
- have an appropriate extension, the plot is simply displayed.
1104
- ylog (bool, optional): If True, the y-axis is set to a logarithmic scale. Default is False.
1105
- colorby (str, optional): If provided, the color of each spectrum is determined by this attribute.
1106
-
1107
- Returns:
1108
- None
1109
- Side Effects:
1110
- - Displays the Bokeh plot in a browser window if no filename is provided.
1111
- - Exports the plot to a file if a valid filename is provided.
1112
- - Prints a message to the console if a spectrum contains no peaks after applying the m/z trimming.
1113
- """
1114
- import numpy as np
1115
-
1116
- from bokeh.io import output_file
1117
- from bokeh.io import save
1118
- from bokeh.io.export import export_png
1119
- from bokeh.models import BoxZoomTool
1120
- from bokeh.models import ColumnDataSource
1121
- from bokeh.models import HoverTool
1122
- from bokeh.models import LogScale
1123
- from bokeh.models import LogTickFormatter
1124
- from bokeh.models import NumeralTickFormatter
1125
- from bokeh.plotting import figure
1126
- from bokeh.plotting import show
1127
- from holoviews.plotting.util import process_cmap
1128
- from matplotlib.colors import rgb2hex
1129
-
1130
- num_plots = len(spectra)
1131
- cm = process_cmap(cmap, ncolors=num_plots, provider=cmap_provider)
1132
- colors = [
1133
- rgb2hex(cm[int(i * (len(cm) - 1) / (num_plots - 1))])
1134
- if num_plots > 1
1135
- else rgb2hex(cm[0])
1136
- for i in range(num_plots)
1137
- ]
1138
-
1139
- p = figure(
1140
- width=width,
1141
- height=height,
1142
- title=title,
1143
- )
1144
-
1145
- for spec_idx, spec in enumerate(spectra):
1146
- try:
1147
- label = f"Spectrum {spec_idx}"
1148
- if spec.label is not None:
1149
- label = spec.label
1150
- if labels is not None and len(labels) >= num_plots:
1151
- label = labels[spec_idx]
1152
-
1153
- mcvalues = None
1154
- mcolors = ["black"] * len(spec.mz)
1155
- if colorby is not None:
1156
- # check whether the string is a valid attribute of the spectrum
1157
- if not hasattr(spec, colorby):
1158
- raise ValueError(
1159
- f"{colorby} is not a valid attribute of the spectrum {spec_idx}",
1160
- )
1161
- if not isinstance(spec.__dict__[colorby], np.ndarray):
1162
- raise ValueError(
1163
- f"{colorby} is not a valid attribute of the spectrum {spec_idx}",
1164
- )
1165
- if len(spec.__dict__[colorby]) != len(spec.mz):
1166
- raise ValueError(
1167
- f"{colorby} is not a valid attribute of the spectrum {spec_idx}",
1168
- )
1169
- else:
1170
- mcvalues = spec.__dict__[colorby]
1171
- mcvalues[mcvalues < -1] = -1
1172
- mcvalues[mcvalues > 1] = 1
1173
-
1174
- # normalize the values to be between 0 and 255
1175
- mcvalues = (mcvalues + 1) / 2 * 255
1176
-
1177
- cm_markers = process_cmap(cmap, ncolors=255, provider=cmap_provider)
1178
- # assign colors to the peaks based on the colorby attribute. Set Nans to black
1179
- mcolors = [
1180
- rgb2hex(cm_markers[int(i * (len(cm_markers) - 1) / 255)])
1181
- if not np.isnan(i)
1182
- else rgb2hex((0, 0, 0))
1183
- for i in mcvalues
1184
- ]
1185
-
1186
- color = colors[spec_idx]
1187
- mz = spec.mz
1188
- inty = spec.inty
1189
- if mz_start is not None:
1190
- mask = mz >= mz_start
1191
- mz = mz[mask]
1192
- inty = inty[mask]
1193
- mcolors = np.array(mcolors)[mask].tolist()
1194
- if mz_stop is not None:
1195
- mask = mz <= mz_stop
1196
- mz = mz[mask]
1197
- inty = inty[mask]
1198
- mcolors = np.array(mcolors)[mask].tolist()
1199
-
1200
- if len(mz) == 0:
1201
- print("No peaks in spectrum after trimming")
1202
- return
1203
-
1204
- if not spec.centroided:
1205
- # For profile spectra, adjust the points for line continuity
1206
- mz_diff = np.diff(mz)
1207
- new_mzs: list[float] = []
1208
- new_inty: list[float] = []
1209
- last_good_step = 1
1210
- for i in range(len(mz_diff)):
1211
- if mz_diff[i] > last_good_step * 4:
1212
- new_mzs.append(mz[i] + last_good_step)
1213
- new_inty.append(0)
1214
- new_mzs.append(mz[i + 1] - last_good_step)
1215
- new_inty.append(0)
1216
- else:
1217
- last_good_step = mz_diff[i]
1218
- if len(new_mzs) > 0:
1219
- new_mzs_array = np.array(new_mzs)
1220
- new_inty_array = np.array(new_inty)
1221
- mz = np.append(mz, new_mzs_array)
1222
- inty = np.append(inty, new_inty_array)
1223
- idx = np.argsort(mz)
1224
- mz = mz[idx]
1225
- inty = inty[idx]
1226
-
1227
- # Plot profile spectrum as a line
1228
- p.line(mz, inty, line_color=color, legend_label=label)
1229
- else:
1230
- # For centroided spectra, build a data source that includes all available array attributes
1231
- data = spec.to_dict()
1232
- # remove all keys whose value does not have the size of mz
1233
- data = {
1234
- key: val
1235
- for key, val in data.items()
1236
- if isinstance(val, np.ndarray) and val.size == mz.size
1237
- }
1238
- data["zeros"] = np.zeros_like(mz)
1239
- if colorby is not None:
1240
- data[colorby] = mcolors
1241
- source = ColumnDataSource(data)
1242
-
1243
- # seg = p.segment(
1244
- # x0="mz",
1245
- # y0="zeros",
1246
- # x1="mz",
1247
- # y1="inty",
1248
- # line_color=color,
1249
- # legend_label=label,
1250
- # source=source,
1251
- # )
1252
- if colorby is not None:
1253
- sc = p.scatter(
1254
- x="mz",
1255
- y="inty",
1256
- size=5,
1257
- fill_color=colorby,
1258
- line_color=colorby,
1259
- legend_label=label,
1260
- source=source,
1261
- )
1262
- else:
1263
- sc = p.scatter(
1264
- x="mz",
1265
- y="inty",
1266
- size=3,
1267
- fill_color=color,
1268
- line_color=color,
1269
- legend_label=label,
1270
- source=source,
1271
- )
1272
- # Create tooltips for all columns in the data source
1273
- tooltips = [(k, "@" + k) for k in source.data if k != "zeros"]
1274
- hover_tool = HoverTool(renderers=[sc], tooltips=tooltips) # seg
1275
- p.add_tools(hover_tool)
1276
- box_zoom_tools = [tool for tool in p.toolbar.tools if isinstance(tool, BoxZoomTool)]
1277
- if box_zoom_tools:
1278
- p.toolbar.active_drag = box_zoom_tools[0]
1279
- except Exception as e:
1280
- print(f"Error plotting spectrum {spec_idx}: {e}")
1281
-
1282
- if colorby is not None:
1283
- # Create a color mapper using the colormap (cm) with fixed range from -1 to 1
1284
- color_mapper = LinearColorMapper(palette=cm_markers, low=-1, high=1)
1285
- if ColorBar is not None:
1286
- color_bar = ColorBar(
1287
- color_mapper=color_mapper,
1288
- ticker=FixedTicker(ticks=[-1, -0.5, 0, 0.5, 1]),
1289
- location=(0, 0),
1290
- )
1291
- p.add_layout(color_bar, "right")
1292
-
1293
- if ylog:
1294
- p.y_scale = LogScale()
1295
- p.yaxis.formatter = LogTickFormatter()
1296
- else:
1297
- p.yaxis.formatter = NumeralTickFormatter(format="0.0e0")
1298
- p.legend.click_policy = "hide"
1299
-
1300
- p.legend.click_policy = "hide"
1301
- p.yaxis.formatter = NumeralTickFormatter(format="0.0e0")
1302
-
1303
- if filename is not None:
1304
- if filename.endswith(".html"):
1305
- output_file(filename)
1306
- save(p)
1307
- elif filename.endswith(".svg"):
1308
- p.output_backend = "svg"
1309
- export_svg(p, filename=filename)
1310
- elif filename.endswith(".png"):
1311
- export_png(p, filename=filename)
1312
- else:
1313
- show(p)
1314
- else:
1315
- show(p)
1316
-
1317
-
1318
- if __name__ == "__main__":
1319
- pass
1
+ """
2
+ spec.py
3
+
4
+ This module provides tools for processing and analyzing individual mass spectra.
5
+ It defines the `spec` class for handling mass spectral data, including peak detection,
6
+ spectrum visualization, preprocessing operations, and spectral similarity calculations.
7
+
8
+ Key Features:
9
+ - **Spectrum Processing**: Handle m/z and intensity data with various preprocessing options.
10
+ - **Peak Detection**: Advanced peak picking with customizable parameters and algorithms.
11
+ - **Visualization**: Interactive and static spectral plots with annotation capabilities.
12
+ - **Spectrum Comparison**: Calculate spectral similarities and perform matching operations.
13
+ - **Data Export**: Save spectra in multiple formats including images and data files.
14
+ - **Preprocessing**: Smoothing, baseline correction, normalization, and noise filtering.
15
+
16
+ Dependencies:
17
+ - `numpy`: For numerical array operations and mathematical computations.
18
+ - `pandas`: For structured data handling and manipulation.
19
+ - `bokeh`: For interactive plotting and visualization.
20
+ - `scipy.signal`: For signal processing and peak detection algorithms.
21
+ - `holoviews`: For high-level data visualization and color mapping.
22
+
23
+ Classes:
24
+ - `spec`: Main class for individual spectrum processing, providing methods for data
25
+ manipulation, peak detection, visualization, and analysis.
26
+
27
+ Functions:
28
+ - `combine_peaks()`: Utility function for merging multiple peak lists.
29
+ - `spec_to_mgf()`: Convert spectrum objects to MGF format.
30
+ - Various utility functions for spectrum processing and analysis.
31
+
32
+ Example Usage:
33
+ ```python
34
+ from spec import spec
35
+ import numpy as np
36
+
37
+ # Create spectrum from m/z and intensity arrays
38
+ mz = np.array([100.0, 150.0, 200.0, 250.0])
39
+ intensity = np.array([1000, 5000, 3000, 800])
40
+ spectrum = spec(mz=mz, inty=intensity, ms_level=1)
41
+
42
+ # Process and visualize
43
+ spectrum.find_peaks()
44
+ spectrum.plot()
45
+ spectrum.save_plot("spectrum.html")
46
+ ```
47
+
48
+ See Also:
49
+ - `single.py`: For handling complete mass spectrometry files containing multiple spectra.
50
+ - `parameters.spectrum_parameters`: For spectrum-specific parameter configuration.
51
+
52
+ """
53
+
54
+ from __future__ import annotations
55
+
56
+ import importlib
57
+ import re
58
+ import warnings
59
+
60
+ from dataclasses import dataclass
61
+ from typing import TYPE_CHECKING
62
+
63
+ import numpy as np
64
+ import pandas as pd
65
+
66
+ from bokeh.io import output_file
67
+ from bokeh.io import save
68
+ from bokeh.io.export import export_png
69
+ from bokeh.io.export import export_svg
70
+ from bokeh.models import BoxZoomTool
71
+ from bokeh.models import ColumnDataSource
72
+ from bokeh.models import FixedTicker
73
+ from bokeh.models import HoverTool
74
+ from bokeh.models import LinearColorMapper
75
+ from bokeh.models import LogScale
76
+ from bokeh.models import LogTickFormatter
77
+ from bokeh.models import NumeralTickFormatter
78
+ from bokeh.plotting import figure
79
+ from bokeh.plotting import show
80
+
81
+ if TYPE_CHECKING:
82
+ try:
83
+ from bokeh.models import ColorBar # type: ignore
84
+ except ImportError:
85
+ ColorBar = None
86
+ else:
87
+ try:
88
+ from bokeh.models import ColorBar # type: ignore
89
+ except ImportError:
90
+ try:
91
+ from bokeh.models.annotations import ColorBar # type: ignore[import-untyped]
92
+ except ImportError:
93
+ ColorBar = None
94
+
95
+
96
+ try:
97
+ from holoviews.plotting.util import process_cmap
98
+ except ImportError:
99
+ process_cmap = None
100
+ from matplotlib.colors import rgb2hex
101
+ from scipy.signal import find_peaks
102
+ from scipy.signal import find_peaks_cwt
103
+ from scipy.signal import peak_prominences
104
+ from scipy.signal import peak_widths
105
+ from scipy.signal import savgol_filter
106
+
107
+
108
+ if TYPE_CHECKING:
109
+ from collections.abc import Callable
110
+
111
+
112
+ @dataclass
113
+ class Spectrum:
114
+ """
115
+ A class for processing and analyzing individual mass spectra.
116
+
117
+ The `spec` class provides comprehensive tools for handling mass spectral data,
118
+ including peak detection, preprocessing, visualization, and spectral analysis.
119
+ It supports both centroided and profile mode spectra and offers various
120
+ algorithms for peak picking and spectral processing.
121
+
122
+ Attributes:
123
+ mz (np.ndarray): Mass-to-charge ratio values.
124
+ inty (np.ndarray): Intensity values corresponding to m/z values.
125
+ ms_level (int, optional): MS level (1 for MS1, 2 for MS2, etc.).
126
+ label (str, optional): Text label for the spectrum.
127
+ centroided (bool, optional): Whether the spectrum is centroided.
128
+ history (str): Processing history log.
129
+ bl (np.ndarray, optional): Baseline values for baseline correction.
130
+
131
+ Key Methods:
132
+ - `find_peaks()`: Detect peaks in the spectrum using various algorithms.
133
+ - `plot()`: Generate interactive or static plots of the spectrum.
134
+ - `denoise()`: Remove noise and low-intensity signals.
135
+ - `smooth()`: Apply smoothing algorithms to the spectrum.
136
+ - `normalize()`: Normalize spectrum intensities.
137
+ - `copy()`: Create a deep copy of the spectrum object.
138
+
139
+ Example Usage:
140
+ >>> import numpy as np
141
+ >>> from masster import spec
142
+ >>> mz = np.array([100.0, 150.0, 200.0, 250.0])
143
+ >>> intensity = np.array([1000, 5000, 3000, 800])
144
+ >>> spectrum = spec(mz=mz, inty=intensity, ms_level=1)
145
+ >>> spectrum.find_peaks()
146
+ >>> spectrum.plot()
147
+
148
+ See Also:
149
+ - `ddafile`: For handling complete mass spectrometry files.
150
+ - `SpectrumParameters`: For spectrum-specific parameter configuration.
151
+ """
152
+
153
+ def __init__(
154
+ self,
155
+ mz: np.ndarray | None = None,
156
+ inty: np.ndarray | None = None,
157
+ ms_level: int | None = None,
158
+ label: str | None = None,
159
+ centroided=None,
160
+ **kwargs,
161
+ ):
162
+ # Handle case where mz and inty might be in kwargs (from from_dict/from_json)
163
+ if mz is None and "mz" in kwargs:
164
+ mz = kwargs.pop("mz")
165
+ if inty is None and "inty" in kwargs:
166
+ inty = kwargs.pop("inty")
167
+
168
+ # Ensure mz and inty are provided
169
+ if mz is None or inty is None:
170
+ raise ValueError("mz and inty arrays are required")
171
+
172
+ self.label = label
173
+ self.ms_level = ms_level
174
+ self.centroided = centroided
175
+ self.mz = mz
176
+ self.inty = inty
177
+ self.history = ""
178
+ self.bl: float | None = None
179
+ # Optional attributes for peak analysis
180
+ self.width: np.ndarray | None = None
181
+ self.prominence: np.ndarray | None = None
182
+ self.__dict__.update(kwargs)
183
+ self.__post_init__()
184
+ if centroided is None:
185
+ self.centroided = self.check_if_centroided()
186
+
187
+ def __post_init__(self):
188
+ self.mz = np.asarray(self.mz)
189
+ self.inty = np.asarray(self.inty)
190
+ if self.mz.shape != self.inty.shape:
191
+ raise ValueError("mz and intensity arrays must have the same shape")
192
+ if self.centroided is None:
193
+ self.centroided = self.check_if_centroided()
194
+ if self.history is None:
195
+ self.history = ""
196
+ if self.bl is None:
197
+ self.bl = None
198
+
199
+ def check_if_centroided(self) -> bool:
200
+ if self.mz.size == 0:
201
+ return True
202
+ mzs = self.mz[self.mz < np.min(self.mz) + 0.4]
203
+ if len(mzs) < 20:
204
+ if len(mzs) < 3:
205
+ return True
206
+ min_distance = np.min(np.diff(mzs))
207
+ if min_distance > 0.003:
208
+ return True
209
+ return False
210
+
211
+ def reload(self):
212
+ modname = self.__class__.__module__
213
+ mod = __import__(modname, fromlist=[modname.split(".")[0]])
214
+ importlib.reload(mod)
215
+ new = getattr(mod, self.__class__.__name__)
216
+ setattr(self, "__class__", new) # noqa: B010
217
+
218
+ def to_dict(self):
219
+ # return a dictionary representation of the spectrum. include all the attributes
220
+ # Create a copy to avoid modifying the original object
221
+ import copy
222
+
223
+ result = {}
224
+
225
+ # Handle numpy arrays by creating copies and converting to lists
226
+ for key, value in self.__dict__.items():
227
+ if isinstance(value, np.ndarray):
228
+ result[key] = value.copy().tolist()
229
+ elif isinstance(value, (list, dict)):
230
+ # Create copies of mutable objects
231
+ result[key] = copy.deepcopy(value)
232
+ else:
233
+ # Immutable objects can be copied directly
234
+ result[key] = value
235
+ # round m/z to 5 decimal places and intensity to 2 decimal places
236
+ if "mz" in result:
237
+ result["mz"] = np.round(result["mz"], 5).tolist()
238
+ if "inty" in result:
239
+ result["inty"] = np.round(result["inty"], 2).tolist()
240
+
241
+ return result
242
+
243
+ @classmethod
244
+ def from_dict(cls, data: dict):
245
+ # Create instance directly from data dictionary
246
+ return cls(**data)
247
+
248
+ def to_json(self):
249
+ """
250
+ Serialize the spectrum to a JSON string.
251
+
252
+ Returns:
253
+ str: JSON string representation of the spectrum.
254
+ """
255
+ import json
256
+
257
+ data = self.to_dict()
258
+ return json.dumps(data, indent=2)
259
+
260
+ @classmethod
261
+ def from_json(cls, json_str):
262
+ """
263
+ Create a Spectrum instance from a JSON string.
264
+
265
+ Args:
266
+ json_str (str): JSON string containing spectrum data.
267
+
268
+ Returns:
269
+ Spectrum: New instance with attributes set from the JSON data.
270
+ """
271
+ import json
272
+
273
+ data = json.loads(json_str)
274
+ return cls.from_dict(data)
275
+
276
+ def pandalize(self):
277
+ data = {
278
+ key: val for key, val in self.__dict__.items() if isinstance(val, np.ndarray) and val.size == self.mz.size
279
+ }
280
+ return pd.DataFrame(data)
281
+
282
+ def to_df(self):
283
+ return self.pandalize()
284
+
285
+ def mz_trim(self, *args, **kwargs):
286
+ """
287
+ Alias for trim method to maintain compatibility with older code.
288
+ """
289
+ return self.trim(*args, **kwargs)
290
+
291
+ def trim(
292
+ self,
293
+ mz_min: float | None = None,
294
+ mz_max: float | None = None,
295
+ ) -> Spectrum:
296
+ if mz_min is not None:
297
+ mask = self.mz >= mz_min
298
+ self.mz = self.mz[mask]
299
+ self.inty = self.inty[mask]
300
+ for key in self.__dict__:
301
+ if isinstance(self.__dict__[key], np.ndarray) and self.__dict__[key].size == mask.size:
302
+ self.__dict__[key] = self.__dict__[key][mask]
303
+ if mz_max is not None:
304
+ mask = self.mz <= mz_max
305
+ self.mz = self.mz[mask]
306
+ self.inty = self.inty[mask]
307
+ for key in self.__dict__:
308
+ if isinstance(self.__dict__[key], np.ndarray) and self.__dict__[key].size == mask.size:
309
+ self.__dict__[key] = self.__dict__[key][mask]
310
+ return self
311
+
312
+ def mz_min(self):
313
+ if len(self.mz) == 0:
314
+ return 0
315
+ return np.min(self.mz)
316
+
317
+ def mz_max(self):
318
+ if len(self.mz) == 0:
319
+ return 0
320
+ return np.max(self.mz)
321
+
322
+ def inty_min(self):
323
+ if len(self.inty) == 0:
324
+ return 0
325
+ return np.min(self.inty)
326
+
327
+ def inty_max(self):
328
+ if len(self.inty) == 0:
329
+ return 0
330
+ return np.max(self.inty)
331
+
332
+ def tic(self):
333
+ if len(self.inty) == 0:
334
+ return 0
335
+ return np.sum(self.inty)
336
+
337
+ def keep_top(self, n: int = 100, inplace: bool = False) -> Spectrum:
338
+ idx = np.argsort(self.inty)[-n:]
339
+ spec_obj = self if inplace else self.copy()
340
+ array_length = self.mz.size
341
+ for key, val in spec_obj.__dict__.items():
342
+ if isinstance(val, np.ndarray) and val.size == array_length:
343
+ spec_obj.__dict__[key] = val[idx]
344
+ return spec_obj
345
+
346
+ def scale(self, factor: float = 1.0) -> Spectrum:
347
+ if factor == 1.0:
348
+ return self.copy()
349
+ spec_obj = self.copy()
350
+ spec_obj.inty = spec_obj.inty.astype(float) * factor
351
+ spec_obj.history_add(f"s[{factor}]")
352
+ return spec_obj
353
+
354
+ def baseline(self):
355
+ mz = self.mz
356
+ inty = self.inty
357
+ mz = mz[inty != 0]
358
+ inty = inty[inty != 0]
359
+ if len(mz) == 0:
360
+ return 0
361
+ idx = np.argsort(mz)
362
+ mz = mz[idx]
363
+ inty = inty[idx]
364
+ if len(mz) > 50:
365
+ # TODO not used
366
+ mz = mz[-50:]
367
+ inty = inty[-50:]
368
+ while True:
369
+ baseline = 1.5 * np.mean(inty)
370
+ mask = inty > baseline
371
+ if np.sum(mask) == 0:
372
+ break
373
+ inty = inty[~mask]
374
+ return baseline
375
+
376
+ def entropy(self) -> float:
377
+ peaks = np.column_stack((self.mz, self.inty))
378
+ entropy = -np.sum(peaks[:, 1] * np.log(peaks[:, 1] + 1e-9))
379
+ return float(entropy)
380
+
381
+ def __len__(self):
382
+ return self.mz.size
383
+
384
+ def __sizeof__(self):
385
+ return self.mz.size
386
+
387
+ def length(self):
388
+ return self.__len__()
389
+
390
+ def history_add(self, term: str):
391
+ if getattr(self, "history", None) is None:
392
+ self.history = ""
393
+ if len(self.history) > 0:
394
+ self.history += f" {term}"
395
+ else:
396
+ self.history = term
397
+
398
+ def history_check(self, term):
399
+ m = re.search(f"{term}[([A-Za-z0-9]*)]", self.history)
400
+ if m is None:
401
+ return None
402
+ return [x[1:-1] for x in m.group(0).split(",")]
403
+
404
+ def copy(self) -> Spectrum:
405
+ new = Spectrum(
406
+ mz=self.mz.copy(),
407
+ inty=self.inty.copy(),
408
+ ms_level=self.ms_level,
409
+ centroided=self.centroided,
410
+ label=self.label,
411
+ )
412
+ for key, val in self.__dict__.items():
413
+ if isinstance(val, np.ndarray):
414
+ new.__dict__[key] = val.copy()
415
+ else:
416
+ new.__dict__[key] = val
417
+ return new
418
+
419
+ def denoise(self, threshold: float | None = None) -> Spectrum:
420
+ if threshold is None:
421
+ threshold = self.baseline()
422
+ self_c = self.copy()
423
+ mask = self_c.inty > threshold
424
+ length = self_c.mz.size
425
+ for key in self_c.__dict__:
426
+ if isinstance(self_c.__dict__[key], np.ndarray) and self_c.__dict__[key].size == length:
427
+ self_c.__dict__[key] = self_c.__dict__[key][mask]
428
+ self_c.history_add("t[BL]")
429
+ self_c.bl = threshold
430
+ return self_c
431
+
432
+ def filter(
433
+ self,
434
+ inty_min: float | None = None,
435
+ inty_max: float | None = None,
436
+ q1_ratio_min: float | None = None,
437
+ q1_ratio_max: float | None = None,
438
+ eic_corr_min: float | None = None,
439
+ eic_corr_max: float | None = None,
440
+ ) -> Spectrum:
441
+ spec_obj = self.copy()
442
+ mask: np.ndarray = np.ones(len(spec_obj.mz), dtype=bool)
443
+ if inty_min is not None and inty_min > 0:
444
+ if inty_min < 1:
445
+ # TODO not used
446
+ inty_min = inty_min * spec_obj.inty.max()
447
+ else:
448
+ mask = mask & (spec_obj.inty >= inty_min)
449
+ spec_obj.history_add("f[inty_min%]")
450
+ if inty_max is not None and inty_max > 0:
451
+ mask = mask & (spec_obj.inty <= inty_max)
452
+ spec_obj.history_add("f[inty_max]")
453
+ if q1_ratio_min is not None and hasattr(spec_obj, "q1_ratio"):
454
+ mask = mask & (spec_obj.q1_ratio >= q1_ratio_min)
455
+ spec_obj.history_add("f[q1_ratio_min]")
456
+ if q1_ratio_max is not None and hasattr(spec_obj, "q1_ratio"):
457
+ mask = mask & (spec_obj.q1_ratio <= q1_ratio_max)
458
+ spec_obj.history_add("f[q1_ratio_max]")
459
+ if eic_corr_min is not None and hasattr(spec_obj, "eic_corr"):
460
+ mask = mask & (spec_obj.eic_corr >= eic_corr_min)
461
+ spec_obj.history_add("f[eic_corr_min]")
462
+ if eic_corr_max is not None and hasattr(spec_obj, "eic_corr"):
463
+ mask = mask & (spec_obj.eic_corr <= eic_corr_max)
464
+ spec_obj.history_add("f[eic_corr_max]")
465
+ mask_length = len(mask)
466
+ for key in spec_obj.__dict__:
467
+ if isinstance(spec_obj.__dict__[key], np.ndarray) and spec_obj.__dict__[key].size == mask_length:
468
+ spec_obj.__dict__[key] = spec_obj.__dict__[key][mask]
469
+ return spec_obj
470
+
471
+ def centroid(self, algo: str = "cr", **kwargs) -> Spectrum:
472
+ algo = algo.lower()
473
+ if algo == "cr":
474
+ return self.centroid_cr(**kwargs)
475
+ elif algo == "cwt":
476
+ return self.centroid_cwt(**kwargs)
477
+ elif algo in ["slm", "lm", "slmp", "lmp"]:
478
+ return self.centroid_lm(**kwargs)
479
+ else:
480
+ raise ValueError(f"Unknown centroiding algorithm: {algo}")
481
+
482
+ # TODO externalize params
483
+ def centroid_cr(
484
+ self,
485
+ tolerance: float = 0.002,
486
+ ppm: float = 5,
487
+ time_domain: bool = True,
488
+ inty_fun=np.max,
489
+ weighted: bool = True,
490
+ exponent: float = 3,
491
+ mode: str = "union",
492
+ min_prop: float = 0.5,
493
+ min_points: int = 5,
494
+ stats: bool = False,
495
+ wlen=50,
496
+ prominence=None,
497
+ **kwargs,
498
+ ) -> Spectrum:
499
+ if self.centroided:
500
+ return self
501
+ s = self.copy()
502
+ with warnings.catch_warnings():
503
+ warnings.simplefilter("ignore")
504
+ new_spec = combine_peaks(
505
+ [s],
506
+ tolerance=tolerance,
507
+ ppm=ppm,
508
+ time_domain=time_domain,
509
+ inty_fun=inty_fun,
510
+ weighted=weighted,
511
+ exponent=exponent,
512
+ mode=mode,
513
+ min_prop=min_prop,
514
+ min_points=min_points,
515
+ main=None,
516
+ )
517
+ s.history_add("c[CR]")
518
+ s.history_add("c[CR]")
519
+ if stats or (prominence is not None):
520
+ indexes = np.searchsorted(s.mz, new_spec.mz)
521
+ widths = peak_widths(s.inty, indexes, rel_height=0.75)[0]
522
+ prominences = peak_prominences(s.inty, indexes, wlen=wlen)[0]
523
+ s.width = widths
524
+ s.prominence = prominences
525
+
526
+ s.mz = new_spec.mz
527
+ s.inty = new_spec.inty
528
+ s.centroided = True
529
+ if prominence is not None:
530
+ mask = prominences >= prominence
531
+ s.mz = s.mz[mask]
532
+ s.inty = s.inty[mask]
533
+ s.width = s.width[mask]
534
+ s.prominence = s.prominence[mask]
535
+ s.history_add("f[PRO]")
536
+ s.history_add("f[PRO]")
537
+ return s
538
+
539
+ def smooth(self, algo: str = "savgol", window_length: int = 7) -> Spectrum:
540
+ if self.centroided:
541
+ return self
542
+ s = self.copy()
543
+ match algo.lower():
544
+ case "savgol":
545
+ s.inty = savgol_filter(s.inty, window_length, 2)
546
+ s.history_add("s[SG]")
547
+ s.history_add("s[SG]")
548
+ case "cumsum":
549
+ cumsum_vec = np.cumsum(np.insert(s.inty, 0, 0))
550
+ ma_vec = (cumsum_vec[window_length:] - cumsum_vec[:-window_length]) / window_length
551
+ s.inty = np.concatenate((
552
+ s.inty[: window_length // 2],
553
+ ma_vec,
554
+ s.inty[-window_length // 2 :],
555
+ ))
556
+ s.history_add("s[CSM]")
557
+ s.history_add("s[CSM]")
558
+ return s
559
+
560
+ # TODO externalize params
561
+ def centroid_cwt(
562
+ self,
563
+ stats: bool = False,
564
+ # TODO not used
565
+ wlen=50,
566
+ prominence=None,
567
+ **kwargs,
568
+ ) -> Spectrum:
569
+ if self.centroided:
570
+ return self
571
+ s = self.copy()
572
+ with warnings.catch_warnings():
573
+ warnings.simplefilter("ignore")
574
+ peaks = find_peaks_cwt(s.inty, widths=np.arange(4, 30), min_snr=1)
575
+ if stats or (prominence is not None):
576
+ widths = peak_widths(s.inty, peaks, rel_height=0.75)
577
+ prominences = peak_prominences(s.inty, peaks)[0]
578
+ s.width = widths
579
+ s.prominence = prominences
580
+ s.mz = s.mz[peaks]
581
+ s.inty = s.inty[peaks]
582
+ s.centroided = True
583
+ s.history_add("c[CWT]")
584
+ s.history_add("c[CWT]")
585
+ if prominence is not None:
586
+ mask = prominences >= prominence
587
+ s.mz = s.mz[mask]
588
+ s.inty = s.inty[mask]
589
+ s.width = s.width[mask]
590
+ s.prominence = s.prominence[mask]
591
+ s.history_add("f[PRO]")
592
+ s.history_add("f[PRO]")
593
+ return s
594
+
595
+ # TODO externalize params
596
+ def centroid_lm(
597
+ self,
598
+ smooth=5,
599
+ # TODO not used
600
+ stats: bool = False,
601
+ min_points: int = 1,
602
+ ##
603
+ distance: float = 5,
604
+ wlen=30,
605
+ plateau_size=None,
606
+ prominence=None,
607
+ refine: bool = True,
608
+ **kwargs,
609
+ ) -> Spectrum:
610
+ if self.centroided:
611
+ return self
612
+ s = self.copy()
613
+ not_smothed_inty = s.inty.copy()
614
+ if smooth is not None:
615
+ try:
616
+ if len(s.mz) > smooth * 2:
617
+ s.inty = savgol_filter(s.inty, smooth, 2)
618
+ except: # noqa: E722
619
+ pass
620
+ if prominence is not None and prominence < 0 and s.bl is not None:
621
+ prominence = s.bl
622
+ with warnings.catch_warnings():
623
+ warnings.simplefilter("ignore")
624
+ peaks, props = find_peaks(
625
+ s.inty,
626
+ height=0,
627
+ width=1,
628
+ distance=distance,
629
+ plateau_size=plateau_size,
630
+ rel_height=0.75,
631
+ wlen=wlen,
632
+ )
633
+ s.width = props["widths"]
634
+ s.prominence = props["prominences"]
635
+ if refine:
636
+ prof_mz = s.mz
637
+ prof_inty = s.inty
638
+ for idx in peaks:
639
+ idxs = np.arange(idx - 2, idx + 3)
640
+ if idxs[0] < 0 or idxs[-1] >= len(prof_mz):
641
+ continue
642
+ s.mz[idx] = np.average(prof_mz[idxs], weights=prof_inty[idxs] ** 3 + 1)
643
+ inty_smoothed = np.max(prof_inty[idxs])
644
+ inty_not_smoothed = np.max(not_smothed_inty[idxs])
645
+ s.inty[idx] = np.max([inty_smoothed, inty_not_smoothed])
646
+ s.mz = s.mz[peaks]
647
+ s.inty = s.inty[peaks]
648
+ s.history_add("c[SLMR]")
649
+ s.history_add("c[SLMR]")
650
+ s.centroided = True
651
+ else:
652
+ s.mz = s.mz[peaks]
653
+ s.inty = props["peak_heights"]
654
+ s.history_add("c[SLM]")
655
+ s.history_add("c[SLM]")
656
+ s.centroided = True
657
+ if prominence is not None:
658
+ mask = s.prominence >= prominence
659
+ s.mz = s.mz[mask]
660
+ s.inty = s.inty[mask]
661
+ s.width = s.width[mask]
662
+ s.prominence = s.prominence[mask]
663
+ s.history_add("f[PRO]")
664
+ s.history_add("f[PRO]")
665
+ return s
666
+
667
+ def deisotope(self, mz_tol: float = 0.02, ratio_max: float = 1.5) -> Spectrum:
668
+ self_c = self.copy()
669
+ mzs = self_c.mz
670
+ intys = self_c.inty
671
+ is_isotopolog_of = np.zeros(len(mzs)).astype(np.int32)
672
+ i = 0
673
+ j = 1
674
+ while j < len(mzs) and i < len(mzs):
675
+ isodelta = mzs[j] - mzs[i] - 1.00335
676
+ if isodelta < -mz_tol:
677
+ j += 1
678
+ elif isodelta <= mz_tol:
679
+ if intys[j] < intys[i] * ratio_max:
680
+ if is_isotopolog_of[i] == 0:
681
+ is_isotopolog_of[j] = i
682
+ else:
683
+ is_isotopolog_of[j] = is_isotopolog_of[i]
684
+ j += 1
685
+ else:
686
+ i += 1
687
+ mask = np.where(is_isotopolog_of == 0)[0]
688
+ for key in self_c.__dict__:
689
+ if isinstance(self_c.__dict__[key], np.ndarray) and self_c.__dict__[key].size == len(is_isotopolog_of):
690
+ self_c.__dict__[key] = self_c.__dict__[key][mask]
691
+ if self_c.label is not None:
692
+ self_c.label = self_c.label + " deiso."
693
+ self_c.history_add("f[iso]")
694
+ self_c.history_add("f[iso]")
695
+ return self_c
696
+
697
+ # TODO externalize params
698
+ def plot(
699
+ self,
700
+ mz_start: float | None = None,
701
+ mz_stop: float | None = None,
702
+ ylog: bool = False,
703
+ title: str | None = None,
704
+ width: int = 1000,
705
+ height: int = 250,
706
+ colorby: str | None = None,
707
+ cmap: str = "rainbow",
708
+ cmap_provider: str = "colorcet",
709
+ cmap_min: float = -1,
710
+ cmap_max: float = 1,
711
+ filename: str | None = None,
712
+ ):
713
+ cvalues = None
714
+ colors = ["black"] * len(self.mz)
715
+ if colorby is not None:
716
+ if not hasattr(self, colorby):
717
+ raise ValueError(f"{colorby} is not a valid attribute of the spectrum")
718
+ if not isinstance(self.__dict__[colorby], np.ndarray):
719
+ raise ValueError(f"{colorby} is not a valid attribute of the spectrum")
720
+ if len(self.__dict__[colorby]) != len(self.mz):
721
+ raise ValueError(f"{colorby} is not a valid attribute of the spectrum")
722
+ else:
723
+ cvalues = self.__dict__[colorby].copy()
724
+ cvalues[cvalues < cmap_min] = cmap_min
725
+ cvalues[cvalues > cmap_max] = cmap_max
726
+ cvalues = (cvalues - cmap_min) / (cmap_max - cmap_min) * 255
727
+ cm = process_cmap(cmap, ncolors=255, provider=cmap_provider)
728
+ colors = [
729
+ rgb2hex(cm[int(i * (len(cm) - 1) / 255)]) if not np.isnan(i) else rgb2hex((0, 0, 0))
730
+ for i in cvalues
731
+ ]
732
+ p = figure(
733
+ width=width,
734
+ height=height,
735
+ title=title,
736
+ )
737
+ label = None
738
+ if self.label is not None:
739
+ label = self.label
740
+ mz = self.mz
741
+ inty = self.inty
742
+ if mz_start is not None:
743
+ mask = mz >= mz_start
744
+ mz = mz[mask]
745
+ inty = inty[mask]
746
+ colors = np.array(colors)[mask].tolist()
747
+ if mz_stop is not None:
748
+ mask = mz <= mz_stop
749
+ mz = mz[mask]
750
+ inty = inty[mask]
751
+ colors = np.array(colors)[mask].tolist()
752
+ if len(mz) == 0:
753
+ print("No peaks in spectrum after trimming")
754
+ return
755
+ if not self.centroided:
756
+ mz_diff = np.diff(mz)
757
+ new_mzs: list[float] = []
758
+ new_inty: list[float] = []
759
+ last_good_step = 1
760
+ for i in range(len(mz_diff)):
761
+ if mz_diff[i] > last_good_step * 4:
762
+ new_mzs.append(mz[i] + last_good_step)
763
+ new_inty.append(0)
764
+ new_mzs.append(mz[i + 1] - last_good_step)
765
+ new_inty.append(0)
766
+ else:
767
+ last_good_step = mz_diff[i]
768
+ if len(new_mzs) > 0:
769
+ new_mzs_array = np.array(new_mzs)
770
+ new_inty_array = np.array(new_inty)
771
+ mz = np.append(mz, new_mzs_array)
772
+ inty = np.append(inty, new_inty_array)
773
+ idx = np.argsort(mz)
774
+ mz = mz[idx]
775
+ inty = inty[idx]
776
+ p.line(mz, inty, line_color="black", legend_label=label)
777
+ else:
778
+ data = self.to_dict()
779
+ data = {key: val for key, val in data.items() if isinstance(val, np.ndarray) and val.size == mz.size}
780
+ if ylog:
781
+ data["zeros"] = np.ones_like(mz)
782
+ else:
783
+ data["zeros"] = np.zeros_like(mz)
784
+ data["color"] = colors
785
+ source = ColumnDataSource(data)
786
+ p.segment(
787
+ x0="mz",
788
+ y0="zeros",
789
+ x1="mz",
790
+ y1="inty",
791
+ line_color="black",
792
+ legend_label=label,
793
+ source=source,
794
+ )
795
+ if cvalues is not None:
796
+ sc = p.scatter(
797
+ x="mz",
798
+ y="inty",
799
+ size=5,
800
+ fill_color="color",
801
+ line_color="color",
802
+ legend_label=label,
803
+ source=source,
804
+ )
805
+ else:
806
+ sc = p.scatter(
807
+ x="mz",
808
+ y="inty",
809
+ size=3,
810
+ fill_color="black",
811
+ line_color="black",
812
+ legend_label=label,
813
+ source=source,
814
+ )
815
+ tooltips = [(k, "@" + k) for k in source.data if k != "zeros"]
816
+ hover_tool = HoverTool(renderers=[sc], tooltips=tooltips)
817
+ p.add_tools(hover_tool)
818
+ box_zoom_tools = [tool for tool in p.toolbar.tools if isinstance(tool, BoxZoomTool)]
819
+ if box_zoom_tools:
820
+ p.toolbar.active_drag = box_zoom_tools[0]
821
+ if colorby is not None:
822
+ mapper = LinearColorMapper(
823
+ palette=[rgb2hex(c) for c in cm],
824
+ low=cmap_min,
825
+ high=cmap_max,
826
+ )
827
+ if ColorBar is not None:
828
+ color_bar = ColorBar(
829
+ color_mapper=mapper,
830
+ location=(0, 0),
831
+ title=colorby,
832
+ )
833
+ p.add_layout(color_bar, "right")
834
+ if ylog:
835
+ p.y_scale = LogScale()
836
+ p.yaxis.formatter = LogTickFormatter()
837
+ else:
838
+ p.yaxis.formatter = NumeralTickFormatter(format="0.0e0")
839
+ if filename is not None:
840
+ if filename.endswith(".html"):
841
+ output_file(filename)
842
+ save(p)
843
+ elif filename.endswith(".png"):
844
+ export_png(p, filename=filename)
845
+ else:
846
+ show(p)
847
+ else:
848
+ show(p)
849
+
850
+ def plot_stats(self):
851
+ df = self.pandalize()
852
+ from bokeh.plotting import show
853
+ from hvplot.plotting import parallel_coordinates
854
+
855
+ p = parallel_coordinates(
856
+ df,
857
+ color="black",
858
+ width=1000,
859
+ height=250,
860
+ line_width=1,
861
+ hover_color="red",
862
+ )
863
+ show(p)
864
+
865
+ def plot_dist(self):
866
+ from bokeh.plotting import figure
867
+ from bokeh.plotting import show
868
+
869
+ for _i, attr in enumerate(self.__dict__):
870
+ if isinstance(self.__dict__[attr], np.ndarray):
871
+ hist, edges = np.histogram(self.__dict__[attr], bins=100)
872
+ p = figure(
873
+ width=250,
874
+ height=250,
875
+ title=attr,
876
+ )
877
+ p.quad(
878
+ top=hist,
879
+ bottom=0,
880
+ left=edges[:-1],
881
+ right=edges[1:],
882
+ fill_color="navy",
883
+ line_color="white",
884
+ alpha=0.5,
885
+ )
886
+ show(p)
887
+
888
+
889
+ # TODO externalize params
890
+ def group_peaks(
891
+ mz_values: np.ndarray,
892
+ tolerance: float = 0,
893
+ ppm: float = 0,
894
+ time_domain: bool = False,
895
+ ) -> np.ndarray:
896
+ """
897
+ Group peaks based on m/z values using tolerance and ppm.
898
+
899
+ Args:
900
+ mz_values: Array of m/z values
901
+ tolerance: Absolute tolerance for grouping
902
+ ppm: Parts per million tolerance
903
+ time_domain: If True, grouping is done on sqrt(mz)
904
+
905
+ Returns:
906
+ Array of group indices for each peak
907
+ """
908
+ values = np.sqrt(mz_values) if time_domain else mz_values
909
+ values = np.sqrt(mz_values) if time_domain else mz_values
910
+
911
+ # Initialize groups
912
+ groups = np.zeros(len(values), dtype=int)
913
+ current_group = 0
914
+
915
+ for i in range(1, len(values)):
916
+ diff = values[i] - values[i - 1]
917
+ ppm_tolerance = values[i - 1] * ppm * 1e-6 if ppm else 0
918
+ max_diff = max(tolerance, ppm_tolerance)
919
+
920
+ if diff > max_diff:
921
+ current_group += 1
922
+ groups[i] = current_group
923
+
924
+ return groups
925
+
926
+
927
+ # TODO externalize params
928
+ def combine_peaks(
929
+ spectra: list[Spectrum],
930
+ inty_fun: Callable = np.sum,
931
+ mz_fun: Callable = np.mean,
932
+ weighted: bool = False,
933
+ exponent: float = 3,
934
+ tolerance: float = 0.002,
935
+ ppm: float = 5,
936
+ time_domain: bool = True,
937
+ mode: str = "union",
938
+ main: int | None = None,
939
+ min_points: int | None = None,
940
+ min_prop: float = 0.5,
941
+ ) -> Spectrum:
942
+ """
943
+ Combine multiple spectra into a single spectrum.
944
+ Args:
945
+ spectra: List of PeakMatrix objects to combine
946
+ inty_fun: Function to combine intensities
947
+ mz_fun: Function to combine m/z values
948
+ weighted: Use intensity-weighted mean for m/z values
949
+ exponent: Exponent for intensity weighting
950
+ tolerance: Absolute tolerance for peak grouping
951
+ ppm: Parts per million tolerance for peak grouping
952
+ time_domain: If True, grouping is done on sqrt(mz)
953
+ mode: Strategy for combining peaks ("union" or "intersect")
954
+ main: Index of main spectrum to keep peaks from
955
+ min_points: Minimum number of points to retain a peak
956
+ min_prop: Minimum proportion for intersect strategy
957
+
958
+ Returns:
959
+ Combined Spectrum
960
+
961
+ """
962
+
963
+ if len(spectra) == 1:
964
+ all_mz = spectra[0].mz
965
+ all_inty = spectra[0].inty
966
+ spectrum_indices: np.ndarray = np.zeros(all_mz.size)
967
+ else:
968
+ # Concatenate all m/z and intensity values
969
+ all_mz = np.concatenate([pm.mz for pm in spectra])
970
+ all_inty = np.concatenate([pm.inty for pm in spectra])
971
+
972
+ # Track which spectrum each peak came from
973
+ spectrum_indices = np.concatenate([np.full(len(pm.mz), i) for i, pm in enumerate(spectra)])
974
+
975
+ if all_mz.size < 2:
976
+ return Spectrum(
977
+ mz=all_mz,
978
+ inty=all_inty,
979
+ ms_level=spectra[0].ms_level,
980
+ centroided=True,
981
+ )
982
+ # Sort by m/z
983
+ sort_idx = np.argsort(all_mz)
984
+ all_mz = all_mz[sort_idx]
985
+ all_inty = all_inty[sort_idx]
986
+ spectrum_indices = spectrum_indices[sort_idx]
987
+
988
+ # Group peaks
989
+ groups = group_peaks(all_mz, tolerance, ppm, time_domain)
990
+ unique_groups = np.unique(groups)
991
+
992
+ # Process each group
993
+ combined_mz = []
994
+ combined_inty = []
995
+
996
+ for group in unique_groups:
997
+ mask = groups == group
998
+ # check if the number of points is greater than min_points
999
+ if min_points is not None and np.sum(mask) < min_points:
1000
+ continue
1001
+ if min_points is not None and np.sum(mask) < min_points:
1002
+ continue
1003
+ group_mz = all_mz[mask]
1004
+ group_inty = all_inty[mask]
1005
+ group_spectra = spectrum_indices[mask]
1006
+
1007
+ # Handle intersect strategy
1008
+ if mode == "intersect":
1009
+ unique_spectra = len(np.unique(group_spectra))
1010
+ if unique_spectra < (len(spectra) * min_prop):
1011
+ continue
1012
+
1013
+ # Handle main spectrum filtering
1014
+ if main is not None and main not in group_spectra:
1015
+ continue
1016
+ if main is not None and main not in group_spectra:
1017
+ continue
1018
+
1019
+ # Calculate combined values
1020
+
1021
+ if weighted:
1022
+ combined_mz.append(np.average(group_mz, weights=group_inty**exponent))
1023
+ else:
1024
+ combined_mz.append(mz_fun(group_mz))
1025
+
1026
+ combined_inty.append(inty_fun(group_inty))
1027
+
1028
+ if not combined_mz:
1029
+ return Spectrum(mz=np.array([]), inty=np.array([]))
1030
+
1031
+ return Spectrum(
1032
+ mz=np.array(combined_mz),
1033
+ inty=np.array(combined_inty),
1034
+ ms_level=spectra[0].ms_level,
1035
+ centroided=True,
1036
+ )
1037
+
1038
+
1039
+ # TODO externalize params
1040
+ def plot_spectra(
1041
+ spectra: list[Spectrum],
1042
+ labels: list[str] | None = None,
1043
+ mz_start: float | None = None,
1044
+ mz_stop: float | None = None,
1045
+ title: str | None = None,
1046
+ width: int = 1000,
1047
+ height: int = 250,
1048
+ cmap: str = "rainbow",
1049
+ cmap_provider: str = "colorcet",
1050
+ filename: str | None = None,
1051
+ colorby: str | None = None,
1052
+ ylog: bool = False,
1053
+ ) -> None:
1054
+ """
1055
+ Plot multiple mass spectrometry spectra on a single Bokeh figure.
1056
+ This function displays profile spectra as continuous lines and centroided spectra as vertical segments
1057
+ (with circles at the peak tops) on a Bokeh plot. Spectra can be optionally trimmed by m/z range using the
1058
+ mz_start and mz_stop parameters. Additionally, a colormap is applied to differentiate between spectra.
1059
+ Parameters:
1060
+ spectra (List[spectrum]): A list of spectrum objects to be plotted. Each object must have attributes
1061
+ 'mz' (mass-to-charge ratio), 'inty' (intensity), and 'centroided' (a boolean
1062
+ indicating if the spectrum is centroided).
1063
+ labels (List[str], optional): A list of labels for the spectra. If provided and its length is at least as
1064
+ long as the number of spectra, these labels override the default spectrum
1065
+ naming.
1066
+ mz_start (float, optional): The lower bound for m/z values. Peaks with m/z values below this threshold
1067
+ are excluded from the plot.
1068
+ mz_stop (float, optional): The upper bound for m/z values. Peaks with m/z values above this threshold
1069
+ are excluded from the plot.
1070
+ title (str, optional): The title of the plot.
1071
+ width (int, optional): The width of the plot in pixels. Default is 1000.
1072
+ height (int, optional): The height of the plot in pixels. Default is 250.
1073
+ cmap (str, optional): The colormap name used to assign colors to the spectra. Default is "rainbow".
1074
+ cmap_provider (str, optional): The provider for the specified colormap. Default is "colorcet".
1075
+ filename (str, optional): If provided, the plot is saved to a file. The export format is determined by the
1076
+ file extension—HTML for ".html" and PNG for ".png". If the filename does not
1077
+ have an appropriate extension, the plot is simply displayed.
1078
+ ylog (bool, optional): If True, the y-axis is set to a logarithmic scale. Default is False.
1079
+ colorby (str, optional): If provided, the color of each spectrum is determined by this attribute.
1080
+
1081
+ Returns:
1082
+ None
1083
+ Side Effects:
1084
+ - Displays the Bokeh plot in a browser window if no filename is provided.
1085
+ - Exports the plot to a file if a valid filename is provided.
1086
+ - Prints a message to the console if a spectrum contains no peaks after applying the m/z trimming.
1087
+ """
1088
+ import numpy as np
1089
+
1090
+ from bokeh.io import output_file
1091
+ from bokeh.io import save
1092
+ from bokeh.io.export import export_png
1093
+ from bokeh.models import BoxZoomTool
1094
+ from bokeh.models import ColumnDataSource
1095
+ from bokeh.models import HoverTool
1096
+ from bokeh.models import LogScale
1097
+ from bokeh.models import LogTickFormatter
1098
+ from bokeh.models import NumeralTickFormatter
1099
+ from bokeh.plotting import figure
1100
+ from bokeh.plotting import show
1101
+ from holoviews.plotting.util import process_cmap
1102
+ from matplotlib.colors import rgb2hex
1103
+
1104
+ num_plots = len(spectra)
1105
+ cm = process_cmap(cmap, ncolors=num_plots, provider=cmap_provider)
1106
+ colors = [
1107
+ rgb2hex(cm[int(i * (len(cm) - 1) / (num_plots - 1))]) if num_plots > 1 else rgb2hex(cm[0])
1108
+ for i in range(num_plots)
1109
+ ]
1110
+
1111
+ p = figure(
1112
+ width=width,
1113
+ height=height,
1114
+ title=title,
1115
+ )
1116
+
1117
+ for spec_idx, spec in enumerate(spectra):
1118
+ try:
1119
+ label = f"Spectrum {spec_idx}"
1120
+ if spec.label is not None:
1121
+ label = spec.label
1122
+ if labels is not None and len(labels) >= num_plots:
1123
+ label = labels[spec_idx]
1124
+
1125
+ mcvalues = None
1126
+ mcolors = ["black"] * len(spec.mz)
1127
+ if colorby is not None:
1128
+ # check whether the string is a valid attribute of the spectrum
1129
+ if not hasattr(spec, colorby):
1130
+ raise ValueError(
1131
+ f"{colorby} is not a valid attribute of the spectrum {spec_idx}",
1132
+ )
1133
+ if not isinstance(spec.__dict__[colorby], np.ndarray):
1134
+ raise ValueError(
1135
+ f"{colorby} is not a valid attribute of the spectrum {spec_idx}",
1136
+ )
1137
+ if len(spec.__dict__[colorby]) != len(spec.mz):
1138
+ raise ValueError(
1139
+ f"{colorby} is not a valid attribute of the spectrum {spec_idx}",
1140
+ )
1141
+ else:
1142
+ mcvalues = spec.__dict__[colorby]
1143
+ mcvalues[mcvalues < -1] = -1
1144
+ mcvalues[mcvalues > 1] = 1
1145
+
1146
+ # normalize the values to be between 0 and 255
1147
+ mcvalues = (mcvalues + 1) / 2 * 255
1148
+
1149
+ cm_markers = process_cmap(cmap, ncolors=255, provider=cmap_provider)
1150
+ # assign colors to the peaks based on the colorby attribute. Set Nans to black
1151
+ mcolors = [
1152
+ rgb2hex(cm_markers[int(i * (len(cm_markers) - 1) / 255)])
1153
+ if not np.isnan(i)
1154
+ else rgb2hex((0, 0, 0))
1155
+ for i in mcvalues
1156
+ ]
1157
+
1158
+ color = colors[spec_idx]
1159
+ mz = spec.mz
1160
+ inty = spec.inty
1161
+ if mz_start is not None:
1162
+ mask = mz >= mz_start
1163
+ mz = mz[mask]
1164
+ inty = inty[mask]
1165
+ mcolors = np.array(mcolors)[mask].tolist()
1166
+ if mz_stop is not None:
1167
+ mask = mz <= mz_stop
1168
+ mz = mz[mask]
1169
+ inty = inty[mask]
1170
+ mcolors = np.array(mcolors)[mask].tolist()
1171
+
1172
+ if len(mz) == 0:
1173
+ print("No peaks in spectrum after trimming")
1174
+ return
1175
+
1176
+ if not spec.centroided:
1177
+ # For profile spectra, adjust the points for line continuity
1178
+ mz_diff = np.diff(mz)
1179
+ new_mzs: list[float] = []
1180
+ new_inty: list[float] = []
1181
+ last_good_step = 1
1182
+ for i in range(len(mz_diff)):
1183
+ if mz_diff[i] > last_good_step * 4:
1184
+ new_mzs.append(mz[i] + last_good_step)
1185
+ new_inty.append(0)
1186
+ new_mzs.append(mz[i + 1] - last_good_step)
1187
+ new_inty.append(0)
1188
+ else:
1189
+ last_good_step = mz_diff[i]
1190
+ if len(new_mzs) > 0:
1191
+ new_mzs_array = np.array(new_mzs)
1192
+ new_inty_array = np.array(new_inty)
1193
+ mz = np.append(mz, new_mzs_array)
1194
+ inty = np.append(inty, new_inty_array)
1195
+ idx = np.argsort(mz)
1196
+ mz = mz[idx]
1197
+ inty = inty[idx]
1198
+
1199
+ # Plot profile spectrum as a line
1200
+ p.line(mz, inty, line_color=color, legend_label=label)
1201
+ else:
1202
+ # For centroided spectra, build a data source that includes all available array attributes
1203
+ data = spec.to_dict()
1204
+ # remove all keys whose value does not have the size of mz
1205
+ data = {key: val for key, val in data.items() if isinstance(val, np.ndarray) and val.size == mz.size}
1206
+ data["zeros"] = np.zeros_like(mz)
1207
+ if colorby is not None:
1208
+ data[colorby] = mcolors
1209
+ source = ColumnDataSource(data)
1210
+
1211
+ # seg = p.segment(
1212
+ # x0="mz",
1213
+ # y0="zeros",
1214
+ # x1="mz",
1215
+ # y1="inty",
1216
+ # line_color=color,
1217
+ # legend_label=label,
1218
+ # source=source,
1219
+ # )
1220
+ if colorby is not None:
1221
+ sc = p.scatter(
1222
+ x="mz",
1223
+ y="inty",
1224
+ size=5,
1225
+ fill_color=colorby,
1226
+ line_color=colorby,
1227
+ legend_label=label,
1228
+ source=source,
1229
+ )
1230
+ else:
1231
+ sc = p.scatter(
1232
+ x="mz",
1233
+ y="inty",
1234
+ size=3,
1235
+ fill_color=color,
1236
+ line_color=color,
1237
+ legend_label=label,
1238
+ source=source,
1239
+ )
1240
+ # Create tooltips for all columns in the data source
1241
+ tooltips = [(k, "@" + k) for k in source.data if k != "zeros"]
1242
+ hover_tool = HoverTool(renderers=[sc], tooltips=tooltips) # seg
1243
+ p.add_tools(hover_tool)
1244
+ box_zoom_tools = [tool for tool in p.toolbar.tools if isinstance(tool, BoxZoomTool)]
1245
+ if box_zoom_tools:
1246
+ p.toolbar.active_drag = box_zoom_tools[0]
1247
+ except Exception as e:
1248
+ print(f"Error plotting spectrum {spec_idx}: {e}")
1249
+
1250
+ if colorby is not None:
1251
+ # Create a color mapper using the colormap (cm) with fixed range from -1 to 1
1252
+ color_mapper = LinearColorMapper(palette=cm_markers, low=-1, high=1)
1253
+ if ColorBar is not None:
1254
+ color_bar = ColorBar(
1255
+ color_mapper=color_mapper,
1256
+ ticker=FixedTicker(ticks=[-1, -0.5, 0, 0.5, 1]),
1257
+ location=(0, 0),
1258
+ )
1259
+ p.add_layout(color_bar, "right")
1260
+
1261
+ if ylog:
1262
+ p.y_scale = LogScale()
1263
+ p.yaxis.formatter = LogTickFormatter()
1264
+ else:
1265
+ p.yaxis.formatter = NumeralTickFormatter(format="0.0e0")
1266
+ p.legend.click_policy = "hide"
1267
+
1268
+ p.legend.click_policy = "hide"
1269
+ p.yaxis.formatter = NumeralTickFormatter(format="0.0e0")
1270
+
1271
+ if filename is not None:
1272
+ if filename.endswith(".html"):
1273
+ output_file(filename)
1274
+ save(p)
1275
+ elif filename.endswith(".svg"):
1276
+ p.output_backend = "svg"
1277
+ export_svg(p, filename=filename)
1278
+ elif filename.endswith(".png"):
1279
+ export_png(p, filename=filename)
1280
+ else:
1281
+ show(p)
1282
+ else:
1283
+ show(p)
1284
+
1285
+
1286
+ if __name__ == "__main__":
1287
+ pass