pytrms 0.2.1__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pytrms/peaktable.py ADDED
@@ -0,0 +1,499 @@
1
+ """Module peaktable.py
2
+
3
+ Defines classes Peak and PeakTable.
4
+
5
+ A `Peak` is a predefined template and a directive for the peakfitting
6
+ algorithm on how to fit peaks in the spectrum.
7
+ This class has additional information of the chemical properties
8
+ (chemical formula, isotopic_abundance) to be able to group isotopic peaks
9
+ of the same compound together and additional factors (k_rate, multiplier)
10
+ to convert a signal into a concentration.
11
+
12
+ The `PeakTable` holds any number of `Peak`-instances. It can be saved
13
+ and recovered to and from various formats, including .json and .tsv and
14
+ other custom formats.
15
+
16
+ >>> from io import StringIO
17
+ >>> s = StringIO('''
18
+ ... {
19
+ ... "version":"1.0",
20
+ ... "R":3333,
21
+ ... "peaks":
22
+ ... [
23
+ ... {"label":"H3O+",
24
+ ... "center":21.0219,
25
+ ... "formula":"?",
26
+ ... "parent":"?",
27
+ ... "isotopic_abundance":0.002,
28
+ ... "k_rate":2.10,
29
+ ... "multiplier":488
30
+ ... }
31
+ ... ]
32
+ ... }
33
+ ... ''')
34
+ >>> pt = PeakTable._parse_json(s)
35
+ >>> pt
36
+ <PeakTable (1) [21.0u]>
37
+ >>> pt[0]
38
+ <Peak [H3O+] @ 21.0219>
39
+
40
+ Peaks may be modified and the PeakTable exported in the same format:
41
+ >>> pt[0].formula = 'H3O'
42
+ >>> pt[0].isotopic_abundance = 0.678
43
+ >>> s = StringIO()
44
+ >>> pt._write_json(s)
45
+ >>> s.seek(0)
46
+ 0
47
+ >>> print(s.read())
48
+ {
49
+ "version": "1.0",
50
+ "R": 6000,
51
+ "peaks": [
52
+ {
53
+ "center": 21.0219,
54
+ "label": "H3O+",
55
+ "formula": "H3O",
56
+ "parent": "?",
57
+ "isotopic_abundance": 0.678,
58
+ "k_rate": 2.1,
59
+ "multiplier": 488
60
+ }
61
+ ]
62
+ }
63
+
64
+ """
65
+ import os
66
+ import csv
67
+ import json
68
+ import logging
69
+ from configparser import ConfigParser
70
+ from functools import total_ordering, partial
71
+ from collections import defaultdict
72
+ import h5py
73
+
74
+ import pandas as pd
75
+ import numpy as np
76
+
77
+ log = logging.getLogger(__name__)
78
+
79
+ __all__ = ['Peak', 'PeakTable']
80
+
81
+
82
+ @total_ordering
83
+ class Peak:
84
+ """Defines a Peak in the Spectrum.
85
+
86
+ Each Peak is uniquely defined by its `center` mass.
87
+
88
+ The `float()` function may be called on a `Peak` to return its `center`.
89
+
90
+ A `label` may be supplied, otherwise it is derived from the center mass.
91
+
92
+ If `borders` are passed explicitly, these borders override the automatic
93
+ border detection based on the instrument resolution.
94
+
95
+ Other optional attributes that may be saved in the Peak-instance are
96
+ `formula`, `parent` (Peak), `k_rate`, `multiplier`.
97
+
98
+ Keyword arguments:
99
+
100
+ - `label`: attach a label to this peak (default: m42.0000 for mass 42)
101
+ - `borders`: define the borders of this peak (default: m +/- 0.5u)
102
+ - `k_rate`: specify a k-rate
103
+ - `multiplier`: specify a multiplier
104
+
105
+ """
106
+ _exact_decimals = 4
107
+
108
+ def __init__(self, center, label='', formula='', parent=None, borders=(),
109
+ isotopic_abundance=1.0, k_rate=2.0, multiplier=1.0,
110
+ resolution=1000, shift=0):
111
+ self.center = round(float(center), ndigits=Peak._exact_decimals)
112
+ if not label:
113
+ label = 'm{:.4f}'.format(self.center)
114
+ self.label = str(label)
115
+ self.formula = formula
116
+ if isinstance(parent, Peak):
117
+ self.parent = str(parent.label)
118
+ elif parent is not None:
119
+ self.parent = str(parent)
120
+ else:
121
+ self.parent = ''
122
+ self._borders = tuple(map(lambda x: round(float(x), ndigits=4), borders))
123
+ self.isotopic_abundance = float(isotopic_abundance)
124
+ self.k_rate = float(k_rate)
125
+ self.multiplier = float(multiplier)
126
+ self.resolution = float(resolution)
127
+ self.shift = float(shift)
128
+
129
+ @property
130
+ def is_unitmass(self):
131
+ return self.center == round(self.center)
132
+
133
+ @property
134
+ def borders(self):
135
+ if len(self._borders):
136
+ return self._borders
137
+ else:
138
+ return self.center - 0.5, self.center + 0.5
139
+
140
+ def __lt__(self, other):
141
+ return self.center < round(float(other), Peak._exact_decimals)
142
+
143
+ def __eq__(self, other):
144
+ return self.center == round(float(other), Peak._exact_decimals)
145
+
146
+ def __hash__(self):
147
+ return hash(str(self.center)) # + self.label)
148
+
149
+ def __float__(self):
150
+ return self.center
151
+
152
+ def __repr__(self):
153
+ return '<%s [%s <~ %s] @ %.4f+%.4f>' % (self.__class__.__name__,
154
+ self.label, self.parent, self.center, self.shift)
155
+
156
+
157
+ class PeakTable:
158
+ """Keeps a list of peaks. Can be merged with other PeakTables by simple addition.
159
+
160
+ Supports import/export to various LabView formats.
161
+ """
162
+
163
+ @staticmethod
164
+ def from_file(filename):
165
+ base, ext = os.path.splitext(filename)
166
+ if ext == '.ipt':
167
+ with open(filename, 'rb') as f:
168
+ return PeakTable._parse_ipt(f)
169
+ elif ext == '.ipta':
170
+ with open(filename) as f:
171
+ return PeakTable._parse_ipta(f)
172
+ elif ext == '.ipt2':
173
+ with open(filename) as f:
174
+ return PeakTable._parse_ipt2(f)
175
+ elif ext == '.ipt3' or ext == '.json':
176
+ with open(filename) as f:
177
+ return PeakTable._parse_json(f)
178
+ elif ext == '.ionipt':
179
+ with open(filename) as f:
180
+ return PeakTable._parse_ionipt(f)
181
+ elif ext == '.h5':
182
+ with h5py.File(filename, 'r') as f:
183
+ return PeakTable._parse_h5(f)
184
+ else:
185
+ raise ValueError("Can't read from file! Unknown file-extension: '%s'." % ext)
186
+
187
+ @staticmethod
188
+ def _parse_ipt(file):
189
+ column_names = ['Descriptions', 'MassCenters', 'BorderLow',
190
+ 'BorderHigh', 'Multipliers', 'kRates']
191
+ table = pd.read_csv(file, sep='\t', skip_blank_lines=True,
192
+ header=None, names=column_names,
193
+ index_col=False, float_precision='high')
194
+
195
+ peaks = []
196
+ for row in table.itertuples(index=False):
197
+ borders = row.BorderLow, row.BorderHigh
198
+ peaks.append(Peak(center=row.MassCenters, label=row.Descriptions,
199
+ borders=borders, k_rate=row.kRates,
200
+ multiplier=row.Multipliers))
201
+
202
+ return PeakTable(peaks)
203
+
204
+ @staticmethod
205
+ def _parse_ipta(file):
206
+ cp = ConfigParser()
207
+ cp.read_file(file)
208
+ i = 0
209
+ peaks = []
210
+ while True:
211
+ try:
212
+ i += 1
213
+ ps = 'Peak_{:04d}'.format(i) # the 'peakstring', something like Peak_0042
214
+ sec = cp[ps]
215
+ if int(sec['NumOfPeaks']) > 1:
216
+ log.warning("File %s contains multipeaks. This feature is not supported "
217
+ "by this parser! Returning only the first peak!" % file.name)
218
+ borders = float(sec['BorderLow']), float(sec['BorderHigh'])
219
+ peaks.append(Peak(center=float(sec[ps + '_MassCenters_1']),
220
+ borders=borders,
221
+ label=sec[ps+'_Descriptions_1'],
222
+ k_rate=float(sec[ps+'_kRates_1']),
223
+ multiplier=float(sec[ps+'_Multipliers_1'])))
224
+ except KeyError:
225
+ break
226
+ log.info("Parsed %d Peaks from %s." % (len(peaks), file.name))
227
+
228
+ return PeakTable(peaks)
229
+
230
+ @staticmethod
231
+ def _parse_ipt2(file):
232
+ raise NotImplementedError
233
+
234
+ @staticmethod
235
+ def _parse_txt(file):
236
+ raise NotImplementedError
237
+
238
+ def _parse_h5(file):
239
+ tempData = file['/TRACEdata/TraceInfo']
240
+
241
+ # Functions to Convert H5 binary to str, and further to float, int if needed
242
+ vfunc_str = np.vectorize(lambda t: t.decode("utf-8"))
243
+ vfunc_float = np.vectorize(lambda t: float(t.decode("utf-8")))
244
+ vfunc_int = np.vectorize(lambda t: int(t.decode("utf-8")))
245
+
246
+ table = pd.DataFrame(data={
247
+ 'Descriptions': vfunc_str(tempData[1]),
248
+ 'MassCenters': vfunc_float(tempData[2]),
249
+ 'BorderLow': vfunc_float(tempData[3]),
250
+ 'BorderHigh': vfunc_float(tempData[4]),
251
+ 'Multipliers': vfunc_float(tempData[5]),
252
+ 'kRates': vfunc_float(tempData[6])},
253
+ index=vfunc_int(tempData[0]))
254
+
255
+
256
+ peaks = []
257
+ for row in table.itertuples(index=False):
258
+ borders = row.BorderLow, row.BorderHigh
259
+ peaks.append(Peak(center=row.MassCenters, label=row.Descriptions,
260
+ borders=borders, k_rate=row.kRates,
261
+ multiplier=row.Multipliers))
262
+
263
+ return PeakTable(peaks)
264
+
265
+ @staticmethod
266
+ def _parse_json(file):
267
+ version, resolution, peak_list = json.load(file).values()
268
+ peaks = []
269
+ for pars in peak_list:
270
+ peaks.append(Peak(**pars))
271
+
272
+ return PeakTable(peaks)
273
+
274
+ @staticmethod
275
+ def _parse_ionipt(file):
276
+
277
+ def _make_peak(ioni_p, borders, shift, parent=None):
278
+ return Peak(ioni_p["center"],
279
+ label=ioni_p["name"],
280
+ formula=ioni_p["ionic_isotope"],
281
+ parent=parent,
282
+ borders=borders,
283
+ isotopic_abundance=ioni_p["isotopic_abundance"],
284
+ k_rate=ioni_p["k_rate"],
285
+ multiplier=ioni_p["multiplier"],
286
+ resolution=ioni_p["resolution"],
287
+ shift=shift)
288
+
289
+ peak_list = json.load(file)
290
+ peaks = []
291
+ for item in peak_list:
292
+ border_peak = item["border_peak"]
293
+ borders = (item["low"], item["high"])
294
+ shift = item["shift"]
295
+ parent = None
296
+ MODE = int(item["mode"])
297
+ IGNORE = 0b00
298
+ INTEGRATE = 0b01
299
+ FIT_PEAKS = 0b10
300
+ if bool(MODE == IGNORE):
301
+ continue
302
+ if bool(MODE & INTEGRATE):
303
+ parent = _make_peak(border_peak, borders, shift)
304
+ peaks.append(parent)
305
+ if bool(MODE & FIT_PEAKS):
306
+ for ioni_peak in item["peak"]:
307
+ if parent is None:
308
+ # Note: we denote a peak w/ parent as a "fitted" peak..
309
+ # as a workaround, use the first as (its own) parent:
310
+ parent = ioni_peak["name"]
311
+ peaks.append(_make_peak(ioni_peak, borders, shift, parent))
312
+
313
+ return PeakTable(peaks)
314
+
315
+ def _write_json(self, fp, resolution=6000, fileversion='1.0'):
316
+ s = json.dumps({'version': fileversion,
317
+ 'R': resolution,
318
+ 'peaks': [{key: val for key, val in vars(peak).items()
319
+ if not key.startswith('_')}
320
+ for peak in self.peaks],
321
+ },
322
+ indent=2)
323
+ fp.write(s)
324
+
325
+ def _write_ipt(self, fp, fileversion='1.0'):
326
+ if fileversion not in ['1.0', '1.1']:
327
+ raise NotImplementedError("Can't write .ipt version %s!" % fileversion)
328
+
329
+ out = csv.writer(fp, dialect='excel-tab')
330
+ # Note: pretty-print by using extra width of 10 and 12 for numbers and label,
331
+ # respectively (this might not be standard csv though):
332
+ _number_format = lambda x: '{:>10.4f}'.format(x)
333
+ _string_format = lambda x: '{:<12s}'.format(x)
334
+ for p in self:
335
+ columns = [p.center] + list(p.borders) + [p.multiplier, p.k_rate]
336
+ if float(fileversion) >= 1.1:
337
+ columns += [p.resolution, p.shift]
338
+ out.writerow([_string_format(p.label)] + list(map(_number_format, columns)))
339
+
340
+ def _write_ipta(self, fp, fileversion='1.0'):
341
+ if fileversion != '1.0':
342
+ raise NotImplementedError("Can't write .ipta version %s!" % fileversion)
343
+
344
+ cp = ConfigParser()
345
+ cp.optionxform = str
346
+
347
+ sec_name = 'General'
348
+ cp.add_section(sec_name)
349
+ sec = cp[sec_name]
350
+ sec['PeaksVersion'] = str(fileversion)
351
+
352
+ for i, peak in enumerate(self):
353
+ sec_name = 'Peak_{:04d}'.format(i+1)
354
+ cp.add_section(sec_name)
355
+ sec = cp[sec_name]
356
+ sec['Mode'] = '0'
357
+ sec['NumOfPeaks'] = '1'
358
+ borders = peak.borders
359
+ sec['BorderLow'] = str(borders[0])
360
+ sec['BorderHigh'] = str(borders[1])
361
+ sec[sec_name+'_NumDescriptions'] = '1'
362
+ sec[sec_name+'_Descriptions_1'] = str(peak.label)
363
+ sec[sec_name+'_NumMassCenters'] = '1'
364
+ sec[sec_name+'_MassCenters_1'] = str(peak.center)
365
+ sec[sec_name+'_NumMultipliers'] = '1'
366
+ sec[sec_name+'_Multipliers_1'] = str(peak.multiplier)
367
+ sec[sec_name+'_NumkRates'] = '1'
368
+ sec[sec_name+'_kRates_1'] = str(peak.k_rate)
369
+ sec['GaussPercent'] = '0.000000'
370
+ sec['GaussHeight'] = '0.000000'
371
+ sec['GaussWidth'] = '0.002000'
372
+ sec[sec_name+'_NumIsPrimIon'] = '1'
373
+ sec[sec_name+'_IsPrimIon_1'] = '0.000000'
374
+ sec[sec_name+'_NumSigma'] = '1'
375
+ sec[sec_name+'_Sigma_1'] = '0.000000'
376
+ sec['GaussCenter'] = '0.000000'
377
+ sec['FitFunction'] = '0'
378
+
379
+ cp.write(fp)
380
+ log.info("Written %d Peaks to %s." % (len(self), fp.name))
381
+
382
+ @staticmethod
383
+ def from_masses(exact_masses):
384
+ return PeakTable([Peak(mass) for mass in exact_masses])
385
+
386
+ def __init__(self, peaks: list = ()):
387
+ self.peaks = sorted(peaks)
388
+
389
+ @property
390
+ def nominal(self):
391
+ peaks = [peak for peak in self.peaks if not peak.parent]
392
+ return PeakTable(peaks)
393
+
394
+ @property
395
+ def fitted(self):
396
+ peaks = [peak for peak in self.peaks if peak.parent]
397
+ return PeakTable(peaks)
398
+
399
+ @property
400
+ def exact_masses(self):
401
+ return [peak.center for peak in self.peaks]
402
+
403
+ @property
404
+ def mass_labels(self):
405
+ return [peak.label for peak in self.peaks]
406
+
407
+ def find_by_mass(self, exact_mass):
408
+ """Return the peak at `exact_mass` up to 4 decimal digits precision.
409
+
410
+ Raises KeyError if not found.
411
+ """
412
+ lo, hi = 0, len(self) - 1
413
+ while lo <= hi:
414
+ mid = (lo + hi) // 2
415
+ if self[mid] == exact_mass:
416
+ return self[mid]
417
+ elif self[mid] < exact_mass:
418
+ lo = mid + 1
419
+ elif self[mid] > exact_mass:
420
+ hi = mid - 1
421
+
422
+ raise KeyError("No such peak at %s!" % str(exact_mass))
423
+
424
+ def group(self):
425
+ groups = defaultdict(list)
426
+ for peak in self:
427
+ groups[peak.parent].append(peak)
428
+
429
+ return groups
430
+
431
+ def save(self, filename):
432
+ base, ext = os.path.splitext(filename)
433
+ if ext == '.json':
434
+ writer = partial(self._write_json, resolution=6000, fileversion='1.0')
435
+ elif ext == '.ipt3':
436
+ writer = partial(self._write_json, resolution=6000, fileversion='1.0')
437
+ elif ext == '.ipt':
438
+ writer = partial(self._write_ipt, fileversion='1.0')
439
+ elif ext == '.ipta':
440
+ writer = partial(self._write_ipta, fileversion='1.0')
441
+ else:
442
+ raise NotImplementedError("can't export with file extension <%s>!" % ext)
443
+
444
+ with open(filename, 'w') as f:
445
+ writer(f)
446
+
447
+ def __len__(self):
448
+ return len(self.peaks)
449
+
450
+ def __getitem__(self, index):
451
+ return self.peaks[index]
452
+
453
+ def __setitem__(self, index, peak):
454
+ if not isinstance(peak, Peak):
455
+ raise TypeError("Can only insert a Peak into a PeakTable!")
456
+
457
+ if peak in (self.peaks[:index] + self.peaks[index+1:]):
458
+ raise ValueError("PeakTable must be unique! Can't add %r." % peak)
459
+
460
+ self.peaks[index] = peak
461
+
462
+ def __add__(self, other):
463
+ if isinstance(other, PeakTable):
464
+ return PeakTable(set(self.peaks) | set(other.peaks))
465
+ elif isinstance(other, Peak):
466
+ return PeakTable(set(self.peaks) | set([other,]))
467
+ else:
468
+ raise TypeError(str(other))
469
+
470
+ def __sub__(self, other):
471
+ if isinstance(other, PeakTable):
472
+ return PeakTable(set(self.peaks) ^ set(other.peaks))
473
+ elif isinstance(other, Peak):
474
+ return PeakTable(set(self.peaks) ^ set([other,]))
475
+ else:
476
+ raise TypeError(str(other))
477
+
478
+ def __gt__(self, other):
479
+ return PeakTable([peak for peak in self.peaks if peak > other])
480
+
481
+ def __ge__(self, other):
482
+ return PeakTable([peak for peak in self.peaks if peak >= other])
483
+
484
+ def __lt__(self, other):
485
+ return PeakTable([peak for peak in self.peaks if peak < other])
486
+
487
+ def __le__(self, other):
488
+ return PeakTable([peak for peak in self.peaks if peak <= other])
489
+
490
+ def __repr__(self):
491
+ if not len(self):
492
+ return '<%s (%d) []>' % (self.__class__.__name__, len(self))
493
+ elif len(self) == 1:
494
+ return '<%s (%d) [%.1fu]>' % (self.__class__.__name__, len(self),
495
+ self.peaks[0].center)
496
+ else:
497
+ return '<%s (%d) [%.1fu .. %.1fu]>' % (self.__class__.__name__, len(self),
498
+ self.peaks[0].center, self.peaks[-1].center)
499
+
@@ -0,0 +1,4 @@
1
+ from .plotting import plot_marker
2
+
3
+ __all__ = ['plot_marker']
4
+
@@ -0,0 +1,27 @@
1
+ # import matplotlib.pyplot as plt # should be inlined, loads forever!
2
+
3
+
4
+ def plot_marker(signal, marker, **kwargs):
5
+ '''Plot a `signal` and fill the regions where `marker=True`.
6
+
7
+ Returns a tuple of `figure, axis`.
8
+ '''
9
+ import matplotlib.pyplot as plt
10
+
11
+ fig, ax = plt.subplots()
12
+ if hasattr(signal, 'plot'):
13
+ subplot = signal.plot(ax=ax)
14
+ line, *_ = subplot.get_lines()
15
+ else:
16
+ line, = ax.plot(signal)
17
+
18
+ x_ = line.get_xdata()
19
+ lo, hi = ax.get_ylim()
20
+ ax.fill_between(x_, lo, hi, where=marker, color='orange')
21
+
22
+ ax.grid(visible=True)
23
+ if hasattr(signal, 'name'):
24
+ ax.set_title(signal.name)
25
+
26
+ return fig, ax
27
+
@@ -0,0 +1,4 @@
1
+ from .ionitof_reader import IoniTOFReader
2
+
3
+ __all__ = ['IoniTOFReader']
4
+