pychemstation 0.5.7.dev1__py3-none-any.whl → 0.5.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pychemstation/analysis/base_spectrum.py +509 -509
- pychemstation/analysis/spec_utils.py +304 -304
- pychemstation/control/controllers/comm.py +4 -3
- pychemstation/control/controllers/method.py +18 -2
- pychemstation/control/controllers/sequence.py +10 -1
- pychemstation/control/controllers/table_controller.py +7 -3
- pychemstation/control/hplc.py +65 -14
- pychemstation/utils/macro.py +3 -0
- {pychemstation-0.5.7.dev1.dist-info → pychemstation-0.5.9.dist-info}/METADATA +1 -1
- {pychemstation-0.5.7.dev1.dist-info → pychemstation-0.5.9.dist-info}/RECORD +15 -15
- tests/constants.py +1 -0
- tests/test_comb.py +7 -9
- {pychemstation-0.5.7.dev1.dist-info → pychemstation-0.5.9.dist-info}/LICENSE +0 -0
- {pychemstation-0.5.7.dev1.dist-info → pychemstation-0.5.9.dist-info}/WHEEL +0 -0
- {pychemstation-0.5.7.dev1.dist-info → pychemstation-0.5.9.dist-info}/top_level.txt +0 -0
@@ -1,509 +1,509 @@
|
|
1
|
-
import pickle
|
2
|
-
import os
|
3
|
-
import logging
|
4
|
-
|
5
|
-
from abc import ABC, abstractmethod
|
6
|
-
|
7
|
-
import numpy as np
|
8
|
-
import matplotlib.pyplot as plt
|
9
|
-
|
10
|
-
from scipy import (
|
11
|
-
sparse,
|
12
|
-
signal,
|
13
|
-
interpolate,
|
14
|
-
integrate,
|
15
|
-
)
|
16
|
-
|
17
|
-
from .utils import interpolate_to_index, find_nearest_value_index
|
18
|
-
|
19
|
-
|
20
|
-
class AbstractSpectrum(ABC):
|
21
|
-
"""General class for handling spectroscopic data
|
22
|
-
|
23
|
-
Contains methods for data manipulation (load/save) and basic processing
|
24
|
-
features, such as baseline correction, smoothing, peak picking and
|
25
|
-
integration.
|
26
|
-
|
27
|
-
All data processing happens in place!
|
28
|
-
"""
|
29
|
-
|
30
|
-
# for plotting
|
31
|
-
AXIS_MAPPING = {
|
32
|
-
"x": "x_data",
|
33
|
-
"y": "y_data",
|
34
|
-
}
|
35
|
-
|
36
|
-
# list of properties to be saved
|
37
|
-
PUBLIC_PROPERTIES = {
|
38
|
-
"x",
|
39
|
-
"y",
|
40
|
-
"peaks",
|
41
|
-
"timestamp",
|
42
|
-
}
|
43
|
-
|
44
|
-
# list of internal properties to be dumped during new data loading
|
45
|
-
INTERNAL_PROPERTIES = {
|
46
|
-
"baseline",
|
47
|
-
}
|
48
|
-
|
49
|
-
def __init__(self, path=None, autosaving=True):
|
50
|
-
"""Default constructor, loads properties into instance namespace.
|
51
|
-
|
52
|
-
Can be redefined in ancestor classes.
|
53
|
-
|
54
|
-
Args:
|
55
|
-
path (Union[str, bool], optional): Valid path to save data to.
|
56
|
-
If omitted, uses ".//spectrum". If False - no folder created.
|
57
|
-
autosaving (bool, optional): If the True (default) will save the
|
58
|
-
spectrum when the new one is loaded. Will drop otherwise.
|
59
|
-
"""
|
60
|
-
|
61
|
-
self.autosaving = autosaving
|
62
|
-
|
63
|
-
# loading public properties
|
64
|
-
for prop in self.PUBLIC_PROPERTIES:
|
65
|
-
setattr(self, prop, None)
|
66
|
-
|
67
|
-
# loading internal properties
|
68
|
-
for prop in self.INTERNAL_PROPERTIES:
|
69
|
-
setattr(self, prop, None)
|
70
|
-
|
71
|
-
# creating data path
|
72
|
-
if path is None:
|
73
|
-
self.path = os.path.join(".", "spectrum")
|
74
|
-
os.makedirs(self.path, exist_ok=True)
|
75
|
-
else:
|
76
|
-
try:
|
77
|
-
os.makedirs(path, exist_ok=True)
|
78
|
-
self.path = path
|
79
|
-
except TypeError: # type(path) -> bool
|
80
|
-
self.path = "."
|
81
|
-
|
82
|
-
# creating logger
|
83
|
-
if not hasattr(self, "logger"):
|
84
|
-
self.logger = logging.getLogger(self.__class__.__name__)
|
85
|
-
|
86
|
-
def _dump(self):
|
87
|
-
"""Dummy method to dump all spectral data. Used before loading new data."""
|
88
|
-
|
89
|
-
self.__init__(path=self.path, autosaving=self.autosaving)
|
90
|
-
|
91
|
-
@abstractmethod
|
92
|
-
def load_spectrum(self, x, y, timestamp):
|
93
|
-
"""Loads the spectral data.
|
94
|
-
|
95
|
-
This method must be redefined in ancestor classes.
|
96
|
-
|
97
|
-
Args:
|
98
|
-
x (:obj: np.array): An array with data to be plotted as "x" axis.
|
99
|
-
y (:obj: np.array): An array with data to be plotted as "y" axis.
|
100
|
-
timestamp (float): Timestamp to the corresponding spectrum.
|
101
|
-
"""
|
102
|
-
|
103
|
-
try:
|
104
|
-
assert x.shape == y.shape
|
105
|
-
except AssertionError:
|
106
|
-
raise ValueError("X and Y data must have same dimension.") from None
|
107
|
-
|
108
|
-
if self.x is not None:
|
109
|
-
if self.autosaving:
|
110
|
-
self.save_data()
|
111
|
-
self._dump()
|
112
|
-
|
113
|
-
self.x = x
|
114
|
-
self.y = y
|
115
|
-
self.timestamp = timestamp
|
116
|
-
|
117
|
-
def save_data(self, filename=None, verbose=False):
|
118
|
-
"""Saves the data to given path using python pickle module.
|
119
|
-
|
120
|
-
Args:
|
121
|
-
filename (str, optional): Filename for the current spectrum. If
|
122
|
-
omitted, using current timestamp.
|
123
|
-
verbose (bool, optional): If all processed data needs to be saved as
|
124
|
-
well. Default: False.
|
125
|
-
"""
|
126
|
-
if filename is None:
|
127
|
-
filename = f"{self.timestamp}.pickle"
|
128
|
-
else:
|
129
|
-
# file extension used from python 3. documentation
|
130
|
-
filename += ".pickle"
|
131
|
-
|
132
|
-
path = os.path.join(self.path, filename)
|
133
|
-
|
134
|
-
data = {
|
135
|
-
prop: self.__dict__[prop]
|
136
|
-
for prop in self.PUBLIC_PROPERTIES
|
137
|
-
if self.__dict__[prop] is not None
|
138
|
-
}
|
139
|
-
|
140
|
-
if verbose:
|
141
|
-
data.update(
|
142
|
-
{
|
143
|
-
prop: self.__dict__[prop]
|
144
|
-
for prop in self.INTERNAL_PROPERTIES
|
145
|
-
if self.__dict__[prop] is not None
|
146
|
-
}
|
147
|
-
)
|
148
|
-
|
149
|
-
with open(path, "wb") as f:
|
150
|
-
pickle.dump(data, f)
|
151
|
-
|
152
|
-
self.logger.info("Saved in %s", path)
|
153
|
-
|
154
|
-
def load_data(self, path):
|
155
|
-
"""Loads the data from saved pickle file.
|
156
|
-
|
157
|
-
Data is loaded in place, so instance attributes are overwritten.
|
158
|
-
|
159
|
-
Args:
|
160
|
-
path (str): Valid path to pickle file.
|
161
|
-
"""
|
162
|
-
|
163
|
-
if self.x is not None:
|
164
|
-
self._dump()
|
165
|
-
|
166
|
-
# TODO add exception handling
|
167
|
-
with open(path, "rb") as f:
|
168
|
-
data = pickle.load(f)
|
169
|
-
|
170
|
-
self.__dict__.update(data)
|
171
|
-
|
172
|
-
def trim(self, xmin, xmax, in_place=True):
|
173
|
-
"""Trims the spectrum data within specific X region
|
174
|
-
|
175
|
-
Args:
|
176
|
-
xmin (int): Minimum position on the X axis to start from.
|
177
|
-
xmax (int): Maximum position on the X axis to end to.
|
178
|
-
in_place (bool): If trimming happens in place, else returns new
|
179
|
-
array as trimmed copy.
|
180
|
-
|
181
|
-
Returns:
|
182
|
-
(bool): True if trimmed in place.
|
183
|
-
(Tuple[np.array, np.array]): Trimmed copy of the original array as
|
184
|
-
tuple with X and Y points respectively.
|
185
|
-
"""
|
186
|
-
|
187
|
-
# Creating the mask to map arrays
|
188
|
-
above_ind = self.x > xmin
|
189
|
-
below_ind = self.x < xmax
|
190
|
-
full_mask = np.logical_and(above_ind, below_ind)
|
191
|
-
|
192
|
-
# Mapping arrays if they are supplied
|
193
|
-
if in_place:
|
194
|
-
self.y = self.y[full_mask]
|
195
|
-
self.x = self.x[full_mask]
|
196
|
-
if self.baseline is not None and self.baseline.shape == full_mask.shape:
|
197
|
-
self.baseline = self.baseline[full_mask]
|
198
|
-
return True
|
199
|
-
else:
|
200
|
-
return (self.x.copy()[full_mask], self.y.copy()[full_mask])
|
201
|
-
|
202
|
-
def show_spectrum(
|
203
|
-
self,
|
204
|
-
filename=None,
|
205
|
-
title=None,
|
206
|
-
label=None,
|
207
|
-
):
|
208
|
-
"""Plots the spectral data using matplotlib.pyplot module.
|
209
|
-
|
210
|
-
Args:
|
211
|
-
filename (str, optional): Filename for the current plot. If omitted,
|
212
|
-
file is not saved.
|
213
|
-
title (str, optional): Title for the spectrum plot. If omitted, no
|
214
|
-
title is set.
|
215
|
-
label (str, optional): Label for the spectrum plot. If omitted, uses
|
216
|
-
the spectrum timestamp.
|
217
|
-
"""
|
218
|
-
if label is None:
|
219
|
-
label = f"{self.timestamp}"
|
220
|
-
|
221
|
-
fig, ax = plt.subplots(figsize=(12, 8))
|
222
|
-
|
223
|
-
ax.plot(
|
224
|
-
self.x,
|
225
|
-
self.y,
|
226
|
-
color="xkcd:navy blue",
|
227
|
-
label=label,
|
228
|
-
)
|
229
|
-
|
230
|
-
ax.set_xlabel(self.AXIS_MAPPING["x"])
|
231
|
-
ax.set_ylabel(self.AXIS_MAPPING["y"])
|
232
|
-
|
233
|
-
if title is not None:
|
234
|
-
ax.set_title(title)
|
235
|
-
|
236
|
-
# plotting peaks if found
|
237
|
-
if self.peaks is not None:
|
238
|
-
plt.scatter(
|
239
|
-
self.peaks[:, 1],
|
240
|
-
self.peaks[:, 2],
|
241
|
-
label="found peaks",
|
242
|
-
color="xkcd:tangerine",
|
243
|
-
)
|
244
|
-
|
245
|
-
ax.legend()
|
246
|
-
|
247
|
-
if filename is None:
|
248
|
-
fig.show()
|
249
|
-
|
250
|
-
else:
|
251
|
-
path = os.path.join(self.path, "images")
|
252
|
-
os.makedirs(path, exist_ok=True)
|
253
|
-
fig.savefig(os.path.join(path, f"{filename}.png"), dpi=150)
|
254
|
-
|
255
|
-
def find_peaks(self, threshold=1, min_width=.1, min_dist=None, area=None):
|
256
|
-
"""Finds all peaks above the threshold with at least min_width width.
|
257
|
-
|
258
|
-
Args:
|
259
|
-
threshold (float, optional): Relative peak height with respect to
|
260
|
-
the highest peak.
|
261
|
-
min_width (int, optional): Minimum peak width.
|
262
|
-
min_dist (int, optional): Minimum distance between peaks.
|
263
|
-
area (Tuple(int, int), optional): Area to search peaks in. Supplied
|
264
|
-
as min, max X values tuple.
|
265
|
-
|
266
|
-
Return:
|
267
|
-
(:obj: np.array): An array of peaks ids as rounded peak_x coordinate
|
268
|
-
value. If searching within specified area, full peak information
|
269
|
-
matrix is returned, see below for details.
|
270
|
-
|
271
|
-
Also updates the self.peaks attrbiute (if "area" is omitted) as:
|
272
|
-
(:obj: np.array): An (n_peaks x 5) array with peak data as columns:
|
273
|
-
peak_id (float): Rounded peak_x coordinate value.
|
274
|
-
peak_x (float): X-coordinate for the peak.
|
275
|
-
peak_y (float): Y-coordinate for the peak.
|
276
|
-
peak_left_x (float): X-coordinate for the left peak border.
|
277
|
-
peak_right_x (float): X-coordinate for the right peak border.
|
278
|
-
|
279
|
-
Peak data is accessed with indexing, e.g.:
|
280
|
-
self.peaks[n] will give all data for n's peak
|
281
|
-
self.peaks[:, 2] will give Y coordinate for all found peaks
|
282
|
-
"""
|
283
|
-
|
284
|
-
# only dumping if area is omitted
|
285
|
-
if self.peaks is not None and not area:
|
286
|
-
self.peaks = None
|
287
|
-
|
288
|
-
# trimming
|
289
|
-
if area is not None:
|
290
|
-
spec_y = self.trim(area[0], area[1], False)[1]
|
291
|
-
else:
|
292
|
-
spec_y = self.y.copy()
|
293
|
-
|
294
|
-
threshold *= self.y.max() - self.y.min()
|
295
|
-
peaks, _ = signal.find_peaks(
|
296
|
-
spec_y, height=threshold, width=min_width, distance=min_dist
|
297
|
-
)
|
298
|
-
|
299
|
-
# obtaining width for full peak height
|
300
|
-
# TODO deal with intersecting peaks!
|
301
|
-
# TODO deal with incorrect peak width
|
302
|
-
pw = signal.peak_widths(spec_y, peaks, rel_height=0.95)
|
303
|
-
|
304
|
-
# converting all to column vectors by adding extra dimension along 2nd
|
305
|
-
# axis. Check documentation on np.newaxis for details
|
306
|
-
peak_xs = self.x.copy()[peaks][:, np.newaxis]
|
307
|
-
peak_ys = self.y.copy()[peaks][:, np.newaxis]
|
308
|
-
peaks_ids = np.around(peak_xs)
|
309
|
-
peaks_left_ids = interpolate_to_index(self.x, pw[2])[:, np.newaxis]
|
310
|
-
peaks_right_ids = interpolate_to_index(self.x, pw[3])[:, np.newaxis]
|
311
|
-
|
312
|
-
if area is None:
|
313
|
-
# updating only if area is not specified
|
314
|
-
self.peaks = np.hstack(
|
315
|
-
(
|
316
|
-
peaks_ids,
|
317
|
-
peak_xs,
|
318
|
-
peak_ys,
|
319
|
-
peaks_left_ids,
|
320
|
-
peaks_right_ids,
|
321
|
-
)
|
322
|
-
)
|
323
|
-
return peaks_ids
|
324
|
-
|
325
|
-
return np.hstack(
|
326
|
-
(
|
327
|
-
peaks_ids,
|
328
|
-
peak_xs,
|
329
|
-
peak_ys,
|
330
|
-
peaks_left_ids,
|
331
|
-
peaks_right_ids,
|
332
|
-
)
|
333
|
-
)
|
334
|
-
|
335
|
-
def correct_baseline(self, lmbd=1e3, p=0.01, n_iter=10):
|
336
|
-
"""Generates and subtracts the baseline for the given spectrum.
|
337
|
-
|
338
|
-
Based on Eilers, P; Boelens, H. (2005): Baseline Correction with
|
339
|
-
Asymmetric Least Squares Smoothing.
|
340
|
-
|
341
|
-
Default values chosen arbitrary based on processing Raman spectra.
|
342
|
-
|
343
|
-
Args:
|
344
|
-
lmbd (float): Arbitrary parameter to define the smoothness of the
|
345
|
-
baseline the larger lmbd is, the smoother baseline will be,
|
346
|
-
recommended value between 1e2 and 1e5.
|
347
|
-
p (float): An asymmetric least squares parameter to compute the
|
348
|
-
weights of the residuals, chosen arbitrary, recommended values
|
349
|
-
between 0.1 and 0.001.
|
350
|
-
n_iter (int, optional): Number of iterations to perform the fit,
|
351
|
-
recommended value between 5 and 10.
|
352
|
-
"""
|
353
|
-
|
354
|
-
# generating the baseline first
|
355
|
-
L = len(self.y)
|
356
|
-
D = sparse.csc_matrix(np.diff(np.eye(L), 2))
|
357
|
-
w = np.ones(L)
|
358
|
-
for _ in range(n_iter):
|
359
|
-
W = sparse.spdiags(w, 0, L, L)
|
360
|
-
Z = W + lmbd * D.dot(D.transpose())
|
361
|
-
z = sparse.linalg.spsolve(Z, w * self.y)
|
362
|
-
w = p * (self.y > z) + (1 - p) * (self.y < z)
|
363
|
-
|
364
|
-
# updating attribute for future use
|
365
|
-
self.baseline = z
|
366
|
-
|
367
|
-
# subtracting the baseline
|
368
|
-
# TODO update peak coordinates if peaks were present
|
369
|
-
self.y -= z
|
370
|
-
self.logger.info("Baseline corrected")
|
371
|
-
|
372
|
-
def integrate_area(self, area, rule="trapz"):
|
373
|
-
"""Integrate the spectrum within given area
|
374
|
-
|
375
|
-
Args:
|
376
|
-
area (Tuple[float, float]): Tuple with left and right border (X axis
|
377
|
-
obviously) for the desired area.
|
378
|
-
rule (str): Method for integration, "trapz" - trapezoidal
|
379
|
-
rule (default), "simps" - Simpson's rule.
|
380
|
-
Returns:
|
381
|
-
float: Definite integral within given area as approximated by given
|
382
|
-
method.
|
383
|
-
"""
|
384
|
-
|
385
|
-
# closest value in experimental data and its index in data array
|
386
|
-
_, left_idx = find_nearest_value_index(self.x, area[0])
|
387
|
-
_, right_idx = find_nearest_value_index(self.x, area[1])
|
388
|
-
|
389
|
-
if rule == "trapz":
|
390
|
-
return integrate.trapz(
|
391
|
-
self.y[left_idx : right_idx + 1], self.x[left_idx : right_idx + 1]
|
392
|
-
)
|
393
|
-
|
394
|
-
elif rule == "simps":
|
395
|
-
return integrate.simps(
|
396
|
-
self.y[left_idx : right_idx + 1], self.x[left_idx : right_idx + 1]
|
397
|
-
)
|
398
|
-
|
399
|
-
else:
|
400
|
-
raise ValueError(
|
401
|
-
'Only trapezoidal "trapz" or Simpson\'s "simps" \
|
402
|
-
rules are supported!'
|
403
|
-
)
|
404
|
-
|
405
|
-
def integrate_peak(self, peak, rule="trapz"):
|
406
|
-
"""Calculate an area for a given peak
|
407
|
-
|
408
|
-
Args:
|
409
|
-
peak (float): (rounded) peak Y coordinate. If precise peak position
|
410
|
-
was not found, closest is picked.
|
411
|
-
rule (str): Method for integration, "trapz" - trapezoidal
|
412
|
-
rule (default), "simps" - Simpson's rule.
|
413
|
-
Returns:
|
414
|
-
float: Definite integral within given area as approximated by given
|
415
|
-
method.
|
416
|
-
"""
|
417
|
-
|
418
|
-
if self.peaks is None:
|
419
|
-
self.find_peaks()
|
420
|
-
|
421
|
-
true_peak, idx = find_nearest_value_index(self.peaks[:, 0], peak)
|
422
|
-
_, _, _, left, right = self.peaks[idx]
|
423
|
-
|
424
|
-
self.logger.debug(
|
425
|
-
"Integrating peak found at %s, borders %.02f-%.02f", true_peak, left, right
|
426
|
-
)
|
427
|
-
|
428
|
-
return self.integrate_area((left, right), rule=rule)
|
429
|
-
|
430
|
-
def smooth_spectrum(self, window_length=15, polyorder=7, in_place=True):
|
431
|
-
"""Smoothes the spectrum using Savitsky-Golay filter.
|
432
|
-
|
433
|
-
For details see scipy.signal.savgol_filter.
|
434
|
-
|
435
|
-
Default values for window length and polynomial order were chosen
|
436
|
-
arbitrary based on Raman spectra.
|
437
|
-
|
438
|
-
Args:
|
439
|
-
window_length (int): The length of the filter window (i.e. the
|
440
|
-
number of coefficients). window_length must be a positive odd
|
441
|
-
integer.
|
442
|
-
polyorder (int): The order of the polynomial used to fit the
|
443
|
-
samples. polyorder must be less than window_length.
|
444
|
-
in_place (bool, optional): If smoothing happens in place, returns
|
445
|
-
smoothed spectrum if True.
|
446
|
-
"""
|
447
|
-
|
448
|
-
if in_place:
|
449
|
-
self.y = signal.savgol_filter(
|
450
|
-
self.y, window_length=window_length, polyorder=polyorder
|
451
|
-
)
|
452
|
-
return True
|
453
|
-
|
454
|
-
return signal.savgol_filter(
|
455
|
-
self.y,
|
456
|
-
window_length=window_length,
|
457
|
-
polyorder=polyorder,
|
458
|
-
)
|
459
|
-
|
460
|
-
def default_processing(self):
|
461
|
-
"""Dummy method to return spectral data.
|
462
|
-
|
463
|
-
Normally redefined in ancestor classes to include basic processing for
|
464
|
-
specific spectrum type.
|
465
|
-
|
466
|
-
Returns:
|
467
|
-
Tuple[np.array, np.array, float]: Spectral data as X and Y
|
468
|
-
coordinates and a timestamp.
|
469
|
-
"""
|
470
|
-
|
471
|
-
return self.x, self.y, self.timestamp
|
472
|
-
|
473
|
-
@classmethod
|
474
|
-
def from_data(cls, data):
|
475
|
-
"""Class method to instantiate the class from the saved data file.
|
476
|
-
|
477
|
-
Args:
|
478
|
-
data (str): Path to spectral data file (as pickle).
|
479
|
-
|
480
|
-
Returns:
|
481
|
-
New instance with all data inside.
|
482
|
-
"""
|
483
|
-
|
484
|
-
if "pickle" not in data:
|
485
|
-
raise AttributeError("Only .pickle files are supported")
|
486
|
-
|
487
|
-
path = os.path.abspath(os.path.dirname(data))
|
488
|
-
|
489
|
-
spec = cls(path)
|
490
|
-
spec.load_data(data)
|
491
|
-
|
492
|
-
return spec
|
493
|
-
|
494
|
-
def copy(self):
|
495
|
-
"""Dummy class to return a new instance with the same data as the
|
496
|
-
current.
|
497
|
-
|
498
|
-
Returns:
|
499
|
-
(:obj:SpinsolveNMRSpectrum): New object with the same data.
|
500
|
-
"""
|
501
|
-
|
502
|
-
# creating new instance
|
503
|
-
spec = self.__class__(self.path, self.autosaving)
|
504
|
-
|
505
|
-
# loading attributes
|
506
|
-
for prop in self.PUBLIC_PROPERTIES.union(self.INTERNAL_PROPERTIES):
|
507
|
-
setattr(spec, prop, getattr(self, prop))
|
508
|
-
|
509
|
-
return spec
|
1
|
+
import pickle
|
2
|
+
import os
|
3
|
+
import logging
|
4
|
+
|
5
|
+
from abc import ABC, abstractmethod
|
6
|
+
|
7
|
+
import numpy as np
|
8
|
+
import matplotlib.pyplot as plt
|
9
|
+
|
10
|
+
from scipy import (
|
11
|
+
sparse,
|
12
|
+
signal,
|
13
|
+
interpolate,
|
14
|
+
integrate,
|
15
|
+
)
|
16
|
+
|
17
|
+
from .utils import interpolate_to_index, find_nearest_value_index
|
18
|
+
|
19
|
+
|
20
|
+
class AbstractSpectrum(ABC):
|
21
|
+
"""General class for handling spectroscopic data
|
22
|
+
|
23
|
+
Contains methods for data manipulation (load/save) and basic processing
|
24
|
+
features, such as baseline correction, smoothing, peak picking and
|
25
|
+
integration.
|
26
|
+
|
27
|
+
All data processing happens in place!
|
28
|
+
"""
|
29
|
+
|
30
|
+
# for plotting
|
31
|
+
AXIS_MAPPING = {
|
32
|
+
"x": "x_data",
|
33
|
+
"y": "y_data",
|
34
|
+
}
|
35
|
+
|
36
|
+
# list of properties to be saved
|
37
|
+
PUBLIC_PROPERTIES = {
|
38
|
+
"x",
|
39
|
+
"y",
|
40
|
+
"peaks",
|
41
|
+
"timestamp",
|
42
|
+
}
|
43
|
+
|
44
|
+
# list of internal properties to be dumped during new data loading
|
45
|
+
INTERNAL_PROPERTIES = {
|
46
|
+
"baseline",
|
47
|
+
}
|
48
|
+
|
49
|
+
def __init__(self, path=None, autosaving=True):
|
50
|
+
"""Default constructor, loads properties into instance namespace.
|
51
|
+
|
52
|
+
Can be redefined in ancestor classes.
|
53
|
+
|
54
|
+
Args:
|
55
|
+
path (Union[str, bool], optional): Valid path to save data to.
|
56
|
+
If omitted, uses ".//spectrum". If False - no folder created.
|
57
|
+
autosaving (bool, optional): If the True (default) will save the
|
58
|
+
spectrum when the new one is loaded. Will drop otherwise.
|
59
|
+
"""
|
60
|
+
|
61
|
+
self.autosaving = autosaving
|
62
|
+
|
63
|
+
# loading public properties
|
64
|
+
for prop in self.PUBLIC_PROPERTIES:
|
65
|
+
setattr(self, prop, None)
|
66
|
+
|
67
|
+
# loading internal properties
|
68
|
+
for prop in self.INTERNAL_PROPERTIES:
|
69
|
+
setattr(self, prop, None)
|
70
|
+
|
71
|
+
# creating data path
|
72
|
+
if path is None:
|
73
|
+
self.path = os.path.join(".", "spectrum")
|
74
|
+
os.makedirs(self.path, exist_ok=True)
|
75
|
+
else:
|
76
|
+
try:
|
77
|
+
os.makedirs(path, exist_ok=True)
|
78
|
+
self.path = path
|
79
|
+
except TypeError: # type(path) -> bool
|
80
|
+
self.path = "."
|
81
|
+
|
82
|
+
# creating logger
|
83
|
+
if not hasattr(self, "logger"):
|
84
|
+
self.logger = logging.getLogger(self.__class__.__name__)
|
85
|
+
|
86
|
+
def _dump(self):
|
87
|
+
"""Dummy method to dump all spectral data. Used before loading new data."""
|
88
|
+
|
89
|
+
self.__init__(path=self.path, autosaving=self.autosaving)
|
90
|
+
|
91
|
+
@abstractmethod
|
92
|
+
def load_spectrum(self, x, y, timestamp):
|
93
|
+
"""Loads the spectral data.
|
94
|
+
|
95
|
+
This method must be redefined in ancestor classes.
|
96
|
+
|
97
|
+
Args:
|
98
|
+
x (:obj: np.array): An array with data to be plotted as "x" axis.
|
99
|
+
y (:obj: np.array): An array with data to be plotted as "y" axis.
|
100
|
+
timestamp (float): Timestamp to the corresponding spectrum.
|
101
|
+
"""
|
102
|
+
|
103
|
+
try:
|
104
|
+
assert x.shape == y.shape
|
105
|
+
except AssertionError:
|
106
|
+
raise ValueError("X and Y data must have same dimension.") from None
|
107
|
+
|
108
|
+
if self.x is not None:
|
109
|
+
if self.autosaving:
|
110
|
+
self.save_data()
|
111
|
+
self._dump()
|
112
|
+
|
113
|
+
self.x = x
|
114
|
+
self.y = y
|
115
|
+
self.timestamp = timestamp
|
116
|
+
|
117
|
+
def save_data(self, filename=None, verbose=False):
|
118
|
+
"""Saves the data to given path using python pickle module.
|
119
|
+
|
120
|
+
Args:
|
121
|
+
filename (str, optional): Filename for the current spectrum. If
|
122
|
+
omitted, using current timestamp.
|
123
|
+
verbose (bool, optional): If all processed data needs to be saved as
|
124
|
+
well. Default: False.
|
125
|
+
"""
|
126
|
+
if filename is None:
|
127
|
+
filename = f"{self.timestamp}.pickle"
|
128
|
+
else:
|
129
|
+
# file extension used from python 3. documentation
|
130
|
+
filename += ".pickle"
|
131
|
+
|
132
|
+
path = os.path.join(self.path, filename)
|
133
|
+
|
134
|
+
data = {
|
135
|
+
prop: self.__dict__[prop]
|
136
|
+
for prop in self.PUBLIC_PROPERTIES
|
137
|
+
if self.__dict__[prop] is not None
|
138
|
+
}
|
139
|
+
|
140
|
+
if verbose:
|
141
|
+
data.update(
|
142
|
+
{
|
143
|
+
prop: self.__dict__[prop]
|
144
|
+
for prop in self.INTERNAL_PROPERTIES
|
145
|
+
if self.__dict__[prop] is not None
|
146
|
+
}
|
147
|
+
)
|
148
|
+
|
149
|
+
with open(path, "wb") as f:
|
150
|
+
pickle.dump(data, f)
|
151
|
+
|
152
|
+
self.logger.info("Saved in %s", path)
|
153
|
+
|
154
|
+
def load_data(self, path):
|
155
|
+
"""Loads the data from saved pickle file.
|
156
|
+
|
157
|
+
Data is loaded in place, so instance attributes are overwritten.
|
158
|
+
|
159
|
+
Args:
|
160
|
+
path (str): Valid path to pickle file.
|
161
|
+
"""
|
162
|
+
|
163
|
+
if self.x is not None:
|
164
|
+
self._dump()
|
165
|
+
|
166
|
+
# TODO add exception handling
|
167
|
+
with open(path, "rb") as f:
|
168
|
+
data = pickle.load(f)
|
169
|
+
|
170
|
+
self.__dict__.update(data)
|
171
|
+
|
172
|
+
def trim(self, xmin, xmax, in_place=True):
|
173
|
+
"""Trims the spectrum data within specific X region
|
174
|
+
|
175
|
+
Args:
|
176
|
+
xmin (int): Minimum position on the X axis to start from.
|
177
|
+
xmax (int): Maximum position on the X axis to end to.
|
178
|
+
in_place (bool): If trimming happens in place, else returns new
|
179
|
+
array as trimmed copy.
|
180
|
+
|
181
|
+
Returns:
|
182
|
+
(bool): True if trimmed in place.
|
183
|
+
(Tuple[np.array, np.array]): Trimmed copy of the original array as
|
184
|
+
tuple with X and Y points respectively.
|
185
|
+
"""
|
186
|
+
|
187
|
+
# Creating the mask to map arrays
|
188
|
+
above_ind = self.x > xmin
|
189
|
+
below_ind = self.x < xmax
|
190
|
+
full_mask = np.logical_and(above_ind, below_ind)
|
191
|
+
|
192
|
+
# Mapping arrays if they are supplied
|
193
|
+
if in_place:
|
194
|
+
self.y = self.y[full_mask]
|
195
|
+
self.x = self.x[full_mask]
|
196
|
+
if self.baseline is not None and self.baseline.shape == full_mask.shape:
|
197
|
+
self.baseline = self.baseline[full_mask]
|
198
|
+
return True
|
199
|
+
else:
|
200
|
+
return (self.x.copy()[full_mask], self.y.copy()[full_mask])
|
201
|
+
|
202
|
+
def show_spectrum(
|
203
|
+
self,
|
204
|
+
filename=None,
|
205
|
+
title=None,
|
206
|
+
label=None,
|
207
|
+
):
|
208
|
+
"""Plots the spectral data using matplotlib.pyplot module.
|
209
|
+
|
210
|
+
Args:
|
211
|
+
filename (str, optional): Filename for the current plot. If omitted,
|
212
|
+
file is not saved.
|
213
|
+
title (str, optional): Title for the spectrum plot. If omitted, no
|
214
|
+
title is set.
|
215
|
+
label (str, optional): Label for the spectrum plot. If omitted, uses
|
216
|
+
the spectrum timestamp.
|
217
|
+
"""
|
218
|
+
if label is None:
|
219
|
+
label = f"{self.timestamp}"
|
220
|
+
|
221
|
+
fig, ax = plt.subplots(figsize=(12, 8))
|
222
|
+
|
223
|
+
ax.plot(
|
224
|
+
self.x,
|
225
|
+
self.y,
|
226
|
+
color="xkcd:navy blue",
|
227
|
+
label=label,
|
228
|
+
)
|
229
|
+
|
230
|
+
ax.set_xlabel(self.AXIS_MAPPING["x"])
|
231
|
+
ax.set_ylabel(self.AXIS_MAPPING["y"])
|
232
|
+
|
233
|
+
if title is not None:
|
234
|
+
ax.set_title(title)
|
235
|
+
|
236
|
+
# plotting peaks if found
|
237
|
+
if self.peaks is not None:
|
238
|
+
plt.scatter(
|
239
|
+
self.peaks[:, 1],
|
240
|
+
self.peaks[:, 2],
|
241
|
+
label="found peaks",
|
242
|
+
color="xkcd:tangerine",
|
243
|
+
)
|
244
|
+
|
245
|
+
ax.legend()
|
246
|
+
|
247
|
+
if filename is None:
|
248
|
+
fig.show()
|
249
|
+
|
250
|
+
else:
|
251
|
+
path = os.path.join(self.path, "images")
|
252
|
+
os.makedirs(path, exist_ok=True)
|
253
|
+
fig.savefig(os.path.join(path, f"{filename}.png"), dpi=150)
|
254
|
+
|
255
|
+
def find_peaks(self, threshold=1, min_width=.1, min_dist=None, area=None):
|
256
|
+
"""Finds all peaks above the threshold with at least min_width width.
|
257
|
+
|
258
|
+
Args:
|
259
|
+
threshold (float, optional): Relative peak height with respect to
|
260
|
+
the highest peak.
|
261
|
+
min_width (int, optional): Minimum peak width.
|
262
|
+
min_dist (int, optional): Minimum distance between peaks.
|
263
|
+
area (Tuple(int, int), optional): Area to search peaks in. Supplied
|
264
|
+
as min, max X values tuple.
|
265
|
+
|
266
|
+
Return:
|
267
|
+
(:obj: np.array): An array of peaks ids as rounded peak_x coordinate
|
268
|
+
value. If searching within specified area, full peak information
|
269
|
+
matrix is returned, see below for details.
|
270
|
+
|
271
|
+
Also updates the self.peaks attrbiute (if "area" is omitted) as:
|
272
|
+
(:obj: np.array): An (n_peaks x 5) array with peak data as columns:
|
273
|
+
peak_id (float): Rounded peak_x coordinate value.
|
274
|
+
peak_x (float): X-coordinate for the peak.
|
275
|
+
peak_y (float): Y-coordinate for the peak.
|
276
|
+
peak_left_x (float): X-coordinate for the left peak border.
|
277
|
+
peak_right_x (float): X-coordinate for the right peak border.
|
278
|
+
|
279
|
+
Peak data is accessed with indexing, e.g.:
|
280
|
+
self.peaks[n] will give all data for n's peak
|
281
|
+
self.peaks[:, 2] will give Y coordinate for all found peaks
|
282
|
+
"""
|
283
|
+
|
284
|
+
# only dumping if area is omitted
|
285
|
+
if self.peaks is not None and not area:
|
286
|
+
self.peaks = None
|
287
|
+
|
288
|
+
# trimming
|
289
|
+
if area is not None:
|
290
|
+
spec_y = self.trim(area[0], area[1], False)[1]
|
291
|
+
else:
|
292
|
+
spec_y = self.y.copy()
|
293
|
+
|
294
|
+
threshold *= self.y.max() - self.y.min()
|
295
|
+
peaks, _ = signal.find_peaks(
|
296
|
+
spec_y, height=threshold, width=min_width, distance=min_dist
|
297
|
+
)
|
298
|
+
|
299
|
+
# obtaining width for full peak height
|
300
|
+
# TODO deal with intersecting peaks!
|
301
|
+
# TODO deal with incorrect peak width
|
302
|
+
pw = signal.peak_widths(spec_y, peaks, rel_height=0.95)
|
303
|
+
|
304
|
+
# converting all to column vectors by adding extra dimension along 2nd
|
305
|
+
# axis. Check documentation on np.newaxis for details
|
306
|
+
peak_xs = self.x.copy()[peaks][:, np.newaxis]
|
307
|
+
peak_ys = self.y.copy()[peaks][:, np.newaxis]
|
308
|
+
peaks_ids = np.around(peak_xs)
|
309
|
+
peaks_left_ids = interpolate_to_index(self.x, pw[2])[:, np.newaxis]
|
310
|
+
peaks_right_ids = interpolate_to_index(self.x, pw[3])[:, np.newaxis]
|
311
|
+
|
312
|
+
if area is None:
|
313
|
+
# updating only if area is not specified
|
314
|
+
self.peaks = np.hstack(
|
315
|
+
(
|
316
|
+
peaks_ids,
|
317
|
+
peak_xs,
|
318
|
+
peak_ys,
|
319
|
+
peaks_left_ids,
|
320
|
+
peaks_right_ids,
|
321
|
+
)
|
322
|
+
)
|
323
|
+
return peaks_ids
|
324
|
+
|
325
|
+
return np.hstack(
|
326
|
+
(
|
327
|
+
peaks_ids,
|
328
|
+
peak_xs,
|
329
|
+
peak_ys,
|
330
|
+
peaks_left_ids,
|
331
|
+
peaks_right_ids,
|
332
|
+
)
|
333
|
+
)
|
334
|
+
|
335
|
+
def correct_baseline(self, lmbd=1e3, p=0.01, n_iter=10):
|
336
|
+
"""Generates and subtracts the baseline for the given spectrum.
|
337
|
+
|
338
|
+
Based on Eilers, P; Boelens, H. (2005): Baseline Correction with
|
339
|
+
Asymmetric Least Squares Smoothing.
|
340
|
+
|
341
|
+
Default values chosen arbitrary based on processing Raman spectra.
|
342
|
+
|
343
|
+
Args:
|
344
|
+
lmbd (float): Arbitrary parameter to define the smoothness of the
|
345
|
+
baseline the larger lmbd is, the smoother baseline will be,
|
346
|
+
recommended value between 1e2 and 1e5.
|
347
|
+
p (float): An asymmetric least squares parameter to compute the
|
348
|
+
weights of the residuals, chosen arbitrary, recommended values
|
349
|
+
between 0.1 and 0.001.
|
350
|
+
n_iter (int, optional): Number of iterations to perform the fit,
|
351
|
+
recommended value between 5 and 10.
|
352
|
+
"""
|
353
|
+
|
354
|
+
# generating the baseline first
|
355
|
+
L = len(self.y)
|
356
|
+
D = sparse.csc_matrix(np.diff(np.eye(L), 2))
|
357
|
+
w = np.ones(L)
|
358
|
+
for _ in range(n_iter):
|
359
|
+
W = sparse.spdiags(w, 0, L, L)
|
360
|
+
Z = W + lmbd * D.dot(D.transpose())
|
361
|
+
z = sparse.linalg.spsolve(Z, w * self.y)
|
362
|
+
w = p * (self.y > z) + (1 - p) * (self.y < z)
|
363
|
+
|
364
|
+
# updating attribute for future use
|
365
|
+
self.baseline = z
|
366
|
+
|
367
|
+
# subtracting the baseline
|
368
|
+
# TODO update peak coordinates if peaks were present
|
369
|
+
self.y -= z
|
370
|
+
self.logger.info("Baseline corrected")
|
371
|
+
|
372
|
+
def integrate_area(self, area, rule="trapz"):
|
373
|
+
"""Integrate the spectrum within given area
|
374
|
+
|
375
|
+
Args:
|
376
|
+
area (Tuple[float, float]): Tuple with left and right border (X axis
|
377
|
+
obviously) for the desired area.
|
378
|
+
rule (str): Method for integration, "trapz" - trapezoidal
|
379
|
+
rule (default), "simps" - Simpson's rule.
|
380
|
+
Returns:
|
381
|
+
float: Definite integral within given area as approximated by given
|
382
|
+
method.
|
383
|
+
"""
|
384
|
+
|
385
|
+
# closest value in experimental data and its index in data array
|
386
|
+
_, left_idx = find_nearest_value_index(self.x, area[0])
|
387
|
+
_, right_idx = find_nearest_value_index(self.x, area[1])
|
388
|
+
|
389
|
+
if rule == "trapz":
|
390
|
+
return integrate.trapz(
|
391
|
+
self.y[left_idx : right_idx + 1], self.x[left_idx : right_idx + 1]
|
392
|
+
)
|
393
|
+
|
394
|
+
elif rule == "simps":
|
395
|
+
return integrate.simps(
|
396
|
+
self.y[left_idx : right_idx + 1], self.x[left_idx : right_idx + 1]
|
397
|
+
)
|
398
|
+
|
399
|
+
else:
|
400
|
+
raise ValueError(
|
401
|
+
'Only trapezoidal "trapz" or Simpson\'s "simps" \
|
402
|
+
rules are supported!'
|
403
|
+
)
|
404
|
+
|
405
|
+
def integrate_peak(self, peak, rule="trapz"):
|
406
|
+
"""Calculate an area for a given peak
|
407
|
+
|
408
|
+
Args:
|
409
|
+
peak (float): (rounded) peak Y coordinate. If precise peak position
|
410
|
+
was not found, closest is picked.
|
411
|
+
rule (str): Method for integration, "trapz" - trapezoidal
|
412
|
+
rule (default), "simps" - Simpson's rule.
|
413
|
+
Returns:
|
414
|
+
float: Definite integral within given area as approximated by given
|
415
|
+
method.
|
416
|
+
"""
|
417
|
+
|
418
|
+
if self.peaks is None:
|
419
|
+
self.find_peaks()
|
420
|
+
|
421
|
+
true_peak, idx = find_nearest_value_index(self.peaks[:, 0], peak)
|
422
|
+
_, _, _, left, right = self.peaks[idx]
|
423
|
+
|
424
|
+
self.logger.debug(
|
425
|
+
"Integrating peak found at %s, borders %.02f-%.02f", true_peak, left, right
|
426
|
+
)
|
427
|
+
|
428
|
+
return self.integrate_area((left, right), rule=rule)
|
429
|
+
|
430
|
+
def smooth_spectrum(self, window_length=15, polyorder=7, in_place=True):
|
431
|
+
"""Smoothes the spectrum using Savitsky-Golay filter.
|
432
|
+
|
433
|
+
For details see scipy.signal.savgol_filter.
|
434
|
+
|
435
|
+
Default values for window length and polynomial order were chosen
|
436
|
+
arbitrary based on Raman spectra.
|
437
|
+
|
438
|
+
Args:
|
439
|
+
window_length (int): The length of the filter window (i.e. the
|
440
|
+
number of coefficients). window_length must be a positive odd
|
441
|
+
integer.
|
442
|
+
polyorder (int): The order of the polynomial used to fit the
|
443
|
+
samples. polyorder must be less than window_length.
|
444
|
+
in_place (bool, optional): If smoothing happens in place, returns
|
445
|
+
smoothed spectrum if True.
|
446
|
+
"""
|
447
|
+
|
448
|
+
if in_place:
|
449
|
+
self.y = signal.savgol_filter(
|
450
|
+
self.y, window_length=window_length, polyorder=polyorder
|
451
|
+
)
|
452
|
+
return True
|
453
|
+
|
454
|
+
return signal.savgol_filter(
|
455
|
+
self.y,
|
456
|
+
window_length=window_length,
|
457
|
+
polyorder=polyorder,
|
458
|
+
)
|
459
|
+
|
460
|
+
def default_processing(self):
|
461
|
+
"""Dummy method to return spectral data.
|
462
|
+
|
463
|
+
Normally redefined in ancestor classes to include basic processing for
|
464
|
+
specific spectrum type.
|
465
|
+
|
466
|
+
Returns:
|
467
|
+
Tuple[np.array, np.array, float]: Spectral data as X and Y
|
468
|
+
coordinates and a timestamp.
|
469
|
+
"""
|
470
|
+
|
471
|
+
return self.x, self.y, self.timestamp
|
472
|
+
|
473
|
+
@classmethod
|
474
|
+
def from_data(cls, data):
|
475
|
+
"""Class method to instantiate the class from the saved data file.
|
476
|
+
|
477
|
+
Args:
|
478
|
+
data (str): Path to spectral data file (as pickle).
|
479
|
+
|
480
|
+
Returns:
|
481
|
+
New instance with all data inside.
|
482
|
+
"""
|
483
|
+
|
484
|
+
if "pickle" not in data:
|
485
|
+
raise AttributeError("Only .pickle files are supported")
|
486
|
+
|
487
|
+
path = os.path.abspath(os.path.dirname(data))
|
488
|
+
|
489
|
+
spec = cls(path)
|
490
|
+
spec.load_data(data)
|
491
|
+
|
492
|
+
return spec
|
493
|
+
|
494
|
+
def copy(self):
|
495
|
+
"""Dummy class to return a new instance with the same data as the
|
496
|
+
current.
|
497
|
+
|
498
|
+
Returns:
|
499
|
+
(:obj:SpinsolveNMRSpectrum): New object with the same data.
|
500
|
+
"""
|
501
|
+
|
502
|
+
# creating new instance
|
503
|
+
spec = self.__class__(self.path, self.autosaving)
|
504
|
+
|
505
|
+
# loading attributes
|
506
|
+
for prop in self.PUBLIC_PROPERTIES.union(self.INTERNAL_PROPERTIES):
|
507
|
+
setattr(spec, prop, getattr(self, prop))
|
508
|
+
|
509
|
+
return spec
|