pychemstation 0.5.7.dev1__py3-none-any.whl → 0.5.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,509 +1,509 @@
1
- import pickle
2
- import os
3
- import logging
4
-
5
- from abc import ABC, abstractmethod
6
-
7
- import numpy as np
8
- import matplotlib.pyplot as plt
9
-
10
- from scipy import (
11
- sparse,
12
- signal,
13
- interpolate,
14
- integrate,
15
- )
16
-
17
- from .utils import interpolate_to_index, find_nearest_value_index
18
-
19
-
20
- class AbstractSpectrum(ABC):
21
- """General class for handling spectroscopic data
22
-
23
- Contains methods for data manipulation (load/save) and basic processing
24
- features, such as baseline correction, smoothing, peak picking and
25
- integration.
26
-
27
- All data processing happens in place!
28
- """
29
-
30
- # for plotting
31
- AXIS_MAPPING = {
32
- "x": "x_data",
33
- "y": "y_data",
34
- }
35
-
36
- # list of properties to be saved
37
- PUBLIC_PROPERTIES = {
38
- "x",
39
- "y",
40
- "peaks",
41
- "timestamp",
42
- }
43
-
44
- # list of internal properties to be dumped during new data loading
45
- INTERNAL_PROPERTIES = {
46
- "baseline",
47
- }
48
-
49
- def __init__(self, path=None, autosaving=True):
50
- """Default constructor, loads properties into instance namespace.
51
-
52
- Can be redefined in ancestor classes.
53
-
54
- Args:
55
- path (Union[str, bool], optional): Valid path to save data to.
56
- If omitted, uses ".//spectrum". If False - no folder created.
57
- autosaving (bool, optional): If the True (default) will save the
58
- spectrum when the new one is loaded. Will drop otherwise.
59
- """
60
-
61
- self.autosaving = autosaving
62
-
63
- # loading public properties
64
- for prop in self.PUBLIC_PROPERTIES:
65
- setattr(self, prop, None)
66
-
67
- # loading internal properties
68
- for prop in self.INTERNAL_PROPERTIES:
69
- setattr(self, prop, None)
70
-
71
- # creating data path
72
- if path is None:
73
- self.path = os.path.join(".", "spectrum")
74
- os.makedirs(self.path, exist_ok=True)
75
- else:
76
- try:
77
- os.makedirs(path, exist_ok=True)
78
- self.path = path
79
- except TypeError: # type(path) -> bool
80
- self.path = "."
81
-
82
- # creating logger
83
- if not hasattr(self, "logger"):
84
- self.logger = logging.getLogger(self.__class__.__name__)
85
-
86
- def _dump(self):
87
- """Dummy method to dump all spectral data. Used before loading new data."""
88
-
89
- self.__init__(path=self.path, autosaving=self.autosaving)
90
-
91
- @abstractmethod
92
- def load_spectrum(self, x, y, timestamp):
93
- """Loads the spectral data.
94
-
95
- This method must be redefined in ancestor classes.
96
-
97
- Args:
98
- x (:obj: np.array): An array with data to be plotted as "x" axis.
99
- y (:obj: np.array): An array with data to be plotted as "y" axis.
100
- timestamp (float): Timestamp to the corresponding spectrum.
101
- """
102
-
103
- try:
104
- assert x.shape == y.shape
105
- except AssertionError:
106
- raise ValueError("X and Y data must have same dimension.") from None
107
-
108
- if self.x is not None:
109
- if self.autosaving:
110
- self.save_data()
111
- self._dump()
112
-
113
- self.x = x
114
- self.y = y
115
- self.timestamp = timestamp
116
-
117
- def save_data(self, filename=None, verbose=False):
118
- """Saves the data to given path using python pickle module.
119
-
120
- Args:
121
- filename (str, optional): Filename for the current spectrum. If
122
- omitted, using current timestamp.
123
- verbose (bool, optional): If all processed data needs to be saved as
124
- well. Default: False.
125
- """
126
- if filename is None:
127
- filename = f"{self.timestamp}.pickle"
128
- else:
129
- # file extension used from python 3. documentation
130
- filename += ".pickle"
131
-
132
- path = os.path.join(self.path, filename)
133
-
134
- data = {
135
- prop: self.__dict__[prop]
136
- for prop in self.PUBLIC_PROPERTIES
137
- if self.__dict__[prop] is not None
138
- }
139
-
140
- if verbose:
141
- data.update(
142
- {
143
- prop: self.__dict__[prop]
144
- for prop in self.INTERNAL_PROPERTIES
145
- if self.__dict__[prop] is not None
146
- }
147
- )
148
-
149
- with open(path, "wb") as f:
150
- pickle.dump(data, f)
151
-
152
- self.logger.info("Saved in %s", path)
153
-
154
- def load_data(self, path):
155
- """Loads the data from saved pickle file.
156
-
157
- Data is loaded in place, so instance attributes are overwritten.
158
-
159
- Args:
160
- path (str): Valid path to pickle file.
161
- """
162
-
163
- if self.x is not None:
164
- self._dump()
165
-
166
- # TODO add exception handling
167
- with open(path, "rb") as f:
168
- data = pickle.load(f)
169
-
170
- self.__dict__.update(data)
171
-
172
- def trim(self, xmin, xmax, in_place=True):
173
- """Trims the spectrum data within specific X region
174
-
175
- Args:
176
- xmin (int): Minimum position on the X axis to start from.
177
- xmax (int): Maximum position on the X axis to end to.
178
- in_place (bool): If trimming happens in place, else returns new
179
- array as trimmed copy.
180
-
181
- Returns:
182
- (bool): True if trimmed in place.
183
- (Tuple[np.array, np.array]): Trimmed copy of the original array as
184
- tuple with X and Y points respectively.
185
- """
186
-
187
- # Creating the mask to map arrays
188
- above_ind = self.x > xmin
189
- below_ind = self.x < xmax
190
- full_mask = np.logical_and(above_ind, below_ind)
191
-
192
- # Mapping arrays if they are supplied
193
- if in_place:
194
- self.y = self.y[full_mask]
195
- self.x = self.x[full_mask]
196
- if self.baseline is not None and self.baseline.shape == full_mask.shape:
197
- self.baseline = self.baseline[full_mask]
198
- return True
199
- else:
200
- return (self.x.copy()[full_mask], self.y.copy()[full_mask])
201
-
202
- def show_spectrum(
203
- self,
204
- filename=None,
205
- title=None,
206
- label=None,
207
- ):
208
- """Plots the spectral data using matplotlib.pyplot module.
209
-
210
- Args:
211
- filename (str, optional): Filename for the current plot. If omitted,
212
- file is not saved.
213
- title (str, optional): Title for the spectrum plot. If omitted, no
214
- title is set.
215
- label (str, optional): Label for the spectrum plot. If omitted, uses
216
- the spectrum timestamp.
217
- """
218
- if label is None:
219
- label = f"{self.timestamp}"
220
-
221
- fig, ax = plt.subplots(figsize=(12, 8))
222
-
223
- ax.plot(
224
- self.x,
225
- self.y,
226
- color="xkcd:navy blue",
227
- label=label,
228
- )
229
-
230
- ax.set_xlabel(self.AXIS_MAPPING["x"])
231
- ax.set_ylabel(self.AXIS_MAPPING["y"])
232
-
233
- if title is not None:
234
- ax.set_title(title)
235
-
236
- # plotting peaks if found
237
- if self.peaks is not None:
238
- plt.scatter(
239
- self.peaks[:, 1],
240
- self.peaks[:, 2],
241
- label="found peaks",
242
- color="xkcd:tangerine",
243
- )
244
-
245
- ax.legend()
246
-
247
- if filename is None:
248
- fig.show()
249
-
250
- else:
251
- path = os.path.join(self.path, "images")
252
- os.makedirs(path, exist_ok=True)
253
- fig.savefig(os.path.join(path, f"{filename}.png"), dpi=150)
254
-
255
- def find_peaks(self, threshold=1, min_width=.1, min_dist=None, area=None):
256
- """Finds all peaks above the threshold with at least min_width width.
257
-
258
- Args:
259
- threshold (float, optional): Relative peak height with respect to
260
- the highest peak.
261
- min_width (int, optional): Minimum peak width.
262
- min_dist (int, optional): Minimum distance between peaks.
263
- area (Tuple(int, int), optional): Area to search peaks in. Supplied
264
- as min, max X values tuple.
265
-
266
- Return:
267
- (:obj: np.array): An array of peaks ids as rounded peak_x coordinate
268
- value. If searching within specified area, full peak information
269
- matrix is returned, see below for details.
270
-
271
- Also updates the self.peaks attrbiute (if "area" is omitted) as:
272
- (:obj: np.array): An (n_peaks x 5) array with peak data as columns:
273
- peak_id (float): Rounded peak_x coordinate value.
274
- peak_x (float): X-coordinate for the peak.
275
- peak_y (float): Y-coordinate for the peak.
276
- peak_left_x (float): X-coordinate for the left peak border.
277
- peak_right_x (float): X-coordinate for the right peak border.
278
-
279
- Peak data is accessed with indexing, e.g.:
280
- self.peaks[n] will give all data for n's peak
281
- self.peaks[:, 2] will give Y coordinate for all found peaks
282
- """
283
-
284
- # only dumping if area is omitted
285
- if self.peaks is not None and not area:
286
- self.peaks = None
287
-
288
- # trimming
289
- if area is not None:
290
- spec_y = self.trim(area[0], area[1], False)[1]
291
- else:
292
- spec_y = self.y.copy()
293
-
294
- threshold *= self.y.max() - self.y.min()
295
- peaks, _ = signal.find_peaks(
296
- spec_y, height=threshold, width=min_width, distance=min_dist
297
- )
298
-
299
- # obtaining width for full peak height
300
- # TODO deal with intersecting peaks!
301
- # TODO deal with incorrect peak width
302
- pw = signal.peak_widths(spec_y, peaks, rel_height=0.95)
303
-
304
- # converting all to column vectors by adding extra dimension along 2nd
305
- # axis. Check documentation on np.newaxis for details
306
- peak_xs = self.x.copy()[peaks][:, np.newaxis]
307
- peak_ys = self.y.copy()[peaks][:, np.newaxis]
308
- peaks_ids = np.around(peak_xs)
309
- peaks_left_ids = interpolate_to_index(self.x, pw[2])[:, np.newaxis]
310
- peaks_right_ids = interpolate_to_index(self.x, pw[3])[:, np.newaxis]
311
-
312
- if area is None:
313
- # updating only if area is not specified
314
- self.peaks = np.hstack(
315
- (
316
- peaks_ids,
317
- peak_xs,
318
- peak_ys,
319
- peaks_left_ids,
320
- peaks_right_ids,
321
- )
322
- )
323
- return peaks_ids
324
-
325
- return np.hstack(
326
- (
327
- peaks_ids,
328
- peak_xs,
329
- peak_ys,
330
- peaks_left_ids,
331
- peaks_right_ids,
332
- )
333
- )
334
-
335
- def correct_baseline(self, lmbd=1e3, p=0.01, n_iter=10):
336
- """Generates and subtracts the baseline for the given spectrum.
337
-
338
- Based on Eilers, P; Boelens, H. (2005): Baseline Correction with
339
- Asymmetric Least Squares Smoothing.
340
-
341
- Default values chosen arbitrary based on processing Raman spectra.
342
-
343
- Args:
344
- lmbd (float): Arbitrary parameter to define the smoothness of the
345
- baseline the larger lmbd is, the smoother baseline will be,
346
- recommended value between 1e2 and 1e5.
347
- p (float): An asymmetric least squares parameter to compute the
348
- weights of the residuals, chosen arbitrary, recommended values
349
- between 0.1 and 0.001.
350
- n_iter (int, optional): Number of iterations to perform the fit,
351
- recommended value between 5 and 10.
352
- """
353
-
354
- # generating the baseline first
355
- L = len(self.y)
356
- D = sparse.csc_matrix(np.diff(np.eye(L), 2))
357
- w = np.ones(L)
358
- for _ in range(n_iter):
359
- W = sparse.spdiags(w, 0, L, L)
360
- Z = W + lmbd * D.dot(D.transpose())
361
- z = sparse.linalg.spsolve(Z, w * self.y)
362
- w = p * (self.y > z) + (1 - p) * (self.y < z)
363
-
364
- # updating attribute for future use
365
- self.baseline = z
366
-
367
- # subtracting the baseline
368
- # TODO update peak coordinates if peaks were present
369
- self.y -= z
370
- self.logger.info("Baseline corrected")
371
-
372
- def integrate_area(self, area, rule="trapz"):
373
- """Integrate the spectrum within given area
374
-
375
- Args:
376
- area (Tuple[float, float]): Tuple with left and right border (X axis
377
- obviously) for the desired area.
378
- rule (str): Method for integration, "trapz" - trapezoidal
379
- rule (default), "simps" - Simpson's rule.
380
- Returns:
381
- float: Definite integral within given area as approximated by given
382
- method.
383
- """
384
-
385
- # closest value in experimental data and its index in data array
386
- _, left_idx = find_nearest_value_index(self.x, area[0])
387
- _, right_idx = find_nearest_value_index(self.x, area[1])
388
-
389
- if rule == "trapz":
390
- return integrate.trapz(
391
- self.y[left_idx : right_idx + 1], self.x[left_idx : right_idx + 1]
392
- )
393
-
394
- elif rule == "simps":
395
- return integrate.simps(
396
- self.y[left_idx : right_idx + 1], self.x[left_idx : right_idx + 1]
397
- )
398
-
399
- else:
400
- raise ValueError(
401
- 'Only trapezoidal "trapz" or Simpson\'s "simps" \
402
- rules are supported!'
403
- )
404
-
405
- def integrate_peak(self, peak, rule="trapz"):
406
- """Calculate an area for a given peak
407
-
408
- Args:
409
- peak (float): (rounded) peak Y coordinate. If precise peak position
410
- was not found, closest is picked.
411
- rule (str): Method for integration, "trapz" - trapezoidal
412
- rule (default), "simps" - Simpson's rule.
413
- Returns:
414
- float: Definite integral within given area as approximated by given
415
- method.
416
- """
417
-
418
- if self.peaks is None:
419
- self.find_peaks()
420
-
421
- true_peak, idx = find_nearest_value_index(self.peaks[:, 0], peak)
422
- _, _, _, left, right = self.peaks[idx]
423
-
424
- self.logger.debug(
425
- "Integrating peak found at %s, borders %.02f-%.02f", true_peak, left, right
426
- )
427
-
428
- return self.integrate_area((left, right), rule=rule)
429
-
430
- def smooth_spectrum(self, window_length=15, polyorder=7, in_place=True):
431
- """Smoothes the spectrum using Savitsky-Golay filter.
432
-
433
- For details see scipy.signal.savgol_filter.
434
-
435
- Default values for window length and polynomial order were chosen
436
- arbitrary based on Raman spectra.
437
-
438
- Args:
439
- window_length (int): The length of the filter window (i.e. the
440
- number of coefficients). window_length must be a positive odd
441
- integer.
442
- polyorder (int): The order of the polynomial used to fit the
443
- samples. polyorder must be less than window_length.
444
- in_place (bool, optional): If smoothing happens in place, returns
445
- smoothed spectrum if True.
446
- """
447
-
448
- if in_place:
449
- self.y = signal.savgol_filter(
450
- self.y, window_length=window_length, polyorder=polyorder
451
- )
452
- return True
453
-
454
- return signal.savgol_filter(
455
- self.y,
456
- window_length=window_length,
457
- polyorder=polyorder,
458
- )
459
-
460
- def default_processing(self):
461
- """Dummy method to return spectral data.
462
-
463
- Normally redefined in ancestor classes to include basic processing for
464
- specific spectrum type.
465
-
466
- Returns:
467
- Tuple[np.array, np.array, float]: Spectral data as X and Y
468
- coordinates and a timestamp.
469
- """
470
-
471
- return self.x, self.y, self.timestamp
472
-
473
- @classmethod
474
- def from_data(cls, data):
475
- """Class method to instantiate the class from the saved data file.
476
-
477
- Args:
478
- data (str): Path to spectral data file (as pickle).
479
-
480
- Returns:
481
- New instance with all data inside.
482
- """
483
-
484
- if "pickle" not in data:
485
- raise AttributeError("Only .pickle files are supported")
486
-
487
- path = os.path.abspath(os.path.dirname(data))
488
-
489
- spec = cls(path)
490
- spec.load_data(data)
491
-
492
- return spec
493
-
494
- def copy(self):
495
- """Dummy class to return a new instance with the same data as the
496
- current.
497
-
498
- Returns:
499
- (:obj:SpinsolveNMRSpectrum): New object with the same data.
500
- """
501
-
502
- # creating new instance
503
- spec = self.__class__(self.path, self.autosaving)
504
-
505
- # loading attributes
506
- for prop in self.PUBLIC_PROPERTIES.union(self.INTERNAL_PROPERTIES):
507
- setattr(spec, prop, getattr(self, prop))
508
-
509
- return spec
1
+ import pickle
2
+ import os
3
+ import logging
4
+
5
+ from abc import ABC, abstractmethod
6
+
7
+ import numpy as np
8
+ import matplotlib.pyplot as plt
9
+
10
+ from scipy import (
11
+ sparse,
12
+ signal,
13
+ interpolate,
14
+ integrate,
15
+ )
16
+
17
+ from .utils import interpolate_to_index, find_nearest_value_index
18
+
19
+
20
+ class AbstractSpectrum(ABC):
21
+ """General class for handling spectroscopic data
22
+
23
+ Contains methods for data manipulation (load/save) and basic processing
24
+ features, such as baseline correction, smoothing, peak picking and
25
+ integration.
26
+
27
+ All data processing happens in place!
28
+ """
29
+
30
+ # for plotting
31
+ AXIS_MAPPING = {
32
+ "x": "x_data",
33
+ "y": "y_data",
34
+ }
35
+
36
+ # list of properties to be saved
37
+ PUBLIC_PROPERTIES = {
38
+ "x",
39
+ "y",
40
+ "peaks",
41
+ "timestamp",
42
+ }
43
+
44
+ # list of internal properties to be dumped during new data loading
45
+ INTERNAL_PROPERTIES = {
46
+ "baseline",
47
+ }
48
+
49
+ def __init__(self, path=None, autosaving=True):
50
+ """Default constructor, loads properties into instance namespace.
51
+
52
+ Can be redefined in ancestor classes.
53
+
54
+ Args:
55
+ path (Union[str, bool], optional): Valid path to save data to.
56
+ If omitted, uses ".//spectrum". If False - no folder created.
57
+ autosaving (bool, optional): If the True (default) will save the
58
+ spectrum when the new one is loaded. Will drop otherwise.
59
+ """
60
+
61
+ self.autosaving = autosaving
62
+
63
+ # loading public properties
64
+ for prop in self.PUBLIC_PROPERTIES:
65
+ setattr(self, prop, None)
66
+
67
+ # loading internal properties
68
+ for prop in self.INTERNAL_PROPERTIES:
69
+ setattr(self, prop, None)
70
+
71
+ # creating data path
72
+ if path is None:
73
+ self.path = os.path.join(".", "spectrum")
74
+ os.makedirs(self.path, exist_ok=True)
75
+ else:
76
+ try:
77
+ os.makedirs(path, exist_ok=True)
78
+ self.path = path
79
+ except TypeError: # type(path) -> bool
80
+ self.path = "."
81
+
82
+ # creating logger
83
+ if not hasattr(self, "logger"):
84
+ self.logger = logging.getLogger(self.__class__.__name__)
85
+
86
+ def _dump(self):
87
+ """Dummy method to dump all spectral data. Used before loading new data."""
88
+
89
+ self.__init__(path=self.path, autosaving=self.autosaving)
90
+
91
+ @abstractmethod
92
+ def load_spectrum(self, x, y, timestamp):
93
+ """Loads the spectral data.
94
+
95
+ This method must be redefined in ancestor classes.
96
+
97
+ Args:
98
+ x (:obj: np.array): An array with data to be plotted as "x" axis.
99
+ y (:obj: np.array): An array with data to be plotted as "y" axis.
100
+ timestamp (float): Timestamp to the corresponding spectrum.
101
+ """
102
+
103
+ try:
104
+ assert x.shape == y.shape
105
+ except AssertionError:
106
+ raise ValueError("X and Y data must have same dimension.") from None
107
+
108
+ if self.x is not None:
109
+ if self.autosaving:
110
+ self.save_data()
111
+ self._dump()
112
+
113
+ self.x = x
114
+ self.y = y
115
+ self.timestamp = timestamp
116
+
117
+ def save_data(self, filename=None, verbose=False):
118
+ """Saves the data to given path using python pickle module.
119
+
120
+ Args:
121
+ filename (str, optional): Filename for the current spectrum. If
122
+ omitted, using current timestamp.
123
+ verbose (bool, optional): If all processed data needs to be saved as
124
+ well. Default: False.
125
+ """
126
+ if filename is None:
127
+ filename = f"{self.timestamp}.pickle"
128
+ else:
129
+ # file extension used from python 3. documentation
130
+ filename += ".pickle"
131
+
132
+ path = os.path.join(self.path, filename)
133
+
134
+ data = {
135
+ prop: self.__dict__[prop]
136
+ for prop in self.PUBLIC_PROPERTIES
137
+ if self.__dict__[prop] is not None
138
+ }
139
+
140
+ if verbose:
141
+ data.update(
142
+ {
143
+ prop: self.__dict__[prop]
144
+ for prop in self.INTERNAL_PROPERTIES
145
+ if self.__dict__[prop] is not None
146
+ }
147
+ )
148
+
149
+ with open(path, "wb") as f:
150
+ pickle.dump(data, f)
151
+
152
+ self.logger.info("Saved in %s", path)
153
+
154
+ def load_data(self, path):
155
+ """Loads the data from saved pickle file.
156
+
157
+ Data is loaded in place, so instance attributes are overwritten.
158
+
159
+ Args:
160
+ path (str): Valid path to pickle file.
161
+ """
162
+
163
+ if self.x is not None:
164
+ self._dump()
165
+
166
+ # TODO add exception handling
167
+ with open(path, "rb") as f:
168
+ data = pickle.load(f)
169
+
170
+ self.__dict__.update(data)
171
+
172
+ def trim(self, xmin, xmax, in_place=True):
173
+ """Trims the spectrum data within specific X region
174
+
175
+ Args:
176
+ xmin (int): Minimum position on the X axis to start from.
177
+ xmax (int): Maximum position on the X axis to end to.
178
+ in_place (bool): If trimming happens in place, else returns new
179
+ array as trimmed copy.
180
+
181
+ Returns:
182
+ (bool): True if trimmed in place.
183
+ (Tuple[np.array, np.array]): Trimmed copy of the original array as
184
+ tuple with X and Y points respectively.
185
+ """
186
+
187
+ # Creating the mask to map arrays
188
+ above_ind = self.x > xmin
189
+ below_ind = self.x < xmax
190
+ full_mask = np.logical_and(above_ind, below_ind)
191
+
192
+ # Mapping arrays if they are supplied
193
+ if in_place:
194
+ self.y = self.y[full_mask]
195
+ self.x = self.x[full_mask]
196
+ if self.baseline is not None and self.baseline.shape == full_mask.shape:
197
+ self.baseline = self.baseline[full_mask]
198
+ return True
199
+ else:
200
+ return (self.x.copy()[full_mask], self.y.copy()[full_mask])
201
+
202
+ def show_spectrum(
203
+ self,
204
+ filename=None,
205
+ title=None,
206
+ label=None,
207
+ ):
208
+ """Plots the spectral data using matplotlib.pyplot module.
209
+
210
+ Args:
211
+ filename (str, optional): Filename for the current plot. If omitted,
212
+ file is not saved.
213
+ title (str, optional): Title for the spectrum plot. If omitted, no
214
+ title is set.
215
+ label (str, optional): Label for the spectrum plot. If omitted, uses
216
+ the spectrum timestamp.
217
+ """
218
+ if label is None:
219
+ label = f"{self.timestamp}"
220
+
221
+ fig, ax = plt.subplots(figsize=(12, 8))
222
+
223
+ ax.plot(
224
+ self.x,
225
+ self.y,
226
+ color="xkcd:navy blue",
227
+ label=label,
228
+ )
229
+
230
+ ax.set_xlabel(self.AXIS_MAPPING["x"])
231
+ ax.set_ylabel(self.AXIS_MAPPING["y"])
232
+
233
+ if title is not None:
234
+ ax.set_title(title)
235
+
236
+ # plotting peaks if found
237
+ if self.peaks is not None:
238
+ plt.scatter(
239
+ self.peaks[:, 1],
240
+ self.peaks[:, 2],
241
+ label="found peaks",
242
+ color="xkcd:tangerine",
243
+ )
244
+
245
+ ax.legend()
246
+
247
+ if filename is None:
248
+ fig.show()
249
+
250
+ else:
251
+ path = os.path.join(self.path, "images")
252
+ os.makedirs(path, exist_ok=True)
253
+ fig.savefig(os.path.join(path, f"{filename}.png"), dpi=150)
254
+
255
+ def find_peaks(self, threshold=1, min_width=.1, min_dist=None, area=None):
256
+ """Finds all peaks above the threshold with at least min_width width.
257
+
258
+ Args:
259
+ threshold (float, optional): Relative peak height with respect to
260
+ the highest peak.
261
+ min_width (int, optional): Minimum peak width.
262
+ min_dist (int, optional): Minimum distance between peaks.
263
+ area (Tuple(int, int), optional): Area to search peaks in. Supplied
264
+ as min, max X values tuple.
265
+
266
+ Return:
267
+ (:obj: np.array): An array of peaks ids as rounded peak_x coordinate
268
+ value. If searching within specified area, full peak information
269
+ matrix is returned, see below for details.
270
+
271
+ Also updates the self.peaks attrbiute (if "area" is omitted) as:
272
+ (:obj: np.array): An (n_peaks x 5) array with peak data as columns:
273
+ peak_id (float): Rounded peak_x coordinate value.
274
+ peak_x (float): X-coordinate for the peak.
275
+ peak_y (float): Y-coordinate for the peak.
276
+ peak_left_x (float): X-coordinate for the left peak border.
277
+ peak_right_x (float): X-coordinate for the right peak border.
278
+
279
+ Peak data is accessed with indexing, e.g.:
280
+ self.peaks[n] will give all data for n's peak
281
+ self.peaks[:, 2] will give Y coordinate for all found peaks
282
+ """
283
+
284
+ # only dumping if area is omitted
285
+ if self.peaks is not None and not area:
286
+ self.peaks = None
287
+
288
+ # trimming
289
+ if area is not None:
290
+ spec_y = self.trim(area[0], area[1], False)[1]
291
+ else:
292
+ spec_y = self.y.copy()
293
+
294
+ threshold *= self.y.max() - self.y.min()
295
+ peaks, _ = signal.find_peaks(
296
+ spec_y, height=threshold, width=min_width, distance=min_dist
297
+ )
298
+
299
+ # obtaining width for full peak height
300
+ # TODO deal with intersecting peaks!
301
+ # TODO deal with incorrect peak width
302
+ pw = signal.peak_widths(spec_y, peaks, rel_height=0.95)
303
+
304
+ # converting all to column vectors by adding extra dimension along 2nd
305
+ # axis. Check documentation on np.newaxis for details
306
+ peak_xs = self.x.copy()[peaks][:, np.newaxis]
307
+ peak_ys = self.y.copy()[peaks][:, np.newaxis]
308
+ peaks_ids = np.around(peak_xs)
309
+ peaks_left_ids = interpolate_to_index(self.x, pw[2])[:, np.newaxis]
310
+ peaks_right_ids = interpolate_to_index(self.x, pw[3])[:, np.newaxis]
311
+
312
+ if area is None:
313
+ # updating only if area is not specified
314
+ self.peaks = np.hstack(
315
+ (
316
+ peaks_ids,
317
+ peak_xs,
318
+ peak_ys,
319
+ peaks_left_ids,
320
+ peaks_right_ids,
321
+ )
322
+ )
323
+ return peaks_ids
324
+
325
+ return np.hstack(
326
+ (
327
+ peaks_ids,
328
+ peak_xs,
329
+ peak_ys,
330
+ peaks_left_ids,
331
+ peaks_right_ids,
332
+ )
333
+ )
334
+
335
+ def correct_baseline(self, lmbd=1e3, p=0.01, n_iter=10):
336
+ """Generates and subtracts the baseline for the given spectrum.
337
+
338
+ Based on Eilers, P; Boelens, H. (2005): Baseline Correction with
339
+ Asymmetric Least Squares Smoothing.
340
+
341
+ Default values chosen arbitrary based on processing Raman spectra.
342
+
343
+ Args:
344
+ lmbd (float): Arbitrary parameter to define the smoothness of the
345
+ baseline the larger lmbd is, the smoother baseline will be,
346
+ recommended value between 1e2 and 1e5.
347
+ p (float): An asymmetric least squares parameter to compute the
348
+ weights of the residuals, chosen arbitrary, recommended values
349
+ between 0.1 and 0.001.
350
+ n_iter (int, optional): Number of iterations to perform the fit,
351
+ recommended value between 5 and 10.
352
+ """
353
+
354
+ # generating the baseline first
355
+ L = len(self.y)
356
+ D = sparse.csc_matrix(np.diff(np.eye(L), 2))
357
+ w = np.ones(L)
358
+ for _ in range(n_iter):
359
+ W = sparse.spdiags(w, 0, L, L)
360
+ Z = W + lmbd * D.dot(D.transpose())
361
+ z = sparse.linalg.spsolve(Z, w * self.y)
362
+ w = p * (self.y > z) + (1 - p) * (self.y < z)
363
+
364
+ # updating attribute for future use
365
+ self.baseline = z
366
+
367
+ # subtracting the baseline
368
+ # TODO update peak coordinates if peaks were present
369
+ self.y -= z
370
+ self.logger.info("Baseline corrected")
371
+
372
+ def integrate_area(self, area, rule="trapz"):
373
+ """Integrate the spectrum within given area
374
+
375
+ Args:
376
+ area (Tuple[float, float]): Tuple with left and right border (X axis
377
+ obviously) for the desired area.
378
+ rule (str): Method for integration, "trapz" - trapezoidal
379
+ rule (default), "simps" - Simpson's rule.
380
+ Returns:
381
+ float: Definite integral within given area as approximated by given
382
+ method.
383
+ """
384
+
385
+ # closest value in experimental data and its index in data array
386
+ _, left_idx = find_nearest_value_index(self.x, area[0])
387
+ _, right_idx = find_nearest_value_index(self.x, area[1])
388
+
389
+ if rule == "trapz":
390
+ return integrate.trapz(
391
+ self.y[left_idx : right_idx + 1], self.x[left_idx : right_idx + 1]
392
+ )
393
+
394
+ elif rule == "simps":
395
+ return integrate.simps(
396
+ self.y[left_idx : right_idx + 1], self.x[left_idx : right_idx + 1]
397
+ )
398
+
399
+ else:
400
+ raise ValueError(
401
+ 'Only trapezoidal "trapz" or Simpson\'s "simps" \
402
+ rules are supported!'
403
+ )
404
+
405
+ def integrate_peak(self, peak, rule="trapz"):
406
+ """Calculate an area for a given peak
407
+
408
+ Args:
409
+ peak (float): (rounded) peak Y coordinate. If precise peak position
410
+ was not found, closest is picked.
411
+ rule (str): Method for integration, "trapz" - trapezoidal
412
+ rule (default), "simps" - Simpson's rule.
413
+ Returns:
414
+ float: Definite integral within given area as approximated by given
415
+ method.
416
+ """
417
+
418
+ if self.peaks is None:
419
+ self.find_peaks()
420
+
421
+ true_peak, idx = find_nearest_value_index(self.peaks[:, 0], peak)
422
+ _, _, _, left, right = self.peaks[idx]
423
+
424
+ self.logger.debug(
425
+ "Integrating peak found at %s, borders %.02f-%.02f", true_peak, left, right
426
+ )
427
+
428
+ return self.integrate_area((left, right), rule=rule)
429
+
430
+ def smooth_spectrum(self, window_length=15, polyorder=7, in_place=True):
431
+ """Smoothes the spectrum using Savitsky-Golay filter.
432
+
433
+ For details see scipy.signal.savgol_filter.
434
+
435
+ Default values for window length and polynomial order were chosen
436
+ arbitrary based on Raman spectra.
437
+
438
+ Args:
439
+ window_length (int): The length of the filter window (i.e. the
440
+ number of coefficients). window_length must be a positive odd
441
+ integer.
442
+ polyorder (int): The order of the polynomial used to fit the
443
+ samples. polyorder must be less than window_length.
444
+ in_place (bool, optional): If smoothing happens in place, returns
445
+ smoothed spectrum if True.
446
+ """
447
+
448
+ if in_place:
449
+ self.y = signal.savgol_filter(
450
+ self.y, window_length=window_length, polyorder=polyorder
451
+ )
452
+ return True
453
+
454
+ return signal.savgol_filter(
455
+ self.y,
456
+ window_length=window_length,
457
+ polyorder=polyorder,
458
+ )
459
+
460
+ def default_processing(self):
461
+ """Dummy method to return spectral data.
462
+
463
+ Normally redefined in ancestor classes to include basic processing for
464
+ specific spectrum type.
465
+
466
+ Returns:
467
+ Tuple[np.array, np.array, float]: Spectral data as X and Y
468
+ coordinates and a timestamp.
469
+ """
470
+
471
+ return self.x, self.y, self.timestamp
472
+
473
+ @classmethod
474
+ def from_data(cls, data):
475
+ """Class method to instantiate the class from the saved data file.
476
+
477
+ Args:
478
+ data (str): Path to spectral data file (as pickle).
479
+
480
+ Returns:
481
+ New instance with all data inside.
482
+ """
483
+
484
+ if "pickle" not in data:
485
+ raise AttributeError("Only .pickle files are supported")
486
+
487
+ path = os.path.abspath(os.path.dirname(data))
488
+
489
+ spec = cls(path)
490
+ spec.load_data(data)
491
+
492
+ return spec
493
+
494
+ def copy(self):
495
+ """Dummy class to return a new instance with the same data as the
496
+ current.
497
+
498
+ Returns:
499
+ (:obj:SpinsolveNMRSpectrum): New object with the same data.
500
+ """
501
+
502
+ # creating new instance
503
+ spec = self.__class__(self.path, self.autosaving)
504
+
505
+ # loading attributes
506
+ for prop in self.PUBLIC_PROPERTIES.union(self.INTERNAL_PROPERTIES):
507
+ setattr(spec, prop, getattr(self, prop))
508
+
509
+ return spec