pychemstation 0.5.6__py3-none-any.whl → 0.5.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,304 +1,304 @@
1
- """
2
- Module contains various utility function for spectral data processing and
3
- analysis.
4
- """
5
-
6
- import numpy as np
7
- import scipy
8
-
9
- from .utils import find_nearest_value_index
10
-
11
-
12
- def create_binary_peak_map(data):
13
- """Return binary map of the peaks within data points.
14
-
15
- True values are assigned to potential peak points, False - to baseline.
16
-
17
- Args:
18
- data (:obj:np.array): 1D array with data points.
19
-
20
- Returns:
21
- :obj:np.array, dtype=bool: Mapping of data points, where True is
22
- potential peak region point, False - baseline.
23
- """
24
- # copying array
25
- data_c = np.copy(data)
26
-
27
- # placeholder for the peak mapping
28
- peak_map = np.full_like(data_c, False, dtype=bool)
29
-
30
- for _ in range(100500): # shouldn't take more iterations
31
-
32
- # looking for peaks
33
- peaks_found = np.logical_or(
34
- data_c > np.mean(data_c) + np.std(data_c) * 3,
35
- data_c < np.mean(data_c) - np.std(data_c) * 3,
36
- )
37
-
38
- # merging with peak mapping
39
- np.logical_or(peak_map, peaks_found, out=peak_map)
40
-
41
- # if no peaks found - break
42
- if not peaks_found.any():
43
- break
44
-
45
- # setting values to 0 and iterating again
46
- data_c[peaks_found] = 0
47
-
48
- return peak_map
49
-
50
-
51
- def combine_map_to_regions(mapping):
52
- """Combine True values into their indexes arrays.
53
-
54
- Args:
55
- mapping (:obj:np.array): Boolean mapping array to extract the indexes
56
- from.
57
-
58
- Returns:
59
- :obj:np.array: 2D array with left and right borders of regions, where
60
- mapping is True.
61
-
62
- Example:
63
- >>> combine_map_to_regions(np.array([True, True, False, True, False]))
64
- array([[0, 1],
65
- [3, 3]])
66
- """
67
-
68
- # No peaks identified, i.e. mapping is all False
69
- if not mapping.any():
70
- return np.array([], dtype="int64")
71
-
72
- # region borders
73
- region_borders = np.diff(mapping)
74
-
75
- # corresponding indexes
76
- border_indexes = np.argwhere(region_borders)
77
-
78
- lefts = border_indexes[::2] + 1 # because diff was used to get the index
79
-
80
- # edge case, where first peak doesn't have left border
81
- if mapping[border_indexes][0]:
82
- # just preppend 0 as first left border
83
- # mind the vstack, as np.argwhere produces a vector array
84
- lefts = np.vstack((0, lefts))
85
-
86
- rights = border_indexes[1::2]
87
-
88
- # another edge case, where last peak doesn't have a right border
89
- if mapping[-1]: # True if last point identified as potential peak
90
- # just append -1 as last peak right border
91
- rights = np.vstack((rights, -1))
92
-
93
- # columns as borders, rows as regions, i.e.
94
- # :output:[0] -> first peak region
95
- return np.hstack((lefts, rights))
96
-
97
-
98
- def filter_regions(x_data, peaks_regions):
99
- """Filter peak regions.
100
-
101
- Peak regions are filtered to remove potential false positives (e.g. noise
102
- spikes).
103
-
104
- Args:
105
- x_data (:obj:np.array): X data points, needed to pick up the data
106
- resolution and map the region indexes to the corresponding data
107
- points.
108
- y_data (:obj:np.array): Y data points, needed to validate if the peaks
109
- are actually present in the region and remove invalid regions.
110
- peaks_regions (:obj:np.array): 2D Nx2 array with peak regions indexes
111
- (rows) as left and right borders (columns).
112
-
113
- Returns:
114
- :obj:np.array: 2D Mx2 array with filtered peak regions indexes(rows) as
115
- left and right borders (columns).
116
- """
117
-
118
- # filter peaks where region is smaller than spectrum resolution
119
- # like single spikes, e.g. noise
120
- # compute the regions first
121
- x_data_regions = np.copy(x_data[peaks_regions])
122
-
123
- # get arguments where absolute difference is greater than data resolution
124
- resolution = np.absolute(np.mean(np.diff(x_data)))
125
-
126
- # (N, 1) array!
127
- valid_regions_map = np.absolute(np.diff(x_data_regions)) > resolution
128
-
129
- # get their indexes, mind the flattening of all arrays!
130
- valid_regions_indexes = np.argwhere(valid_regions_map.flatten()).flatten()
131
-
132
- # filtering!
133
- peaks_regions = peaks_regions[valid_regions_indexes]
134
-
135
- return peaks_regions
136
-
137
-
138
- def filter_noisy_regions(y_data, peaks_regions):
139
- """Remove noisy regions from given regions array.
140
-
141
- Peak regions are filtered to remove false positive noise regions, e.g.
142
- incorrectly assigned due to curvy baseline. Filtering is performed by
143
- computing average peak points/data points ratio.
144
-
145
- Args:
146
- y_data (:obj:np.array): Y data points, needed to validate if the peaks
147
- are actually present in the region and remove invalid regions.
148
- peaks_regions (:obj:np.array): 2D Nx2 array with peak regions indexes
149
- (rows) as left and right borders (columns).
150
-
151
- Returns:
152
- :obj:np.array: 2D Mx2 array with filtered peak regions indexes(rows) as
153
- left and right borders (columns).
154
- """
155
-
156
- # compute the actual regions data points
157
- y_data_regions = []
158
- for region in peaks_regions:
159
- y_data_regions.append(y_data[region[0] : region[-1]])
160
-
161
- # compute noise data regions, i.e. in between peak regions
162
- noise_data_regions = []
163
- for row, _ in enumerate(peaks_regions):
164
- try:
165
- noise_data_regions.append(
166
- y_data[peaks_regions[row][1] : peaks_regions[row + 1][0]]
167
- )
168
- except IndexError:
169
- # exception for the last row -> discard
170
- pass
171
-
172
- # compute average peaks/data points ratio for noisy regions
173
- noise_peaks_ratio = []
174
- for region in noise_data_regions:
175
- # protection from empty regions
176
- if region.size != 0:
177
- # minimum height is pretty low to ensure enough noise is picked
178
- peaks, _ = scipy.signal.find_peaks(region, height=region.max() * 0.2)
179
- noise_peaks_ratio.append(peaks.size / region.size)
180
-
181
- # compute average with weights equal to the region length
182
- noise_peaks_ratio = np.average(
183
- noise_peaks_ratio, weights=[region.size for region in noise_data_regions]
184
- )
185
-
186
- # filtering!
187
- valid_regions_indexes = []
188
- for row, region in enumerate(y_data_regions):
189
- peaks, _ = scipy.signal.find_peaks(region, height=region.max() * 0.2)
190
- if peaks.size != 0 and peaks.size / region.size < noise_peaks_ratio:
191
- valid_regions_indexes.append(row)
192
-
193
- # protecting from complete cleaning
194
- if not valid_regions_indexes:
195
- return peaks_regions
196
-
197
- peaks_regions = peaks_regions[np.array(valid_regions_indexes)]
198
-
199
- return peaks_regions
200
-
201
-
202
- def merge_regions(x_data, peaks_regions, d_merge, recursively=True):
203
- """Merge peak regions if distance between is less than delta.
204
-
205
- Args:
206
- x_data (:obj:np.array): X data points.
207
- peaks_regions (:obj:np.array): 2D Nx2 array with peak regions indexes
208
- (rows) as left and right borders (columns).
209
- d_merge (float): Minimum distance in X data points to merge two or more
210
- regions together.
211
- recursively (bool, optional): If True - will repeat the procedure until
212
- all regions with distance < than d_merge will merge.
213
-
214
- Returns:
215
- :obj:np.array: 2D Mx2 array with peak regions indexes (rows) as left and
216
- right borders (columns), merged according to predefined minimal
217
- distance.
218
-
219
- Example:
220
- >>> regions = np.array([
221
- [1, 10],
222
- [11, 20],
223
- [25, 45],
224
- [50, 75],
225
- [100, 120],
226
- [122, 134]
227
- ])
228
- >>> data = np.ones_like(regions) # ones as example
229
- >>> merge_regions(data, regions, 1)
230
- array([[ 1, 20],
231
- [ 25, 45],
232
- [ 50, 75],
233
- [100, 120],
234
- [122, 134]])
235
- >>> merge_regions(data, regions, 20, True)
236
- array([[ 1, 75],
237
- [100, 134]])
238
- """
239
- # the code is pretty ugly but works
240
- merged_regions = []
241
-
242
- # converting to list to drop the data of the fly
243
- regions = peaks_regions.tolist()
244
-
245
- for i, _ in enumerate(regions):
246
- try:
247
- # check left border of i regions with right of i+1
248
- if abs(x_data[regions[i][-1]] - x_data[regions[i + 1][0]]) <= d_merge:
249
- # if lower append merge the regions
250
- merged_regions.append([regions[i][0], regions[i + 1][-1]])
251
- # drop the merged one
252
- regions.pop(i + 1)
253
- else:
254
- # if nothing to merge, just append the current region
255
- merged_regions.append(regions[i])
256
- except IndexError:
257
- # last row
258
- merged_regions.append(regions[i])
259
-
260
- merged_regions = np.array(merged_regions)
261
-
262
- if not recursively:
263
- return merged_regions
264
-
265
- # if recursively, check for the difference
266
- if (merged_regions == regions).all():
267
- # done
268
- return merged_regions
269
-
270
- return merge_regions(x_data, merged_regions, d_merge, recursively=True)
271
-
272
-
273
- def expand_regions(x_data, peaks_regions, d_expand):
274
- """Expand the peak regions by the desired value.
275
-
276
- Args:
277
- x_data (:obj:np.array): X data points.
278
- peaks_regions (:obj:np.array): 2D Nx2 array with peak regions indexes
279
- (rows) as left and right borders (columns).
280
- d_expand (float): Value to expand borders to (in X data scale).
281
-
282
- Returns:
283
- :obj:np.array: 2D Nx2 array with expanded peak regions indexes (rows) as
284
- left and right borders (columns).
285
- """
286
-
287
- data_regions = np.copy(x_data[peaks_regions])
288
-
289
- # determine scale orientation, i.e. decreasing (e.g. ppm on NMR spectrum)
290
- # or increasing (e.g. wavelength on UV spectrum)
291
- if (data_regions[:, 0] - data_regions[:, 1]).mean() > 0:
292
- # ppm-like scale
293
- data_regions[:, 0] += d_expand
294
- data_regions[:, -1] -= d_expand
295
- else:
296
- # wavelength-like scale
297
- data_regions[:, 0] -= d_expand
298
- data_regions[:, -1] += d_expand
299
-
300
- # converting new values to new indexes
301
- for index_, value in np.ndenumerate(data_regions):
302
- data_regions[index_] = find_nearest_value_index(x_data, value)[1]
303
-
304
- return data_regions.astype(int)
1
+ """
2
+ Module contains various utility function for spectral data processing and
3
+ analysis.
4
+ """
5
+
6
+ import numpy as np
7
+ import scipy
8
+
9
+ from .utils import find_nearest_value_index
10
+
11
+
12
+ def create_binary_peak_map(data):
13
+ """Return binary map of the peaks within data points.
14
+
15
+ True values are assigned to potential peak points, False - to baseline.
16
+
17
+ Args:
18
+ data (:obj:np.array): 1D array with data points.
19
+
20
+ Returns:
21
+ :obj:np.array, dtype=bool: Mapping of data points, where True is
22
+ potential peak region point, False - baseline.
23
+ """
24
+ # copying array
25
+ data_c = np.copy(data)
26
+
27
+ # placeholder for the peak mapping
28
+ peak_map = np.full_like(data_c, False, dtype=bool)
29
+
30
+ for _ in range(100500): # shouldn't take more iterations
31
+
32
+ # looking for peaks
33
+ peaks_found = np.logical_or(
34
+ data_c > np.mean(data_c) + np.std(data_c) * 3,
35
+ data_c < np.mean(data_c) - np.std(data_c) * 3,
36
+ )
37
+
38
+ # merging with peak mapping
39
+ np.logical_or(peak_map, peaks_found, out=peak_map)
40
+
41
+ # if no peaks found - break
42
+ if not peaks_found.any():
43
+ break
44
+
45
+ # setting values to 0 and iterating again
46
+ data_c[peaks_found] = 0
47
+
48
+ return peak_map
49
+
50
+
51
+ def combine_map_to_regions(mapping):
52
+ """Combine True values into their indexes arrays.
53
+
54
+ Args:
55
+ mapping (:obj:np.array): Boolean mapping array to extract the indexes
56
+ from.
57
+
58
+ Returns:
59
+ :obj:np.array: 2D array with left and right borders of regions, where
60
+ mapping is True.
61
+
62
+ Example:
63
+ >>> combine_map_to_regions(np.array([True, True, False, True, False]))
64
+ array([[0, 1],
65
+ [3, 3]])
66
+ """
67
+
68
+ # No peaks identified, i.e. mapping is all False
69
+ if not mapping.any():
70
+ return np.array([], dtype="int64")
71
+
72
+ # region borders
73
+ region_borders = np.diff(mapping)
74
+
75
+ # corresponding indexes
76
+ border_indexes = np.argwhere(region_borders)
77
+
78
+ lefts = border_indexes[::2] + 1 # because diff was used to get the index
79
+
80
+ # edge case, where first peak doesn't have left border
81
+ if mapping[border_indexes][0]:
82
+ # just preppend 0 as first left border
83
+ # mind the vstack, as np.argwhere produces a vector array
84
+ lefts = np.vstack((0, lefts))
85
+
86
+ rights = border_indexes[1::2]
87
+
88
+ # another edge case, where last peak doesn't have a right border
89
+ if mapping[-1]: # True if last point identified as potential peak
90
+ # just append -1 as last peak right border
91
+ rights = np.vstack((rights, -1))
92
+
93
+ # columns as borders, rows as regions, i.e.
94
+ # :output:[0] -> first peak region
95
+ return np.hstack((lefts, rights))
96
+
97
+
98
+ def filter_regions(x_data, peaks_regions):
99
+ """Filter peak regions.
100
+
101
+ Peak regions are filtered to remove potential false positives (e.g. noise
102
+ spikes).
103
+
104
+ Args:
105
+ x_data (:obj:np.array): X data points, needed to pick up the data
106
+ resolution and map the region indexes to the corresponding data
107
+ points.
108
+ y_data (:obj:np.array): Y data points, needed to validate if the peaks
109
+ are actually present in the region and remove invalid regions.
110
+ peaks_regions (:obj:np.array): 2D Nx2 array with peak regions indexes
111
+ (rows) as left and right borders (columns).
112
+
113
+ Returns:
114
+ :obj:np.array: 2D Mx2 array with filtered peak regions indexes(rows) as
115
+ left and right borders (columns).
116
+ """
117
+
118
+ # filter peaks where region is smaller than spectrum resolution
119
+ # like single spikes, e.g. noise
120
+ # compute the regions first
121
+ x_data_regions = np.copy(x_data[peaks_regions])
122
+
123
+ # get arguments where absolute difference is greater than data resolution
124
+ resolution = np.absolute(np.mean(np.diff(x_data)))
125
+
126
+ # (N, 1) array!
127
+ valid_regions_map = np.absolute(np.diff(x_data_regions)) > resolution
128
+
129
+ # get their indexes, mind the flattening of all arrays!
130
+ valid_regions_indexes = np.argwhere(valid_regions_map.flatten()).flatten()
131
+
132
+ # filtering!
133
+ peaks_regions = peaks_regions[valid_regions_indexes]
134
+
135
+ return peaks_regions
136
+
137
+
138
+ def filter_noisy_regions(y_data, peaks_regions):
139
+ """Remove noisy regions from given regions array.
140
+
141
+ Peak regions are filtered to remove false positive noise regions, e.g.
142
+ incorrectly assigned due to curvy baseline. Filtering is performed by
143
+ computing average peak points/data points ratio.
144
+
145
+ Args:
146
+ y_data (:obj:np.array): Y data points, needed to validate if the peaks
147
+ are actually present in the region and remove invalid regions.
148
+ peaks_regions (:obj:np.array): 2D Nx2 array with peak regions indexes
149
+ (rows) as left and right borders (columns).
150
+
151
+ Returns:
152
+ :obj:np.array: 2D Mx2 array with filtered peak regions indexes(rows) as
153
+ left and right borders (columns).
154
+ """
155
+
156
+ # compute the actual regions data points
157
+ y_data_regions = []
158
+ for region in peaks_regions:
159
+ y_data_regions.append(y_data[region[0] : region[-1]])
160
+
161
+ # compute noise data regions, i.e. in between peak regions
162
+ noise_data_regions = []
163
+ for row, _ in enumerate(peaks_regions):
164
+ try:
165
+ noise_data_regions.append(
166
+ y_data[peaks_regions[row][1] : peaks_regions[row + 1][0]]
167
+ )
168
+ except IndexError:
169
+ # exception for the last row -> discard
170
+ pass
171
+
172
+ # compute average peaks/data points ratio for noisy regions
173
+ noise_peaks_ratio = []
174
+ for region in noise_data_regions:
175
+ # protection from empty regions
176
+ if region.size != 0:
177
+ # minimum height is pretty low to ensure enough noise is picked
178
+ peaks, _ = scipy.signal.find_peaks(region, height=region.max() * 0.2)
179
+ noise_peaks_ratio.append(peaks.size / region.size)
180
+
181
+ # compute average with weights equal to the region length
182
+ noise_peaks_ratio = np.average(
183
+ noise_peaks_ratio, weights=[region.size for region in noise_data_regions]
184
+ )
185
+
186
+ # filtering!
187
+ valid_regions_indexes = []
188
+ for row, region in enumerate(y_data_regions):
189
+ peaks, _ = scipy.signal.find_peaks(region, height=region.max() * 0.2)
190
+ if peaks.size != 0 and peaks.size / region.size < noise_peaks_ratio:
191
+ valid_regions_indexes.append(row)
192
+
193
+ # protecting from complete cleaning
194
+ if not valid_regions_indexes:
195
+ return peaks_regions
196
+
197
+ peaks_regions = peaks_regions[np.array(valid_regions_indexes)]
198
+
199
+ return peaks_regions
200
+
201
+
202
+ def merge_regions(x_data, peaks_regions, d_merge, recursively=True):
203
+ """Merge peak regions if distance between is less than delta.
204
+
205
+ Args:
206
+ x_data (:obj:np.array): X data points.
207
+ peaks_regions (:obj:np.array): 2D Nx2 array with peak regions indexes
208
+ (rows) as left and right borders (columns).
209
+ d_merge (float): Minimum distance in X data points to merge two or more
210
+ regions together.
211
+ recursively (bool, optional): If True - will repeat the procedure until
212
+ all regions with distance < than d_merge will merge.
213
+
214
+ Returns:
215
+ :obj:np.array: 2D Mx2 array with peak regions indexes (rows) as left and
216
+ right borders (columns), merged according to predefined minimal
217
+ distance.
218
+
219
+ Example:
220
+ >>> regions = np.array([
221
+ [1, 10],
222
+ [11, 20],
223
+ [25, 45],
224
+ [50, 75],
225
+ [100, 120],
226
+ [122, 134]
227
+ ])
228
+ >>> data = np.ones_like(regions) # ones as example
229
+ >>> merge_regions(data, regions, 1)
230
+ array([[ 1, 20],
231
+ [ 25, 45],
232
+ [ 50, 75],
233
+ [100, 120],
234
+ [122, 134]])
235
+ >>> merge_regions(data, regions, 20, True)
236
+ array([[ 1, 75],
237
+ [100, 134]])
238
+ """
239
+ # the code is pretty ugly but works
240
+ merged_regions = []
241
+
242
+ # converting to list to drop the data of the fly
243
+ regions = peaks_regions.tolist()
244
+
245
+ for i, _ in enumerate(regions):
246
+ try:
247
+ # check left border of i regions with right of i+1
248
+ if abs(x_data[regions[i][-1]] - x_data[regions[i + 1][0]]) <= d_merge:
249
+ # if lower append merge the regions
250
+ merged_regions.append([regions[i][0], regions[i + 1][-1]])
251
+ # drop the merged one
252
+ regions.pop(i + 1)
253
+ else:
254
+ # if nothing to merge, just append the current region
255
+ merged_regions.append(regions[i])
256
+ except IndexError:
257
+ # last row
258
+ merged_regions.append(regions[i])
259
+
260
+ merged_regions = np.array(merged_regions)
261
+
262
+ if not recursively:
263
+ return merged_regions
264
+
265
+ # if recursively, check for the difference
266
+ if (merged_regions == regions).all():
267
+ # done
268
+ return merged_regions
269
+
270
+ return merge_regions(x_data, merged_regions, d_merge, recursively=True)
271
+
272
+
273
+ def expand_regions(x_data, peaks_regions, d_expand):
274
+ """Expand the peak regions by the desired value.
275
+
276
+ Args:
277
+ x_data (:obj:np.array): X data points.
278
+ peaks_regions (:obj:np.array): 2D Nx2 array with peak regions indexes
279
+ (rows) as left and right borders (columns).
280
+ d_expand (float): Value to expand borders to (in X data scale).
281
+
282
+ Returns:
283
+ :obj:np.array: 2D Nx2 array with expanded peak regions indexes (rows) as
284
+ left and right borders (columns).
285
+ """
286
+
287
+ data_regions = np.copy(x_data[peaks_regions])
288
+
289
+ # determine scale orientation, i.e. decreasing (e.g. ppm on NMR spectrum)
290
+ # or increasing (e.g. wavelength on UV spectrum)
291
+ if (data_regions[:, 0] - data_regions[:, 1]).mean() > 0:
292
+ # ppm-like scale
293
+ data_regions[:, 0] += d_expand
294
+ data_regions[:, -1] -= d_expand
295
+ else:
296
+ # wavelength-like scale
297
+ data_regions[:, 0] -= d_expand
298
+ data_regions[:, -1] += d_expand
299
+
300
+ # converting new values to new indexes
301
+ for index_, value in np.ndenumerate(data_regions):
302
+ data_regions[index_] = find_nearest_value_index(x_data, value)[1]
303
+
304
+ return data_regions.astype(int)
@@ -12,6 +12,7 @@ Authors: Alexander Hammer, Hessam Mehr, Lucy Hao
12
12
  import logging
13
13
  import os
14
14
  import time
15
+ from typing import Optional
15
16
 
16
17
  from result import Result, Ok, Err
17
18
 
@@ -43,7 +44,7 @@ class CommunicationController:
43
44
  self.cmd_no = 0
44
45
  else:
45
46
  raise FileNotFoundError(f"comm_dir: {comm_dir} not found.")
46
- self._most_recent_hplc_status = None
47
+ self._most_recent_hplc_status: Optional[Status] = None
47
48
 
48
49
  # Create files for Chemstation to communicate with Python
49
50
  open(self.cmd_file, "a").close()
@@ -67,7 +68,7 @@ class CommunicationController:
67
68
  else:
68
69
  raise RuntimeError("Failed to get string")
69
70
 
70
- def get_status(self) -> Union[HPLCRunningStatus, HPLCAvailStatus, HPLCErrorStatus]:
71
+ def get_status(self) -> Status:
71
72
  """Get device status(es).
72
73
 
73
74
  :return: list of ChemStation's current status
@@ -130,7 +131,7 @@ class CommunicationController:
130
131
  :raises IOError: Could not read reply file.
131
132
  :return: Potential ChemStation response
132
133
  """
133
- err = None
134
+ err: Optional[Union[OSError, IndexError]] = None
134
135
  for _ in range(num_attempts):
135
136
  time.sleep(1)
136
137