pytrms 0.9.2__py3-none-any.whl → 0.9.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,472 +1,472 @@
1
- import os.path
2
- from functools import partial, lru_cache
3
- from itertools import islice
4
-
5
- import h5py
6
- import numpy as np
7
- import pandas as pd
8
-
9
- from .._base import itype
10
- from ..helpers import convert_labview_to_posix
11
-
12
- __all__ = ['IoniTOFReader', 'GroupNotFoundError']
13
-
14
-
15
- class GroupNotFoundError(KeyError):
16
- pass
17
-
18
-
19
- class IoniTOFReader:
20
-
21
- @property
22
- @lru_cache
23
- def time_of_meas(self):
24
- """The pandas.Timestamp of the 0th measurement cycle."""
25
- return next(self.iter_index('abs_time')) - next(self.iter_index('rel_time'))
26
-
27
- @property
28
- @lru_cache
29
- def time_of_file(self):
30
- """The pandas.Timestamp of the 0th file cycle."""
31
- # ..which is *not* the 1st file-cycle, but the (unrecorded) one before..
32
- file0 = next(self.iter_index('abs_time')) - pd.Timedelta(self.single_spec_duration_ms, 'ms')
33
- # ..and should never pre-pone the measurement time:
34
- return max(file0, self.time_of_meas)
35
-
36
- @property
37
- @lru_cache
38
- def time_of_file_creation(self):
39
- """The pandas.Timestamp of the file creation."""
40
- return convert_labview_to_posix(float(self.hf.attrs['FileCreatedTime_UTC']), self.utc_offset_sec)
41
-
42
- @property
43
- @lru_cache
44
- def utc_offset_sec(self):
45
- """The pandas.Timestamp of the 0th file cycle."""
46
- return int(self.hf.attrs['UTC_Offset'])
47
-
48
- @property
49
- def inst_type(self):
50
- return str(self.hf.attrs.get('InstrumentType', [b'',])[0].decode('latin-1'))
51
-
52
- @property
53
- def sub_type(self):
54
- return str(self.hf.attrs.get('InstSubType', [b'',])[0].decode('latin-1'))
55
-
56
- @property
57
- def serial_nr(self):
58
- return str(self.hf.attrs.get('InstSerial#', [b'???',])[0].decode('latin-1'))
59
-
60
- @serial_nr.setter
61
- def serial_nr(self, number):
62
- path = self.filename
63
- self.hf.close()
64
- try:
65
- hf = h5py.File(path, 'r+')
66
- hf.attrs['InstSerial#'] = np.array([str(number).encode('latin-1')], dtype='S')
67
- hf.flush()
68
- hf.close()
69
- except OSError:
70
- # well it didn't work..
71
- pass
72
- finally:
73
- self.hf = h5py.File(path, 'r', swmr=False)
74
-
75
- @property
76
- def number_of_timebins(self):
77
- return int(self.hf['SPECdata/Intensities'].shape[1])
78
-
79
- @property
80
- def timebin_width_ps(self):
81
- return float(self.hf.attrs.get('Timebin width (ps)'))
82
-
83
- @property
84
- def poisson_deadtime_ns(self):
85
- return float(self.hf.attrs.get('PoissonDeadTime (ns)'))
86
-
87
- @property
88
- def pulsing_period_ns(self):
89
- return float(self.hf.attrs.get('Pulsing Period (ns)'))
90
-
91
- @property
92
- def start_delay_ns(self):
93
- return float(self.hf.attrs.get('Start Delay (ns)'))
94
-
95
- @property
96
- def single_spec_duration_ms(self):
97
- return float(self.hf.attrs.get('Single Spec Duration (ms)'))
98
-
99
- def __init__(self, path):
100
- self.hf = h5py.File(path, 'r', swmr=False)
101
- self.filename = os.path.abspath(self.hf.filename)
102
-
103
- table_locs = {
104
- 'primary_ions': '/PTR-PrimaryIons',
105
- 'transmission': '/PTR-Transmission',
106
- }
107
-
108
- def get_table(self, table_name):
109
- try:
110
- grp = self.hf.get(IoniTOFReader.table_locs[table_name])
111
- assert grp is not None, "missing dataset in hdf5 file"
112
- except KeyError as exc:
113
- raise KeyError(str(exc) + f", possible values: {list(IoniTOFReader.table_locs.keys())}")
114
-
115
- rv = []
116
- for i, name in enumerate(s.decode('latin-1') for s in grp['Descriptions']):
117
- # Note: the dataset is 10 x 2 x 10 by default, but we remove all empty rows...
118
- if not len(name):
119
- continue
120
-
121
- dset = grp['Masses_Factors'][i]
122
- # ...and columns:
123
- filled = np.all(dset, axis=0)
124
- masses = dset[0, filled]
125
- values = dset[1, filled]
126
- rv.append(itype.table_setting_t(name, list(zip(masses, values))))
127
-
128
- return rv
129
-
130
- def read_addtraces(self, matches=None, index='abs_cycle'):
131
- """Reads all /AddTraces into a DataFrame.
132
-
133
- - 'index' one of abs_cycle|abs_time|rel_cycle|rel_time
134
- """
135
- if matches is not None:
136
- if callable(matches):
137
- filter_fun = matches
138
- elif isinstance(matches, str):
139
- filter_fun = lambda x: matches.lower() in x.lower()
140
- else:
141
- raise ValueError(repr(matches))
142
- locs = list(filter(filter_fun, self._locate_datainfo()))
143
- else:
144
- locs = self._locate_datainfo()
145
-
146
- if not len(locs):
147
- raise ValueError(f"no match for {matches} in {self._locate_datainfo()}")
148
-
149
- rv = pd.concat((self._read_datainfo(loc) for loc in locs), axis='columns')
150
- rv.index = list(self.iter_index(index))
151
-
152
- # de-duplicate trace-columns to prevent issues...
153
- return rv.loc[:, ~rv.columns.duplicated()]
154
-
155
- def read_calctraces(self, index='abs_cycle'):
156
- """Reads the calculated traces into a DataFrame.
157
-
158
- - 'index' one of abs_cycle|abs_time|rel_cycle|rel_time
159
- """
160
- return self.read_addtraces('CalcTraces', index)
161
-
162
- @lru_cache
163
- def read_traces(self, kind='conc', index='abs_cycle', force_original=False):
164
- """Reads the peak-traces of the given 'kind' into a DataFrame.
165
-
166
- If the traces have been post-processed in the Ionicon Viewer,
167
- those will be used, unless `force_original=True`.
168
-
169
- - 'kind' one of raw|corr|conc
170
- - 'index' one of abs_cycle|abs_time|rel_cycle|rel_time
171
- - 'force_original' ignore the post-processed data
172
- """
173
- if force_original:
174
- return self._read_original_traces(kind, index)
175
- else:
176
- try:
177
- return self._read_processed_traces(kind, index)
178
- except GroupNotFoundError:
179
- return self._read_original_traces(kind, index)
180
-
181
- def read_all(self, kind='conc', index='abs_cycle', force_original=False):
182
- """Reads all traces into a DataFrame.
183
-
184
- If the traces have been post-processed in the Ionicon Viewer,
185
- those will be used, unless `force_original=True`.
186
-
187
- - 'kind' one of raw|corr|conc
188
- - 'index' one of abs_cycle|abs_time|rel_cycle|rel_time
189
- - 'force_original' ignore the post-processed data
190
- """
191
- # ...and throw it all together:
192
- return pd.concat([
193
- self.read_traces(kind, index, force_original),
194
- self.read_addtraces(None, index),
195
- ], axis='columns')
196
-
197
- def iter_index(self, kind='abs_cycle'):
198
- lut = {
199
- 'rel_cycle': (0, lambda a: iter(a.astype('int', copy=False))),
200
- 'abs_cycle': (1, lambda a: iter(a.astype('int', copy=False))),
201
- 'abs_time': (2, lambda a: map(partial(convert_labview_to_posix, utc_offset_sec=self.utc_offset_sec), a)),
202
- 'rel_time': (3, lambda a: map(partial(pd.Timedelta, unit='s'), a)),
203
- }
204
- try:
205
- _N, convert2iterator = lut[kind.lower()]
206
- except KeyError as exc:
207
- msg = "Unknown index-type! `kind` must be one of {0}.".format(', '.join(lut.keys()))
208
- raise KeyError(msg) from exc
209
-
210
- return convert2iterator(self.hf['SPECdata/Times'][:, _N])
211
-
212
- @lru_cache
213
- def make_index(self, kind='abs_cycle'):
214
- return pd.Index(self.iter_index(kind))
215
-
216
- def __len__(self):
217
- return self.hf['SPECdata/Intensities'].shape[0]
218
-
219
- def iter_specdata(self, start=None, stop=None):
220
- has_mc_segments = False # self.hf.get('MassCal') is not None
221
-
222
- add_data_dicts = {ad_info.split('/')[1]: self.read_addtraces(ad_info)
223
- for ad_info in self._locate_datainfo()
224
- if ad_info.startswith('AddTraces')}
225
-
226
- for i in islice(range(len(self)), start, stop):
227
- tc = itype.timecycle_t(*self.hf['SPECdata/Times'][i])
228
- iy = self.hf['SPECdata/Intensities'][i]
229
- if has_mc_segments:
230
- raise NotImplementedError("new style mass-cal")
231
- else:
232
- mc_map = self.hf['CALdata/Mapping']
233
- mc_pars = self.hf['CALdata/Spectrum'][i]
234
- mc_segs = mc_pars.reshape((1, mc_pars.size))
235
- mc = itype.masscal_t(0, mc_map[:, 0], mc_map[:, 1], mc_pars, mc_segs)
236
- ad = dict()
237
- for ad_info, ad_frame in add_data_dicts.items():
238
- ad_series = ad_frame.iloc[i]
239
- unit = ''
240
- view = 1
241
- ad[ad_info] = [itype.add_data_item_t(val, name, unit, view)
242
- for name, val in ad_series.items()]
243
- yield itype.fullcycle_t(tc, iy, mc, ad)
244
-
245
- def list_file_structure(self):
246
- """Lists all hdf5 group- and dataset-names."""
247
- # this walks all h5 objects in alphabetic order:
248
- obj_names = set()
249
- self.hf.visit(lambda obj_name: obj_names.add(obj_name))
250
-
251
- return sorted(obj_names)
252
-
253
- def list_addtrace_groups(self):
254
- """Lists the recorded additional trace-groups."""
255
- return sorted(self._locate_datainfo())
256
-
257
- def __repr__(self):
258
- return "<%s (%s) [no. %s] %s>" % (self.__class__.__name__,
259
- self.inst_type, self.serial_nr, self.hf.filename)
260
-
261
- @lru_cache
262
- def _locate_datainfo(self):
263
- """Lookup groups with data-info traces."""
264
- dataloc = set()
265
- infoloc = set()
266
-
267
- def func(object_name):
268
- nonlocal dataloc
269
- nonlocal infoloc
270
- if object_name.endswith('/Data'):
271
- dataloc |= {object_name[:-5], }
272
- if object_name.endswith('/Info'):
273
- infoloc |= {object_name[:-5], }
274
- return None
275
-
276
- # use the above 'visit'-function that appends matched sections...
277
- self.hf.visit(func)
278
-
279
- # ...and return only groups with both /Data and /Info datasets:
280
- return dataloc.intersection(infoloc)
281
-
282
- def traces(self):
283
- """Returns a 'pandas.DataFrame' with all traces concatenated."""
284
- return self.read_all(kind='conc', index='abs_cycle', force_original=False)
285
-
286
- # TODO :: optimize: gib eine 'smarte' Series zurueck, die sich die aufgerufenen
287
- # columns merkt! diese haelt die ganze erste Zeile des datensatzes.
288
- # ab dem zweiten durchgang kann die Series auf diese columns
289
- # reduziert werden
290
- # uuuuuuuuuund so geht's:
291
- # - defaultdict ~> factory checkt parIDs! ~> sonst KeyError
292
- # |__ wird dann bei bedarf ge-populated
293
- #
294
- # das sourcefile / measurement soll sich wie ein pd.DataFrame "anfuehlen":
295
-
296
- # das loest das Problem, aus einer "Matrix2 gezielt eine Zeile oder eine "Spalte"
297
- # oder alles (d.h. iterieren ueber Zeilen) zu selektieren und zwar intuitiv!!
298
-
299
- # IDEE: die .traces "tun so, als waren sie ein DataFrame"
300
- # (keine inheritance, nur ein paar methoden werden durch effizientere ersetzt):
301
- # wir brauchen:
302
- # 1. _len_getter ~> reace condi vermeiden!
303
- # 2. |__ index_getter
304
- # 3. _column_getter
305
- # 4. _row_getter
306
- # 5. parID_resolver ~> keys() aus ParID.txt zu addtrace-group + column!
307
-
308
- ###################################################################################
309
- # #
310
- # ENDZIEL: times u. Automation fuer die "letzte" Zeile an die Datenbank schicken! #
311
- # #
312
- ###################################################################################
313
-
314
- def __getitem__(self, key):
315
- index = self.make_index()
316
- if isinstance(key, str):
317
- return pd.Series(self._get_datacolumn(key), name=key, index=index)
318
- else:
319
- return pd.DataFrame({k: self._get_datacolumn(k) for k in key}, index=index)
320
-
321
- @lru_cache
322
- def _build_datainfo(self):
323
- """Parse all "Data-Info" groups and build a lookup-table.
324
- """
325
- lut = dict()
326
- for group_name in self._locate_datainfo():
327
- info = self.hf[group_name + '/Info']
328
- for column, label in enumerate(info[:] if info.ndim == 1 else info[0,:]):
329
- if hasattr(label, 'decode'):
330
- label = label.decode('latin1')
331
- lut[label] = group_name + '/Data', column
332
-
333
- return lut
334
-
335
- def _get_datacolumn(self, key):
336
- lut = self._build_datainfo()
337
- if key not in lut and not key.endswith('_Act') and not key.endswith('_Set'):
338
- # fallback to act-value (which is typically wanted):
339
- key = key + '_Act'
340
-
341
- dset_name, column = lut[key] # may raise KeyError
342
-
343
- return self.hf[dset_name][:,column]
344
-
345
- def loc(self, label):
346
- if isinstance(label, int):
347
- return self.iloc[self.make_index('abs_cycle')[label]]
348
- else:
349
- return self.iloc[self.make_index('abs_time')[label]]
350
-
351
- def iloc(self, offset):
352
- # build a row of all trace-data...
353
- lut = self._build_datainfo()
354
- name = self.make_index()[offset]
355
- data = {key: self.hf[h5_loc][offset,col] for key, [h5_loc, col] in lut.items()}
356
-
357
- return pd.Series(data, name=name)
358
-
359
- @lru_cache
360
- def _read_datainfo(self, group, prefix=''):
361
- """Parse a "Data-Info" group into a pd.DataFrame.
362
-
363
- - 'group' a hdf5 group or a string-location to a group
364
- - 'prefix' names an optional sub-group
365
- """
366
- if isinstance(group, str):
367
- group = self.hf[group]
368
- data = group[prefix + 'Data']
369
- info = group[prefix + 'Info']
370
- if info.ndim > 1:
371
- labels = info[0,:]
372
- else:
373
- labels = info[:]
374
-
375
- if hasattr(labels[0], 'decode'):
376
- labels = [b.decode('latin1') for b in labels]
377
-
378
- # TODO :: wir haben hier diese doesigen Set/Act werte drin, was wollen wir??
379
- # if keys[0].endswith('[Set]'):
380
- # rv = {key[:-5]: (value, unit)
381
- # for key, value, unit in zip(keys, values, units)
382
- # if key.endswith('[Set]')}
383
- # else:
384
- # rv = {key: (value, unit)
385
- # for key, value, unit in zip(keys, values, units)}
386
-
387
- # siehe auch:
388
-
389
- #def datainfo2df(h5group, selection=slice(None)):
390
- # """
391
- # Split a Data-Info-group into `pd.DataFrame`s for set- and act-values, respectively.
392
- #
393
- # Note, that the column names are inferred from the Info-dataset and that
394
- # the columns of act- and set-dataframe need not overlap!
395
- #
396
- # `h5group` - a HDF5-group containing datasets "Data" and "Info"
397
- # `selection` - [slice, optional] load only a part of the TimeCycle-data
398
- # """
399
- # from collections import namedtuple
400
- # _trace = namedtuple('Trace', ['set', 'act'])
401
- #
402
- # names = (info.decode('latin-1') for info in h5group['Info'][0])
403
- # units = (info.decode('latin-1') for info in h5group['Info'][1])
404
- #
405
- # df = pd.DataFrame(h5group['Data'][selection], columns=names)
406
- #
407
- # set_cols = [col for col in df.columns if col.endswith('_Set')]
408
- # act_cols = [col for col in df.columns if col.endswith('_Act')]
409
- #
410
- # set_values = df[set_cols]
411
- # act_values = df[act_cols]
412
- #
413
- # set_values.columns = [col.replace('_Set', '') for col in set_values.columns]
414
- # act_values.columns = [col.replace('_Act', '') for col in act_values.columns]
415
- #
416
- # return _trace(set_values, act_values)
417
-
418
-
419
- return pd.DataFrame(data, columns=labels)
420
-
421
- def _read_processed_traces(self, kind, index):
422
- # error conditions:
423
- # 1) 'kind' is not recognized -> ValueError
424
- # 2) no 'PROCESSED/TraceData' group -> GroupNotFoundError
425
- # 3) expected group not found -> KeyError (file is not supported yet)
426
- lut = {
427
- 'con': 'Concentrations',
428
- 'raw': 'Raw',
429
- 'cor': 'Corrected',
430
- }
431
- tracedata = self.hf.get('PROCESSED/TraceData')
432
- if tracedata is None:
433
- raise GroupNotFoundError()
434
-
435
- try:
436
- prefix = lut[kind[:3].lower()]
437
- except KeyError as exc:
438
- msg = ("Unknown trace-type! `kind` must be one of 'raw', 'corrected' or 'concentration'.")
439
- raise ValueError(msg) from exc
440
-
441
- try:
442
- data = self._read_datainfo(tracedata, prefix=prefix)
443
- pt = self._read_datainfo(tracedata, prefix='PeakTable')
444
- except KeyError as exc:
445
- raise KeyError(f'unknown group {exc}. filetype is not supported yet.') from exc
446
-
447
- labels = [b.decode('latin1') for b in pt['label']]
448
- mapper = dict(zip(data.columns, labels))
449
- data.rename(columns=mapper, inplace=True)
450
- data.index = list(self.iter_index(index))
451
-
452
- return data
453
-
454
- def _read_original_traces(self, kind, index):
455
- lut = {
456
- 'con': 'TraceConcentration',
457
- 'raw': 'TraceRaw',
458
- 'cor': 'TraceCorrected',
459
- }
460
- tracedata = self.hf['TRACEdata']
461
- try:
462
- loc = lut[kind[:3].lower()]
463
- data = tracedata[loc]
464
- except KeyError as exc:
465
- msg = ("Unknown trace-type! `kind` must be one of 'raw', 'corrected' or 'concentration'.")
466
- raise ValueError(msg) from exc
467
-
468
- info = self.hf['TRACEdata/TraceInfo']
469
- labels = [b.decode('latin1') for b in info[1,:]]
470
-
471
- return pd.DataFrame(data, columns=labels, index=list(self.iter_index(index)))
472
-
1
+ import os.path
2
+ from functools import partial, lru_cache
3
+ from itertools import islice
4
+
5
+ import h5py
6
+ import numpy as np
7
+ import pandas as pd
8
+
9
+ from .._base import itype
10
+ from ..helpers import convert_labview_to_posix
11
+
12
+ __all__ = ['IoniTOFReader', 'GroupNotFoundError']
13
+
14
+
15
+ class GroupNotFoundError(KeyError):
16
+ pass
17
+
18
+
19
+ class IoniTOFReader:
20
+
21
+ @property
22
+ @lru_cache
23
+ def time_of_meas(self):
24
+ """The pandas.Timestamp of the 0th measurement cycle."""
25
+ return next(self.iter_index('abs_time')) - next(self.iter_index('rel_time'))
26
+
27
+ @property
28
+ @lru_cache
29
+ def time_of_file(self):
30
+ """The pandas.Timestamp of the 0th file cycle."""
31
+ # ..which is *not* the 1st file-cycle, but the (unrecorded) one before..
32
+ file0 = next(self.iter_index('abs_time')) - pd.Timedelta(self.single_spec_duration_ms, 'ms')
33
+ # ..and should never pre-pone the measurement time:
34
+ return max(file0, self.time_of_meas)
35
+
36
+ @property
37
+ @lru_cache
38
+ def time_of_file_creation(self):
39
+ """The pandas.Timestamp of the file creation."""
40
+ return convert_labview_to_posix(float(self.hf.attrs['FileCreatedTime_UTC']), self.utc_offset_sec)
41
+
42
+ @property
43
+ @lru_cache
44
+ def utc_offset_sec(self):
45
+ """The pandas.Timestamp of the 0th file cycle."""
46
+ return int(self.hf.attrs['UTC_Offset'])
47
+
48
+ @property
49
+ def inst_type(self):
50
+ return str(self.hf.attrs.get('InstrumentType', [b'',])[0].decode('latin-1'))
51
+
52
+ @property
53
+ def sub_type(self):
54
+ return str(self.hf.attrs.get('InstSubType', [b'',])[0].decode('latin-1'))
55
+
56
+ @property
57
+ def serial_nr(self):
58
+ return str(self.hf.attrs.get('InstSerial#', [b'???',])[0].decode('latin-1'))
59
+
60
+ @serial_nr.setter
61
+ def serial_nr(self, number):
62
+ path = self.filename
63
+ self.hf.close()
64
+ try:
65
+ hf = h5py.File(path, 'r+')
66
+ hf.attrs['InstSerial#'] = np.array([str(number).encode('latin-1')], dtype='S')
67
+ hf.flush()
68
+ hf.close()
69
+ except OSError:
70
+ # well it didn't work..
71
+ pass
72
+ finally:
73
+ self.hf = h5py.File(path, 'r', swmr=False)
74
+
75
+ @property
76
+ def number_of_timebins(self):
77
+ return int(self.hf['SPECdata/Intensities'].shape[1])
78
+
79
+ @property
80
+ def timebin_width_ps(self):
81
+ return float(self.hf.attrs.get('Timebin width (ps)'))
82
+
83
+ @property
84
+ def poisson_deadtime_ns(self):
85
+ return float(self.hf.attrs.get('PoissonDeadTime (ns)'))
86
+
87
+ @property
88
+ def pulsing_period_ns(self):
89
+ return float(self.hf.attrs.get('Pulsing Period (ns)'))
90
+
91
+ @property
92
+ def start_delay_ns(self):
93
+ return float(self.hf.attrs.get('Start Delay (ns)'))
94
+
95
+ @property
96
+ def single_spec_duration_ms(self):
97
+ return float(self.hf.attrs.get('Single Spec Duration (ms)'))
98
+
99
+ def __init__(self, path):
100
+ self.hf = h5py.File(path, 'r', swmr=False)
101
+ self.filename = os.path.abspath(self.hf.filename)
102
+
103
+ table_locs = {
104
+ 'primary_ions': '/PTR-PrimaryIons',
105
+ 'transmission': '/PTR-Transmission',
106
+ }
107
+
108
+ def get_table(self, table_name):
109
+ try:
110
+ grp = self.hf.get(IoniTOFReader.table_locs[table_name])
111
+ assert grp is not None, "missing dataset in hdf5 file"
112
+ except KeyError as exc:
113
+ raise KeyError(str(exc) + f", possible values: {list(IoniTOFReader.table_locs.keys())}")
114
+
115
+ rv = []
116
+ for i, name in enumerate(s.decode('latin-1') for s in grp['Descriptions']):
117
+ # Note: the dataset is 10 x 2 x 10 by default, but we remove all empty rows...
118
+ if not len(name):
119
+ continue
120
+
121
+ dset = grp['Masses_Factors'][i]
122
+ # ...and columns:
123
+ filled = np.all(dset, axis=0)
124
+ masses = dset[0, filled]
125
+ values = dset[1, filled]
126
+ rv.append(itype.table_setting_t(name, list(zip(masses, values))))
127
+
128
+ return rv
129
+
130
+ def read_addtraces(self, matches=None, index='abs_cycle'):
131
+ """Reads all /AddTraces into a DataFrame.
132
+
133
+ - 'index' one of abs_cycle|abs_time|rel_cycle|rel_time
134
+ """
135
+ if matches is not None:
136
+ if callable(matches):
137
+ filter_fun = matches
138
+ elif isinstance(matches, str):
139
+ filter_fun = lambda x: matches.lower() in x.lower()
140
+ else:
141
+ raise ValueError(repr(matches))
142
+ locs = list(filter(filter_fun, self._locate_datainfo()))
143
+ else:
144
+ locs = self._locate_datainfo()
145
+
146
+ if not len(locs):
147
+ raise ValueError(f"no match for {matches} in {self._locate_datainfo()}")
148
+
149
+ rv = pd.concat((self._read_datainfo(loc) for loc in locs), axis='columns')
150
+ rv.index = list(self.iter_index(index))
151
+
152
+ # de-duplicate trace-columns to prevent issues...
153
+ return rv.loc[:, ~rv.columns.duplicated()]
154
+
155
+ def read_calctraces(self, index='abs_cycle'):
156
+ """Reads the calculated traces into a DataFrame.
157
+
158
+ - 'index' one of abs_cycle|abs_time|rel_cycle|rel_time
159
+ """
160
+ return self.read_addtraces('CalcTraces', index)
161
+
162
+ @lru_cache
163
+ def read_traces(self, kind='conc', index='abs_cycle', force_original=False):
164
+ """Reads the peak-traces of the given 'kind' into a DataFrame.
165
+
166
+ If the traces have been post-processed in the Ionicon Viewer,
167
+ those will be used, unless `force_original=True`.
168
+
169
+ - 'kind' one of raw|corr|conc
170
+ - 'index' one of abs_cycle|abs_time|rel_cycle|rel_time
171
+ - 'force_original' ignore the post-processed data
172
+ """
173
+ if force_original:
174
+ return self._read_original_traces(kind, index)
175
+ else:
176
+ try:
177
+ return self._read_processed_traces(kind, index)
178
+ except GroupNotFoundError:
179
+ return self._read_original_traces(kind, index)
180
+
181
+ def read_all(self, kind='conc', index='abs_cycle', force_original=False):
182
+ """Reads all traces into a DataFrame.
183
+
184
+ If the traces have been post-processed in the Ionicon Viewer,
185
+ those will be used, unless `force_original=True`.
186
+
187
+ - 'kind' one of raw|corr|conc
188
+ - 'index' one of abs_cycle|abs_time|rel_cycle|rel_time
189
+ - 'force_original' ignore the post-processed data
190
+ """
191
+ # ...and throw it all together:
192
+ return pd.concat([
193
+ self.read_traces(kind, index, force_original),
194
+ self.read_addtraces(None, index),
195
+ ], axis='columns')
196
+
197
+ def iter_index(self, kind='abs_cycle'):
198
+ lut = {
199
+ 'rel_cycle': (0, lambda a: iter(a.astype('int', copy=False))),
200
+ 'abs_cycle': (1, lambda a: iter(a.astype('int', copy=False))),
201
+ 'abs_time': (2, lambda a: map(partial(convert_labview_to_posix, utc_offset_sec=self.utc_offset_sec), a)),
202
+ 'rel_time': (3, lambda a: map(partial(pd.Timedelta, unit='s'), a)),
203
+ }
204
+ try:
205
+ _N, convert2iterator = lut[kind.lower()]
206
+ except KeyError as exc:
207
+ msg = "Unknown index-type! `kind` must be one of {0}.".format(', '.join(lut.keys()))
208
+ raise KeyError(msg) from exc
209
+
210
+ return convert2iterator(self.hf['SPECdata/Times'][:, _N])
211
+
212
+ @lru_cache
213
+ def make_index(self, kind='abs_cycle'):
214
+ return pd.Index(self.iter_index(kind))
215
+
216
+ def __len__(self):
217
+ return self.hf['SPECdata/Intensities'].shape[0]
218
+
219
+ def iter_specdata(self, start=None, stop=None):
220
+ has_mc_segments = False # self.hf.get('MassCal') is not None
221
+
222
+ add_data_dicts = {ad_info.split('/')[1]: self.read_addtraces(ad_info)
223
+ for ad_info in self._locate_datainfo()
224
+ if ad_info.startswith('AddTraces')}
225
+
226
+ for i in islice(range(len(self)), start, stop):
227
+ tc = itype.timecycle_t(*self.hf['SPECdata/Times'][i])
228
+ iy = self.hf['SPECdata/Intensities'][i]
229
+ if has_mc_segments:
230
+ raise NotImplementedError("new style mass-cal")
231
+ else:
232
+ mc_map = self.hf['CALdata/Mapping']
233
+ mc_pars = self.hf['CALdata/Spectrum'][i]
234
+ mc_segs = mc_pars.reshape((1, mc_pars.size))
235
+ mc = itype.masscal_t(0, mc_map[:, 0], mc_map[:, 1], mc_pars, mc_segs)
236
+ ad = dict()
237
+ for ad_info, ad_frame in add_data_dicts.items():
238
+ ad_series = ad_frame.iloc[i]
239
+ unit = ''
240
+ view = 1
241
+ ad[ad_info] = [itype.add_data_item_t(val, name, unit, view)
242
+ for name, val in ad_series.items()]
243
+ yield itype.fullcycle_t(tc, iy, mc, ad)
244
+
245
+ def list_file_structure(self):
246
+ """Lists all hdf5 group- and dataset-names."""
247
+ # this walks all h5 objects in alphabetic order:
248
+ obj_names = set()
249
+ self.hf.visit(lambda obj_name: obj_names.add(obj_name))
250
+
251
+ return sorted(obj_names)
252
+
253
+ def list_addtrace_groups(self):
254
+ """Lists the recorded additional trace-groups."""
255
+ return sorted(self._locate_datainfo())
256
+
257
+ def __repr__(self):
258
+ return "<%s (%s) [no. %s] %s>" % (self.__class__.__name__,
259
+ self.inst_type, self.serial_nr, self.hf.filename)
260
+
261
+ @lru_cache
262
+ def _locate_datainfo(self):
263
+ """Lookup groups with data-info traces."""
264
+ dataloc = set()
265
+ infoloc = set()
266
+
267
+ def func(object_name):
268
+ nonlocal dataloc
269
+ nonlocal infoloc
270
+ if object_name.endswith('/Data'):
271
+ dataloc |= {object_name[:-5], }
272
+ if object_name.endswith('/Info'):
273
+ infoloc |= {object_name[:-5], }
274
+ return None
275
+
276
+ # use the above 'visit'-function that appends matched sections...
277
+ self.hf.visit(func)
278
+
279
+ # ...and return only groups with both /Data and /Info datasets:
280
+ return dataloc.intersection(infoloc)
281
+
282
+ def traces(self):
283
+ """Returns a 'pandas.DataFrame' with all traces concatenated."""
284
+ return self.read_all(kind='conc', index='abs_cycle', force_original=False)
285
+
286
+ # TODO :: optimize: gib eine 'smarte' Series zurueck, die sich die aufgerufenen
287
+ # columns merkt! diese haelt die ganze erste Zeile des datensatzes.
288
+ # ab dem zweiten durchgang kann die Series auf diese columns
289
+ # reduziert werden
290
+ # uuuuuuuuuund so geht's:
291
+ # - defaultdict ~> factory checkt parIDs! ~> sonst KeyError
292
+ # |__ wird dann bei bedarf ge-populated
293
+ #
294
+ # das sourcefile / measurement soll sich wie ein pd.DataFrame "anfuehlen":
295
+
296
+ # das loest das Problem, aus einer "Matrix2 gezielt eine Zeile oder eine "Spalte"
297
+ # oder alles (d.h. iterieren ueber Zeilen) zu selektieren und zwar intuitiv!!
298
+
299
+ # IDEE: die .traces "tun so, als waren sie ein DataFrame"
300
+ # (keine inheritance, nur ein paar methoden werden durch effizientere ersetzt):
301
+ # wir brauchen:
302
+ # 1. _len_getter ~> reace condi vermeiden!
303
+ # 2. |__ index_getter
304
+ # 3. _column_getter
305
+ # 4. _row_getter
306
+ # 5. parID_resolver ~> keys() aus ParID.txt zu addtrace-group + column!
307
+
308
+ ###################################################################################
309
+ # #
310
+ # ENDZIEL: times u. Automation fuer die "letzte" Zeile an die Datenbank schicken! #
311
+ # #
312
+ ###################################################################################
313
+
314
+ def __getitem__(self, key):
315
+ index = self.make_index()
316
+ if isinstance(key, str):
317
+ return pd.Series(self._get_datacolumn(key), name=key, index=index)
318
+ else:
319
+ return pd.DataFrame({k: self._get_datacolumn(k) for k in key}, index=index)
320
+
321
+ @lru_cache
322
+ def _build_datainfo(self):
323
+ """Parse all "Data-Info" groups and build a lookup-table.
324
+ """
325
+ lut = dict()
326
+ for group_name in self._locate_datainfo():
327
+ info = self.hf[group_name + '/Info']
328
+ for column, label in enumerate(info[:] if info.ndim == 1 else info[0,:]):
329
+ if hasattr(label, 'decode'):
330
+ label = label.decode('latin1')
331
+ lut[label] = group_name + '/Data', column
332
+
333
+ return lut
334
+
335
+ def _get_datacolumn(self, key):
336
+ lut = self._build_datainfo()
337
+ if key not in lut and not key.endswith('_Act') and not key.endswith('_Set'):
338
+ # fallback to act-value (which is typically wanted):
339
+ key = key + '_Act'
340
+
341
+ dset_name, column = lut[key] # may raise KeyError
342
+
343
+ return self.hf[dset_name][:,column]
344
+
345
+ def loc(self, label):
346
+ if isinstance(label, int):
347
+ return self.iloc[self.make_index('abs_cycle')[label]]
348
+ else:
349
+ return self.iloc[self.make_index('abs_time')[label]]
350
+
351
+ def iloc(self, offset):
352
+ # build a row of all trace-data...
353
+ lut = self._build_datainfo()
354
+ name = self.make_index()[offset]
355
+ data = {key: self.hf[h5_loc][offset,col] for key, [h5_loc, col] in lut.items()}
356
+
357
+ return pd.Series(data, name=name)
358
+
359
+ @lru_cache
360
+ def _read_datainfo(self, group, prefix=''):
361
+ """Parse a "Data-Info" group into a pd.DataFrame.
362
+
363
+ - 'group' a hdf5 group or a string-location to a group
364
+ - 'prefix' names an optional sub-group
365
+ """
366
+ if isinstance(group, str):
367
+ group = self.hf[group]
368
+ data = group[prefix + 'Data']
369
+ info = group[prefix + 'Info']
370
+ if info.ndim > 1:
371
+ labels = info[0,:]
372
+ else:
373
+ labels = info[:]
374
+
375
+ if hasattr(labels[0], 'decode'):
376
+ labels = [b.decode('latin1') for b in labels]
377
+
378
+ # TODO :: wir haben hier diese doesigen Set/Act werte drin, was wollen wir??
379
+ # if keys[0].endswith('[Set]'):
380
+ # rv = {key[:-5]: (value, unit)
381
+ # for key, value, unit in zip(keys, values, units)
382
+ # if key.endswith('[Set]')}
383
+ # else:
384
+ # rv = {key: (value, unit)
385
+ # for key, value, unit in zip(keys, values, units)}
386
+
387
+ # siehe auch:
388
+
389
+ #def datainfo2df(h5group, selection=slice(None)):
390
+ # """
391
+ # Split a Data-Info-group into `pd.DataFrame`s for set- and act-values, respectively.
392
+ #
393
+ # Note, that the column names are inferred from the Info-dataset and that
394
+ # the columns of act- and set-dataframe need not overlap!
395
+ #
396
+ # `h5group` - a HDF5-group containing datasets "Data" and "Info"
397
+ # `selection` - [slice, optional] load only a part of the TimeCycle-data
398
+ # """
399
+ # from collections import namedtuple
400
+ # _trace = namedtuple('Trace', ['set', 'act'])
401
+ #
402
+ # names = (info.decode('latin-1') for info in h5group['Info'][0])
403
+ # units = (info.decode('latin-1') for info in h5group['Info'][1])
404
+ #
405
+ # df = pd.DataFrame(h5group['Data'][selection], columns=names)
406
+ #
407
+ # set_cols = [col for col in df.columns if col.endswith('_Set')]
408
+ # act_cols = [col for col in df.columns if col.endswith('_Act')]
409
+ #
410
+ # set_values = df[set_cols]
411
+ # act_values = df[act_cols]
412
+ #
413
+ # set_values.columns = [col.replace('_Set', '') for col in set_values.columns]
414
+ # act_values.columns = [col.replace('_Act', '') for col in act_values.columns]
415
+ #
416
+ # return _trace(set_values, act_values)
417
+
418
+
419
+ return pd.DataFrame(data, columns=labels)
420
+
421
+ def _read_processed_traces(self, kind, index):
422
+ # error conditions:
423
+ # 1) 'kind' is not recognized -> ValueError
424
+ # 2) no 'PROCESSED/TraceData' group -> GroupNotFoundError
425
+ # 3) expected group not found -> KeyError (file is not supported yet)
426
+ lut = {
427
+ 'con': 'Concentrations',
428
+ 'raw': 'Raw',
429
+ 'cor': 'Corrected',
430
+ }
431
+ tracedata = self.hf.get('PROCESSED/TraceData')
432
+ if tracedata is None:
433
+ raise GroupNotFoundError()
434
+
435
+ try:
436
+ prefix = lut[kind[:3].lower()]
437
+ except KeyError as exc:
438
+ msg = ("Unknown trace-type! `kind` must be one of 'raw', 'corrected' or 'concentration'.")
439
+ raise ValueError(msg) from exc
440
+
441
+ try:
442
+ data = self._read_datainfo(tracedata, prefix=prefix)
443
+ pt = self._read_datainfo(tracedata, prefix='PeakTable')
444
+ except KeyError as exc:
445
+ raise KeyError(f'unknown group {exc}. filetype is not supported yet.') from exc
446
+
447
+ labels = [b.decode('latin1') for b in pt['label']]
448
+ mapper = dict(zip(data.columns, labels))
449
+ data.rename(columns=mapper, inplace=True)
450
+ data.index = list(self.iter_index(index))
451
+
452
+ return data
453
+
454
+ def _read_original_traces(self, kind, index):
455
+ lut = {
456
+ 'con': 'TraceConcentration',
457
+ 'raw': 'TraceRaw',
458
+ 'cor': 'TraceCorrected',
459
+ }
460
+ tracedata = self.hf['TRACEdata']
461
+ try:
462
+ loc = lut[kind[:3].lower()]
463
+ data = tracedata[loc]
464
+ except KeyError as exc:
465
+ msg = ("Unknown trace-type! `kind` must be one of 'raw', 'corrected' or 'concentration'.")
466
+ raise ValueError(msg) from exc
467
+
468
+ info = self.hf['TRACEdata/TraceInfo']
469
+ labels = [b.decode('latin1') for b in info[1,:]]
470
+
471
+ return pd.DataFrame(data, columns=labels, index=list(self.iter_index(index)))
472
+