das2numpy 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,479 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Copyright (c) 2018 Silixa Ltd
4
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
5
+ files (the "Software"), to use the Software for the sole purpose of private, non-commercial use and/or in-house company
6
+ research and development meaning the right to use, copy, modify, merge, share the Software, and to permit persons to whom
7
+ the Software is furnished to like-wise do so, subject to the following conditions:
8
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
9
+ For any intended commercial use then contact the copyright holder, Silixa Ltd, for permission, which shall not be unreasonably
10
+ withheld.
11
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
12
+ OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
13
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
14
+ IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
15
+
16
+ $Rev:: 26283 $
17
+ $Date:: 2018-03-27 13:14:05 +0100 (Tue, 27 Mar 2018) $
18
+
19
+ Ref: [1] TDMS_Adv_Read.m
20
+ [2] http://www.ni.com/white-paper/5696/en#toc2
21
+
22
+
23
+ Changed by Erik Genthe, erik.genthe@desy.de
24
+ """
25
+
26
+ import os, struct, datetime
27
+ import pandas as pd
28
+ import numpy as np
29
+ import mmap
30
+
31
+ import matplotlib.pyplot as plt
32
+ from copy import deepcopy
33
+
34
+ #%%
35
+ def load_property_map(xls_file):
36
+ prop_map = pd.read_excel(xls_file, sheetname='Sheet1')
37
+ return prop_map[['CurrentTag', 'CorrectTag']].applymap(lambda x: x.replace(" ", "")).set_index('CurrentTag').to_dict()['CorrectTag']
38
+
39
+ #prop_map = load_property_map('MetaDataTable_iDAS_TDMS_CFG_Tags.xlsx')
40
+
41
+ def write_property_dict(prop_dict, out_file):
42
+ from pprint import pformat
43
+ f = open(out_file, 'w')
44
+ f.write('tdms_property_map=')
45
+ f.write(pformat(prop_dict))
46
+ f.close()
47
+
48
+ def type_not_supported(vargin):
49
+ """Function raises a NotImplementedException."""
50
+ raise NotImplementedError("Reading of this tdsDataType is not implemented")
51
+
52
+
53
+ def parse_time_stamp(fractions, seconds):
54
+ """
55
+ Convert time TDMS time representation to datetime
56
+ fractions -- fractional seconds (2^-64)
57
+ seconds -- The number of seconds since 1/1/1904
58
+ @rtype : datetime.datetime
59
+ """
60
+ if fractions is not None and seconds is not None and fractions + seconds > 0:
61
+ return datetime.timedelta(0, fractions * 2 ** -64 + seconds) + \
62
+ datetime.datetime(1904, 1, 1)
63
+ else:
64
+ return None
65
+
66
+
67
+ # Enum mapping TDM data types to description string, numpy type where exists
68
+ # See Ref[2] for enum values
69
+ TDS_DATA_TYPE = dict({
70
+ 0x00: 'void', # tdsTypeVoid
71
+ 0x01: 'int8', # tdsTypeI8
72
+ 0x02: 'int16', # tdsTypeI16
73
+ 0x03: 'int32', # tdsTypeI32
74
+ 0x04: 'int64', # tdsTypeI64
75
+ 0x05: 'uint8', # tdsTypeU8
76
+ 0x06: 'uint16', # tdsTypeU16
77
+ 0x07: 'uint32', # tdsTypeU32
78
+ 0x08: 'uint64', # tdsTypeU64
79
+ 0x09: 'float32', # tdsTypeSingleFloat
80
+ 0x0a: 'float64', # tdsTypeDoubleFloat
81
+ 0x0b: 'float128', # tdsTypeExtendedFloat
82
+ 0x19: 'singleFloatWithUnit', # tdsTypeSingleFloatWithUnit
83
+ 0x1a: 'doubleFloatWithUnit', # tdsTypeDoubleFloatWithUnit
84
+ 0x1b: 'extendedFloatWithUnit', # tdsTypeExtendedFloatWithUnit
85
+ 0x20: 'str', # tdsTypeString
86
+ 0x21: 'bool', # tdsTypeBoolean
87
+ 0x44: 'datetime', # tdsTypeTimeStamp
88
+ 0xFFFFFFFF: 'raw' # tdsTypeDAQmxRawData
89
+ })
90
+
91
+ # Function mapping for reading TDMS data types
92
+ TDS_READ_VAL = dict({
93
+ 'void': lambda f: None, # tdsTypeVoid
94
+ 'int8': lambda f: struct.unpack('<b', f.read(1))[0],
95
+ 'int16': lambda f: struct.unpack('<h', f.read(2))[0],
96
+ 'int32': lambda f: struct.unpack('<i', f.read(4))[0],
97
+ 'int64': lambda f: struct.unpack('<q', f.read(8))[0],
98
+ 'uint8': lambda f: struct.unpack('<B', f.read(1))[0],
99
+ 'uint16': lambda f: struct.unpack('<H', f.read(2))[0],
100
+ 'uint32': lambda f: struct.unpack('<I', f.read(4))[0],
101
+ 'uint64': lambda f: struct.unpack('<Q', f.read(8))[0],
102
+ 'float32': lambda f: struct.unpack('<f', f.read(4))[0],
103
+ 'float64': lambda f: struct.unpack('<d', f.read(8))[0],
104
+ 'float128': type_not_supported,
105
+ 'singleFloatWithUnit': type_not_supported,
106
+ 'doubleFloatWithUnit': type_not_supported,
107
+ 'extendedFloatWithUnit': type_not_supported,
108
+ 'str': lambda f: f.read(struct.unpack('<i', f.read(4))[0]),
109
+ 'bool': lambda f: struct.unpack('<?', f.read(1))[0],
110
+ 'datetime': lambda f: parse_time_stamp(
111
+ struct.unpack('<Q', f.read(8))[0], struct.unpack('<q', f.read(8))[0]),
112
+ 'raw': type_not_supported
113
+ })
114
+
115
+ DECIMATE_MASK = 0b00100000
116
+ LEAD_IN_LENGTH = 28
117
+ FILEINFO_NAMES = ('file_tag',
118
+ 'toc',
119
+ 'version',
120
+ 'next_segment_offset',
121
+ 'raw_data_offset')
122
+
123
+
124
+ class TdmsReader(object):
125
+ """A TDMS file reader object for reading properties and data"""
126
+
127
+ def __init__(self, filename):
128
+ self._properties = None
129
+ self._end_of_properties_offset = None
130
+ self._data_type = None
131
+ self._chunk_size = None
132
+
133
+ self._raw_data = None
134
+ self._raw_data2 = None # The mapped data in the 'Next Segment'
135
+ self._raw_last_chunk = None
136
+ self._raw2_last_chunk = None
137
+
138
+ self.file_size = os.path.getsize(filename)
139
+ self._channel_length = None
140
+ self._seg1_length = None
141
+ self._seg2_length = None
142
+
143
+ #TODO: Error if file not big enough to hold header
144
+ self._tdms_file = open(filename, 'rb')
145
+ # Read lead in (28 bytes):
146
+ lead_in = self._tdms_file.read(LEAD_IN_LENGTH)
147
+ # lead_in is 28 bytes:
148
+ # [string of length 4][int32][int32][int64][int64]
149
+ fields = struct.unpack('<4siiQQ', lead_in)
150
+
151
+ #TODO: validate file
152
+ if fields[0].decode() not in 'TDSm':
153
+ msg = "Not a TDMS file (TDSm tag not found)"
154
+ raise(TypeError, msg)
155
+
156
+ self.fileinfo = dict(zip(FILEINFO_NAMES, fields))
157
+ self.fileinfo['decimated'] = not bool(self.fileinfo['toc'] &
158
+ DECIMATE_MASK)
159
+ # Make offsets relative to beginning of file:
160
+ self.fileinfo['next_segment_offset'] += LEAD_IN_LENGTH
161
+ self.fileinfo['raw_data_offset'] += LEAD_IN_LENGTH
162
+ self.fileinfo['file_size'] = os.path.getsize(self._tdms_file.name)
163
+
164
+ #TODO: Validate lead in:
165
+ if self.fileinfo['next_segment_offset'] > self.file_size:
166
+ self.fileinfo['next_segment_offset'] = self.file_size
167
+ # raise(ValueError, "Next Segment Offset too large in TDMS header")
168
+
169
+ def __enter__(self):
170
+ return self
171
+
172
+ def __exit__(self, exc_type, exc_value, traceback):
173
+ self._tdms_file.close()
174
+
175
+ def _get_channel_length(self):
176
+ if not self._channel_length:
177
+ self._initialise_data()
178
+
179
+ return self._channel_length
180
+
181
+ channel_length = property(_get_channel_length)
182
+
183
+ def get_properties(self, mapped=False):
184
+ """
185
+ Return a dictionary of properties. Read from file only if necessary.
186
+ """
187
+ # Check if already hold properties in memory
188
+ if self._properties is None:
189
+ self._properties = self._read_properties()
190
+ if mapped:
191
+ props = self._properties.copy()
192
+ tmp = [prop_map.get(col.replace(" ", ""),col.replace(" ", "")) for col in self._properties.index]
193
+ tmp1 = []
194
+ def addToList(ls, val, cnt=0):
195
+ if val not in ls:
196
+ ls.append(val)
197
+ else:
198
+ newVal = val + '_' + str(cnt+1)
199
+ if newVal not in ls:
200
+ ls.append(newVal)
201
+ else:
202
+ addToList(ls, val, cnt+1)
203
+
204
+ for col in tmp:
205
+ addToList(tmp1, col)
206
+
207
+ props.index = tmp1
208
+ return props.loc[:,'Value'].to_dict()
209
+ else:
210
+ return self._properties.loc[:,'Value'].to_dict()
211
+
212
+ def _read_property(self):
213
+ """
214
+ Read a single property from the TDMS file.
215
+ Return the name, type and value of the property as a list.
216
+ """
217
+ # Read length of object path:
218
+ var = struct.unpack('<i', self._tdms_file.read(4))[0]
219
+ # Read property name and type:
220
+ name, data_type = struct.unpack('<{0}si'.format(var),
221
+ self._tdms_file.read(var + 4))
222
+ # Lookup function to read and parse property value based on type:
223
+ value = TDS_READ_VAL[TDS_DATA_TYPE[data_type]](self._tdms_file)
224
+ name = name.decode()
225
+ if data_type == 32:
226
+ value = value.decode()
227
+
228
+ return name, data_type, value
229
+
230
+ def _read_properties(self):
231
+ """Read the properties from the file"""
232
+ self._tdms_file.seek(LEAD_IN_LENGTH, 0)
233
+ # Number of channels is total objects - file objects - group objects
234
+ self.fileinfo['n_channels'] = struct.unpack('i',
235
+ self._tdms_file.read(4))[0] - 2
236
+ # Read length of object path:
237
+ var = struct.unpack('<i', self._tdms_file.read(4))[0]
238
+ # skip over object path and raw data index:
239
+ self._tdms_file.seek(var + 4, 1)
240
+ # Read number of properties in this group:
241
+ var = struct.unpack('<i', self._tdms_file.read(4))[0]
242
+
243
+ # loop through and read each property
244
+ properties = [self._read_property() for _ in range(var)]
245
+ df = pd.DataFrame(properties)
246
+ df.columns = ['Property', 'Type', 'Value']
247
+ df.set_index('Property', inplace=True)
248
+
249
+ self._end_of_properties_offset = self._tdms_file.tell()
250
+
251
+ self._read_chunk_size()
252
+ #TODO: Add number of channels to properties
253
+ return df
254
+
255
+ def _read_chunk_size(self):
256
+ """ Read the data chunk size from the TDMS file header."""
257
+ if self._end_of_properties_offset is None:
258
+ self._read_properties()
259
+
260
+ self._tdms_file.seek(self._end_of_properties_offset, 0)
261
+
262
+ # skip over Group Information:
263
+ var = struct.unpack('<i', self._tdms_file.read(4))[0]
264
+ self._tdms_file.seek(var + 8, 1)
265
+
266
+ # skip over first channel path and length of index information:
267
+ var = struct.unpack('<i', self._tdms_file.read(4))[0]
268
+ self._tdms_file.seek(var + 4, 1)
269
+
270
+ self._data_type = TDS_DATA_TYPE.get(
271
+ struct.unpack('<i', self._tdms_file.read(4))[0])
272
+ if self._data_type not in ('int16', 'float32'):
273
+ raise Exception('Unsupported TDMS data type: ' + self._data_type)
274
+
275
+ # Read Dimension of the raw data array (has to be 1):
276
+ dummy = struct.unpack('<i', self._tdms_file.read(4))[0]
277
+
278
+ self._chunk_size = struct.unpack('<i', self._tdms_file.read(4))[0]
279
+
280
+ def get_data(self, first_ch=0, last_ch=None, first_s=0, last_s=None):
281
+ """
282
+ Get a block of data from the TDMS file.
283
+ first_ch -- The first channel to load
284
+ last_ch -- The last channel to load
285
+ first_s -- The first sample to load
286
+ last_s -- The last sample to load
287
+ """
288
+ if self._raw_data is None:
289
+ self._initialise_data()
290
+ if first_ch is None or first_ch < 0:
291
+ first_ch = 0
292
+ if last_ch is None or last_ch >= self.fileinfo['n_channels']:
293
+ last_ch = self.fileinfo['n_channels']
294
+ else:
295
+ # return data inclusive of last_ch, numpy indexing is exclusive of end index
296
+ last_ch += 1
297
+ if last_s is None or last_s > self._channel_length:
298
+ last_s = self._channel_length
299
+ else:
300
+ # return data inclusive of last_s, numpy indexing is exclusive of end index
301
+ last_s += 1
302
+ nch = np.int(max(last_ch - first_ch, 0))
303
+ ns = np.int(max(last_s - first_s, 0))
304
+
305
+ # Allocate output container
306
+ data = np.empty((ns, nch), dtype=np.dtype(self._data_type))
307
+ if data.size is 0:
308
+ return data
309
+
310
+ ## 1. Index first block & reshape?
311
+ first_blk = first_s // self._chunk_size
312
+ last_blk = last_s // self._chunk_size
313
+ last_full_blk = min(last_blk + 1, self._raw_data.shape[1])
314
+ nchunk = min(max(last_full_blk - first_blk, 0), self._raw_data.shape[1])
315
+ first_s_1a = max(first_s - first_blk * self._chunk_size, 0)
316
+ last_s_1a = min(last_s - first_blk * self._chunk_size, nchunk*self._chunk_size)
317
+ ind_s = 0
318
+ ind_e = ind_s + max(last_s_1a - first_s_1a, 0)
319
+
320
+ # data_1a = self._raw_data[:, first_blk:last_full_blk,
321
+ # first_ch:last_ch].reshape((self._chunk_size*nchunk, nch), order='F')[first_s_1a:last_s_1a, :]
322
+ d = self._raw_data[:, first_blk:last_full_blk,
323
+ first_ch:last_ch]
324
+ d.shape = (self._chunk_size*nchunk, nch)
325
+ d.reshape((self._chunk_size*nchunk, nch), order='F')
326
+ data[ind_s:ind_e,:] = d[first_s_1a:last_s_1a, :]
327
+
328
+ ## 2. Index first additional samples
329
+ first_s_1b = max(first_s - self._raw_data.shape[1]*self._chunk_size, 0)
330
+ last_s_1b = min(last_s - self._raw_data.shape[1]*self._chunk_size, self._raw_last_chunk.shape[0])
331
+ ind_s = ind_e
332
+ ind_e = ind_s + max(last_s_1b - first_s_1b, 0)
333
+ # data_1b = self._raw_last_chunk[first_s_1b:last_s_1b,first_ch:last_ch]
334
+ if ind_e > ind_s:
335
+ data[ind_s:ind_e,:] = self._raw_last_chunk[first_s_1b:last_s_1b,first_ch:last_ch]
336
+
337
+ ## 3. Index second block
338
+ first_s_2 = max(first_s - self._seg1_length, 0)
339
+ last_s_2 = last_s - self._seg1_length
340
+ if (first_s_2 > 0 or last_s_2 > 0) and self._raw_data2 is not None:
341
+ first_blk_2 = max(first_s_2 // self._chunk_size, 0)
342
+ last_blk_2 = max(last_s_2 // self._chunk_size, 0)
343
+ last_full_blk_2 = min(last_blk_2 + 1, self._raw_data2.shape[1])
344
+ nchunk_2 = min(max(last_full_blk_2 - first_blk_2, 0), self._raw_data2.shape[1])
345
+ first_s_2a = max(first_s_2 - first_blk_2 * self._chunk_size, 0)
346
+ last_s_2a = min(last_s_2 - first_blk_2 * self._chunk_size, nchunk_2*self._chunk_size)
347
+ ind_s = ind_e
348
+ ind_e = ind_s + max(last_s_2a - first_s_2a, 0)
349
+ # data_2a = self._raw_data2[:, first_blk_2:last_full_blk_2,
350
+ # first_ch:last_ch].reshape((self._chunk_size*nchunk_2, nch), order='F')[first_s_2a:last_s_2a, :]
351
+ if ind_e > ind_s:
352
+ data[ind_s:ind_e,:] = self._raw_data2[:, first_blk_2:last_full_blk_2,
353
+ first_ch:last_ch].reshape((self._chunk_size*nchunk_2, nch), order='F')[first_s_2a:last_s_2a, :]
354
+ ## 4. Index second additional samples
355
+ if (first_s_2 > 0 or last_s_2 > 0) and self._raw2_last_chunk is not None:
356
+ first_s_2b = max(first_s_2 - self._raw_data2.shape[1]*self._chunk_size, 0)
357
+ last_s_2b = min(last_s_2 - self._raw_data2.shape[1]*self._chunk_size, self._raw2_last_chunk.shape[0])
358
+ ind_s = ind_e
359
+ ind_e = ind_s + max(last_s_2b - first_s_2b, 0)
360
+ # data_2b = self._raw2_last_chunk[first_s_2b:last_s_2b,first_ch:last_ch]
361
+ if ind_e > ind_s:
362
+ data[ind_s:ind_e,:] = self._raw2_last_chunk[first_s_2b:last_s_2b,first_ch:last_ch]
363
+ ## 5. Concatenate blocks
364
+ # data = np.concatenate((data_1a, data_1b, data_2a, data_2b))
365
+ if data.size == 0:
366
+ data = data.reshape(0,0)
367
+ return data
368
+
369
+
370
+ def get_mmap(self):
371
+ self._read_properties()
372
+ arr = np.memmap(self._tdms_file, dtype=np.int16, mode="r", offset=self.fileinfo['raw_data_offset']) # NP.memmap automatically sets file pointer to beginning.
373
+ arr = arr.reshape((-1, self.fileinfo['n_channels']))
374
+ return arr
375
+
376
+ def _initialise_data(self):
377
+ """Initialise the memory map for the data array."""
378
+ if self._chunk_size is None:
379
+ self._read_chunk_size()
380
+
381
+ dmap = mmap.mmap(self._tdms_file.fileno(), 0, access=mmap.ACCESS_READ)
382
+ rdo = np.int(self.fileinfo['raw_data_offset'])
383
+ nch = np.int(self.fileinfo['n_channels'])
384
+
385
+ #TODO: Support streaming file type?
386
+ #TODO: Is this a valid calculation for ChannelLength?
387
+ nso = self.fileinfo['next_segment_offset']
388
+ self._seg1_length = np.int((nso - rdo) / nch / np.dtype(self._data_type).itemsize)
389
+ self._channel_length = self._seg1_length
390
+
391
+ if self.fileinfo['decimated']:
392
+ n_complete_blk = np.int(self._seg1_length / self._chunk_size)
393
+ ax_ord = 'C'
394
+ else:
395
+ n_complete_blk = 0
396
+ ax_ord = 'F'
397
+ self._raw_data = np.ndarray((n_complete_blk, nch, self._chunk_size),
398
+ dtype=self._data_type,
399
+ buffer=dmap,
400
+ offset=rdo)
401
+ # Rotate the axes to [chunk_size, nblk, nch]
402
+ self._raw_data = np.rollaxis(self._raw_data, 2)
403
+ additional_samples = np.int(self._seg1_length - n_complete_blk * self._chunk_size)
404
+ additional_samples_offset = rdo + n_complete_blk*nch*self._chunk_size*np.dtype(self._data_type).itemsize
405
+ self._raw_last_chunk = np.ndarray((nch, additional_samples),
406
+ dtype=self._data_type,
407
+ buffer=dmap,
408
+ offset=additional_samples_offset,
409
+ order=ax_ord)
410
+ # Rotate the axes to [samples, nch]
411
+ self._raw_last_chunk = np.rollaxis(self._raw_last_chunk, 1)
412
+
413
+ if self.file_size == nso:
414
+ self._seg2_length = 0
415
+ else:
416
+ self._tdms_file.seek(nso + 12, 0)
417
+ (seg2_nso, seg2_rdo) = struct.unpack('<qq',
418
+ self._tdms_file.read(2 * 8))
419
+ self._seg2_length = (seg2_nso - seg2_rdo) / nch / np.dtype(self._data_type).itemsize
420
+ if self.fileinfo['decimated']:
421
+ n_complete_blk2 = np.int(self._seg2_length / self._chunk_size)
422
+ else:
423
+ n_complete_blk2 = np.int(0)
424
+ self._raw_data2 = np.ndarray((n_complete_blk2, nch, self._chunk_size),
425
+ dtype=self._data_type,
426
+ buffer=dmap,
427
+ offset=(nso + LEAD_IN_LENGTH + seg2_rdo))
428
+ self._raw_data2 = np.rollaxis(self._raw_data2, 2)
429
+ additional_samples = np.int(self._seg2_length - n_complete_blk2 * self._chunk_size)
430
+ additional_samples_offset = nso + LEAD_IN_LENGTH + seg2_rdo + n_complete_blk2*nch*self._chunk_size*np.dtype(self._data_type).itemsize
431
+ self._raw2_last_chunk = np.ndarray((nch, additional_samples),
432
+ dtype=self._data_type,
433
+ buffer=dmap,
434
+ offset=additional_samples_offset,
435
+ order=ax_ord)
436
+ # Rotate the axes to [samples, nch]
437
+ self._raw2_last_chunk = np.rollaxis(self._raw2_last_chunk, 1)
438
+
439
+ if self._raw_data2.size != 0 or self._raw2_last_chunk.size != 0:
440
+ pass
441
+ # raise Exception('Second segment contains some data, \
442
+ # not currently supported')
443
+ self._channel_length = self._seg1_length + self._seg2_length
444
+ # else:
445
+ # print "Not decimated"
446
+ # raise Exception('Reading file with decimated flag not set is not \
447
+ # supported yet')
448
+
449
+
450
+ if __name__ == '__main__':
451
+ print("TDMS Reader demo.")
452
+
453
+ file_path = 'path_to_tdms_file.tdms'
454
+
455
+ print('File: {0}'.format(file_path))
456
+
457
+ tdms = TdmsReader(file_path)
458
+
459
+ props = tdms.get_properties()
460
+
461
+ zero_offset = props.get('Zero Offset (m)')
462
+ channel_spacing = props.get('SpatialResolution[m]') * props.get('Fibre Length Multiplier')
463
+ n_channels = tdms.fileinfo['n_channels']
464
+ depth = zero_offset + np.arange(n_channels) * channel_spacing
465
+ fs = props.get('SamplingFrequency[Hz]')
466
+
467
+ print('Number of channels in file: {0}'.format(n_channels))
468
+ print('Time samples in file: {0}'.format(tdms.channel_length))
469
+ print('Sampling frequency (Hz): {0}'.format(fs))
470
+
471
+ first_channel = 250
472
+ last_channel = 2275
473
+ first_time_sample = 0
474
+ last_time_sample = 3999
475
+
476
+
477
+ some_data = tdms.get_data(first_channel, last_channel, first_time_sample, last_time_sample)
478
+ print('Size of data loaded: {0}'.format(some_data.shape))
479
+
@@ -0,0 +1,91 @@
1
+ """
2
+ Deprecated
3
+ """
4
+
5
+ from os import path as P
6
+ import numpy as NP
7
+ import h5py as H5PY
8
+ import datetime as DT
9
+ from ..filefinder import FileFinder, to_posix_timestamp_ms
10
+ from ..chunk import Chunk
11
+
12
+
13
+ FILE_TIME_SAMPLE_AMOUNT = 60000
14
+ CHANNEL_AMOUNT = 10000
15
+ DATA_ROOT = "/wave/seismic-rawdata/OPTA"
16
+ NUM_WORKER_THREADS = 16
17
+ CALIBRATE = True
18
+ assert P.isdir(DATA_ROOT)
19
+
20
+ def _filename_to_posix_timestamp(file_name:str) -> int:
21
+ return to_posix_timestamp_ms(DT.datetime.strptime(file_name[-21:], "%Y-%m-%dT%H%M%SZ.h5"))
22
+
23
+
24
+
25
+ def _load_from_h5(file_path, rel_t_start, rel_t_end, t_step, channel_start, channel_end, channel_step) -> NP.ndarray:
26
+ """ Internal helper function """
27
+ file_handle = open(file_path, 'rb')
28
+ file:H5PY.File = H5PY.File(file_handle, 'r')
29
+ data = file['Acquisition']['Raw[0]']['RawData'] # Data is not loaded into memory at this point! (Lazy evaluation)
30
+
31
+ # At this point the data gets loaded into memory.
32
+ data = data[
33
+ channel_start : channel_end : channel_step,
34
+ rel_t_start : rel_t_end : t_step
35
+ ]
36
+
37
+ # To numpy and transpose...
38
+ data = NP.array(data)
39
+ data = data.transpose() # Extremely efficient :)
40
+ file.close()
41
+ file_handle.close()
42
+
43
+ # Calibrate
44
+ data = _calibrate(data)
45
+ return data
46
+
47
+
48
+
49
+ def _calibrate(data:NP.ndarray) -> NP.ndarray:
50
+ """ Convert raw data to strain data.
51
+ As the resulting values are decimals, the datatype should be float. Otherwise an assertion fails. """
52
+ assert data.dtype in (NP.float, NP.float32, NP.float64), "The data should be floating point."
53
+
54
+ # The parameters and the formula are aquired from the Optasense user manual.
55
+ # If samples are stored as integer values the sample value’s unit is “rad*10430.378850470453”.
56
+ # To obtain phase shift values in “rad” divide each sample value by 10430.378850470453
57
+ # data /= 10430.378850470453
58
+ #
59
+ # delta_phase_shift_in_rad = 4 * pi * groupindex * gaugelength * scaling_factor * strain / wavelength
60
+ # wavelength = 1550 / 1000 / 1000 / 1000 # Meters. "OptaSenses ODH DAS systems operate at a wavelength of 1550 nm"
61
+ # groupindex = 1.468 #TODO inprecise # "The fiber’s refractive index can vary with fiber type, it is typically in the vicinity of 1.468"
62
+ # gaugelength = 10 # Meters
63
+ # scaling_factor = 0.78
64
+ # delta_phase_shift_in_rad = data
65
+ # strain = delta_phase_shift_in_rad * wavelength / 4 / 3.141 / groupindex / gaugelength / scaling_factor
66
+ # print("Factor: ", wavelength / 4 / 3.141 / groupindex / gaugelength / scaling_factor, 1 / (wavelength / 4 / 3.141 / groupindex / gaugelength / scaling_factor))
67
+ # data *= wavelength / 4 / 3.141 / groupindex / gaugelength / scaling_factor
68
+ # Result: Strain [Dimensionless, m/m]
69
+
70
+ GAUGELENGTH = 10
71
+ OMN_2 = 10430.378350470453
72
+ OMN_WAVELENGTH = 1550e-9
73
+ OMN_N = 1.4682
74
+ OMN_X = 0.78
75
+
76
+ OPTASENSE_CAL = OMN_WAVELENGTH / 4 / pi / OMN_N / GAUGELENGTH / OMN_X / OMN_2
77
+ return data * OPTASENSE_CAL
78
+
79
+
80
+ FILE_FINDER = FileFinder(DATA_ROOT, ".h5", _filename_to_posix_timestamp)
81
+
82
+ def create_chunk():
83
+ return Chunk(
84
+ FILE_FINDER,
85
+ CHANNEL_AMOUNT,
86
+ FILE_TIME_SAMPLE_AMOUNT,
87
+ True,
88
+ 16,
89
+ True,
90
+ _load_from_h5
91
+ )
@@ -0,0 +1,111 @@
1
+ """ Deprecated
2
+ """
3
+
4
+ from math import ceil, floor
5
+ import mmap
6
+ from os import path as P
7
+ import numpy as NP
8
+ import h5py as H5PY
9
+ import datetime as DT
10
+ from time import time
11
+ from filefinder import FileFinder, to_posix_timestamp_ms
12
+ from chunk import Chunk
13
+
14
+ #/wave/seismic-work/markhoff/pilot/data/cache/7wave7seismic-rawdata7OPTA7Disk27DESY-Rec-9-GL8m-Chan10000_2021-05-28T06_01_36+01007DESY-Rec-9-GL8m-Chan10000_2021-05-28T194319Z.h5.bin
15
+ FILE_TIME_SAMPLE_AMOUNT = 60000
16
+ CHANNEL_AMOUNT = 10000
17
+ DATA_ROOT = "/wave/seismic-work/markhoff/pilot/data/cache"
18
+ assert P.isdir(DATA_ROOT)
19
+
20
+ def _filename_to_posix_timestamp(file_name:str) -> int:
21
+ return to_posix_timestamp_ms(DT.datetime.strptime(file_name[-25:], "%Y-%m-%dT%H%M%SZ.h5.bin"))
22
+
23
+
24
+
25
+ def _load_from_h5(file_path, rel_t_start, rel_t_end, t_step, channel_start, channel_end, channel_step) -> NP.ndarray:
26
+ """ Internal helper function """
27
+ #file_handle = open(file_path, 'rb')
28
+ #file:H5PY.File = H5PY.File(file_handle, 'r')
29
+ #data = file['Acquisition']['Raw[0]']['RawData'] # Data is not loaded into memory at this point! (Lazy evaluation)
30
+ #
31
+ ## At this point the data gets loaded into memory.
32
+ #data = data[
33
+ # channel_start : channel_end : channel_step,
34
+ # rel_t_start : rel_t_end : t_step
35
+ #]
36
+
37
+ DTYPE_SIZE = 4
38
+ data = None
39
+ #if channel_step == 1:
40
+ # data = NP.fromfile(
41
+ # file_path,
42
+ # dtype = NP.int32,
43
+ # offset = channel_start * FILE_TIME_SAMPLE_AMOUNT * DTYPE_SIZE,
44
+ # count = (channel_end-channel_start) * FILE_TIME_SAMPLE_AMOUNT
45
+ # )
46
+ # data.shape = (channel_end-channel_start, FILE_TIME_SAMPLE_AMOUNT)
47
+ # data = data[:, rel_t_start:rel_t_end:t_step]
48
+ #else:
49
+ # data = NP.ndarray(
50
+ # shape=(
51
+ # ceil((channel_end - channel_start) / channel_step),
52
+ # FILE_TIME_SAMPLE_AMOUNT
53
+ # ),
54
+ # dtype=NP.int32
55
+ # )
56
+ # file_handle = open(file_path, 'rb')
57
+ # data_index = 0
58
+ # for channel_index in range(channel_start, channel_end, channel_step):
59
+ # file_handle.seek(channel_index * FILE_TIME_SAMPLE_AMOUNT)
60
+ # channel_data = NP.frombuffer(file_handle.read(FILE_TIME_SAMPLE_AMOUNT))
61
+ # data[data_index] = channel_data
62
+ # data_index += 1
63
+ # file_handle.close()
64
+ # data = data[:, rel_t_start:rel_t_end:t_step]
65
+
66
+ data = NP.fromfile(
67
+ file_path,
68
+ dtype = NP.int32,
69
+ offset = channel_start * FILE_TIME_SAMPLE_AMOUNT * DTYPE_SIZE,
70
+ count = (channel_end-channel_start) * FILE_TIME_SAMPLE_AMOUNT
71
+ )
72
+ data.shape = (channel_end-channel_start, FILE_TIME_SAMPLE_AMOUNT)
73
+ data = data[::channel_step, rel_t_start:rel_t_end:t_step]
74
+
75
+ print("Args (channel):", channel_start, channel_end, channel_step)
76
+ print("Args (time):", rel_t_start, rel_t_end, rel_t_end)
77
+ print("Fresh after loading: ", data.shape)
78
+ data = data.transpose() # Extremely efficient :)
79
+ return data
80
+
81
+ def _load_from_h5_X(file_path, rel_t_start, rel_t_end, t_step, channel_start, channel_end, channel_step) -> NP.ndarray:
82
+ DTYPE_SIZE = 4
83
+ t1 = time()
84
+ mm = NP.memmap(file_path, dtype=NP.int32, mode='readonly')
85
+ mm.shape = (CHANNEL_AMOUNT, FILE_TIME_SAMPLE_AMOUNT)
86
+ t2 = time()
87
+ data = mm[channel_start:channel_end:channel_step, rel_t_start:rel_t_end:t_step]
88
+ t3 = time()
89
+ data = NP.array(data)
90
+ t4 = time()
91
+ data = data.transpose() # Extremely efficient :)
92
+ t5 = time()
93
+ print("DELTAS", t2-t1, t3-t2, t4-t3, t5-t4)
94
+ #print("Args (channel):", channel_start, channel_end, channel_step)
95
+ #print("Args (time):", rel_t_start, rel_t_end, rel_t_end)
96
+ #print("Fresh after loading: ", data.shape)
97
+ return data
98
+
99
+ FILE_FINDER = FileFinder(DATA_ROOT, ".h5.bin", _filename_to_posix_timestamp)
100
+
101
+
102
+ def create_chunk():
103
+ return Chunk(
104
+ FILE_FINDER,
105
+ CHANNEL_AMOUNT,
106
+ FILE_TIME_SAMPLE_AMOUNT,
107
+ True,
108
+ 8,
109
+ False,
110
+ _load_from_h5
111
+ )