das2numpy 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,87 @@
1
+ """ Univsersal setup file for silixa, that detects sampling rate and number of channels by itself.
2
+ The root directory shall be supplied by the user via an argument
3
+ """
4
+
5
+ import sys as SYS
6
+ from os import path as P
7
+ import datetime as DT
8
+ import numpy as NP
9
+ from ..filefinder import FileFinder, to_posix_timestamp_ms
10
+ from ..chunk import Chunk
11
+ from .light_tdms_reader import TdmsReader
12
+
13
+
14
+ CALIBRATE = True
15
+
16
+
17
+
18
+
19
+
20
+ def init(root_path, num_worker_threads):
21
+ assert P.isdir(root_path)
22
+ file_time_sample_amount = 20000
23
+ file_finder = FileFinder(root_path, ".tdms", filename_to_posix_timestamp)
24
+ example_file_path = file_finder.get_first()[1]
25
+ tdms = TdmsReader(example_file_path)
26
+ shape = tdms.get_mmap().shape
27
+ file_time_sample_amount = shape[0]
28
+ channel_amount = shape[1]
29
+ #channel_amount = tdms.fileinfo['n_channels']
30
+ #props = tdms.get_properties()
31
+ #fs = props.get('SamplingFrequency[Hz]') #TODO read sampling freq from file
32
+ return Chunk(
33
+ file_finder,
34
+ channel_amount,
35
+ file_time_sample_amount,
36
+ True,
37
+ num_worker_threads,
38
+ False,
39
+ load_file
40
+ )
41
+
42
+ def filename_to_posix_timestamp(file_name:str) -> int:
43
+ return to_posix_timestamp_ms(DT.datetime.strptime(file_name[-24:], "%Y%m%d_%H%M%S.%f.tdms"))
44
+
45
+
46
+
47
+ def load_file(file_path, rel_t_start, rel_t_end, t_step, channel_start, channel_end, channel_step) -> NP.ndarray:
48
+ #assert rel_t_end <= FILE_TIME_SAMPLE_AMOUNT
49
+ assert rel_t_start != rel_t_end
50
+ assert rel_t_start < rel_t_end
51
+ assert rel_t_start >= 0
52
+
53
+ tdms = TdmsReader(file_path)
54
+ data = tdms.get_mmap()
55
+ data = data[rel_t_start:rel_t_end, channel_start:channel_end]
56
+ if t_step != 1:
57
+ data = data[::t_step]
58
+ if channel_step != 1:
59
+ data = data[:, ::channel_step]
60
+ assert len(data) > 0
61
+
62
+ if CALIBRATE:
63
+ data = calibrate(data)
64
+
65
+ return data
66
+
67
+
68
+
69
+
70
+
71
+ def calibrate(data:NP.ndarray) -> NP.ndarray:
72
+ """ Convert raw data to strain rate data.
73
+ As the resulting values are decimals, the datatype should be float. Otherwise an assertion fails. """
74
+ #assert data.dtype in (NP.float, NP.float32, NP.float64), f"The data should be floating point. It is {data.dtype}"
75
+ if data.dtype not in (NP.float, NP.float32, NP.float64):
76
+ NEW_TYPE = NP.float32
77
+ #print("Warning: For calibration the data has to be of type float. Converting from {data.dtype} to {NEW_TYPE}")
78
+ data = data.astype(NEW_TYPE)
79
+
80
+ SAMPLE_FREQ = 1000.0
81
+ EICHLAENGE = 10.0
82
+ factor = 116.0 * 10.0**(-9.0) / 8192.0 * SAMPLE_FREQ / EICHLAENGE
83
+ return data * factor # Result: 1 / s
84
+
85
+
86
+
87
+
das2numpy/test.py ADDED
@@ -0,0 +1,158 @@
1
+ """
2
+ Deprecated
3
+
4
+ Unittests for this dataloader-module
5
+ by Erik Genthe
6
+ 05.01.2022
7
+ """
8
+ from math import ceil, floor
9
+ import sys as SYS
10
+ from os import path as P
11
+ import datetime as DT
12
+ import h5py as H5PY
13
+ import numpy as NP
14
+
15
+ try:
16
+ import dataloader as D
17
+ except ModuleNotFoundError as e:
18
+ raise RuntimeError("TO RUN THIS TEST, MOVE IT INTO THE PARENT DIR FIRST!") from e
19
+ from dataloader.filefinder import to_posix_timestamp_ms
20
+
21
+
22
+
23
+ def test_silixa_filefinder():
24
+ #file_path = '/wave/seismic-rawdata/desy_12km_1m_P7gauss/desy_UTC_20210522_155121.950.tdms'
25
+ #ls /wave/seismic-rawdata/desy_12km_1m_P7gauss -l | grep -n --invert-match 504946688
26
+
27
+ # Find one specific file...
28
+ time = DT.datetime(2021, 5, 30, 14, 00, 00)
29
+ filelist = D.silixa.FILE_FINDER.get_range(time, time)
30
+ assert len(filelist) == 1
31
+ assert filelist[0][1].endswith('/desy_UTC_20210530_135950.619.tdms')
32
+
33
+ # Find all files...
34
+ filelist = D.silixa.FILE_FINDER.get_range_posix(0, D.to_posix_timestamp_ms(DT.datetime.now()))
35
+ assert len(filelist) > 9000
36
+
37
+
38
+ def test_optasense_filefinder():
39
+ # Find one specific file...
40
+ time = DT.datetime(2021, 5, 30, 14, 00, 00)
41
+ filelist = D.optasense.FILE_FINDER.get_range(time, time)
42
+ assert len(filelist) == 1
43
+ assert filelist[0][1].endswith('2021-05-30T135924Z.h5')
44
+
45
+ # Find all files...
46
+ filelist = D.optasense.FILE_FINDER.get_range_posix(0, D.to_posix_timestamp_ms(DT.datetime.now()))
47
+ assert len(filelist) > 9000
48
+
49
+
50
+ def test_fast_optasense_filefinder():
51
+ # Find one specific file...
52
+ time = DT.datetime(2021, 5, 30, 14, 00, 00)
53
+ filelist = D.fast_optasense.FILE_FINDER.get_range(time, time)
54
+ assert len(filelist) == 1
55
+ assert filelist[0][1].endswith('2021-05-30T135924Z.h5.bin')
56
+
57
+ # Find all files...
58
+ filelist = D.optasense.FILE_FINDER.get_range_posix(0, D.to_posix_timestamp_ms(DT.datetime.now()))
59
+ assert len(filelist) > 9000
60
+
61
+
62
+
63
+ def test_chunk(chunk, MAX_CHANNEL):
64
+ import time as TIME
65
+ #MAX_CHANNEL = 12608
66
+ #chunk = D.silixa.create_chunk()
67
+ t_start: int = to_posix_timestamp_ms(DT.datetime(2021, 5, 30, 14, 00, 00))
68
+ t_end1: int = to_posix_timestamp_ms(DT.datetime(2021, 5, 30, 14, 00, 1))
69
+ t_end2: int = to_posix_timestamp_ms(DT.datetime(2021, 5, 30, 14, 1, 30))
70
+ t_end3: int = to_posix_timestamp_ms(DT.datetime(2021, 5, 30, 14, 10, 00))
71
+ t_end_one_hour: int = to_posix_timestamp_ms(DT.datetime(2021, 5, 30, 15, 00, 00))
72
+ print()
73
+
74
+ chunk.load(t_start, t_end1, 1, 0, MAX_CHANNEL, 1)
75
+ assert chunk.data.shape == (1000, MAX_CHANNEL)
76
+ print()
77
+
78
+ chunk.load(t_start, t_end2, 3, 0, MAX_CHANNEL, 9)
79
+ assert chunk.data.shape == (30000, ceil(MAX_CHANNEL / 9))
80
+ print()
81
+
82
+ # Now some benchmarks...
83
+ #bench_start = TIME.time()
84
+ #file_handle = open("/wave/seismic-rawdata/OPTA/Disk2/DESY-Rec-11-GL8m-Chan10000_2021-05-30T07_55_42+0100/DESY-Rec-11-GL8m-Chan10000_2021-05-30T135924Z.h5", 'rb')
85
+ #file:H5PY.File = H5PY.File(file_handle, 'r')
86
+ #data = file['Acquisition']['Raw[0]']['RawData'] # Data is not loaded into memory at this point! (Lazy evaluation)
87
+ #data = NP.array(data)
88
+ #print("TIME for loading one whole file using h5py:", TIME.time() - bench_start, "\n")
89
+
90
+ bench_start = TIME.time()
91
+ chunk.load(t_start, t_end3, 1, 0, 1000, 1)
92
+ print("Time for loading the first 1000 sensors of one hour of data: %4f\n" % (TIME.time() - bench_start))
93
+ assert chunk.data.shape == (600000, 1000)
94
+
95
+ bench_start = TIME.time()
96
+ chunk.load(t_start, t_end_one_hour, 1, 0, MAX_CHANNEL, 10)
97
+ print("Time for loading one hour of data with with sensor_step=10: %4f\n" % (TIME.time() - bench_start))
98
+ assert chunk.data.shape == (1000*60*60, ceil(MAX_CHANNEL/10))
99
+
100
+ bench_start = TIME.time()
101
+ chunk.load(t_start, t_end_one_hour, 1, 0, 100, 1)
102
+ print("Time for loading 100 sensors with 1 hour of data: %4f\n" % (TIME.time() - bench_start))
103
+
104
+ bench_start = TIME.time()
105
+ chunk.load(t_start, t_end_one_hour, 1, 0, 1000, 1)
106
+ print("Time for loading 1000 sensors with 1 hour of data: %4f\n" % (TIME.time() - bench_start))
107
+
108
+ bench_start = TIME.time()
109
+ chunk.load(t_start, t_end_one_hour, 1, 0, MAX_CHANNEL, 1)
110
+ print("Time for loading 1 hour completely: %4f\n" % (TIME.time() - bench_start))
111
+
112
+
113
+
114
+ def test_equalness_of_fast_opta_simple():
115
+ t_start: int = to_posix_timestamp_ms(DT.datetime(2021, 5, 30, 14, 00, 00))
116
+ t_end: int = to_posix_timestamp_ms(DT.datetime(2021, 5, 30, 14, 00, 1))
117
+
118
+ chunk_fast = D.fast_optasense.create_chunk()
119
+ chunk_fast.load(t_start, t_end, 1, 0, 10, 1)
120
+
121
+ chunk_normal = D.optasense.create_chunk()
122
+ chunk_normal.load(t_start, t_end, 1, 0, 10, 1)
123
+
124
+ assert chunk_fast.data.shape == chunk_normal.data.shape
125
+ assert NP.array_equiv(chunk_fast.data, chunk_normal.data)
126
+
127
+
128
+ def test_equalness_of_fast_opta():
129
+ t_start: int = to_posix_timestamp_ms(DT.datetime(2021, 5, 30, 14, 00, 00))
130
+ t_end: int = to_posix_timestamp_ms(DT.datetime(2021, 5, 30, 14, 00, 1))
131
+
132
+ chunk_fast = D.fast_optasense.create_chunk()
133
+ chunk_fast.load(t_start, t_end, 3, 2000, 7000, 9)
134
+
135
+ chunk_normal = D.optasense.create_chunk()
136
+ chunk_normal.load(t_start, t_end, 3, 2000, 7000, 9)
137
+
138
+ assert chunk_fast.data.shape == chunk_normal.data.shape
139
+ assert NP.array_equiv(chunk_fast.data, chunk_normal.data)
140
+
141
+
142
+ if __name__ == '__main__':
143
+ #test_equalness_of_fast_opta_simple()
144
+ #test_equalness_of_fast_opta()
145
+ #test_fast_optasense_filefinder()
146
+ #test_silixa_filefinder()
147
+ #test_optasense_filefinder()
148
+
149
+
150
+
151
+ print("\nSilixa benchmark:")
152
+ test_chunk(D.silixa.create_chunk(), 12608)
153
+
154
+ print("\nFast Optasense benchmark:")
155
+ test_chunk(D.fast_optasense.create_chunk(), 10000)
156
+
157
+ #print("\nOptasense benchmark:")
158
+ #test_chunk(D.optasense.create_chunk(), 10000)
das2numpy/utils.py ADDED
@@ -0,0 +1,136 @@
1
+ """
2
+ Everything, that modifies the signal.
3
+ author: Erik Genthe
4
+ """
5
+
6
+ import numpy as NP
7
+ from numba import njit
8
+ import scipy.signal as SS
9
+
10
+ TIME_AXIS = 0
11
+
12
+ @DeprecationWarning
13
+ def remove_channel_offset(data:NP.ndarray):
14
+ """Removes a constant value from each channel from the data.
15
+ Expecting the time-axis to be the first axis!
16
+ The constant values are initially calculated and save to a file.
17
+ """
18
+
19
+ print("Warning! Untested function!") #TODO
20
+ #for i in range(data.shape[1]):
21
+ # data[:,i] -= data[:,i].mean(dtype=data.dtype)
22
+ data -= data.mean(axis=0)
23
+
24
+ @njit
25
+ def differentiate(data: NP.ndarray, axis: int) -> NP.ndarray:
26
+ """Differentiate the 2-dimensional signal over one axis
27
+ A 2-d array is expected as input
28
+ The return-value is None. The array is copied, modified and returned.
29
+ :return: differentiated array
30
+ """
31
+ assert axis == 0 or axis == 1
32
+ data = data.copy()
33
+ if data.shape[axis] < 2:
34
+ raise Exception("Integration with less then two samples makes no sense.")
35
+ if axis == 0:
36
+ for i in range(0, data.shape[0]-1):
37
+ data[i] = data[i+1] - data[i]
38
+ elif axis == 1:
39
+ for i in range(0, data.shape[1]-1):
40
+ data[:,i] = data[:,i+1] - data[:,i]
41
+ return data
42
+
43
+ #@njit
44
+ #def integrate(data: NP.ndarray, axis: int) -> NP.ndarray:
45
+ # """Integrate the 2-dimensional signal over one axis
46
+ # A 2-d array is expected as input
47
+ # The return-value is None. The array is copied, modified and returned.
48
+ # :return: integrated array
49
+ # """
50
+ # assert axis == 0 or axis == 1
51
+ # data = data.copy()
52
+ #
53
+ # if data.shape[axis] < 2:
54
+ # raise Exception("Integration with less then two samples makes no sense.")
55
+ # if axis == 0:
56
+ # for i in range(1, data.shape[0]):
57
+ # data[i] = data[i] + data[i-1]
58
+ # elif axis == 1:
59
+ # for i in range(1, data.shape[1]):
60
+ # data[:, i] = data[:, i] + data[:, i-1]
61
+ # return data
62
+ def integrate(data: NP.ndarray, axis: int, sample_rate_hz:float) -> NP.ndarray:
63
+ """Integrate the 2-dimensional signal over one axis
64
+ A 2-d array is expected as input
65
+ The array is copied, modified and returned.
66
+ :return: integrated array
67
+ """
68
+ integral = NP.cumsum(data, axis=axis) / sample_rate_hz
69
+ return integral
70
+
71
+
72
+ def butterworth_filter(
73
+ array : NP.ndarray,
74
+ freq : float,
75
+ order : int,
76
+ btype, #: {‘lowpass’, ‘highpass’}
77
+ fs : float) -> NP.ndarray:
78
+ """
79
+ Apply a butterwort high-pass-filter on time-axis.
80
+ :array: The input data. Two dimensions expected. First dimension is expected to be the time dimension.
81
+ return: The filtered array
82
+ """
83
+ sos = SS.iirfilter(order, freq, rp=None, rs=None, btype=btype, analog=False, ftype='butter', output='sos', fs=fs)
84
+ array = SS.sosfiltfilt(sos, array, axis=TIME_AXIS, padtype='odd', padlen=None)
85
+ return array
86
+
87
+ #https://numpy.org/doc/stable/reference/generated/numpy.fft.fft.html#numpy.fft.fft
88
+ def fft(array):
89
+ raise NotImplementedError("Not implemented yet")
90
+ return None
91
+
92
+
93
+
94
+ def bin(arr: NP.ndarray, bin_factors:tuple):
95
+ """ Returns a binned version of arr. If factors were 1, the original array is returned."""
96
+ assert len(bin_factors) == len(arr.shape)
97
+ #assert arr.dtype == NP.float32 or arr.dtype == NP.float64
98
+ assert len(arr.shape) == 2
99
+ for factor in bin_factors:
100
+ assert factor > 0
101
+
102
+ if bin_factors[0] == 1 and bin_factors[1] == 1:
103
+ return arr
104
+
105
+ newshape = NP.array(arr.shape) // NP.array(bin_factors)
106
+ newarr = NP.empty(newshape, dtype=arr.dtype)
107
+ _bin_helper(arr, newarr, bin_factors)
108
+ return newarr
109
+
110
+ @njit
111
+ def _bin_helper(arr, newarr, bin_factors):
112
+ for x in range(newarr.shape[0]):
113
+ for y in range(newarr.shape[1]):
114
+ x_ = x * bin_factors[0]
115
+ y_ = y * bin_factors[1]
116
+ newarr[x][y] = NP.mean(arr[x_ : x_ + bin_factors[0], y_ : y_ + bin_factors[1]])
117
+
118
+
119
+ def log_scale_symmetric(arr:NP.ndarray) -> NP.ndarray:
120
+ """ Symmetric logarithmic scaling. For negative values it is applied as if they were positive"""
121
+
122
+ zeros = NP.zeros(arr.shape)
123
+ positives = NP.maximum(zeros, arr)
124
+ negatives = NP.minimum(zeros, arr)
125
+
126
+ positives += 1
127
+ positives = NP.log2(positives, dtype=NP.float32)
128
+
129
+ negatives *= -1
130
+ negatives += 1
131
+ negatives = NP.log2(negatives, dtype=NP.float32)
132
+ negatives *= -1
133
+
134
+ result = zeros
135
+ result = negatives + positives
136
+ return result