das2numpy 0.0.4__tar.gz → 1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {das2numpy-0.0.4/src/das2numpy.egg-info → das2numpy-1.0}/PKG-INFO +2 -1
- {das2numpy-0.0.4 → das2numpy-1.0}/README.md +1 -0
- {das2numpy-0.0.4 → das2numpy-1.0}/pyproject.toml +1 -1
- {das2numpy-0.0.4 → das2numpy-1.0}/src/das2numpy/__init__.py +7 -6
- {das2numpy-0.0.4 → das2numpy-1.0}/src/das2numpy/chunk.py +49 -48
- {das2numpy-0.0.4 → das2numpy-1.0}/src/das2numpy/filefinder.py +1 -1
- das2numpy-1.0/src/das2numpy/setups/flac_200hz.py +110 -0
- {das2numpy-0.0.4 → das2numpy-1.0}/src/das2numpy/setups/silixa.py +40 -26
- das2numpy-1.0/src/das2numpy/setups/silixa_200hz.py +105 -0
- {das2numpy-0.0.4 → das2numpy-1.0}/src/das2numpy/utils.py +68 -7
- {das2numpy-0.0.4 → das2numpy-1.0/src/das2numpy.egg-info}/PKG-INFO +2 -1
- {das2numpy-0.0.4 → das2numpy-1.0}/src/das2numpy.egg-info/SOURCES.txt +4 -1
- {das2numpy-0.0.4 → das2numpy-1.0}/src/das2numpy.egg-info/top_level.txt +1 -0
- das2numpy-1.0/src/test_downsampled.py +54 -0
- {das2numpy-0.0.4 → das2numpy-1.0}/LICENSE +0 -0
- {das2numpy-0.0.4 → das2numpy-1.0}/setup.cfg +0 -0
- {das2numpy-0.0.4 → das2numpy-1.0}/src/das2numpy/__main__.py +0 -0
- {das2numpy-0.0.4 → das2numpy-1.0}/src/das2numpy/setups/light_tdms_reader.py +0 -0
- {das2numpy-0.0.4 → das2numpy-1.0}/src/das2numpy/setups/optasense_b35idefix.py +0 -0
- {das2numpy-0.0.4 → das2numpy-1.0}/src/das2numpy/setups/optasense_b35idefix_fast.py +0 -0
- {das2numpy-0.0.4 → das2numpy-1.0}/src/das2numpy/test.py +0 -0
- {das2numpy-0.0.4 → das2numpy-1.0}/src/das2numpy.egg-info/dependency_links.txt +0 -0
- {das2numpy-0.0.4 → das2numpy-1.0}/src/example.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: das2numpy
|
|
3
|
-
Version:
|
|
3
|
+
Version: 1.0
|
|
4
4
|
Summary: A simple and universal package for loading large amounts of distributed acoustic sensing (DAS) data.
|
|
5
5
|
Author-email: Erik Genthe <erik.genthe@desy.de>
|
|
6
6
|
Project-URL: Homepage, https://git.physnet.uni-hamburg.de/wave/das2numpy
|
|
@@ -49,6 +49,7 @@ Returns:
|
|
|
49
49
|
```
|
|
50
50
|
|
|
51
51
|
|
|
52
|
+
|
|
52
53
|
#### More detailed interface
|
|
53
54
|
```python
|
|
54
55
|
def load_array(t_start:datetime, t_end:datetime, t_step:int, channel_start:int, channel_end:int, channel_step:int) -> NP.ndarray:
|
|
@@ -4,16 +4,20 @@
|
|
|
4
4
|
|
|
5
5
|
import os as OS
|
|
6
6
|
import numpy as NP
|
|
7
|
-
from
|
|
7
|
+
from . import utils
|
|
8
8
|
|
|
9
9
|
|
|
10
|
-
|
|
11
|
-
#@dispatch(str, str, int)
|
|
12
10
|
def loader(root_path:str, predefined_setup:str, num_worker_threads):
|
|
13
11
|
|
|
14
12
|
if predefined_setup.upper() == "SILIXA":
|
|
15
13
|
from .setups import silixa
|
|
16
14
|
chunk = silixa.init(root_path, num_worker_threads)
|
|
15
|
+
elif predefined_setup.upper() == "SILIXA_200HZ":
|
|
16
|
+
from .setups import silixa_200hz
|
|
17
|
+
chunk = silixa_200hz.init(root_path, num_worker_threads)
|
|
18
|
+
elif predefined_setup.upper() == "FLAC_200HZ":
|
|
19
|
+
from .setups import flac_200hz
|
|
20
|
+
chunk = flac_200hz.init(root_path, num_worker_threads)
|
|
17
21
|
elif predefined_setup.upper() == "OPTASENSE":
|
|
18
22
|
from .setups import optasense_b35idefix
|
|
19
23
|
chunk = optasense_b35idefix.init()
|
|
@@ -23,6 +27,3 @@ def loader(root_path:str, predefined_setup:str, num_worker_threads):
|
|
|
23
27
|
return chunk
|
|
24
28
|
|
|
25
29
|
|
|
26
|
-
#@dispatch(str, str, int)
|
|
27
|
-
#def loader(self, root_path:str, predefined_setup:str, num_worker_threads):
|
|
28
|
-
# return chunk
|
|
@@ -16,6 +16,7 @@ from random import shuffle
|
|
|
16
16
|
from multipledispatch import dispatch
|
|
17
17
|
import concurrent.futures as CF
|
|
18
18
|
from concurrent.futures import ThreadPoolExecutor
|
|
19
|
+
from threading import Lock
|
|
19
20
|
from multiprocessing import Pool
|
|
20
21
|
import numpy as NP
|
|
21
22
|
from .filefinder import FileFinder, to_posix_timestamp_ms
|
|
@@ -23,7 +24,7 @@ from .filefinder import FileFinder, to_posix_timestamp_ms
|
|
|
23
24
|
|
|
24
25
|
SHUFFLE_TASKS = False
|
|
25
26
|
|
|
26
|
-
def
|
|
27
|
+
def _calc_size(start: int, end: int, step: int) -> int:
|
|
27
28
|
diff = end - start
|
|
28
29
|
return int(((diff-1) - (diff-1)%step) / step + 1)
|
|
29
30
|
|
|
@@ -37,25 +38,25 @@ class Chunk():
|
|
|
37
38
|
the data and the meta information can be accessed directly by accessing the following fields:
|
|
38
39
|
data, timestamps, geo_positions, channel.
|
|
39
40
|
TODO implement geo_positions, channel, timestamps
|
|
40
|
-
author:
|
|
41
|
+
author: Erik genthe
|
|
41
42
|
"""
|
|
42
43
|
|
|
43
44
|
|
|
44
45
|
def __init__(self,
|
|
45
|
-
file_finder:FileFinder,
|
|
46
|
-
|
|
47
|
-
file_time_sample_amount:int,
|
|
46
|
+
file_finder:FileFinder,
|
|
47
|
+
sample_rate,
|
|
48
48
|
multithreaded:bool,
|
|
49
49
|
workers:int,
|
|
50
50
|
workerprocess:bool,
|
|
51
51
|
loading_function:Callable[[str, int, int, int, int, int, int], NP.ndarray]
|
|
52
52
|
):
|
|
53
53
|
self.__file_finder = file_finder
|
|
54
|
-
self.
|
|
55
|
-
self.__file_time_sample_amount = file_time_sample_amount
|
|
54
|
+
self.__sample_rate = sample_rate
|
|
56
55
|
self.__multithreaded = multithreaded
|
|
57
56
|
self.__workerprocess = workerprocess
|
|
58
57
|
self.__loading_function = loading_function
|
|
58
|
+
self.__lock = Lock()
|
|
59
|
+
assert type(sample_rate) == int
|
|
59
60
|
if multithreaded:
|
|
60
61
|
self.__executor = ThreadPoolExecutor(workers)
|
|
61
62
|
if not self.__multithreaded:
|
|
@@ -64,7 +65,7 @@ class Chunk():
|
|
|
64
65
|
|
|
65
66
|
|
|
66
67
|
def __load_from_file_into_data(self,
|
|
67
|
-
|
|
68
|
+
file_timestamp:int, # The timestamp retrieved from the filename
|
|
68
69
|
file_path:str,
|
|
69
70
|
t_start:int,
|
|
70
71
|
t_end:int,
|
|
@@ -73,50 +74,44 @@ class Chunk():
|
|
|
73
74
|
channel_end:int,
|
|
74
75
|
channel_step:int
|
|
75
76
|
) -> None:
|
|
76
|
-
#print("Args: ",
|
|
77
|
+
#print("Args: ", file_timestamp, file_path, t_start, t_end, t_step, channel_start, channel_end, channel_step)
|
|
77
78
|
# Check if the whole file shall be loaded. Especially the first and last file could be cut...
|
|
78
79
|
print("das2numpy: Loading from", file_path)
|
|
79
|
-
|
|
80
|
-
rel_t_end = self.__file_time_sample_amount
|
|
81
|
-
if t_start > start_timestamp:
|
|
82
|
-
rel_t_start = t_start - start_timestamp
|
|
83
|
-
if t_end < start_timestamp + self.__file_time_sample_amount: #TODO magicnumber
|
|
84
|
-
rel_t_end = t_end - start_timestamp
|
|
85
|
-
if rel_t_start == rel_t_end:
|
|
86
|
-
return # Do nothing
|
|
87
|
-
#print("relative start, relative end", rel_t_start, rel_t_end)
|
|
88
|
-
if start_timestamp + self.__file_time_sample_amount <= t_start:
|
|
89
|
-
print("Warning: File does not contain any parts of the requested data.",
|
|
90
|
-
"This can happen if there are leaks in the data. The corresponding output will be left filled with zeros.\n",
|
|
91
|
-
f" Requested range (Posixtimestamps in ms): [{t_start}, {t_end}[\n",
|
|
92
|
-
f" Filepath: {file_path}.")
|
|
93
|
-
return
|
|
94
|
-
assert rel_t_end > rel_t_start, f"rel_t_start={rel_t_start}, rel_t_end={rel_t_end}."
|
|
95
|
-
|
|
80
|
+
|
|
96
81
|
|
|
97
82
|
# Load h5-data using a different process... There is no other way to make h5py work parallel :(
|
|
98
83
|
data = None
|
|
99
84
|
if self.__workerprocess:
|
|
100
85
|
pool = Pool(1)
|
|
101
86
|
result = pool.apply_async(self.__loading_function,
|
|
102
|
-
(file_path,
|
|
87
|
+
(file_path, file_timestamp, t_start, t_end, t_step, channel_start, channel_end, channel_step))
|
|
103
88
|
pool.close()
|
|
104
|
-
|
|
105
|
-
data = result
|
|
89
|
+
data = result.get() # Blocks!
|
|
106
90
|
else:
|
|
107
|
-
data = self.__loading_function(file_path,
|
|
91
|
+
data = self.__loading_function(file_path, file_timestamp, t_start, t_end, t_step, channel_start, channel_end, channel_step)
|
|
108
92
|
|
|
109
93
|
# Store loaded data part into all_data
|
|
110
|
-
start_index =
|
|
94
|
+
start_index = int((file_timestamp - t_start) * self.__sample_rate / 1000 / t_step)
|
|
111
95
|
#print(start_index)
|
|
112
96
|
if start_index < 0:
|
|
113
97
|
start_index = 0
|
|
114
98
|
#print("Shape: ", data.shape)
|
|
115
99
|
|
|
100
|
+
if data.shape[1] != self.data.shape[1]:
|
|
101
|
+
print(f"Amount of channels detected in file {file_path} is {data.shape[1]}. The buffer has {self.data.shape[1]}")
|
|
102
|
+
|
|
103
|
+
if channel_end == -1:
|
|
104
|
+
with self.__lock:
|
|
105
|
+
# If number of channels increased, reallocate the target array.
|
|
106
|
+
if self.data.shape[1] < data.shape[1]:
|
|
107
|
+
print("Reallocating buffer")
|
|
108
|
+
old = self.data
|
|
109
|
+
self.data = NP.zeros((self.data.shape[0], data.shape[1]), dtype=data.dtype)
|
|
110
|
+
self.data[:, :old.shape[1]] = old[:,:]
|
|
111
|
+
del old
|
|
112
|
+
|
|
116
113
|
# To make this a little bit tolerant to a changing amount of channels per file, also the number of channels is given!
|
|
117
114
|
n_channels = min(data.shape[1], self.data.shape[1])
|
|
118
|
-
if data.shape[1] != self.data.shape[1]:
|
|
119
|
-
print(f"Warning: Incosistend amount of channels detected in file {file_path}. Expected={self.data.shape[1]}, file={data.shape[1]}. Cropping to fit.")
|
|
120
115
|
self.data[start_index : start_index + data.shape[0], 0:n_channels] = data[:,:n_channels]
|
|
121
116
|
|
|
122
117
|
@dispatch(int, int, int, int, int, int)
|
|
@@ -140,31 +135,34 @@ class Chunk():
|
|
|
140
135
|
"""
|
|
141
136
|
|
|
142
137
|
assert channel_start >= 0
|
|
143
|
-
assert channel_start <= self.__file_channel_amount
|
|
144
|
-
if channel_end == -1:
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
138
|
+
#assert channel_start <= self.__file_channel_amount
|
|
139
|
+
#if channel_end == -1: channel_end = self.__file_channel_amount
|
|
140
|
+
assert channel_end == -1 or channel_end > 0
|
|
141
|
+
if channel_end != -1:
|
|
142
|
+
assert channel_end >= channel_start
|
|
143
|
+
#assert channel_end <= self.__file_channel_amount, "channel_end has to be less or equal than self.__file_channel_amount"
|
|
148
144
|
assert t_step > 0
|
|
149
145
|
assert channel_step > 0
|
|
150
146
|
|
|
151
147
|
file_pathes = self.__file_finder.get_range_posix(t_start, t_end)
|
|
152
148
|
print(f"Loading data from {len(file_pathes)} files.")
|
|
153
149
|
#print("file_pathes", file_pathes)
|
|
154
|
-
data_shape =
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
150
|
+
data_shape = [
|
|
151
|
+
_calc_size(t_start * self.__sample_rate / 1000, t_end * self.__sample_rate / 1000, t_step),
|
|
152
|
+
_calc_size(channel_start, channel_end, channel_step)
|
|
153
|
+
]
|
|
154
|
+
if channel_end == -1:
|
|
155
|
+
data_shape[1] = 1
|
|
158
156
|
self.data = NP.zeros(shape=data_shape, dtype=NP.float32)
|
|
159
157
|
if self.__multithreaded:
|
|
160
158
|
futures = []
|
|
161
159
|
if SHUFFLE_TASKS:
|
|
162
160
|
shuffle(file_pathes)
|
|
163
|
-
for
|
|
161
|
+
for file_timestamp, file_path in file_pathes:
|
|
164
162
|
futures.append(
|
|
165
163
|
self.__executor.submit(
|
|
166
164
|
self.__load_from_file_into_data,
|
|
167
|
-
|
|
165
|
+
file_timestamp,
|
|
168
166
|
file_path,
|
|
169
167
|
t_start,
|
|
170
168
|
t_end,
|
|
@@ -179,9 +177,9 @@ class Chunk():
|
|
|
179
177
|
future.result() # Raises possible exceptions
|
|
180
178
|
|
|
181
179
|
else:
|
|
182
|
-
for
|
|
180
|
+
for file_timestamp, file_path in file_pathes:
|
|
183
181
|
self.__load_from_file_into_data(
|
|
184
|
-
|
|
182
|
+
file_timestamp,
|
|
185
183
|
file_path,
|
|
186
184
|
t_start,
|
|
187
185
|
t_end,
|
|
@@ -189,8 +187,11 @@ class Chunk():
|
|
|
189
187
|
channel_start,
|
|
190
188
|
channel_end,
|
|
191
189
|
channel_step)
|
|
192
|
-
|
|
193
|
-
|
|
190
|
+
|
|
191
|
+
# The following is weird, but it solves issues with garbage collection. Otherwise this behaves like a memory leak.
|
|
192
|
+
data = self.data
|
|
193
|
+
del self.data
|
|
194
|
+
return data
|
|
194
195
|
|
|
195
196
|
|
|
196
197
|
|
|
@@ -73,7 +73,7 @@ class FileFinder():
|
|
|
73
73
|
tuple: A triple (internal_index, posix timestamp in millis of the file start, file path)
|
|
74
74
|
None: If the given time was before any recording was done.
|
|
75
75
|
"""
|
|
76
|
-
for i in range(len(self.__file_pathes)-1,
|
|
76
|
+
for i in range(len(self.__file_pathes)-1, -1, -1): # Iterate reverse
|
|
77
77
|
key, value = self.__file_pathes[i]
|
|
78
78
|
if key < posix_timestamp_ms:
|
|
79
79
|
return (i, key, value)
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
""" Univsersal setup file for silixa, that detects sampling rate and number of channels by itself.
|
|
2
|
+
The root directory shall be supplied by the user via an argument
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import sys as SYS
|
|
6
|
+
import ast as AST
|
|
7
|
+
from os import path as P
|
|
8
|
+
import datetime as DT
|
|
9
|
+
import numpy as NP
|
|
10
|
+
import ffmpeg as FFMPEG
|
|
11
|
+
from ..filefinder import FileFinder, to_posix_timestamp_ms
|
|
12
|
+
from ..chunk import Chunk
|
|
13
|
+
from ..utils import bin
|
|
14
|
+
|
|
15
|
+
CALIBRATE = True
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
sample_rate = 200
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def init(root_path, num_worker_threads):
|
|
23
|
+
assert P.isdir(root_path)
|
|
24
|
+
file_finder = FileFinder(root_path, ".flac", filename_to_posix_timestamp)
|
|
25
|
+
assert num_worker_threads >= 1
|
|
26
|
+
multithreaded = num_worker_threads > 1
|
|
27
|
+
return Chunk(
|
|
28
|
+
file_finder,
|
|
29
|
+
sample_rate,
|
|
30
|
+
multithreaded,
|
|
31
|
+
num_worker_threads,
|
|
32
|
+
False,
|
|
33
|
+
load_file
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def filename_to_posix_timestamp(file_name:str) -> int:
|
|
38
|
+
timestamp_str = file_name.split(".flac")[0]
|
|
39
|
+
timestamp_dt = DT.datetime.strptime(timestamp_str, "%Y%m%dT%H%M%S")
|
|
40
|
+
timestamp_ms = to_posix_timestamp_ms(timestamp_dt)
|
|
41
|
+
return timestamp_ms
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def load_file(file_path, file_timestamp, t_start, t_end, t_step, channel_start, channel_end, channel_step) -> NP.ndarray:
|
|
46
|
+
""" Loads a single file, trims it. And returns the trimmed data as a numpy array. Downsampling (t_step, channel_step) is also possible!
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
try:
|
|
50
|
+
probe = FFMPEG.probe(file_path, v='error', select_streams='a:0', show_entries='stream=channels,sample_rate', of='json')
|
|
51
|
+
shape = AST.literal_eval(probe['format']['tags']['shape'])
|
|
52
|
+
calibration_factor = float(probe['format']['tags']['calibration_factor'])
|
|
53
|
+
info = probe['streams'][0]
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
idx_start = 0
|
|
57
|
+
if t_start > file_timestamp: # Check if beginning should be trimmed.
|
|
58
|
+
rel_t_start = t_start - file_timestamp
|
|
59
|
+
idx_start = int(rel_t_start * sample_rate / 1000.0)
|
|
60
|
+
idx_end = shape[0]
|
|
61
|
+
if t_end < file_timestamp + (shape[0] * 1000 / sample_rate): # Check if end should be trimmed
|
|
62
|
+
rel_t_end = t_end - file_timestamp
|
|
63
|
+
idx_end = int(rel_t_end * sample_rate / 1000.0)
|
|
64
|
+
if idx_start == idx_end:
|
|
65
|
+
return NP.zeros(shape=[0, 0]) # No data should be loaded. Do nothing
|
|
66
|
+
if file_timestamp + (shape[0] * 1000 / sample_rate) <= t_start:
|
|
67
|
+
print("Warning: File does not contain any parts of the requested data.",
|
|
68
|
+
"This can happen if there are leaks in the data or if there are no files for the requested time in the given directory.",
|
|
69
|
+
"The corresponding output will be left filled with zeros.\n",
|
|
70
|
+
f" Requested range (Posixtimestamps in ms): [{t_start}, {t_end}[\n",
|
|
71
|
+
f" Filepath: {file_path}.")
|
|
72
|
+
return NP.zeros(shape=[0, 0])
|
|
73
|
+
assert idx_end == shape[0] or idx_end > idx_start, f"idx_start={idx_start}, idx_end={idx_end}."
|
|
74
|
+
assert idx_start < idx_end
|
|
75
|
+
|
|
76
|
+
out, err = (
|
|
77
|
+
FFMPEG
|
|
78
|
+
.input(file_path)
|
|
79
|
+
.filter('atrim', start_sample=idx_start*shape[1], end_sample=idx_end*shape[1])
|
|
80
|
+
.output('pipe:', format='s16le', acodec='pcm_s16le', ac=int(info['channels']), ar=int(info['sample_rate']))
|
|
81
|
+
.run(capture_stdout=True, capture_stderr=True)
|
|
82
|
+
)
|
|
83
|
+
data = NP.frombuffer(out, dtype=NP.int16)
|
|
84
|
+
data = data.reshape([-1, shape[1]])
|
|
85
|
+
except FFMPEG.Error as e:
|
|
86
|
+
raise Exception(e.stderr.decode("utf-8"))
|
|
87
|
+
except FileNotFoundError as e:
|
|
88
|
+
raise Exception("ffmpeg not found. Please install ffmpeg."
|
|
89
|
+
+ "If you are working on desys maxwell cluster, "
|
|
90
|
+
+ "execute 'module load maxwell ffmpeg'")
|
|
91
|
+
|
|
92
|
+
# Trim data
|
|
93
|
+
data = data[:, channel_start:channel_end]
|
|
94
|
+
|
|
95
|
+
data = data.astype(NP.float32) #This needs to hapen before the binning step!
|
|
96
|
+
|
|
97
|
+
# Downsample data
|
|
98
|
+
if t_step != 1 or channel_step != 1:
|
|
99
|
+
data = bin(data, (t_step, channel_step))
|
|
100
|
+
#if t_step != 1:
|
|
101
|
+
# data = data[::t_step]
|
|
102
|
+
#if channel_step != 1:
|
|
103
|
+
# data = data[:, ::channel_step]
|
|
104
|
+
assert len(data) > 0
|
|
105
|
+
|
|
106
|
+
if CALIBRATE:
|
|
107
|
+
data *= calibration_factor
|
|
108
|
+
|
|
109
|
+
return data
|
|
110
|
+
|
|
@@ -20,17 +20,12 @@ CALIBRATE = True
|
|
|
20
20
|
def init(root_path, num_worker_threads):
|
|
21
21
|
assert P.isdir(root_path)
|
|
22
22
|
file_finder = FileFinder(root_path, ".tdms", filename_to_posix_timestamp)
|
|
23
|
-
example_file_path = file_finder.get_elem(10)[1] # TODO get first instead of 10th
|
|
24
|
-
tdms = TdmsReader(example_file_path)
|
|
25
|
-
shape = tdms.get_mmap().shape
|
|
26
|
-
file_time_sample_amount = shape[0]
|
|
27
|
-
channel_amount = shape[1]
|
|
28
23
|
assert num_worker_threads >= 1
|
|
29
24
|
multithreaded = num_worker_threads > 1
|
|
25
|
+
sample_rate = 1000
|
|
30
26
|
return Chunk(
|
|
31
27
|
file_finder,
|
|
32
|
-
|
|
33
|
-
file_time_sample_amount,
|
|
28
|
+
sample_rate,
|
|
34
29
|
multithreaded,
|
|
35
30
|
num_worker_threads,
|
|
36
31
|
False,
|
|
@@ -44,25 +39,44 @@ def filename_to_posix_timestamp(file_name:str) -> int:
|
|
|
44
39
|
return timestamp_ms
|
|
45
40
|
|
|
46
41
|
|
|
47
|
-
def load_file(file_path,
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
42
|
+
def load_file(file_path, file_timestamp, t_start, t_end, t_step, channel_start, channel_end, channel_step) -> NP.ndarray:
|
|
43
|
+
""" Loads a single file, trims it. And returns the trimmed data as a numpy array. Downsampling (t_step, channel_step) is also possible!
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
with TdmsReader(file_path) as tdms:
|
|
47
|
+
data = tdms.get_mmap()
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
# Trim data
|
|
51
|
+
rel_t_start = 0
|
|
52
|
+
if t_start > file_timestamp: # Check if beginning should be trimmed.
|
|
53
|
+
rel_t_start = t_start - file_timestamp
|
|
54
|
+
rel_t_end = -1
|
|
55
|
+
if t_end < file_timestamp + data.shape[0]: # Check if end should be trimmed
|
|
56
|
+
rel_t_end = t_end - file_timestamp
|
|
57
|
+
if rel_t_start == rel_t_end:
|
|
58
|
+
return NP.zeros(shape=[0, 0]) # No data should be loaded. Do nothing
|
|
59
|
+
if file_timestamp + data.shape[0] <= t_start:
|
|
60
|
+
print("Warning: File does not contain any parts of the requested data.",
|
|
61
|
+
"This can happen if there are leaks in the data. The corresponding output will be left filled with zeros.\n",
|
|
62
|
+
f" Requested range (Posixtimestamps in ms): [{t_start}, {t_end}[\n",
|
|
63
|
+
f" Filepath: {file_path}.")
|
|
64
|
+
return NP.zeros(shape=[0, 0])
|
|
65
|
+
assert rel_t_end == -1 or rel_t_end > rel_t_start, f"rel_t_start={rel_t_start}, rel_t_end={rel_t_end}."
|
|
66
|
+
data = data[rel_t_start:rel_t_end, channel_start:channel_end]
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
# Downsample data
|
|
70
|
+
if t_step != 1 or channel_step != 1:
|
|
71
|
+
data = bin(data, (t_step, channel_step))
|
|
72
|
+
#if t_step != 1:
|
|
73
|
+
# data = data[::t_step]
|
|
74
|
+
#if channel_step != 1:
|
|
75
|
+
# data = data[:, ::channel_step]
|
|
76
|
+
assert len(data) > 0
|
|
77
|
+
|
|
78
|
+
if CALIBRATE:
|
|
79
|
+
data = calibrate(data)
|
|
66
80
|
|
|
67
81
|
return data
|
|
68
82
|
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
""" Univsersal setup file for silixa, that detects sampling rate and number of channels by itself.
|
|
2
|
+
The root directory shall be supplied by the user via an argument
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import sys as SYS
|
|
6
|
+
from os import path as P
|
|
7
|
+
import datetime as DT
|
|
8
|
+
import numpy as NP
|
|
9
|
+
from ..filefinder import FileFinder, to_posix_timestamp_ms
|
|
10
|
+
from ..chunk import Chunk
|
|
11
|
+
from .light_tdms_reader import TdmsReader
|
|
12
|
+
from ..utils import bin
|
|
13
|
+
|
|
14
|
+
CALIBRATE = True
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
sample_rate = 200
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def init(root_path, num_worker_threads):
|
|
22
|
+
assert P.isdir(root_path)
|
|
23
|
+
file_finder = FileFinder(root_path, ".tdms", filename_to_posix_timestamp)
|
|
24
|
+
assert num_worker_threads >= 1
|
|
25
|
+
multithreaded = num_worker_threads > 1
|
|
26
|
+
return Chunk(
|
|
27
|
+
file_finder,
|
|
28
|
+
sample_rate,
|
|
29
|
+
multithreaded,
|
|
30
|
+
num_worker_threads,
|
|
31
|
+
False,
|
|
32
|
+
load_file
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def filename_to_posix_timestamp(file_name:str) -> int:
|
|
37
|
+
timestamp_str = file_name.split("_UTC_")[1][:19]
|
|
38
|
+
timestamp_dt = DT.datetime.strptime(timestamp_str, "%Y%m%d_%H%M%S.%f")
|
|
39
|
+
timestamp_ms = to_posix_timestamp_ms(timestamp_dt)
|
|
40
|
+
return timestamp_ms
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def load_file(file_path, file_timestamp, t_start, t_end, t_step, channel_start, channel_end, channel_step) -> NP.ndarray:
|
|
45
|
+
""" Loads a single file, trims it. And returns the trimmed data as a numpy array. Downsampling (t_step, channel_step) is also possible!
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
with TdmsReader(file_path) as tdms:
|
|
49
|
+
data = tdms.get_mmap()
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
# Trim data
|
|
53
|
+
idx_start = 0
|
|
54
|
+
if t_start > file_timestamp: # Check if beginning should be trimmed.
|
|
55
|
+
rel_t_start = t_start - file_timestamp
|
|
56
|
+
idx_start = int(rel_t_start * sample_rate / 1000.0)
|
|
57
|
+
idx_end = data.shape[0]
|
|
58
|
+
if t_end < file_timestamp + (data.shape[0] * 1000 / sample_rate): # Check if end should be trimmed
|
|
59
|
+
rel_t_end = t_end - file_timestamp
|
|
60
|
+
idx_end = int(rel_t_end * sample_rate / 1000.0)
|
|
61
|
+
if idx_start == idx_end:
|
|
62
|
+
return NP.zeros(shape=[0, 0]) # No data should be loaded. Do nothing
|
|
63
|
+
if file_timestamp + (data.shape[0] * 1000 / sample_rate) <= t_start:
|
|
64
|
+
print("Warning: File does not contain any parts of the requested data.",
|
|
65
|
+
"This can happen if there are leaks in the data. The corresponding output will be left filled with zeros.\n",
|
|
66
|
+
f" Requested range (Posixtimestamps in ms): [{t_start}, {t_end}[\n",
|
|
67
|
+
f" Filepath: {file_path}.")
|
|
68
|
+
return NP.zeros(shape=[0, 0])
|
|
69
|
+
assert idx_end == data.shape[0] or idx_end > idx_start, f"idx_start={idx_start}, idx_end={idx_end}."
|
|
70
|
+
print(idx_start, idx_end)
|
|
71
|
+
data = data[idx_start:idx_end, channel_start:channel_end]
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
# Downsample data
|
|
75
|
+
if t_step != 1 or channel_step != 1:
|
|
76
|
+
data = bin(data, (t_step, channel_step))
|
|
77
|
+
#if t_step != 1:
|
|
78
|
+
# data = data[::t_step]
|
|
79
|
+
#if channel_step != 1:
|
|
80
|
+
# data = data[:, ::channel_step]
|
|
81
|
+
assert len(data) > 0
|
|
82
|
+
|
|
83
|
+
if CALIBRATE:
|
|
84
|
+
data = calibrate(data)
|
|
85
|
+
|
|
86
|
+
return data
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def calibrate(data:NP.ndarray) -> NP.ndarray:
|
|
90
|
+
""" Convert raw data to strain rate data.
|
|
91
|
+
As the resulting values are decimals, the datatype should be float. Otherwise an assertion fails. """
|
|
92
|
+
#assert data.dtype in (NP.float, NP.float32, NP.float64), f"The data should be floating point. It is {data.dtype}"
|
|
93
|
+
if data.dtype not in (float, NP.float32, NP.float64):
|
|
94
|
+
NEW_TYPE = NP.float32
|
|
95
|
+
#print("Warning: For calibration the data has to be of type float. Converting from {data.dtype} to {NEW_TYPE}")
|
|
96
|
+
data = data.astype(NEW_TYPE)
|
|
97
|
+
|
|
98
|
+
SAMPLE_FREQ = 1000.0 # This remains 1000.0 and not 200 Hz because the original sample rate of the device is relevant here!
|
|
99
|
+
EICHLAENGE = 10.0
|
|
100
|
+
factor = 116.0 * 10.0**(-9.0) / 8192.0 * SAMPLE_FREQ / EICHLAENGE
|
|
101
|
+
return data * factor # Result: 1 / s
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
|
|
@@ -7,6 +7,7 @@ import math as M
|
|
|
7
7
|
import numpy as NP
|
|
8
8
|
from numba import njit
|
|
9
9
|
import scipy.signal as SS
|
|
10
|
+
import scipy.stats
|
|
10
11
|
|
|
11
12
|
TIME_AXIS = 0
|
|
12
13
|
|
|
@@ -69,7 +70,22 @@ def butterworth_filter(
|
|
|
69
70
|
|
|
70
71
|
|
|
71
72
|
|
|
72
|
-
def
|
|
73
|
+
def mean_confidence_interval(data, confidence=0.95, min_samples=10):
|
|
74
|
+
"""
|
|
75
|
+
Calculates the confidence interval for a student-t distribution.
|
|
76
|
+
From https://stackoverflow.com/questions/15033511/compute-a-confidence-interval-from-sample-data
|
|
77
|
+
Returns: [mean, lower-confidence-limit, upper-confidence-limit]
|
|
78
|
+
"""
|
|
79
|
+
n = len(data)
|
|
80
|
+
m = NP.mean(data)
|
|
81
|
+
if n < min_samples:
|
|
82
|
+
return m, None, None
|
|
83
|
+
se = scipy.stats.sem(data)
|
|
84
|
+
h = se * scipy.stats.t.ppf((1 + confidence) / 2., n-1)
|
|
85
|
+
return m, m-h, m+h
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def spectrum_smoothing(frequencies:NP.ndarray, psd:NP.ndarray, n:int, mode="median", error_calculation=False):
|
|
73
89
|
"""
|
|
74
90
|
Perform 1/n decade smoothing on the power spectral density (PSD) data.
|
|
75
91
|
See also: https://dsp.stackexchange.com/questions/9967/1-n-octave-smoothing
|
|
@@ -81,10 +97,17 @@ def spectrum_smoothing(frequencies:NP.ndarray, psd:NP.ndarray, n:int):
|
|
|
81
97
|
Array containing the power spectral density values corresponding to the frequencies.
|
|
82
98
|
n : int
|
|
83
99
|
The number of divisions per decade (e.g., n=10 for 1/10 decade smoothing).
|
|
100
|
+
mode : "mean" or "median"
|
|
101
|
+
How the data points of one bin should be reduced to one point.
|
|
102
|
+
error_calculation : False, "std", or float.
|
|
103
|
+
If false, the function returns only two arrays.
|
|
104
|
+
If "std", the third array contains the standard deviation of the original data points per frequency bin.
|
|
105
|
+
If "stderr", the third array contains the standard error or the original data points per frequency bin.
|
|
106
|
+
If float [0.0 until 1.0], the third array contains the confidence intervall for each frequency bin (EXPERIMENTAL).
|
|
84
107
|
|
|
85
108
|
Returns:
|
|
86
|
-
numpy.ndarray, numpy.ndarray
|
|
87
|
-
Smoothed frequencies and
|
|
109
|
+
numpy.ndarray, numpy.ndarray, numpy:ndarray
|
|
110
|
+
Smoothed frequencies, the PSD, and the Standard deviation for each bin.
|
|
88
111
|
"""
|
|
89
112
|
frequencies = NP.array(frequencies)
|
|
90
113
|
psd = NP.array(psd)
|
|
@@ -105,6 +128,7 @@ def spectrum_smoothing(frequencies:NP.ndarray, psd:NP.ndarray, n:int):
|
|
|
105
128
|
|
|
106
129
|
freq_new_actual = []
|
|
107
130
|
psd_new = []
|
|
131
|
+
error = []
|
|
108
132
|
for i in range(len(freq_new)):
|
|
109
133
|
f_log = freq_new_log[i]
|
|
110
134
|
f_lower = 10**(f_log - step_log / 2)
|
|
@@ -112,12 +136,49 @@ def spectrum_smoothing(frequencies:NP.ndarray, psd:NP.ndarray, n:int):
|
|
|
112
136
|
|
|
113
137
|
# Find the indices within this log decade interval
|
|
114
138
|
mask = (frequencies >= f_lower) & (frequencies < f_higher)
|
|
115
|
-
#print(freq_new[i], f_lower, f_higher, mask)
|
|
116
139
|
if NP.any(mask):
|
|
117
140
|
freq_new_actual.append(NP.mean(frequencies[mask]))
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
141
|
+
if mode == "mean":
|
|
142
|
+
mean = NP.mean(psd[mask])
|
|
143
|
+
psd_new.append(mean)
|
|
144
|
+
elif mode == "median":
|
|
145
|
+
psd_new.append(NP.median(psd[mask]))
|
|
146
|
+
else:
|
|
147
|
+
raise Exception("Mode should be 'mean' or 'median'!")
|
|
148
|
+
if error_calculation == False:
|
|
149
|
+
pass
|
|
150
|
+
elif error_calculation == "std":
|
|
151
|
+
if len(psd[mask]) <= 1:
|
|
152
|
+
error.append(float("NaN"))
|
|
153
|
+
else:
|
|
154
|
+
error.append(NP.std(psd[mask]))
|
|
155
|
+
elif error_calculation == "stderr":
|
|
156
|
+
if len(psd[mask]) <= 1:
|
|
157
|
+
error.append(float("NaN"))
|
|
158
|
+
else:
|
|
159
|
+
error.append(NP.std(psd[mask]) / NP.sqrt(len(psd[mask])))
|
|
160
|
+
elif type(error_calculation) == float:
|
|
161
|
+
confidence_level = error_calculation
|
|
162
|
+
assert confidence_level >= 0.5
|
|
163
|
+
#samples = psd[mask]
|
|
164
|
+
#n = len(samples)
|
|
165
|
+
##h = scipy.stats.sem(psd[mask]) * scipy.stats.t.ppf((1 + confidence_level) / 2., n-1) # From https://stackoverflow.com/questions/15033511/compute-a-confidence-interval-from-sample-data
|
|
166
|
+
#z_low = scipy.stats.rayleigh.ppf((1 - confidence_level) / 2.0) # Rayleigh should be the correct distribution for ASD values
|
|
167
|
+
#z_high = scipy.stats.rayleigh.ppf(confidence_level / 2.0) # Rayleigh should be the correct distribution for ASD values
|
|
168
|
+
#mean_or_median = psd_new[-1]
|
|
169
|
+
#standard_error = samples.std() / NP.sqrt(n)
|
|
170
|
+
#confidence_interval = [ mean_or_median - standard_error * z_low,
|
|
171
|
+
# mean_or_median + standard_error * z_high]
|
|
172
|
+
#print(f"-----------> m={mean_or_median} stderr={standard_error} z_low={z_low} stderr*zlow={standard_error * z_low} z+={z_high}")
|
|
173
|
+
m, lower, upper = mean_confidence_interval(psd[mask], confidence_level)
|
|
174
|
+
error.append([lower, upper])
|
|
175
|
+
else:
|
|
176
|
+
raise Exception(f"Error calculation type {error_calculation} is invalid.")
|
|
177
|
+
|
|
178
|
+
if error_calculation:
|
|
179
|
+
return NP.array(freq_new_actual), NP.array(psd_new), NP.array(error)
|
|
180
|
+
else:
|
|
181
|
+
return NP.array(freq_new_actual), NP.array(psd_new)
|
|
121
182
|
|
|
122
183
|
|
|
123
184
|
def bin(arr: NP.ndarray, bin_factors:tuple):
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: das2numpy
|
|
3
|
-
Version:
|
|
3
|
+
Version: 1.0
|
|
4
4
|
Summary: A simple and universal package for loading large amounts of distributed acoustic sensing (DAS) data.
|
|
5
5
|
Author-email: Erik Genthe <erik.genthe@desy.de>
|
|
6
6
|
Project-URL: Homepage, https://git.physnet.uni-hamburg.de/wave/das2numpy
|
|
@@ -49,6 +49,7 @@ Returns:
|
|
|
49
49
|
```
|
|
50
50
|
|
|
51
51
|
|
|
52
|
+
|
|
52
53
|
#### More detailed interface
|
|
53
54
|
```python
|
|
54
55
|
def load_array(t_start:datetime, t_end:datetime, t_step:int, channel_start:int, channel_end:int, channel_step:int) -> NP.ndarray:
|
|
@@ -2,6 +2,7 @@ LICENSE
|
|
|
2
2
|
README.md
|
|
3
3
|
pyproject.toml
|
|
4
4
|
src/example.py
|
|
5
|
+
src/test_downsampled.py
|
|
5
6
|
src/das2numpy/__init__.py
|
|
6
7
|
src/das2numpy/__main__.py
|
|
7
8
|
src/das2numpy/chunk.py
|
|
@@ -12,7 +13,9 @@ src/das2numpy.egg-info/PKG-INFO
|
|
|
12
13
|
src/das2numpy.egg-info/SOURCES.txt
|
|
13
14
|
src/das2numpy.egg-info/dependency_links.txt
|
|
14
15
|
src/das2numpy.egg-info/top_level.txt
|
|
16
|
+
src/das2numpy/setups/flac_200hz.py
|
|
15
17
|
src/das2numpy/setups/light_tdms_reader.py
|
|
16
18
|
src/das2numpy/setups/optasense_b35idefix.py
|
|
17
19
|
src/das2numpy/setups/optasense_b35idefix_fast.py
|
|
18
|
-
src/das2numpy/setups/silixa.py
|
|
20
|
+
src/das2numpy/setups/silixa.py
|
|
21
|
+
src/das2numpy/setups/silixa_200hz.py
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import numpy as NP
|
|
2
|
+
import sys
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
import matplotlib.pyplot as PP
|
|
5
|
+
from das2numpy import loader, utils
|
|
6
|
+
|
|
7
|
+
USE_DOWNSAMPLED = False
|
|
8
|
+
|
|
9
|
+
print("Load data to numpy-array")
|
|
10
|
+
t_start = datetime(2025, 10, 14, 2, 58, 59)
|
|
11
|
+
t_end = datetime(2025, 10, 14, 2, 59, 1)
|
|
12
|
+
channel_start = 1000
|
|
13
|
+
channel_end = 3000
|
|
14
|
+
|
|
15
|
+
if USE_DOWNSAMPLED:
|
|
16
|
+
loader = loader("/pnfs/desy.de/m/project/iDAS/work/derived-data/DOWNSAMPLED_200HZ/2025-10/", "SILIXA_200HZ", 1)
|
|
17
|
+
else:
|
|
18
|
+
loader = loader("/pnfs/desy.de/m/project/iDAS/raw/2025-DESY/2025-10-14-desy", "SILIXA", 1)
|
|
19
|
+
data = loader.load_array(t_start, t_end, channel_start, channel_end)
|
|
20
|
+
|
|
21
|
+
print("Reduce data by binning (mean averaging)")
|
|
22
|
+
if USE_DOWNSAMPLED:
|
|
23
|
+
bin_factors = (1, 1)
|
|
24
|
+
data = utils.bin(data, bin_factors) # Reduce time sampling and spatial sampling by averaging.
|
|
25
|
+
sampling_hz = 200.0 / bin_factors[0]
|
|
26
|
+
else:
|
|
27
|
+
bin_factors = (5, 1)
|
|
28
|
+
data = utils.bin(data, bin_factors) # Reduce time sampling and spatial sampling by averaging.
|
|
29
|
+
sampling_hz = 1000.0 / bin_factors[0]
|
|
30
|
+
channel_spacing = 1.0 * bin_factors[1]
|
|
31
|
+
|
|
32
|
+
NP.save("data.npy", data)
|
|
33
|
+
|
|
34
|
+
print("Create plot with pyplot")
|
|
35
|
+
PP.title(f"{t_start.isoformat()}")
|
|
36
|
+
PP.imshow(
|
|
37
|
+
data,
|
|
38
|
+
cmap = "seismic",
|
|
39
|
+
aspect = "auto",
|
|
40
|
+
interpolation = "nearest",
|
|
41
|
+
vmin = -1e-7,
|
|
42
|
+
vmax = +1e-7,
|
|
43
|
+
extent = (
|
|
44
|
+
channel_start, channel_start + (data.shape[1] * channel_spacing),
|
|
45
|
+
data.shape[0] / sampling_hz, 0
|
|
46
|
+
)
|
|
47
|
+
)
|
|
48
|
+
PP.xlabel("Position [m]")
|
|
49
|
+
PP.ylabel("Time [s]")
|
|
50
|
+
PP.colorbar(label="Strain-rate [$\\frac{m}{m \\cdot s}$]")
|
|
51
|
+
if USE_DOWNSAMPLED:
|
|
52
|
+
PP.savefig("waterfall_downsampled.png")
|
|
53
|
+
else:
|
|
54
|
+
PP.savefig("waterfall.png")
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|