PyPI - das2numpy - Versions diffs - 0.0.1__py3-none-any.whl - Mend

das2numpy 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

das2numpy/__init__.py +28 -0
das2numpy/__main__.py +99 -0
das2numpy/chunk.py +239 -0
das2numpy/filefinder.py +115 -0
das2numpy/setups/light_tdms_reader.py +479 -0
das2numpy/setups/optasense_b35idefix.py +91 -0
das2numpy/setups/optasense_b35idefix_fast.py +111 -0
das2numpy/setups/silixa.py +87 -0
das2numpy/test.py +158 -0
das2numpy/utils.py +136 -0
das2numpy-0.0.1.dist-info/LICENSE +674 -0
das2numpy-0.0.1.dist-info/METADATA +89 -0
das2numpy-0.0.1.dist-info/RECORD +16 -0
das2numpy-0.0.1.dist-info/WHEEL +5 -0
das2numpy-0.0.1.dist-info/top_level.txt +2 -0
example.py +39 -0

das2numpy/setups/silixa.py ADDED Viewed

@@ -0,0 +1,87 @@
+""" Univsersal setup file for silixa, that detects sampling rate and number of channels by itself.
+The root directory shall be supplied by the user via an argument
+"""
+import sys as SYS
+from os import path as P
+import datetime as DT
+import numpy as NP
+from ..filefinder import FileFinder, to_posix_timestamp_ms
+from ..chunk import Chunk
+from .light_tdms_reader import TdmsReader
+CALIBRATE = True
+def init(root_path, num_worker_threads):
+    assert P.isdir(root_path)
+    file_time_sample_amount = 20000
+    file_finder = FileFinder(root_path, ".tdms", filename_to_posix_timestamp)
+    example_file_path = file_finder.get_first()[1]
+    tdms = TdmsReader(example_file_path)
+    shape = tdms.get_mmap().shape
+    file_time_sample_amount = shape[0]
+    channel_amount = shape[1]
+    #channel_amount = tdms.fileinfo['n_channels']
+    #props = tdms.get_properties()
+    #fs = props.get('SamplingFrequency[Hz]') #TODO read sampling freq from file
+    return Chunk(
+                file_finder,
+                channel_amount,
+                file_time_sample_amount,
+                True,
+                num_worker_threads,
+                False,
+                load_file
+            )
+def filename_to_posix_timestamp(file_name:str) -> int:
+    return to_posix_timestamp_ms(DT.datetime.strptime(file_name[-24:], "%Y%m%d_%H%M%S.%f.tdms"))
+def load_file(file_path, rel_t_start, rel_t_end, t_step, channel_start, channel_end, channel_step) -> NP.ndarray:
+    #assert rel_t_end <= FILE_TIME_SAMPLE_AMOUNT
+    assert rel_t_start != rel_t_end
+    assert rel_t_start < rel_t_end
+    assert rel_t_start >= 0
+    tdms = TdmsReader(file_path)
+    data = tdms.get_mmap()
+    data = data[rel_t_start:rel_t_end, channel_start:channel_end]
+    if t_step != 1:
+        data = data[::t_step]
+    if channel_step != 1:
+        data = data[:, ::channel_step]
+    assert len(data) > 0
+    if CALIBRATE:
+        data = calibrate(data)
+    return data
+def calibrate(data:NP.ndarray) -> NP.ndarray:
+    """ Convert raw data to strain rate data.
+    As the resulting values are decimals, the datatype should be float. Otherwise an assertion fails. """
+    #assert data.dtype in (NP.float, NP.float32, NP.float64), f"The data should be floating point. It is {data.dtype}"
+    if data.dtype not in (NP.float, NP.float32, NP.float64):
+        NEW_TYPE = NP.float32
+        #print("Warning: For calibration the data has to be of type float. Converting from {data.dtype} to {NEW_TYPE}")
+        data = data.astype(NEW_TYPE)
+    SAMPLE_FREQ = 1000.0
+    EICHLAENGE = 10.0
+    factor = 116.0 * 10.0**(-9.0) / 8192.0 * SAMPLE_FREQ / EICHLAENGE
+    return data * factor # Result: 1 / s

das2numpy/test.py ADDED Viewed

@@ -0,0 +1,158 @@
+"""
+    Deprecated
+    Unittests for this dataloader-module
+    by Erik Genthe
+    05.01.2022
+"""
+from math import ceil, floor
+import sys as SYS
+from os import path as P
+import datetime as DT
+import h5py as H5PY
+import numpy as NP
+try:
+    import dataloader as D
+except ModuleNotFoundError as e:
+    raise RuntimeError("TO RUN THIS TEST, MOVE IT INTO THE PARENT DIR FIRST!") from e
+from dataloader.filefinder import to_posix_timestamp_ms
+def test_silixa_filefinder():
+    #file_path = '/wave/seismic-rawdata/desy_12km_1m_P7gauss/desy_UTC_20210522_155121.950.tdms'
+    #ls /wave/seismic-rawdata/desy_12km_1m_P7gauss -l | grep -n --invert-match 504946688
+    # Find one specific file...
+    time = DT.datetime(2021, 5, 30, 14, 00, 00)
+    filelist = D.silixa.FILE_FINDER.get_range(time, time)
+    assert len(filelist) == 1
+    assert filelist[0][1].endswith('/desy_UTC_20210530_135950.619.tdms')
+    # Find all files...
+    filelist = D.silixa.FILE_FINDER.get_range_posix(0, D.to_posix_timestamp_ms(DT.datetime.now()))
+    assert len(filelist) > 9000
+def test_optasense_filefinder():
+    # Find one specific file...
+    time = DT.datetime(2021, 5, 30, 14, 00, 00)
+    filelist = D.optasense.FILE_FINDER.get_range(time, time)
+    assert len(filelist) == 1
+    assert filelist[0][1].endswith('2021-05-30T135924Z.h5')
+    # Find all files...
+    filelist = D.optasense.FILE_FINDER.get_range_posix(0, D.to_posix_timestamp_ms(DT.datetime.now()))
+    assert len(filelist) > 9000
+def test_fast_optasense_filefinder():
+    # Find one specific file...
+    time = DT.datetime(2021, 5, 30, 14, 00, 00)
+    filelist = D.fast_optasense.FILE_FINDER.get_range(time, time)
+    assert len(filelist) == 1
+    assert filelist[0][1].endswith('2021-05-30T135924Z.h5.bin')
+    # Find all files...
+    filelist = D.optasense.FILE_FINDER.get_range_posix(0, D.to_posix_timestamp_ms(DT.datetime.now()))
+    assert len(filelist) > 9000
+def test_chunk(chunk, MAX_CHANNEL):
+    import time as TIME
+    #MAX_CHANNEL = 12608
+    #chunk = D.silixa.create_chunk()
+    t_start: int =          to_posix_timestamp_ms(DT.datetime(2021, 5, 30, 14, 00, 00))
+    t_end1: int = to_posix_timestamp_ms(DT.datetime(2021, 5, 30, 14, 00,  1))
+    t_end2: int = to_posix_timestamp_ms(DT.datetime(2021, 5, 30, 14,  1, 30))
+    t_end3: int = to_posix_timestamp_ms(DT.datetime(2021, 5, 30, 14, 10, 00))
+    t_end_one_hour: int =   to_posix_timestamp_ms(DT.datetime(2021, 5, 30, 15, 00, 00))
+    print()
+    chunk.load(t_start, t_end1, 1, 0, MAX_CHANNEL, 1)
+    assert chunk.data.shape == (1000, MAX_CHANNEL)
+    print()
+    chunk.load(t_start, t_end2, 3, 0, MAX_CHANNEL, 9)
+    assert chunk.data.shape == (30000, ceil(MAX_CHANNEL / 9))
+    print()
+    # Now some benchmarks...
+    #bench_start = TIME.time()
+    #file_handle = open("/wave/seismic-rawdata/OPTA/Disk2/DESY-Rec-11-GL8m-Chan10000_2021-05-30T07_55_42+0100/DESY-Rec-11-GL8m-Chan10000_2021-05-30T135924Z.h5", 'rb')
+    #file:H5PY.File = H5PY.File(file_handle, 'r')
+    #data = file['Acquisition']['Raw[0]']['RawData'] # Data is not loaded into memory at this point! (Lazy evaluation)
+    #data = NP.array(data)
+    #print("TIME for loading one whole file using h5py:", TIME.time() - bench_start, "\n")
+    bench_start = TIME.time()
+    chunk.load(t_start, t_end3, 1, 0, 1000, 1)
+    print("Time for loading the first 1000 sensors of one hour of data: %4f\n" % (TIME.time() - bench_start))
+    assert chunk.data.shape == (600000, 1000)
+    bench_start = TIME.time()
+    chunk.load(t_start, t_end_one_hour, 1, 0, MAX_CHANNEL, 10)
+    print("Time for loading one hour of data with with sensor_step=10: %4f\n" % (TIME.time() - bench_start))
+    assert chunk.data.shape == (1000*60*60, ceil(MAX_CHANNEL/10))
+    bench_start = TIME.time()
+    chunk.load(t_start, t_end_one_hour, 1, 0, 100, 1)
+    print("Time for loading 100 sensors with 1 hour of data: %4f\n" % (TIME.time() - bench_start))
+    bench_start = TIME.time()
+    chunk.load(t_start, t_end_one_hour, 1, 0, 1000, 1)
+    print("Time for loading 1000 sensors with 1 hour of data: %4f\n" % (TIME.time() - bench_start))
+    bench_start = TIME.time()
+    chunk.load(t_start, t_end_one_hour, 1, 0, MAX_CHANNEL, 1)
+    print("Time for loading 1 hour completely: %4f\n" % (TIME.time() - bench_start))
+def test_equalness_of_fast_opta_simple():
+    t_start: int =  to_posix_timestamp_ms(DT.datetime(2021, 5, 30, 14, 00, 00))
+    t_end: int =    to_posix_timestamp_ms(DT.datetime(2021, 5, 30, 14, 00,  1))
+    chunk_fast = D.fast_optasense.create_chunk()
+    chunk_fast.load(t_start, t_end, 1, 0, 10, 1)
+    chunk_normal = D.optasense.create_chunk()
+    chunk_normal.load(t_start, t_end, 1, 0, 10, 1)
+    assert chunk_fast.data.shape == chunk_normal.data.shape
+    assert NP.array_equiv(chunk_fast.data, chunk_normal.data)
+def test_equalness_of_fast_opta():
+    t_start: int =  to_posix_timestamp_ms(DT.datetime(2021, 5, 30, 14, 00, 00))
+    t_end: int =    to_posix_timestamp_ms(DT.datetime(2021, 5, 30, 14, 00,  1))
+    chunk_fast = D.fast_optasense.create_chunk()
+    chunk_fast.load(t_start, t_end, 3, 2000, 7000, 9)
+    chunk_normal = D.optasense.create_chunk()
+    chunk_normal.load(t_start, t_end, 3, 2000, 7000, 9)
+    assert chunk_fast.data.shape == chunk_normal.data.shape
+    assert NP.array_equiv(chunk_fast.data, chunk_normal.data)
+if __name__ == '__main__':
+    #test_equalness_of_fast_opta_simple()
+    #test_equalness_of_fast_opta()
+    #test_fast_optasense_filefinder()
+    #test_silixa_filefinder()
+    #test_optasense_filefinder()
+    print("\nSilixa benchmark:")
+    test_chunk(D.silixa.create_chunk(), 12608)
+    print("\nFast Optasense benchmark:")
+    test_chunk(D.fast_optasense.create_chunk(), 10000)
+    #print("\nOptasense benchmark:")
+    #test_chunk(D.optasense.create_chunk(), 10000)

das2numpy/utils.py ADDED Viewed

@@ -0,0 +1,136 @@
+"""
+Everything, that modifies the signal.
+author: Erik Genthe
+"""
+import numpy as NP
+from numba import njit
+import scipy.signal as SS
+TIME_AXIS = 0
+@DeprecationWarning
+def remove_channel_offset(data:NP.ndarray):
+    """Removes a constant value from each channel from the data.
+        Expecting the time-axis to be the first axis!
+        The constant values are initially calculated and save to a file.
+    """
+    print("Warning! Untested function!") #TODO
+    #for i in range(data.shape[1]):
+    #    data[:,i] -= data[:,i].mean(dtype=data.dtype)
+    data -= data.mean(axis=0)
+@njit
+def differentiate(data: NP.ndarray, axis: int) -> NP.ndarray:
+    """Differentiate the 2-dimensional signal over one axis
+     A 2-d array is expected as input
+     The return-value is None. The array is copied, modified and returned.
+     :return: differentiated array
+    """
+    assert axis == 0 or axis == 1
+    data = data.copy()
+    if data.shape[axis] < 2:
+        raise Exception("Integration with less then two samples makes no sense.")
+    if axis == 0:
+        for i in range(0, data.shape[0]-1):
+            data[i] = data[i+1] - data[i]
+    elif axis == 1:
+        for i in range(0, data.shape[1]-1):
+            data[:,i] = data[:,i+1] - data[:,i]
+    return data
+#@njit
+#def integrate(data: NP.ndarray, axis: int) -> NP.ndarray:
+#    """Integrate the 2-dimensional signal over one axis
+#     A 2-d array is expected as input
+#     The return-value is None. The array is copied, modified and returned.
+#     :return: integrated array
+#    """
+#    assert axis == 0 or axis == 1
+#    data = data.copy()
+#
+#    if data.shape[axis] < 2:
+#        raise Exception("Integration with less then two samples makes no sense.")
+#    if axis == 0:
+#        for i in range(1, data.shape[0]):
+#            data[i] = data[i] + data[i-1]
+#    elif axis == 1:
+#        for i in range(1, data.shape[1]):
+#            data[:, i] = data[:, i] + data[:, i-1]
+#    return data
+def integrate(data: NP.ndarray, axis: int, sample_rate_hz:float) -> NP.ndarray:
+    """Integrate the 2-dimensional signal over one axis
+       A 2-d array is expected as input
+       The array is copied, modified and returned.
+       :return: integrated array
+    """
+    integral = NP.cumsum(data, axis=axis) / sample_rate_hz
+    return integral
+def butterworth_filter(
+            array : NP.ndarray,
+            freq : float,
+            order : int,
+            btype, #: {‘lowpass’, ‘highpass’}
+            fs : float) -> NP.ndarray:
+    """
+    Apply a butterwort high-pass-filter on time-axis.
+    :array: The input data. Two dimensions expected. First dimension is expected to be the time dimension.
+    return: The filtered array
+    """
+    sos = SS.iirfilter(order, freq, rp=None, rs=None, btype=btype, analog=False, ftype='butter', output='sos', fs=fs)
+    array = SS.sosfiltfilt(sos, array, axis=TIME_AXIS, padtype='odd', padlen=None)
+    return array
+#https://numpy.org/doc/stable/reference/generated/numpy.fft.fft.html#numpy.fft.fft
+def fft(array):
+    raise NotImplementedError("Not implemented yet")
+    return None
+def bin(arr: NP.ndarray, bin_factors:tuple):
+    """ Returns a binned version of arr. If factors were 1, the original array is returned."""
+    assert len(bin_factors) == len(arr.shape)
+    #assert arr.dtype == NP.float32 or arr.dtype == NP.float64
+    assert len(arr.shape) == 2
+    for factor in bin_factors:
+        assert factor > 0
+    if bin_factors[0] == 1 and bin_factors[1] == 1:
+        return arr
+    newshape = NP.array(arr.shape) // NP.array(bin_factors)
+    newarr = NP.empty(newshape, dtype=arr.dtype)
+    _bin_helper(arr, newarr, bin_factors)
+    return newarr
+@njit
+def _bin_helper(arr, newarr, bin_factors):
+    for x in range(newarr.shape[0]):
+        for y in range(newarr.shape[1]):
+            x_ = x * bin_factors[0]
+            y_ = y * bin_factors[1]
+            newarr[x][y] = NP.mean(arr[x_ : x_ + bin_factors[0], y_ : y_ + bin_factors[1]])
+def log_scale_symmetric(arr:NP.ndarray) -> NP.ndarray:
+    """ Symmetric logarithmic scaling. For negative values it is applied as if they were positive"""
+    zeros = NP.zeros(arr.shape)
+    positives = NP.maximum(zeros, arr)
+    negatives = NP.minimum(zeros, arr)
+    positives += 1
+    positives = NP.log2(positives, dtype=NP.float32)
+    negatives *= -1
+    negatives += 1
+    negatives = NP.log2(negatives, dtype=NP.float32)
+    negatives *= -1
+    result = zeros
+    result = negatives + positives
+    return result