PyPI - das2numpy - Versions diffs - 0.0.4__tar.gz → 1.0__tar.gz - Mend

das2numpy 0.0.4tar.gz → 1.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

{das2numpy-0.0.4/src/das2numpy.egg-info → das2numpy-1.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: das2numpy
-Version: 0.0.4
+Version: 1.0
 Summary: A simple and universal package for loading large amounts of distributed acoustic sensing (DAS) data.
 Author-email: Erik Genthe <erik.genthe@desy.de>
 Project-URL: Homepage, https://git.physnet.uni-hamburg.de/wave/das2numpy
@@ -49,6 +49,7 @@ Returns:
  ```
 #### More detailed interface
 ```python
 def load_array(t_start:datetime, t_end:datetime, t_step:int, channel_start:int, channel_end:int, channel_step:int) -> NP.ndarray:

{das2numpy-0.0.4 → das2numpy-1.0}/README.md RENAMED Viewed

@@ -35,6 +35,7 @@ Returns:
  ```
 #### More detailed interface
 ```python
 def load_array(t_start:datetime, t_end:datetime, t_step:int, channel_start:int, channel_end:int, channel_step:int) -> NP.ndarray:

{das2numpy-0.0.4 → das2numpy-1.0}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "das2numpy"
-version = "0.0.4"
+version = "1.0"
 authors = [
   { name="Erik Genthe", email="erik.genthe@desy.de" },
 ]

{das2numpy-0.0.4 → das2numpy-1.0}/src/das2numpy/__init__.py RENAMED Viewed

@@ -4,16 +4,20 @@
 import os as OS
 import numpy as NP
-from multipledispatch import dispatch
+from . import utils
-#@dispatch(str, str, int)
 def loader(root_path:str, predefined_setup:str, num_worker_threads):
     if predefined_setup.upper() == "SILIXA":
         from .setups import silixa
         chunk = silixa.init(root_path, num_worker_threads)
+    elif predefined_setup.upper() == "SILIXA_200HZ":
+        from .setups import silixa_200hz
+        chunk = silixa_200hz.init(root_path, num_worker_threads)
+    elif predefined_setup.upper() == "FLAC_200HZ":
+        from .setups import flac_200hz
+        chunk = flac_200hz.init(root_path, num_worker_threads)
     elif predefined_setup.upper() == "OPTASENSE":
         from .setups import optasense_b35idefix
         chunk = optasense_b35idefix.init()
@@ -23,6 +27,3 @@ def loader(root_path:str, predefined_setup:str, num_worker_threads):
     return chunk
-#@dispatch(str, str, int)
-#def loader(self, root_path:str, predefined_setup:str, num_worker_threads):
-#    return chunk

{das2numpy-0.0.4 → das2numpy-1.0}/src/das2numpy/chunk.py RENAMED Viewed

@@ -16,6 +16,7 @@ from random import shuffle
 from multipledispatch import dispatch
 import concurrent.futures as CF
 from concurrent.futures import ThreadPoolExecutor
+from threading import Lock
 from multiprocessing import Pool
 import numpy as NP
 from .filefinder import FileFinder, to_posix_timestamp_ms
@@ -23,7 +24,7 @@ from .filefinder import FileFinder, to_posix_timestamp_ms
 SHUFFLE_TASKS = False
-def _predict_size(start: int, end: int, step: int) -> int:
+def _calc_size(start: int, end: int, step: int) -> int:
     diff = end - start
     return int(((diff-1) - (diff-1)%step) / step + 1)
@@ -37,25 +38,25 @@ class Chunk():
         the data and the meta information can be accessed directly by accessing the following fields:
         data, timestamps, geo_positions, channel.
         TODO implement geo_positions, channel, timestamps
-        author: ingrabarbosa, Erik genthe
+        author: Erik genthe
     """
     def __init__(self,
-                file_finder:FileFinder,
-                file_channel_amount:int,
-                file_time_sample_amount:int,
+                file_finder:FileFinder,
+                sample_rate,
                 multithreaded:bool,
                 workers:int,
                 workerprocess:bool,
                 loading_function:Callable[[str, int, int, int, int, int, int], NP.ndarray]
                 ):
         self.__file_finder = file_finder
-        self.__file_channel_amount = file_channel_amount
-        self.__file_time_sample_amount = file_time_sample_amount
+        self.__sample_rate = sample_rate
         self.__multithreaded = multithreaded
         self.__workerprocess = workerprocess
         self.__loading_function = loading_function
+        self.__lock = Lock()
+        assert type(sample_rate) == int
         if multithreaded:
             self.__executor = ThreadPoolExecutor(workers)
         if not self.__multithreaded:
@@ -64,7 +65,7 @@ class Chunk():
     def __load_from_file_into_data(self,
-                start_timestamp:int,
+                file_timestamp:int, # The timestamp retrieved from the filename
                 file_path:str,
                 t_start:int,
                 t_end:int,
@@ -73,50 +74,44 @@ class Chunk():
                 channel_end:int,
                 channel_step:int
                 ) -> None:
-        #print("Args: ", start_timestamp, file_path, t_start, t_end, t_step, channel_start, channel_end, channel_step)
+        #print("Args: ", file_timestamp, file_path, t_start, t_end, t_step, channel_start, channel_end, channel_step)
          # Check if the whole file shall be loaded. Especially the first and last file could be cut...
         print("das2numpy: Loading from", file_path)
-        rel_t_start = 0
-        rel_t_end = self.__file_time_sample_amount
-        if t_start > start_timestamp:
-            rel_t_start = t_start - start_timestamp
-        if t_end < start_timestamp + self.__file_time_sample_amount: #TODO magicnumber
-            rel_t_end = t_end - start_timestamp
-        if rel_t_start == rel_t_end:
-            return # Do nothing
-        #print("relative start, relative end", rel_t_start, rel_t_end)
-        if start_timestamp + self.__file_time_sample_amount <= t_start:
-            print("Warning: File does not contain any parts of the requested data.",
-                    "This can happen if there are leaks in the data. The corresponding output will be left filled with zeros.\n",
-                    f"    Requested range (Posixtimestamps in ms): [{t_start}, {t_end}[\n",
-                    f"    Filepath: {file_path}.")
-            return
-        assert rel_t_end > rel_t_start, f"rel_t_start={rel_t_start}, rel_t_end={rel_t_end}."
         # Load h5-data using a different process... There is no other way to make h5py work parallel :(
         data = None
         if self.__workerprocess:
             pool = Pool(1)
             result = pool.apply_async(self.__loading_function,
-                    (file_path, rel_t_start, rel_t_end, t_step, channel_start, channel_end, channel_step))
+                    (file_path, file_timestamp, t_start, t_end, t_step, channel_start, channel_end, channel_step))
             pool.close()
-            result = result.get() # Blocks!
-            data = result
+            data = result.get() # Blocks!
         else:
-            data = self.__loading_function(file_path, rel_t_start, rel_t_end, t_step, channel_start, channel_end, channel_step)
+            data = self.__loading_function(file_path, file_timestamp, t_start, t_end, t_step, channel_start, channel_end, channel_step)
         # Store loaded data part into all_data
-        start_index = floor((start_timestamp - t_start) / t_step)
+        start_index = int((file_timestamp - t_start) * self.__sample_rate / 1000 / t_step)
         #print(start_index)
         if start_index < 0:
             start_index = 0
         #print("Shape: ", data.shape)
+        if data.shape[1] != self.data.shape[1]:
+            print(f"Amount of channels detected in file {file_path} is {data.shape[1]}. The buffer has {self.data.shape[1]}")
+        if channel_end == -1:
+            with self.__lock:
+                # If number of channels increased, reallocate the target array.
+                if self.data.shape[1] < data.shape[1]:
+                    print("Reallocating buffer")
+                    old = self.data
+                    self.data = NP.zeros((self.data.shape[0], data.shape[1]), dtype=data.dtype)
+                    self.data[:, :old.shape[1]] = old[:,:]
+                    del old
         # To make this a little bit tolerant to a changing amount of channels per file, also the number of channels is given!
         n_channels = min(data.shape[1], self.data.shape[1])
-        if data.shape[1] != self.data.shape[1]:
-            print(f"Warning: Incosistend amount of channels detected in file {file_path}. Expected={self.data.shape[1]}, file={data.shape[1]}. Cropping to fit.")
         self.data[start_index : start_index + data.shape[0], 0:n_channels] = data[:,:n_channels]
     @dispatch(int, int, int, int, int, int)
@@ -140,31 +135,34 @@ class Chunk():
         """
         assert channel_start >= 0
-        assert channel_start <= self.__file_channel_amount
-        if channel_end == -1:
-            channel_end = self.__file_channel_amount
-        assert channel_end >= channel_start
-        assert channel_end <= self.__file_channel_amount, "channel_end has to be less or equal than self.__file_channel_amount"
+        #assert channel_start <= self.__file_channel_amount
+        #if channel_end == -1: channel_end = self.__file_channel_amount
+        assert channel_end == -1 or channel_end > 0
+        if channel_end != -1:
+            assert channel_end >= channel_start
+        #assert channel_end <= self.__file_channel_amount, "channel_end has to be less or equal than self.__file_channel_amount"
         assert t_step > 0
         assert channel_step > 0
         file_pathes = self.__file_finder.get_range_posix(t_start, t_end)
         print(f"Loading data from {len(file_pathes)} files.")
         #print("file_pathes", file_pathes)
-        data_shape = (
-                _predict_size(t_start, t_end, t_step),
-                _predict_size(channel_start, channel_end, channel_step)
-        )
+        data_shape = [
+                _calc_size(t_start * self.__sample_rate / 1000, t_end * self.__sample_rate / 1000, t_step),
+                _calc_size(channel_start, channel_end, channel_step)
+        ]
+        if channel_end == -1:
+            data_shape[1] = 1
         self.data = NP.zeros(shape=data_shape, dtype=NP.float32)
         if self.__multithreaded:
             futures = []
             if SHUFFLE_TASKS:
                 shuffle(file_pathes)
-            for start_timestamp, file_path in file_pathes:
+            for file_timestamp, file_path in file_pathes:
                 futures.append(
                     self.__executor.submit(
                         self.__load_from_file_into_data,
-                        start_timestamp,
+                        file_timestamp,
                         file_path,
                         t_start,
                         t_end,
@@ -179,9 +177,9 @@ class Chunk():
                 future.result() # Raises possible exceptions
         else:
-            for start_timestamp, file_path in file_pathes:
+            for file_timestamp, file_path in file_pathes:
                 self.__load_from_file_into_data(
-                        start_timestamp,
+                        file_timestamp,
                         file_path,
                         t_start,
                         t_end,
@@ -189,8 +187,11 @@ class Chunk():
                         channel_start,
                         channel_end,
                         channel_step)
-        return self.data
+        # The following is weird, but it solves issues with garbage collection. Otherwise this behaves like a memory leak.
+        data = self.data
+        del self.data
+        return data

{das2numpy-0.0.4 → das2numpy-1.0}/src/das2numpy/filefinder.py RENAMED Viewed

@@ -73,7 +73,7 @@ class FileFinder():
             tuple: A triple (internal_index, posix timestamp in millis of the file start, file path)
             None: If the given time was before any recording was done.
         """
-        for i in range(len(self.__file_pathes)-1, 0, -1): # Iterate reverse
+        for i in range(len(self.__file_pathes)-1, -1, -1): # Iterate reverse
             key, value = self.__file_pathes[i]
             if key < posix_timestamp_ms:
                 return (i, key, value)

das2numpy-1.0/src/das2numpy/setups/flac_200hz.py ADDED Viewed

@@ -0,0 +1,110 @@
+""" Univsersal setup file for silixa, that detects sampling rate and number of channels by itself.
+The root directory shall be supplied by the user via an argument
+"""
+import sys as SYS
+import ast as AST
+from os import path as P
+import datetime as DT
+import numpy as NP
+import ffmpeg as FFMPEG
+from ..filefinder import FileFinder, to_posix_timestamp_ms
+from ..chunk import Chunk
+from ..utils import bin
+CALIBRATE = True
+sample_rate = 200
+def init(root_path, num_worker_threads):
+    assert P.isdir(root_path)
+    file_finder = FileFinder(root_path, ".flac", filename_to_posix_timestamp)
+    assert num_worker_threads >= 1
+    multithreaded = num_worker_threads > 1
+    return Chunk(
+                file_finder,
+                sample_rate,
+                multithreaded,
+                num_worker_threads,
+                False,
+                load_file
+            )
+def filename_to_posix_timestamp(file_name:str) -> int:
+    timestamp_str = file_name.split(".flac")[0]
+    timestamp_dt = DT.datetime.strptime(timestamp_str, "%Y%m%dT%H%M%S")
+    timestamp_ms = to_posix_timestamp_ms(timestamp_dt)
+    return timestamp_ms
+def load_file(file_path, file_timestamp, t_start, t_end, t_step, channel_start, channel_end, channel_step) -> NP.ndarray:
+    """ Loads a single file, trims it. And returns the trimmed data as a numpy array. Downsampling (t_step, channel_step) is also possible!
+    """
+    try:
+        probe = FFMPEG.probe(file_path, v='error', select_streams='a:0', show_entries='stream=channels,sample_rate', of='json')
+        shape = AST.literal_eval(probe['format']['tags']['shape'])
+        calibration_factor = float(probe['format']['tags']['calibration_factor'])
+        info = probe['streams'][0]
+        idx_start = 0
+        if t_start > file_timestamp: # Check if beginning should be trimmed.
+            rel_t_start = t_start - file_timestamp
+            idx_start = int(rel_t_start * sample_rate / 1000.0)
+        idx_end = shape[0]
+        if t_end < file_timestamp + (shape[0] * 1000 / sample_rate): # Check if end should be trimmed
+            rel_t_end = t_end - file_timestamp
+            idx_end = int(rel_t_end * sample_rate / 1000.0)
+        if idx_start == idx_end:
+            return NP.zeros(shape=[0, 0]) # No data should be loaded. Do nothing
+        if file_timestamp + (shape[0] * 1000 / sample_rate) <= t_start:
+            print("Warning: File does not contain any parts of the requested data.",
+                    "This can happen if there are leaks in the data or if there are no files for the requested time in the given directory.",
+                    "The corresponding output will be left filled with zeros.\n",
+                    f"    Requested range (Posixtimestamps in ms): [{t_start}, {t_end}[\n",
+                    f"    Filepath: {file_path}.")
+            return NP.zeros(shape=[0, 0])
+        assert idx_end == shape[0] or idx_end > idx_start, f"idx_start={idx_start}, idx_end={idx_end}."
+        assert idx_start < idx_end
+        out, err = (
+            FFMPEG
+            .input(file_path)
+            .filter('atrim', start_sample=idx_start*shape[1], end_sample=idx_end*shape[1])
+            .output('pipe:', format='s16le', acodec='pcm_s16le', ac=int(info['channels']), ar=int(info['sample_rate']))
+            .run(capture_stdout=True, capture_stderr=True)
+        )
+        data = NP.frombuffer(out, dtype=NP.int16)
+        data = data.reshape([-1, shape[1]])
+    except FFMPEG.Error as e:
+        raise Exception(e.stderr.decode("utf-8"))
+    except FileNotFoundError as e:
+        raise Exception("ffmpeg not found. Please install ffmpeg."
+		+ "If you are working on desys maxwell cluster, "
+                + "execute 'module load maxwell ffmpeg'")
+    # Trim data
+    data = data[:, channel_start:channel_end]
+    data = data.astype(NP.float32) #This needs to hapen before the binning step!
+    # Downsample data
+    if t_step != 1 or channel_step != 1:
+        data = bin(data, (t_step, channel_step))
+    #if t_step != 1:
+    #    data = data[::t_step]
+    #if channel_step != 1:
+    #    data = data[:, ::channel_step]
+    assert len(data) > 0
+    if CALIBRATE:
+        data *= calibration_factor
+    return data

{das2numpy-0.0.4 → das2numpy-1.0}/src/das2numpy/setups/silixa.py RENAMED Viewed

@@ -20,17 +20,12 @@ CALIBRATE = True
 def init(root_path, num_worker_threads):
     assert P.isdir(root_path)
     file_finder = FileFinder(root_path, ".tdms", filename_to_posix_timestamp)
-    example_file_path = file_finder.get_elem(10)[1] # TODO get first instead of 10th
-    tdms = TdmsReader(example_file_path)
-    shape = tdms.get_mmap().shape
-    file_time_sample_amount = shape[0]
-    channel_amount = shape[1]
     assert num_worker_threads >= 1
     multithreaded = num_worker_threads > 1
+    sample_rate = 1000
     return Chunk(
                 file_finder,
-                channel_amount,
-                file_time_sample_amount,
+                sample_rate,
                 multithreaded,
                 num_worker_threads,
                 False,
@@ -44,25 +39,44 @@ def filename_to_posix_timestamp(file_name:str) -> int:
     return timestamp_ms
-def load_file(file_path, rel_t_start, rel_t_end, t_step, channel_start, channel_end, channel_step) -> NP.ndarray:
-    #assert rel_t_end <= FILE_TIME_SAMPLE_AMOUNT
-    assert rel_t_start != rel_t_end
-    assert rel_t_start < rel_t_end
-    assert rel_t_start >= 0
-    tdms = TdmsReader(file_path)
-    data = tdms.get_mmap()
-    data = data[rel_t_start:rel_t_end, channel_start:channel_end]
-    if t_step != 1 or channel_step != 1:
-        data = bin(data, (t_step, channel_step))
-    #if t_step != 1:
-    #    data = data[::t_step]
-    #if channel_step != 1:
-    #    data = data[:, ::channel_step]
-    assert len(data) > 0
-    if CALIBRATE:
-        data = calibrate(data)
+def load_file(file_path, file_timestamp, t_start, t_end, t_step, channel_start, channel_end, channel_step) -> NP.ndarray:
+    """ Loads a single file, trims it. And returns the trimmed data as a numpy array. Downsampling (t_step, channel_step) is also possible!
+    """
+    with TdmsReader(file_path) as tdms:
+        data = tdms.get_mmap()
+        # Trim data
+        rel_t_start = 0
+        if t_start > file_timestamp: # Check if beginning should be trimmed.
+            rel_t_start = t_start - file_timestamp
+        rel_t_end = -1
+        if t_end < file_timestamp + data.shape[0]: # Check if end should be trimmed
+            rel_t_end = t_end - file_timestamp
+        if rel_t_start == rel_t_end:
+            return NP.zeros(shape=[0, 0]) # No data should be loaded. Do nothing
+        if file_timestamp + data.shape[0] <= t_start:
+            print("Warning: File does not contain any parts of the requested data.",
+                    "This can happen if there are leaks in the data. The corresponding output will be left filled with zeros.\n",
+                    f"    Requested range (Posixtimestamps in ms): [{t_start}, {t_end}[\n",
+                    f"    Filepath: {file_path}.")
+            return NP.zeros(shape=[0, 0])
+        assert rel_t_end == -1 or rel_t_end > rel_t_start, f"rel_t_start={rel_t_start}, rel_t_end={rel_t_end}."
+        data = data[rel_t_start:rel_t_end, channel_start:channel_end]
+        # Downsample data
+        if t_step != 1 or channel_step != 1:
+            data = bin(data, (t_step, channel_step))
+        #if t_step != 1:
+        #    data = data[::t_step]
+        #if channel_step != 1:
+        #    data = data[:, ::channel_step]
+        assert len(data) > 0
+        if CALIBRATE:
+            data = calibrate(data)
     return data

das2numpy-1.0/src/das2numpy/setups/silixa_200hz.py ADDED Viewed

@@ -0,0 +1,105 @@
+""" Univsersal setup file for silixa, that detects sampling rate and number of channels by itself.
+The root directory shall be supplied by the user via an argument
+"""
+import sys as SYS
+from os import path as P
+import datetime as DT
+import numpy as NP
+from ..filefinder import FileFinder, to_posix_timestamp_ms
+from ..chunk import Chunk
+from .light_tdms_reader import TdmsReader
+from ..utils import bin
+CALIBRATE = True
+sample_rate = 200
+def init(root_path, num_worker_threads):
+    assert P.isdir(root_path)
+    file_finder = FileFinder(root_path, ".tdms", filename_to_posix_timestamp)
+    assert num_worker_threads >= 1
+    multithreaded = num_worker_threads > 1
+    return Chunk(
+                file_finder,
+                sample_rate,
+                multithreaded,
+                num_worker_threads,
+                False,
+                load_file
+            )
+def filename_to_posix_timestamp(file_name:str) -> int:
+    timestamp_str = file_name.split("_UTC_")[1][:19]
+    timestamp_dt = DT.datetime.strptime(timestamp_str, "%Y%m%d_%H%M%S.%f")
+    timestamp_ms = to_posix_timestamp_ms(timestamp_dt)
+    return timestamp_ms
+def load_file(file_path, file_timestamp, t_start, t_end, t_step, channel_start, channel_end, channel_step) -> NP.ndarray:
+    """ Loads a single file, trims it. And returns the trimmed data as a numpy array. Downsampling (t_step, channel_step) is also possible!
+    """
+    with TdmsReader(file_path) as tdms:
+        data = tdms.get_mmap()
+        # Trim data
+        idx_start = 0
+        if t_start > file_timestamp: # Check if beginning should be trimmed.
+            rel_t_start = t_start - file_timestamp
+            idx_start = int(rel_t_start * sample_rate / 1000.0)
+        idx_end = data.shape[0]
+        if t_end < file_timestamp + (data.shape[0] * 1000 / sample_rate): # Check if end should be trimmed
+            rel_t_end = t_end - file_timestamp
+            idx_end = int(rel_t_end * sample_rate / 1000.0)
+        if idx_start == idx_end:
+            return NP.zeros(shape=[0, 0]) # No data should be loaded. Do nothing
+        if file_timestamp + (data.shape[0] * 1000 / sample_rate) <= t_start:
+            print("Warning: File does not contain any parts of the requested data.",
+                    "This can happen if there are leaks in the data. The corresponding output will be left filled with zeros.\n",
+                    f"    Requested range (Posixtimestamps in ms): [{t_start}, {t_end}[\n",
+                    f"    Filepath: {file_path}.")
+            return NP.zeros(shape=[0, 0])
+        assert idx_end == data.shape[0] or idx_end > idx_start, f"idx_start={idx_start}, idx_end={idx_end}."
+        print(idx_start, idx_end)
+        data = data[idx_start:idx_end, channel_start:channel_end]
+        # Downsample data
+        if t_step != 1 or channel_step != 1:
+            data = bin(data, (t_step, channel_step))
+        #if t_step != 1:
+        #    data = data[::t_step]
+        #if channel_step != 1:
+        #    data = data[:, ::channel_step]
+        assert len(data) > 0
+        if CALIBRATE:
+            data = calibrate(data)
+    return data
+def calibrate(data:NP.ndarray) -> NP.ndarray:
+    """ Convert raw data to strain rate data.
+    As the resulting values are decimals, the datatype should be float. Otherwise an assertion fails. """
+    #assert data.dtype in (NP.float, NP.float32, NP.float64), f"The data should be floating point. It is {data.dtype}"
+    if data.dtype not in (float, NP.float32, NP.float64):
+        NEW_TYPE = NP.float32
+        #print("Warning: For calibration the data has to be of type float. Converting from {data.dtype} to {NEW_TYPE}")
+        data = data.astype(NEW_TYPE)
+    SAMPLE_FREQ = 1000.0 # This remains 1000.0 and not 200 Hz because the original sample rate of the device is relevant here!
+    EICHLAENGE = 10.0
+    factor = 116.0 * 10.0**(-9.0) / 8192.0 * SAMPLE_FREQ / EICHLAENGE
+    return data * factor # Result: 1 / s

{das2numpy-0.0.4 → das2numpy-1.0}/src/das2numpy/utils.py RENAMED Viewed

@@ -7,6 +7,7 @@ import math as M
 import numpy as NP
 from numba import njit
 import scipy.signal as SS
+import scipy.stats
 TIME_AXIS = 0
@@ -69,7 +70,22 @@ def butterworth_filter(
-def spectrum_smoothing(frequencies:NP.ndarray, psd:NP.ndarray, n:int):
+def mean_confidence_interval(data, confidence=0.95, min_samples=10):
+    """
+        Calculates the confidence interval for a student-t distribution.
+        From https://stackoverflow.com/questions/15033511/compute-a-confidence-interval-from-sample-data
+        Returns: [mean, lower-confidence-limit, upper-confidence-limit]
+    """
+    n = len(data)
+    m = NP.mean(data)
+    if n < min_samples:
+        return m, None, None
+    se = scipy.stats.sem(data)
+    h = se * scipy.stats.t.ppf((1 + confidence) / 2., n-1)
+    return m, m-h, m+h
+def spectrum_smoothing(frequencies:NP.ndarray, psd:NP.ndarray, n:int, mode="median", error_calculation=False):
     """
     Perform 1/n decade smoothing on the power spectral density (PSD) data.
     See also:  https://dsp.stackexchange.com/questions/9967/1-n-octave-smoothing
@@ -81,10 +97,17 @@ def spectrum_smoothing(frequencies:NP.ndarray, psd:NP.ndarray, n:int):
         Array containing the power spectral density values corresponding to the frequencies.
     n : int
         The number of divisions per decade (e.g., n=10 for 1/10 decade smoothing).
+    mode : "mean" or "median"
+        How the data points of one bin should be reduced to one point.
+    error_calculation : False, "std", or float.
+        If false, the function returns only two arrays.
+        If "std", the third array contains the standard deviation of the original data points per frequency bin.
+        If "stderr", the third array contains the standard error or the original data points per frequency bin.
+        If float [0.0 until 1.0], the third array contains the confidence intervall for each frequency bin (EXPERIMENTAL).
     Returns:
-    numpy.ndarray, numpy.ndarray
-        Smoothed frequencies and PSD.
+    numpy.ndarray, numpy.ndarray, numpy:ndarray
+        Smoothed frequencies, the PSD, and the Standard deviation for each bin.
     """
     frequencies = NP.array(frequencies)
     psd = NP.array(psd)
@@ -105,6 +128,7 @@ def spectrum_smoothing(frequencies:NP.ndarray, psd:NP.ndarray, n:int):
     freq_new_actual = []
     psd_new = []
+    error = []
     for i in range(len(freq_new)):
         f_log = freq_new_log[i]
         f_lower = 10**(f_log - step_log / 2)
@@ -112,12 +136,49 @@ def spectrum_smoothing(frequencies:NP.ndarray, psd:NP.ndarray, n:int):
         # Find the indices within this log decade interval
         mask = (frequencies >= f_lower) & (frequencies < f_higher)
-        #print(freq_new[i], f_lower, f_higher, mask)
         if NP.any(mask):
             freq_new_actual.append(NP.mean(frequencies[mask]))
-            psd_new.append(NP.mean(psd[mask]))
-    return NP.array(freq_new_actual), NP.array(psd_new)
+            if mode == "mean":
+                mean = NP.mean(psd[mask])
+                psd_new.append(mean)
+            elif mode == "median":
+                psd_new.append(NP.median(psd[mask]))
+            else:
+                raise Exception("Mode should be 'mean' or 'median'!")
+            if error_calculation == False:
+                pass
+            elif error_calculation == "std":
+                if len(psd[mask]) <= 1:
+                    error.append(float("NaN"))
+                else:
+                    error.append(NP.std(psd[mask]))
+            elif error_calculation == "stderr":
+                if len(psd[mask]) <= 1:
+                    error.append(float("NaN"))
+                else:
+                    error.append(NP.std(psd[mask]) / NP.sqrt(len(psd[mask])))
+            elif type(error_calculation) == float:
+                confidence_level = error_calculation
+                assert confidence_level >= 0.5
+                #samples = psd[mask]
+                #n = len(samples)
+                ##h = scipy.stats.sem(psd[mask]) * scipy.stats.t.ppf((1 + confidence_level) / 2., n-1) # From https://stackoverflow.com/questions/15033511/compute-a-confidence-interval-from-sample-data
+                #z_low  = scipy.stats.rayleigh.ppf((1 - confidence_level) / 2.0) # Rayleigh should be the correct distribution for ASD values
+                #z_high = scipy.stats.rayleigh.ppf(confidence_level / 2.0) # Rayleigh should be the correct distribution for ASD values
+                #mean_or_median = psd_new[-1]
+                #standard_error = samples.std() / NP.sqrt(n)
+                #confidence_interval = [ mean_or_median - standard_error * z_low,
+                #                        mean_or_median + standard_error * z_high]
+                #print(f"-----------> m={mean_or_median}   stderr={standard_error}   z_low={z_low}   stderr*zlow={standard_error * z_low}   z+={z_high}")
+                m, lower, upper = mean_confidence_interval(psd[mask], confidence_level)
+                error.append([lower, upper])
+            else:
+                raise Exception(f"Error calculation type {error_calculation} is invalid.")
+    if error_calculation:
+        return NP.array(freq_new_actual), NP.array(psd_new), NP.array(error)
+    else:
+        return NP.array(freq_new_actual), NP.array(psd_new)
 def bin(arr: NP.ndarray, bin_factors:tuple):

{das2numpy-0.0.4 → das2numpy-1.0/src/das2numpy.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: das2numpy
-Version: 0.0.4
+Version: 1.0
 Summary: A simple and universal package for loading large amounts of distributed acoustic sensing (DAS) data.
 Author-email: Erik Genthe <erik.genthe@desy.de>
 Project-URL: Homepage, https://git.physnet.uni-hamburg.de/wave/das2numpy
@@ -49,6 +49,7 @@ Returns:
  ```
 #### More detailed interface
 ```python
 def load_array(t_start:datetime, t_end:datetime, t_step:int, channel_start:int, channel_end:int, channel_step:int) -> NP.ndarray:

{das2numpy-0.0.4 → das2numpy-1.0}/src/das2numpy.egg-info/SOURCES.txt RENAMED Viewed

@@ -2,6 +2,7 @@ LICENSE
 README.md
 pyproject.toml
 src/example.py
+src/test_downsampled.py
 src/das2numpy/__init__.py
 src/das2numpy/__main__.py
 src/das2numpy/chunk.py
@@ -12,7 +13,9 @@ src/das2numpy.egg-info/PKG-INFO
 src/das2numpy.egg-info/SOURCES.txt
 src/das2numpy.egg-info/dependency_links.txt
 src/das2numpy.egg-info/top_level.txt
+src/das2numpy/setups/flac_200hz.py
 src/das2numpy/setups/light_tdms_reader.py
 src/das2numpy/setups/optasense_b35idefix.py
 src/das2numpy/setups/optasense_b35idefix_fast.py
-src/das2numpy/setups/silixa.py
+src/das2numpy/setups/silixa.py
+src/das2numpy/setups/silixa_200hz.py

{das2numpy-0.0.4 → das2numpy-1.0}/src/das2numpy.egg-info/top_level.txt RENAMED Viewed

@@ -1,2 +1,3 @@
 das2numpy
 example
+test_downsampled

das2numpy-1.0/src/test_downsampled.py ADDED Viewed

@@ -0,0 +1,54 @@
+import numpy as NP
+import sys
+from datetime import datetime
+import matplotlib.pyplot as PP
+from das2numpy import loader, utils
+USE_DOWNSAMPLED = False
+print("Load data to numpy-array")
+t_start = datetime(2025, 10, 14, 2, 58, 59)
+t_end   = datetime(2025, 10, 14, 2, 59, 1)
+channel_start = 1000
+channel_end = 3000
+if USE_DOWNSAMPLED:
+    loader = loader("/pnfs/desy.de/m/project/iDAS/work/derived-data/DOWNSAMPLED_200HZ/2025-10/", "SILIXA_200HZ", 1)
+else:
+    loader = loader("/pnfs/desy.de/m/project/iDAS/raw/2025-DESY/2025-10-14-desy", "SILIXA", 1)
+data = loader.load_array(t_start, t_end, channel_start, channel_end)
+print("Reduce data by binning (mean averaging)")
+if USE_DOWNSAMPLED:
+    bin_factors = (1, 1)
+    data = utils.bin(data, bin_factors) # Reduce time sampling and spatial sampling by averaging.
+    sampling_hz = 200.0 / bin_factors[0]
+else:
+    bin_factors = (5, 1)
+    data = utils.bin(data, bin_factors) # Reduce time sampling and spatial sampling by averaging.
+    sampling_hz = 1000.0 / bin_factors[0]
+channel_spacing = 1.0 * bin_factors[1]
+NP.save("data.npy", data)
+print("Create plot with pyplot")
+PP.title(f"{t_start.isoformat()}")
+PP.imshow(
+    data,
+    cmap = "seismic",
+    aspect = "auto",
+    interpolation = "nearest",
+    vmin = -1e-7,
+    vmax = +1e-7,
+    extent = (
+        channel_start, channel_start + (data.shape[1] * channel_spacing),
+        data.shape[0] / sampling_hz, 0
+    )
+)
+PP.xlabel("Position [m]")
+PP.ylabel("Time [s]")
+PP.colorbar(label="Strain-rate [$\\frac{m}{m \\cdot s}$]")
+if USE_DOWNSAMPLED:
+    PP.savefig("waterfall_downsampled.png")
+else:
+    PP.savefig("waterfall.png")