das2numpy 1.1.1__tar.gz → 1.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (22) hide show
  1. {das2numpy-1.1.1/src/das2numpy.egg-info → das2numpy-1.2.0}/PKG-INFO +2 -2
  2. {das2numpy-1.1.1 → das2numpy-1.2.0}/pyproject.toml +2 -2
  3. {das2numpy-1.1.1 → das2numpy-1.2.0}/src/das2numpy/__init__.py +3 -0
  4. {das2numpy-1.1.1 → das2numpy-1.2.0}/src/das2numpy/setups/flac_200hz.py +2 -1
  5. das2numpy-1.1.1/src/das2numpy/setups/silixa_200hz.py → das2numpy-1.2.0/src/das2numpy/setups/hdas.py +30 -29
  6. {das2numpy-1.1.1 → das2numpy-1.2.0}/src/das2numpy/setups/optasense_b35idefix.py +2 -0
  7. {das2numpy-1.1.1 → das2numpy-1.2.0}/src/das2numpy/setups/silixa.py +2 -0
  8. {das2numpy-1.1.1 → das2numpy-1.2.0/src/das2numpy.egg-info}/PKG-INFO +2 -2
  9. {das2numpy-1.1.1 → das2numpy-1.2.0}/src/das2numpy.egg-info/SOURCES.txt +2 -3
  10. {das2numpy-1.1.1 → das2numpy-1.2.0}/src/example.py +1 -0
  11. das2numpy-1.1.1/src/das2numpy/setups/optasense_b35idefix_fast.py +0 -111
  12. {das2numpy-1.1.1 → das2numpy-1.2.0}/LICENSE +0 -0
  13. {das2numpy-1.1.1 → das2numpy-1.2.0}/README.md +0 -0
  14. {das2numpy-1.1.1 → das2numpy-1.2.0}/setup.cfg +0 -0
  15. {das2numpy-1.1.1 → das2numpy-1.2.0}/src/das2numpy/__main__.py +0 -0
  16. {das2numpy-1.1.1 → das2numpy-1.2.0}/src/das2numpy/chunk.py +0 -0
  17. {das2numpy-1.1.1 → das2numpy-1.2.0}/src/das2numpy/filefinder.py +0 -0
  18. {das2numpy-1.1.1 → das2numpy-1.2.0}/src/das2numpy/setups/light_tdms_reader.py +0 -0
  19. {das2numpy-1.1.1 → das2numpy-1.2.0}/src/das2numpy/utils.py +0 -0
  20. {das2numpy-1.1.1 → das2numpy-1.2.0}/src/das2numpy.egg-info/dependency_links.txt +0 -0
  21. {das2numpy-1.1.1 → das2numpy-1.2.0}/src/das2numpy.egg-info/requires.txt +0 -0
  22. {das2numpy-1.1.1 → das2numpy-1.2.0}/src/das2numpy.egg-info/top_level.txt +0 -0
@@ -1,9 +1,9 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: das2numpy
3
- Version: 1.1.1
3
+ Version: 1.2.0
4
4
  Summary: A simple and universal package for loading large amounts of distributed acoustic sensing (DAS) data.
5
5
  Author-email: Erik Genthe <erik.genthe@desy.de>
6
- Project-URL: Homepage, https://git.physnet.uni-hamburg.de/wave/das2numpy
6
+ Project-URL: Homepage, https://gitlab.desy.de/wave/tools/das2numpy
7
7
  Classifier: Programming Language :: Python :: 3
8
8
  Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
9
9
  Classifier: Operating System :: OS Independent
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "das2numpy"
7
- version = "1.1.1"
7
+ version = "1.2.0"
8
8
  authors = [
9
9
  { name="Erik Genthe", email="erik.genthe@desy.de" },
10
10
  ]
@@ -20,4 +20,4 @@ dependencies = [
20
20
  "numpy", "ffmpeg-python", "h5py", "scipy", "numba",
21
21
  ]
22
22
  [project.urls]
23
- Homepage = "https://git.physnet.uni-hamburg.de/wave/das2numpy"
23
+ Homepage = "https://gitlab.desy.de/wave/tools/das2numpy"
@@ -26,6 +26,9 @@ def loader(root_path:str, predefined_setup:str, num_worker_threads):
26
26
  elif predefined_setup.upper() == "FLAC_200HZ":
27
27
  from .setups import flac_200hz
28
28
  chunk = flac_200hz.init(root_path, num_worker_threads)
29
+ elif predefined_setup.upper() == "HDAS":
30
+ from .setups import hdas
31
+ chunk = hdas.init(root_path, num_worker_threads)
29
32
  elif predefined_setup.upper() == "OPTASENSE":
30
33
  from .setups import optasense_b35idefix
31
34
  chunk = optasense_b35idefix.init()
@@ -90,7 +90,8 @@ def load_file(file_path, file_timestamp, t_start, t_end, t_step, channel_start,
90
90
  + "execute 'module load maxwell ffmpeg'")
91
91
 
92
92
  # Trim data
93
- data = data[:, channel_start:channel_end]
93
+ if channel_end != -1: # Avoids that last channel is missing when channel_end == -1
94
+ data = data[:, channel_start:channel_end]
94
95
 
95
96
  data = data.astype(NP.float32) #This needs to hapen before the binning step!
96
97
 
@@ -1,28 +1,28 @@
1
- """ Univsersal setup file for silixa, that detects sampling rate and number of channels by itself.
2
- The root directory shall be supplied by the user via an argument
1
+ """ Setup file for hDAS.
3
2
  """
4
3
 
5
4
  import sys as SYS
6
5
  from os import path as P
7
6
  import datetime as DT
8
7
  import numpy as NP
8
+ import h5py as H5PY
9
9
  from ..filefinder import FileFinder, to_posix_timestamp_ms
10
10
  from ..chunk import Chunk
11
11
  from .light_tdms_reader import TdmsReader
12
12
  from ..utils import bin
13
13
 
14
- CALIBRATE = True
15
14
 
15
+ CALIBRATE = True
16
16
 
17
17
 
18
- sample_rate = 200
19
18
 
20
19
 
21
20
  def init(root_path, num_worker_threads):
22
21
  assert P.isdir(root_path)
23
- file_finder = FileFinder(root_path, ".tdms", filename_to_posix_timestamp)
22
+ file_finder = FileFinder(root_path, ".h5", filename_to_posix_timestamp)
24
23
  assert num_worker_threads >= 1
25
24
  multithreaded = num_worker_threads > 1
25
+ sample_rate = 1000
26
26
  return Chunk(
27
27
  file_finder,
28
28
  sample_rate,
@@ -32,43 +32,46 @@ def init(root_path, num_worker_threads):
32
32
  load_file
33
33
  )
34
34
 
35
-
36
35
  def filename_to_posix_timestamp(file_name:str) -> int:
37
- timestamp_str = file_name.split("_UTC_")[1][:19]
38
- timestamp_dt = DT.datetime.strptime(timestamp_str, "%Y%m%d_%H%M%S.%f")
36
+ # Example path: 2026_05_08_15h02m22s_HDAS_StrainRate_ch1.h5
37
+ # TODO Known bug: The timestamps array in the file has milliseconds! So the milliseonds are missing in the file name! h5['Timestamps']['Timestamps_Data']
38
+ timestamp_dt = DT.datetime.strptime(file_name.split("s_")[0], "%Y_%m_%d_%Hh%Mm%S")
39
39
  timestamp_ms = to_posix_timestamp_ms(timestamp_dt)
40
40
  return timestamp_ms
41
41
 
42
42
 
43
-
44
43
  def load_file(file_path, file_timestamp, t_start, t_end, t_step, channel_start, channel_end, channel_step) -> NP.ndarray:
45
44
  """ Loads a single file, trims it. And returns the trimmed data as a numpy array. Downsampling (t_step, channel_step) is also possible!
46
45
  """
47
46
 
48
- with TdmsReader(file_path) as tdms:
49
- data = tdms.get_mmap()
50
-
47
+ with H5PY.File(file_path, "r") as h5:
48
+ # Known bug: The file_timestamp is not accurate to milliseconds, because milliseconds are missing in the file name
49
+ #timestamps_unix_sec = h5['Timestamps']['Timestamps_Data']
50
+ #print(file_timestamp)
51
+ #file_timestamp = int(timestamps_unix_sec[0][0] * 1000.0) # Overwrite inaccurate file_timestamp
52
+ #file_timestamp -= 7200000 # TODO: This is a dirty fix: Internal timestamp is CEST, when hDAS system time is CEST.
53
+ data = NP.array(h5["StrainRate"]["StrainRate_Data"])
54
+ #print(file_timestamp)
51
55
 
52
56
  # Trim data
53
- idx_start = 0
57
+ rel_t_start = 0
54
58
  if t_start > file_timestamp: # Check if beginning should be trimmed.
55
59
  rel_t_start = t_start - file_timestamp
56
- idx_start = int(rel_t_start * sample_rate / 1000.0)
57
- idx_end = data.shape[0]
58
- if t_end < file_timestamp + (data.shape[0] * 1000 / sample_rate): # Check if end should be trimmed
60
+ rel_t_end = -1
61
+ if t_end < file_timestamp + data.shape[0]: # Check if end should be trimmed
59
62
  rel_t_end = t_end - file_timestamp
60
- idx_end = int(rel_t_end * sample_rate / 1000.0)
61
- if idx_start == idx_end:
63
+ if rel_t_start == rel_t_end:
62
64
  return NP.zeros(shape=[0, 0]) # No data should be loaded. Do nothing
63
- if file_timestamp + (data.shape[0] * 1000 / sample_rate) <= t_start:
65
+ if file_timestamp + data.shape[0] <= t_start:
64
66
  print("Warning: File does not contain any parts of the requested data.",
65
67
  "This can happen if there are leaks in the data. The corresponding output will be left filled with zeros.\n",
66
68
  f" Requested range (Posixtimestamps in ms): [{t_start}, {t_end}[\n",
67
69
  f" Filepath: {file_path}.")
68
70
  return NP.zeros(shape=[0, 0])
69
- assert idx_end == data.shape[0] or idx_end > idx_start, f"idx_start={idx_start}, idx_end={idx_end}."
70
- print(idx_start, idx_end)
71
- data = data[idx_start:idx_end, channel_start:channel_end]
71
+ assert rel_t_end == -1 or rel_t_end > rel_t_start, f"rel_t_start={rel_t_start}, rel_t_end={rel_t_end}."
72
+ if channel_end == -1: # Avoids that last channel is missing when channel_end == -1
73
+ channel_end = None
74
+ data = data[rel_t_start:rel_t_end, channel_start:channel_end]
72
75
 
73
76
 
74
77
  # Downsample data
@@ -86,18 +89,16 @@ def load_file(file_path, file_timestamp, t_start, t_end, t_step, channel_start,
86
89
  return data
87
90
 
88
91
 
92
+
93
+
94
+
89
95
  def calibrate(data:NP.ndarray) -> NP.ndarray:
90
- """ Convert raw data to strain rate data.
91
- As the resulting values are decimals, the datatype should be float. Otherwise an assertion fails. """
92
- #assert data.dtype in (NP.float, NP.float32, NP.float64), f"The data should be floating point. It is {data.dtype}"
96
+ """ Convert raw data to strain rate data. """
93
97
  if data.dtype not in (float, NP.float32, NP.float64):
94
98
  NEW_TYPE = NP.float32
95
99
  #print("Warning: For calibration the data has to be of type float. Converting from {data.dtype} to {NEW_TYPE}")
96
100
  data = data.astype(NEW_TYPE)
97
-
98
- SAMPLE_FREQ = 1000.0 # This remains 1000.0 and not 200 Hz because the original sample rate of the device is relevant here!
99
- EICHLAENGE = 10.0
100
- factor = 116.0 * 10.0**(-9.0) / 8192.0 * SAMPLE_FREQ / EICHLAENGE
101
+ factor = 10 * 1e-9
101
102
  return data * factor # Result: 1 / s
102
103
 
103
104
 
@@ -29,6 +29,8 @@ def _load_from_h5(file_path, rel_t_start, rel_t_end, t_step, channel_start, chan
29
29
  data = file['Acquisition']['Raw[0]']['RawData'] # Data is not loaded into memory at this point! (Lazy evaluation)
30
30
 
31
31
  # At this point the data gets loaded into memory.
32
+ if channel_end != -1: # Avoids that last channel is missing when channel_end == -1
33
+ channel_end = None
32
34
  data = data[
33
35
  channel_start : channel_end : channel_step,
34
36
  rel_t_start : rel_t_end : t_step
@@ -63,6 +63,8 @@ def load_file(file_path, file_timestamp, t_start, t_end, t_step, channel_start,
63
63
  f" Filepath: {file_path}.")
64
64
  return NP.zeros(shape=[0, 0])
65
65
  assert rel_t_end == -1 or rel_t_end > rel_t_start, f"rel_t_start={rel_t_start}, rel_t_end={rel_t_end}."
66
+ if channel_end == -1: # Avoids that last channel is missing when channel_end == -1
67
+ channel_end = None
66
68
  data = data[rel_t_start:rel_t_end, channel_start:channel_end]
67
69
 
68
70
 
@@ -1,9 +1,9 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: das2numpy
3
- Version: 1.1.1
3
+ Version: 1.2.0
4
4
  Summary: A simple and universal package for loading large amounts of distributed acoustic sensing (DAS) data.
5
5
  Author-email: Erik Genthe <erik.genthe@desy.de>
6
- Project-URL: Homepage, https://git.physnet.uni-hamburg.de/wave/das2numpy
6
+ Project-URL: Homepage, https://gitlab.desy.de/wave/tools/das2numpy
7
7
  Classifier: Programming Language :: Python :: 3
8
8
  Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
9
9
  Classifier: Operating System :: OS Independent
@@ -13,8 +13,7 @@ src/das2numpy.egg-info/dependency_links.txt
13
13
  src/das2numpy.egg-info/requires.txt
14
14
  src/das2numpy.egg-info/top_level.txt
15
15
  src/das2numpy/setups/flac_200hz.py
16
+ src/das2numpy/setups/hdas.py
16
17
  src/das2numpy/setups/light_tdms_reader.py
17
18
  src/das2numpy/setups/optasense_b35idefix.py
18
- src/das2numpy/setups/optasense_b35idefix_fast.py
19
- src/das2numpy/setups/silixa.py
20
- src/das2numpy/setups/silixa_200hz.py
19
+ src/das2numpy/setups/silixa.py
@@ -11,6 +11,7 @@ t_end = datetime(2024, 7, 23, 1, 1, 0)
11
11
  channel_start = 0
12
12
  channel_end = -1
13
13
  #loader = loader("/pnfs/desy.de/m/project/iDAS/raw/2024-DESY/2024-07-23-desy", "SILIXA", 1) # 1000 Hz
14
+ #loader = loader("/pnfs/desy.de/m/project/iDAS/raw/2026-DESY/2026_das_comparison/hdas", "HDAS", 1) # 1000 Hz
14
15
  loader = loader("/pnfs/desy.de/m/project/iDAS/work/IDAS_200HZ/", "FLAC_200HZ", 1) # 200 Hz
15
16
  data = loader.load_array(t_start, t_end, channel_start, channel_end)
16
17
 
@@ -1,111 +0,0 @@
1
- """ Deprecated
2
- """
3
-
4
- from math import ceil, floor
5
- import mmap
6
- from os import path as P
7
- import numpy as NP
8
- import h5py as H5PY
9
- import datetime as DT
10
- from time import time
11
- from filefinder import FileFinder, to_posix_timestamp_ms
12
- from chunk import Chunk
13
-
14
- #/wave/seismic-work/markhoff/pilot/data/cache/7wave7seismic-rawdata7OPTA7Disk27DESY-Rec-9-GL8m-Chan10000_2021-05-28T06_01_36+01007DESY-Rec-9-GL8m-Chan10000_2021-05-28T194319Z.h5.bin
15
- FILE_TIME_SAMPLE_AMOUNT = 60000
16
- CHANNEL_AMOUNT = 10000
17
- DATA_ROOT = "/wave/seismic-work/markhoff/pilot/data/cache"
18
- assert P.isdir(DATA_ROOT)
19
-
20
- def _filename_to_posix_timestamp(file_name:str) -> int:
21
- return to_posix_timestamp_ms(DT.datetime.strptime(file_name[-25:], "%Y-%m-%dT%H%M%SZ.h5.bin"))
22
-
23
-
24
-
25
- def _load_from_h5(file_path, rel_t_start, rel_t_end, t_step, channel_start, channel_end, channel_step) -> NP.ndarray:
26
- """ Internal helper function """
27
- #file_handle = open(file_path, 'rb')
28
- #file:H5PY.File = H5PY.File(file_handle, 'r')
29
- #data = file['Acquisition']['Raw[0]']['RawData'] # Data is not loaded into memory at this point! (Lazy evaluation)
30
- #
31
- ## At this point the data gets loaded into memory.
32
- #data = data[
33
- # channel_start : channel_end : channel_step,
34
- # rel_t_start : rel_t_end : t_step
35
- #]
36
-
37
- DTYPE_SIZE = 4
38
- data = None
39
- #if channel_step == 1:
40
- # data = NP.fromfile(
41
- # file_path,
42
- # dtype = NP.int32,
43
- # offset = channel_start * FILE_TIME_SAMPLE_AMOUNT * DTYPE_SIZE,
44
- # count = (channel_end-channel_start) * FILE_TIME_SAMPLE_AMOUNT
45
- # )
46
- # data.shape = (channel_end-channel_start, FILE_TIME_SAMPLE_AMOUNT)
47
- # data = data[:, rel_t_start:rel_t_end:t_step]
48
- #else:
49
- # data = NP.ndarray(
50
- # shape=(
51
- # ceil((channel_end - channel_start) / channel_step),
52
- # FILE_TIME_SAMPLE_AMOUNT
53
- # ),
54
- # dtype=NP.int32
55
- # )
56
- # file_handle = open(file_path, 'rb')
57
- # data_index = 0
58
- # for channel_index in range(channel_start, channel_end, channel_step):
59
- # file_handle.seek(channel_index * FILE_TIME_SAMPLE_AMOUNT)
60
- # channel_data = NP.frombuffer(file_handle.read(FILE_TIME_SAMPLE_AMOUNT))
61
- # data[data_index] = channel_data
62
- # data_index += 1
63
- # file_handle.close()
64
- # data = data[:, rel_t_start:rel_t_end:t_step]
65
-
66
- data = NP.fromfile(
67
- file_path,
68
- dtype = NP.int32,
69
- offset = channel_start * FILE_TIME_SAMPLE_AMOUNT * DTYPE_SIZE,
70
- count = (channel_end-channel_start) * FILE_TIME_SAMPLE_AMOUNT
71
- )
72
- data.shape = (channel_end-channel_start, FILE_TIME_SAMPLE_AMOUNT)
73
- data = data[::channel_step, rel_t_start:rel_t_end:t_step]
74
-
75
- print("Args (channel):", channel_start, channel_end, channel_step)
76
- print("Args (time):", rel_t_start, rel_t_end, rel_t_end)
77
- print("Fresh after loading: ", data.shape)
78
- data = data.transpose() # Extremely efficient :)
79
- return data
80
-
81
- def _load_from_h5_X(file_path, rel_t_start, rel_t_end, t_step, channel_start, channel_end, channel_step) -> NP.ndarray:
82
- DTYPE_SIZE = 4
83
- t1 = time()
84
- mm = NP.memmap(file_path, dtype=NP.int32, mode='readonly')
85
- mm.shape = (CHANNEL_AMOUNT, FILE_TIME_SAMPLE_AMOUNT)
86
- t2 = time()
87
- data = mm[channel_start:channel_end:channel_step, rel_t_start:rel_t_end:t_step]
88
- t3 = time()
89
- data = NP.array(data)
90
- t4 = time()
91
- data = data.transpose() # Extremely efficient :)
92
- t5 = time()
93
- print("DELTAS", t2-t1, t3-t2, t4-t3, t5-t4)
94
- #print("Args (channel):", channel_start, channel_end, channel_step)
95
- #print("Args (time):", rel_t_start, rel_t_end, rel_t_end)
96
- #print("Fresh after loading: ", data.shape)
97
- return data
98
-
99
- FILE_FINDER = FileFinder(DATA_ROOT, ".h5.bin", _filename_to_posix_timestamp)
100
-
101
-
102
- def create_chunk():
103
- return Chunk(
104
- FILE_FINDER,
105
- CHANNEL_AMOUNT,
106
- FILE_TIME_SAMPLE_AMOUNT,
107
- True,
108
- 8,
109
- False,
110
- _load_from_h5
111
- )
File without changes
File without changes
File without changes