osekit 0.2.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. osekit/__init__.py +40 -0
  2. osekit/config.py +23 -0
  3. osekit/config.toml +42 -0
  4. osekit/core_api/__init__.py +3 -0
  5. osekit/core_api/audio_data.py +389 -0
  6. osekit/core_api/audio_dataset.py +307 -0
  7. osekit/core_api/audio_file.py +128 -0
  8. osekit/core_api/audio_file_manager.py +107 -0
  9. osekit/core_api/audio_item.py +76 -0
  10. osekit/core_api/base_data.py +304 -0
  11. osekit/core_api/base_dataset.py +387 -0
  12. osekit/core_api/base_file.py +172 -0
  13. osekit/core_api/base_item.py +83 -0
  14. osekit/core_api/event.py +190 -0
  15. osekit/core_api/frequency_scale.py +215 -0
  16. osekit/core_api/instrument.py +141 -0
  17. osekit/core_api/json_serializer.py +38 -0
  18. osekit/core_api/ltas_data.py +217 -0
  19. osekit/core_api/spectro_data.py +743 -0
  20. osekit/core_api/spectro_dataset.py +502 -0
  21. osekit/core_api/spectro_file.py +165 -0
  22. osekit/core_api/spectro_item.py +91 -0
  23. osekit/job.py +643 -0
  24. osekit/logging_config.yaml +36 -0
  25. osekit/logging_context.py +56 -0
  26. osekit/public_api/__init__.py +0 -0
  27. osekit/public_api/analysis.py +151 -0
  28. osekit/public_api/dataset.py +540 -0
  29. osekit/public_api/export_analysis.py +244 -0
  30. osekit/utils/__init__.py +0 -0
  31. osekit/utils/audio_utils.py +114 -0
  32. osekit/utils/core_utils.py +310 -0
  33. osekit/utils/formatting_utils.py +87 -0
  34. osekit/utils/path_utils.py +44 -0
  35. osekit/utils/timestamp_utils.py +242 -0
  36. osekit-0.2.5.dist-info/METADATA +68 -0
  37. osekit-0.2.5.dist-info/RECORD +40 -0
  38. osekit-0.2.5.dist-info/WHEEL +4 -0
  39. osekit-0.2.5.dist-info/entry_points.txt +2 -0
  40. osekit-0.2.5.dist-info/licenses/LICENSE +0 -0
osekit/__init__.py ADDED
@@ -0,0 +1,40 @@
1
+ import logging.config
2
+ import os.path
3
+ from pathlib import Path
4
+
5
+ import yaml
6
+
7
+ from osekit import utils
8
+ from osekit.job import Job_builder
9
+
10
+ __all__ = [
11
+ "Job_builder",
12
+ "utils",
13
+ ]
14
+
15
+
16
+ def _setup_logging(
17
+ config_file="logging_config.yaml",
18
+ default_level: int = logging.INFO,
19
+ ) -> None:
20
+ user_config_file_path = Path(os.getenv("OSMOSE_USER_CONFIG", ".")) / config_file
21
+ default_config_file_path = Path(__file__).parent / config_file
22
+
23
+ config_file_path = next(
24
+ (
25
+ file
26
+ for file in (user_config_file_path, default_config_file_path)
27
+ if file.exists()
28
+ ),
29
+ None,
30
+ )
31
+
32
+ if config_file_path:
33
+ with Path.open(config_file_path) as configuration:
34
+ logging_config = yaml.safe_load(configuration)
35
+ logging.config.dictConfig(logging_config)
36
+ else:
37
+ logging.basicConfig(level=default_level)
38
+
39
+
40
+ _setup_logging()
osekit/config.py ADDED
@@ -0,0 +1,23 @@
1
+ import logging
2
+ import stat
3
+
4
+ from osekit.logging_context import LoggingContext
5
+
6
+ TIMESTAMP_FORMAT_AUDIO_FILE = "%Y-%m-%dT%H:%M:%S.%f%z"
7
+ TIMESTAMP_FORMAT_EXPORTED_FILES_UNLOCALIZED = "%Y_%m_%d_%H_%M_%S_%f"
8
+ TIMESTAMP_FORMAT_EXPORTED_FILES_LOCALIZED = "%Y_%m_%d_%H_%M_%S_%f%z"
9
+ TIMESTAMP_FORMATS_EXPORTED_FILES = [
10
+ TIMESTAMP_FORMAT_EXPORTED_FILES_LOCALIZED,
11
+ TIMESTAMP_FORMAT_EXPORTED_FILES_UNLOCALIZED,
12
+ ]
13
+
14
+ FPDEFAULT = 0o664 # Default file permissions
15
+ DPDEFAULT = stat.S_ISGID | 0o775 # Default directory permissions
16
+
17
+ global_logging_context = LoggingContext()
18
+ print_logger = logging.getLogger("printer")
19
+
20
+ resample_quality_settings = {
21
+ "downsample": "QQ",
22
+ "upsample": "MQ",
23
+ }
osekit/config.toml ADDED
@@ -0,0 +1,42 @@
1
+ [Job]
2
+ job_scheduler = "Torque"
3
+ # env_script should contain everything but the environment name, with ${env_name} being where it will go
4
+ env_script = ". /appli/anaconda/latest/etc/profile.d/conda.sh; conda activate ${env_name}/"
5
+ env_name = "osmose"
6
+ outfile = "Job_{}_%j.out"
7
+ errfile = "Job_{}_%j.err"
8
+
9
+ # Default parameters
10
+ queue = "omp"
11
+ walltime = "12:00:00"
12
+ ncpus = 6
13
+ mem = "40g"
14
+ nodes = 1
15
+
16
+ [Job.Presets.low]
17
+ queue = "sequentiel"
18
+ walltime = "04:00:00"
19
+ ncpus = 1
20
+ mem = "1g"
21
+ nodes = 1
22
+
23
+ [Job.Presets.medium]
24
+ queue = "omp"
25
+ walltime = "12:00:00"
26
+ ncpus = 6
27
+ mem = "40g"
28
+ nodes = 1
29
+
30
+ [Job.Presets.high]
31
+ queue = "omp"
32
+ walltime = "12:00:00"
33
+ ncpus = 28
34
+ mem = "120g"
35
+ nodes = 1
36
+
37
+ [Auxiliary]
38
+ bathymetry = "/home6/grosmaan/Documents/codes/osmose_codes/datawork-osmose/dataset/auxiliary/GEBCO_2022_sub_ice_topo.nc"
39
+ shore_dist = "/home6/grosmaan/Documents/codes/osmose_codes/datawork-osmose/dataset/auxiliary/dist2coast.txt"
40
+
41
+
42
+
@@ -0,0 +1,3 @@
1
+ from osekit.core_api.audio_file_manager import AudioFileManager
2
+
3
+ audio_file_manager = AudioFileManager()
@@ -0,0 +1,389 @@
1
+ """AudioData represent audio data scattered through different AudioFiles.
2
+
3
+ The AudioData has a collection of AudioItem.
4
+ The data is accessed via an AudioItem object per AudioFile.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from math import ceil
10
+ from typing import TYPE_CHECKING
11
+
12
+ import numpy as np
13
+ import soundfile as sf
14
+ from pandas import Timedelta, Timestamp
15
+
16
+ from osekit.config import (
17
+ TIMESTAMP_FORMATS_EXPORTED_FILES,
18
+ )
19
+ from osekit.core_api.audio_file import AudioFile
20
+ from osekit.core_api.audio_item import AudioItem
21
+ from osekit.core_api.base_data import BaseData
22
+ from osekit.core_api.instrument import Instrument
23
+ from osekit.utils.audio_utils import resample
24
+
25
+ if TYPE_CHECKING:
26
+ from pathlib import Path
27
+
28
+
29
+ class AudioData(BaseData[AudioItem, AudioFile]):
30
+ """AudioData represent audio data scattered through different AudioFiles.
31
+
32
+ The AudioData has a collection of AudioItem.
33
+ The data is accessed via an AudioItem object per AudioFile.
34
+ """
35
+
36
+ def __init__(
37
+ self,
38
+ items: list[AudioItem] | None = None,
39
+ begin: Timestamp | None = None,
40
+ end: Timestamp | None = None,
41
+ sample_rate: int | None = None,
42
+ instrument: Instrument | None = None,
43
+ ) -> None:
44
+ """Initialize an AudioData from a list of AudioItems.
45
+
46
+ Parameters
47
+ ----------
48
+ items: list[AudioItem]
49
+ List of the AudioItem constituting the AudioData.
50
+ sample_rate: int
51
+ The sample rate of the audio data.
52
+ begin: Timestamp | None
53
+ Only effective if items is None.
54
+ Set the begin of the empty data.
55
+ end: Timestamp | None
56
+ Only effective if items is None.
57
+ Set the end of the empty data.
58
+ instrument: Instrument | None
59
+ Instrument that might be used to obtain acoustic pressure from
60
+ the wav audio data.
61
+
62
+ """
63
+ super().__init__(items=items, begin=begin, end=end)
64
+ self._set_sample_rate(sample_rate=sample_rate)
65
+ self.instrument = instrument
66
+
67
+ @property
68
+ def nb_channels(self) -> int:
69
+ """Number of channels of the audio data."""
70
+ return max(
71
+ [1] + [item.nb_channels for item in self.items if type(item) is AudioItem],
72
+ )
73
+
74
+ @property
75
+ def shape(self) -> tuple[int, ...] | int:
76
+ """Shape of the audio data."""
77
+ data_length = round(self.sample_rate * self.duration.total_seconds())
78
+ return data_length if self.nb_channels <= 1 else (data_length, self.nb_channels)
79
+
80
+ def __eq__(self, other: AudioData) -> bool:
81
+ """Override __eq__."""
82
+ return self.sample_rate == other.sample_rate and super().__eq__(other)
83
+
84
+ def _set_sample_rate(self, sample_rate: int | None = None) -> None:
85
+ """Set the AudioFile sample rate.
86
+
87
+ If the sample_rate is specified, it is set.
88
+ If it is not specified, it is set to the sampling rate of the
89
+ first item that has one.
90
+ Else, it is set to None.
91
+ """
92
+ if sample_rate is not None:
93
+ self.sample_rate = sample_rate
94
+ return
95
+ if sr := next(
96
+ (item.sample_rate for item in self.items if item.sample_rate is not None),
97
+ None,
98
+ ):
99
+ self.sample_rate = sr
100
+ return
101
+ self.sample_rate = None
102
+
103
+ def get_value(self, reject_dc: bool = False) -> np.ndarray:
104
+ """Return the value of the audio data.
105
+
106
+ The data from the audio file will be resampled if necessary.
107
+
108
+ Parameters
109
+ ----------
110
+ reject_dc: bool
111
+ If True, the values will be centered on 0.
112
+
113
+ Returns
114
+ -------
115
+ np.ndarray:
116
+ The value of the audio data.
117
+
118
+ """
119
+ data = np.empty(shape=self.shape)
120
+ idx = 0
121
+ for item in self.items:
122
+ item_data = self._get_item_value(item)
123
+ item_data = item_data[: min(item_data.shape[0], data.shape[0] - idx)]
124
+ data[idx : idx + len(item_data)] = item_data
125
+ idx += len(item_data)
126
+ if reject_dc:
127
+ data -= data.mean()
128
+ return data
129
+
130
+ def get_value_calibrated(self, reject_dc: bool = False) -> np.ndarray:
131
+ """Return the value of the audio data accounting for the calibration factor.
132
+
133
+ If the instrument parameter of the audio data is not None, the returned value is
134
+ calibrated in units of Pa.
135
+
136
+ Parameters
137
+ ----------
138
+ reject_dc: bool
139
+ If True, the values will be centered on 0.
140
+
141
+ Returns
142
+ -------
143
+ np.ndarray:
144
+ The calibrated value of the audio data.
145
+
146
+ """
147
+ raw_data = self.get_value(reject_dc=reject_dc)
148
+ calibration_factor = (
149
+ 1.0 if self.instrument is None else self.instrument.end_to_end
150
+ )
151
+ return raw_data * calibration_factor
152
+
153
+ def write(
154
+ self,
155
+ folder: Path,
156
+ subtype: str | None = None,
157
+ link: bool = False,
158
+ ) -> None:
159
+ """Write the audio data to file.
160
+
161
+ Parameters
162
+ ----------
163
+ folder: pathlib.Path
164
+ Folder in which to write the audio file.
165
+ subtype: str | None
166
+ Subtype as provided by the soundfile module.
167
+ Defaulted as the default 16-bit PCM for WAV audio files.
168
+ link: bool
169
+ If True, the AudioData will be bound to the written file.
170
+ Its items will be replaced with a single item, which will match the whole
171
+ new AudioFile.
172
+
173
+ """
174
+ super().create_directories(path=folder)
175
+ sf.write(
176
+ folder / f"{self}.wav",
177
+ self.get_value(),
178
+ self.sample_rate,
179
+ subtype=subtype,
180
+ )
181
+ if link:
182
+ self.link(folder=folder)
183
+
184
+ def link(self, folder: Path) -> None:
185
+ """Link the AudioData to an AudioFile in the folder.
186
+
187
+ The given folder should contain a file named "str(self).wav".
188
+ Linking is intended for AudioData objects that have already been written.
189
+ After linking, the AudioData will have a single item with the same
190
+ properties of the target AudioFile.
191
+
192
+ Parameters
193
+ ----------
194
+ folder: Path
195
+ Folder in which is located the AudioFile to which the AudioData instance
196
+ should be linked.
197
+
198
+ """
199
+ file = AudioFile(
200
+ path=folder / f"{self}.wav",
201
+ strptime_format=TIMESTAMP_FORMATS_EXPORTED_FILES,
202
+ )
203
+ self.items = AudioData.from_files([file]).items
204
+
205
+ def _get_item_value(self, item: AudioItem) -> np.ndarray:
206
+ """Return the resampled (if needed) data from the audio item."""
207
+ item_data = item.get_value()
208
+ if item.is_empty:
209
+ return item_data.repeat(
210
+ round(item.duration.total_seconds() * self.sample_rate),
211
+ )
212
+ if item.sample_rate != self.sample_rate:
213
+ return resample(item_data, item.sample_rate, self.sample_rate)
214
+ return item_data
215
+
216
+ def split(self, nb_subdata: int = 2) -> list[AudioData]:
217
+ """Split the audio data object in the specified number of audio subdata.
218
+
219
+ Parameters
220
+ ----------
221
+ nb_subdata: int
222
+ Number of subdata in which to split the data.
223
+
224
+ Returns
225
+ -------
226
+ list[AudioData]
227
+ The list of AudioData subdata objects.
228
+
229
+ """
230
+ return [
231
+ AudioData.from_base_data(base_data, self.sample_rate)
232
+ for base_data in super().split(nb_subdata)
233
+ ]
234
+
235
+ def split_frames(self, start_frame: int = 0, stop_frame: int = -1) -> AudioData:
236
+ """Return a new AudioData from a subpart of this AudioData's data.
237
+
238
+ Parameters
239
+ ----------
240
+ start_frame: int
241
+ First frame included in the new AudioData.
242
+ stop_frame: int
243
+ First frame after the last frame included in the new AudioData.
244
+
245
+ Returns
246
+ -------
247
+ AudioData
248
+ A new AudioData which data is included between start_frame and stop_frame.
249
+
250
+ """
251
+ if start_frame < 0:
252
+ raise ValueError("Start_frame must be greater than or equal to 0.")
253
+ if stop_frame < -1 or stop_frame > self.shape:
254
+ raise ValueError("Stop_frame must be lower than the length of the data.")
255
+
256
+ start_timestamp = self.begin + Timedelta(
257
+ seconds=ceil(start_frame / self.sample_rate * 1e9) / 1e9,
258
+ )
259
+ stop_timestamp = (
260
+ self.end
261
+ if stop_frame == -1
262
+ else self.begin + Timedelta(seconds=stop_frame / self.sample_rate)
263
+ )
264
+ return AudioData.from_files(
265
+ list(self.files),
266
+ start_timestamp,
267
+ stop_timestamp,
268
+ sample_rate=self.sample_rate,
269
+ )
270
+
271
+ def to_dict(self) -> dict:
272
+ """Serialize an AudioData to a dictionary.
273
+
274
+ Returns
275
+ -------
276
+ dict:
277
+ The serialized dictionary representing the AudioData.
278
+
279
+ """
280
+ base_dict = super().to_dict()
281
+ instrument_dict = {
282
+ "instrument": (
283
+ None if self.instrument is None else self.instrument.to_dict()
284
+ ),
285
+ }
286
+ return (
287
+ base_dict
288
+ | instrument_dict
289
+ | {
290
+ "sample_rate": self.sample_rate,
291
+ }
292
+ )
293
+
294
+ @classmethod
295
+ def from_dict(cls, dictionary: dict) -> AudioData:
296
+ """Deserialize an AudioData from a dictionary.
297
+
298
+ Parameters
299
+ ----------
300
+ dictionary: dict
301
+ The serialized dictionary representing the AudioData.
302
+
303
+ Returns
304
+ -------
305
+ AudioData
306
+ The deserialized AudioData.
307
+
308
+ """
309
+ base_data = BaseData.from_dict(dictionary)
310
+ instrument = (
311
+ None
312
+ if dictionary["instrument"] is None
313
+ else Instrument.from_dict(dictionary["instrument"])
314
+ )
315
+ return cls.from_base_data(
316
+ data=base_data,
317
+ sample_rate=dictionary["sample_rate"],
318
+ instrument=instrument,
319
+ )
320
+
321
+ @classmethod
322
+ def from_files(
323
+ cls,
324
+ files: list[AudioFile],
325
+ begin: Timestamp | None = None,
326
+ end: Timestamp | None = None,
327
+ sample_rate: float | None = None,
328
+ instrument: Instrument | None = None,
329
+ ) -> AudioData:
330
+ """Return an AudioData object from a list of AudioFiles.
331
+
332
+ Parameters
333
+ ----------
334
+ files: list[AudioFile]
335
+ List of AudioFiles containing the data.
336
+ begin: Timestamp | None
337
+ Begin of the data object.
338
+ Defaulted to the begin of the first file.
339
+ end: Timestamp | None
340
+ End of the data object.
341
+ Defaulted to the end of the last file.
342
+ sample_rate: float | None
343
+ Sample rate of the AudioData.
344
+ instrument: Instrument | None
345
+ Instrument that might be used to obtain acoustic pressure from
346
+ the wav audio data.
347
+
348
+ Returns
349
+ -------
350
+ AudioData:
351
+ The AudioData object.
352
+
353
+ """
354
+ return cls.from_base_data(
355
+ data=BaseData.from_files(files, begin, end),
356
+ sample_rate=sample_rate,
357
+ instrument=instrument,
358
+ )
359
+
360
+ @classmethod
361
+ def from_base_data(
362
+ cls,
363
+ data: BaseData,
364
+ sample_rate: float | None = None,
365
+ instrument: Instrument | None = None,
366
+ ) -> AudioData:
367
+ """Return an AudioData object from a BaseData object.
368
+
369
+ Parameters
370
+ ----------
371
+ data: BaseData
372
+ BaseData object to convert to AudioData.
373
+ sample_rate: float | None
374
+ Sample rate of the AudioData.
375
+ instrument: Instrument | None
376
+ Instrument that might be used to obtain acoustic pressure from
377
+ the wav audio data.
378
+
379
+ Returns
380
+ -------
381
+ AudioData:
382
+ The AudioData object.
383
+
384
+ """
385
+ return cls(
386
+ items=[AudioItem.from_base_item(item) for item in data.items],
387
+ sample_rate=sample_rate,
388
+ instrument=instrument,
389
+ )