PyPI - osekit - Versions diffs - 0.2.5__py3-none-any.whl - Mend

osekit 0.2.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

osekit/__init__.py +40 -0
osekit/config.py +23 -0
osekit/config.toml +42 -0
osekit/core_api/__init__.py +3 -0
osekit/core_api/audio_data.py +389 -0
osekit/core_api/audio_dataset.py +307 -0
osekit/core_api/audio_file.py +128 -0
osekit/core_api/audio_file_manager.py +107 -0
osekit/core_api/audio_item.py +76 -0
osekit/core_api/base_data.py +304 -0
osekit/core_api/base_dataset.py +387 -0
osekit/core_api/base_file.py +172 -0
osekit/core_api/base_item.py +83 -0
osekit/core_api/event.py +190 -0
osekit/core_api/frequency_scale.py +215 -0
osekit/core_api/instrument.py +141 -0
osekit/core_api/json_serializer.py +38 -0
osekit/core_api/ltas_data.py +217 -0
osekit/core_api/spectro_data.py +743 -0
osekit/core_api/spectro_dataset.py +502 -0
osekit/core_api/spectro_file.py +165 -0
osekit/core_api/spectro_item.py +91 -0
osekit/job.py +643 -0
osekit/logging_config.yaml +36 -0
osekit/logging_context.py +56 -0
osekit/public_api/__init__.py +0 -0
osekit/public_api/analysis.py +151 -0
osekit/public_api/dataset.py +540 -0
osekit/public_api/export_analysis.py +244 -0
osekit/utils/__init__.py +0 -0
osekit/utils/audio_utils.py +114 -0
osekit/utils/core_utils.py +310 -0
osekit/utils/formatting_utils.py +87 -0
osekit/utils/path_utils.py +44 -0
osekit/utils/timestamp_utils.py +242 -0
osekit-0.2.5.dist-info/METADATA +68 -0
osekit-0.2.5.dist-info/RECORD +40 -0
osekit-0.2.5.dist-info/WHEEL +4 -0
osekit-0.2.5.dist-info/entry_points.txt +2 -0
osekit-0.2.5.dist-info/licenses/LICENSE +0 -0

osekit/__init__.py ADDED Viewed

@@ -0,0 +1,40 @@
+import logging.config
+import os.path
+from pathlib import Path
+import yaml
+from osekit import utils
+from osekit.job import Job_builder
+__all__ = [
+    "Job_builder",
+    "utils",
+]
+def _setup_logging(
+    config_file="logging_config.yaml",
+    default_level: int = logging.INFO,
+) -> None:
+    user_config_file_path = Path(os.getenv("OSMOSE_USER_CONFIG", ".")) / config_file
+    default_config_file_path = Path(__file__).parent / config_file
+    config_file_path = next(
+        (
+            file
+            for file in (user_config_file_path, default_config_file_path)
+            if file.exists()
+        ),
+        None,
+    )
+    if config_file_path:
+        with Path.open(config_file_path) as configuration:
+            logging_config = yaml.safe_load(configuration)
+        logging.config.dictConfig(logging_config)
+    else:
+        logging.basicConfig(level=default_level)
+_setup_logging()

osekit/config.py ADDED Viewed

@@ -0,0 +1,23 @@
+import logging
+import stat
+from osekit.logging_context import LoggingContext
+TIMESTAMP_FORMAT_AUDIO_FILE = "%Y-%m-%dT%H:%M:%S.%f%z"
+TIMESTAMP_FORMAT_EXPORTED_FILES_UNLOCALIZED = "%Y_%m_%d_%H_%M_%S_%f"
+TIMESTAMP_FORMAT_EXPORTED_FILES_LOCALIZED = "%Y_%m_%d_%H_%M_%S_%f%z"
+TIMESTAMP_FORMATS_EXPORTED_FILES = [
+    TIMESTAMP_FORMAT_EXPORTED_FILES_LOCALIZED,
+    TIMESTAMP_FORMAT_EXPORTED_FILES_UNLOCALIZED,
+]
+FPDEFAULT = 0o664  # Default file permissions
+DPDEFAULT = stat.S_ISGID | 0o775  # Default directory permissions
+global_logging_context = LoggingContext()
+print_logger = logging.getLogger("printer")
+resample_quality_settings = {
+    "downsample": "QQ",
+    "upsample": "MQ",
+}

osekit/config.toml ADDED Viewed

@@ -0,0 +1,42 @@
+[Job]
+    job_scheduler = "Torque"
+    # env_script should contain everything but the environment name, with ${env_name} being where it will go
+    env_script = ". /appli/anaconda/latest/etc/profile.d/conda.sh; conda activate ${env_name}/"
+    env_name = "osmose"
+    outfile = "Job_{}_%j.out"
+    errfile = "Job_{}_%j.err"
+    # Default parameters
+    queue = "omp"
+    walltime = "12:00:00"
+    ncpus = 6
+    mem = "40g"
+    nodes = 1
+    [Job.Presets.low]
+        queue = "sequentiel"
+        walltime = "04:00:00"
+        ncpus = 1
+        mem = "1g"
+        nodes = 1
+    [Job.Presets.medium]
+        queue = "omp"
+        walltime = "12:00:00"
+        ncpus = 6
+        mem = "40g"
+        nodes = 1
+    [Job.Presets.high]
+        queue = "omp"
+        walltime = "12:00:00"
+        ncpus = 28
+        mem = "120g"
+        nodes = 1
+[Auxiliary]
+    bathymetry = "/home6/grosmaan/Documents/codes/osmose_codes/datawork-osmose/dataset/auxiliary/GEBCO_2022_sub_ice_topo.nc"
+    shore_dist = "/home6/grosmaan/Documents/codes/osmose_codes/datawork-osmose/dataset/auxiliary/dist2coast.txt"

osekit/core_api/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from osekit.core_api.audio_file_manager import AudioFileManager
+audio_file_manager = AudioFileManager()

osekit/core_api/audio_data.py ADDED Viewed

@@ -0,0 +1,389 @@
+"""AudioData represent audio data scattered through different AudioFiles.
+The AudioData has a collection of AudioItem.
+The data is accessed via an AudioItem object per AudioFile.
+"""
+from __future__ import annotations
+from math import ceil
+from typing import TYPE_CHECKING
+import numpy as np
+import soundfile as sf
+from pandas import Timedelta, Timestamp
+from osekit.config import (
+    TIMESTAMP_FORMATS_EXPORTED_FILES,
+)
+from osekit.core_api.audio_file import AudioFile
+from osekit.core_api.audio_item import AudioItem
+from osekit.core_api.base_data import BaseData
+from osekit.core_api.instrument import Instrument
+from osekit.utils.audio_utils import resample
+if TYPE_CHECKING:
+    from pathlib import Path
+class AudioData(BaseData[AudioItem, AudioFile]):
+    """AudioData represent audio data scattered through different AudioFiles.
+    The AudioData has a collection of AudioItem.
+    The data is accessed via an AudioItem object per AudioFile.
+    """
+    def __init__(
+        self,
+        items: list[AudioItem] | None = None,
+        begin: Timestamp | None = None,
+        end: Timestamp | None = None,
+        sample_rate: int | None = None,
+        instrument: Instrument | None = None,
+    ) -> None:
+        """Initialize an AudioData from a list of AudioItems.
+        Parameters
+        ----------
+        items: list[AudioItem]
+            List of the AudioItem constituting the AudioData.
+        sample_rate: int
+            The sample rate of the audio data.
+        begin: Timestamp | None
+            Only effective if items is None.
+            Set the begin of the empty data.
+        end: Timestamp | None
+            Only effective if items is None.
+            Set the end of the empty data.
+        instrument: Instrument | None
+            Instrument that might be used to obtain acoustic pressure from
+            the wav audio data.
+        """
+        super().__init__(items=items, begin=begin, end=end)
+        self._set_sample_rate(sample_rate=sample_rate)
+        self.instrument = instrument
+    @property
+    def nb_channels(self) -> int:
+        """Number of channels of the audio data."""
+        return max(
+            [1] + [item.nb_channels for item in self.items if type(item) is AudioItem],
+        )
+    @property
+    def shape(self) -> tuple[int, ...] | int:
+        """Shape of the audio data."""
+        data_length = round(self.sample_rate * self.duration.total_seconds())
+        return data_length if self.nb_channels <= 1 else (data_length, self.nb_channels)
+    def __eq__(self, other: AudioData) -> bool:
+        """Override __eq__."""
+        return self.sample_rate == other.sample_rate and super().__eq__(other)
+    def _set_sample_rate(self, sample_rate: int | None = None) -> None:
+        """Set the AudioFile sample rate.
+        If the sample_rate is specified, it is set.
+        If it is not specified, it is set to the sampling rate of the
+        first item that has one.
+        Else, it is set to None.
+        """
+        if sample_rate is not None:
+            self.sample_rate = sample_rate
+            return
+        if sr := next(
+            (item.sample_rate for item in self.items if item.sample_rate is not None),
+            None,
+        ):
+            self.sample_rate = sr
+            return
+        self.sample_rate = None
+    def get_value(self, reject_dc: bool = False) -> np.ndarray:
+        """Return the value of the audio data.
+        The data from the audio file will be resampled if necessary.
+        Parameters
+        ----------
+        reject_dc: bool
+            If True, the values will be centered on 0.
+        Returns
+        -------
+        np.ndarray:
+            The value of the audio data.
+        """
+        data = np.empty(shape=self.shape)
+        idx = 0
+        for item in self.items:
+            item_data = self._get_item_value(item)
+            item_data = item_data[: min(item_data.shape[0], data.shape[0] - idx)]
+            data[idx : idx + len(item_data)] = item_data
+            idx += len(item_data)
+        if reject_dc:
+            data -= data.mean()
+        return data
+    def get_value_calibrated(self, reject_dc: bool = False) -> np.ndarray:
+        """Return the value of the audio data accounting for the calibration factor.
+        If the instrument parameter of the audio data is not None, the returned value is
+        calibrated in units of Pa.
+        Parameters
+        ----------
+        reject_dc: bool
+            If True, the values will be centered on 0.
+        Returns
+        -------
+        np.ndarray:
+            The calibrated value of the audio data.
+        """
+        raw_data = self.get_value(reject_dc=reject_dc)
+        calibration_factor = (
+            1.0 if self.instrument is None else self.instrument.end_to_end
+        )
+        return raw_data * calibration_factor
+    def write(
+        self,
+        folder: Path,
+        subtype: str | None = None,
+        link: bool = False,
+    ) -> None:
+        """Write the audio data to file.
+        Parameters
+        ----------
+        folder: pathlib.Path
+            Folder in which to write the audio file.
+        subtype: str | None
+            Subtype as provided by the soundfile module.
+            Defaulted as the default 16-bit PCM for WAV audio files.
+        link: bool
+            If True, the AudioData will be bound to the written file.
+            Its items will be replaced with a single item, which will match the whole
+            new AudioFile.
+        """
+        super().create_directories(path=folder)
+        sf.write(
+            folder / f"{self}.wav",
+            self.get_value(),
+            self.sample_rate,
+            subtype=subtype,
+        )
+        if link:
+            self.link(folder=folder)
+    def link(self, folder: Path) -> None:
+        """Link the AudioData to an AudioFile in the folder.
+        The given folder should contain a file named "str(self).wav".
+        Linking is intended for AudioData objects that have already been written.
+        After linking, the AudioData will have a single item with the same
+        properties of the target AudioFile.
+        Parameters
+        ----------
+        folder: Path
+            Folder in which is located the AudioFile to which the AudioData instance
+            should be linked.
+        """
+        file = AudioFile(
+            path=folder / f"{self}.wav",
+            strptime_format=TIMESTAMP_FORMATS_EXPORTED_FILES,
+        )
+        self.items = AudioData.from_files([file]).items
+    def _get_item_value(self, item: AudioItem) -> np.ndarray:
+        """Return the resampled (if needed) data from the audio item."""
+        item_data = item.get_value()
+        if item.is_empty:
+            return item_data.repeat(
+                round(item.duration.total_seconds() * self.sample_rate),
+            )
+        if item.sample_rate != self.sample_rate:
+            return resample(item_data, item.sample_rate, self.sample_rate)
+        return item_data
+    def split(self, nb_subdata: int = 2) -> list[AudioData]:
+        """Split the audio data object in the specified number of audio subdata.
+        Parameters
+        ----------
+        nb_subdata: int
+            Number of subdata in which to split the data.
+        Returns
+        -------
+        list[AudioData]
+            The list of AudioData subdata objects.
+        """
+        return [
+            AudioData.from_base_data(base_data, self.sample_rate)
+            for base_data in super().split(nb_subdata)
+        ]
+    def split_frames(self, start_frame: int = 0, stop_frame: int = -1) -> AudioData:
+        """Return a new AudioData from a subpart of this AudioData's data.
+        Parameters
+        ----------
+        start_frame: int
+            First frame included in the new AudioData.
+        stop_frame: int
+            First frame after the last frame included in the new AudioData.
+        Returns
+        -------
+        AudioData
+            A new AudioData which data is included between start_frame and stop_frame.
+        """
+        if start_frame < 0:
+            raise ValueError("Start_frame must be greater than or equal to 0.")
+        if stop_frame < -1 or stop_frame > self.shape:
+            raise ValueError("Stop_frame must be lower than the length of the data.")
+        start_timestamp = self.begin + Timedelta(
+            seconds=ceil(start_frame / self.sample_rate * 1e9) / 1e9,
+        )
+        stop_timestamp = (
+            self.end
+            if stop_frame == -1
+            else self.begin + Timedelta(seconds=stop_frame / self.sample_rate)
+        )
+        return AudioData.from_files(
+            list(self.files),
+            start_timestamp,
+            stop_timestamp,
+            sample_rate=self.sample_rate,
+        )
+    def to_dict(self) -> dict:
+        """Serialize an AudioData to a dictionary.
+        Returns
+        -------
+        dict:
+            The serialized dictionary representing the AudioData.
+        """
+        base_dict = super().to_dict()
+        instrument_dict = {
+            "instrument": (
+                None if self.instrument is None else self.instrument.to_dict()
+            ),
+        }
+        return (
+            base_dict
+            | instrument_dict
+            | {
+                "sample_rate": self.sample_rate,
+            }
+        )
+    @classmethod
+    def from_dict(cls, dictionary: dict) -> AudioData:
+        """Deserialize an AudioData from a dictionary.
+        Parameters
+        ----------
+        dictionary: dict
+            The serialized dictionary representing the AudioData.
+        Returns
+        -------
+        AudioData
+            The deserialized AudioData.
+        """
+        base_data = BaseData.from_dict(dictionary)
+        instrument = (
+            None
+            if dictionary["instrument"] is None
+            else Instrument.from_dict(dictionary["instrument"])
+        )
+        return cls.from_base_data(
+            data=base_data,
+            sample_rate=dictionary["sample_rate"],
+            instrument=instrument,
+        )
+    @classmethod
+    def from_files(
+        cls,
+        files: list[AudioFile],
+        begin: Timestamp | None = None,
+        end: Timestamp | None = None,
+        sample_rate: float | None = None,
+        instrument: Instrument | None = None,
+    ) -> AudioData:
+        """Return an AudioData object from a list of AudioFiles.
+        Parameters
+        ----------
+        files: list[AudioFile]
+            List of AudioFiles containing the data.
+        begin: Timestamp | None
+            Begin of the data object.
+            Defaulted to the begin of the first file.
+        end: Timestamp | None
+            End of the data object.
+            Defaulted to the end of the last file.
+        sample_rate: float | None
+            Sample rate of the AudioData.
+        instrument: Instrument | None
+            Instrument that might be used to obtain acoustic pressure from
+            the wav audio data.
+        Returns
+        -------
+        AudioData:
+            The AudioData object.
+        """
+        return cls.from_base_data(
+            data=BaseData.from_files(files, begin, end),
+            sample_rate=sample_rate,
+            instrument=instrument,
+        )
+    @classmethod
+    def from_base_data(
+        cls,
+        data: BaseData,
+        sample_rate: float | None = None,
+        instrument: Instrument | None = None,
+    ) -> AudioData:
+        """Return an AudioData object from a BaseData object.
+        Parameters
+        ----------
+        data: BaseData
+            BaseData object to convert to AudioData.
+        sample_rate: float | None
+            Sample rate of the AudioData.
+        instrument: Instrument | None
+            Instrument that might be used to obtain acoustic pressure from
+            the wav audio data.
+        Returns
+        -------
+        AudioData:
+            The AudioData object.
+        """
+        return cls(
+            items=[AudioItem.from_base_item(item) for item in data.items],
+            sample_rate=sample_rate,
+            instrument=instrument,
+        )