PyPI - bacpipe - Versions diffs - 1.3.0.dev0__tar.gz → 1.3.0.dev2__tar.gz - Mend

bacpipe 1.3.0.dev0tar.gz → 1.3.0.dev2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (90) hide show

{bacpipe-1.3.0.dev0 → bacpipe-1.3.0.dev2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: bacpipe
-Version: 1.3.0.dev0
+Version: 1.3.0.dev2
 Summary: Use bacpipe to streamline the process of generating embeddings and analysing your PAM datasets.
 Author-email: "Vincent S. Kather" <vkather@gmail.com>
 Requires-Python: >=3.11

bacpipe-1.3.0.dev2/bacpipe/core/__init__.py ADDED Viewed

File without changes

bacpipe-1.3.0.dev2/bacpipe/core/audio_processor.py ADDED Viewed

@@ -0,0 +1,177 @@
+import torch
+import logging
+import numpy as np
+import librosa as lb
+import torchaudio as ta
+from pathlib import Path
+logger = logging.getLogger("bacpipe")
+class AudioHandler:
+    def __init__(self, model, padding, audio_dir,
+                 bool_slowdown=False, slowdown_rate=None,
+                 **kwargs):
+        """
+        Helper class for all methods related to loading and padding audio.
+        Parameters
+        ----------
+        model : Model object
+            has attributes for all the model characteristics like
+            sample rate, segment length etc. as well as the methods
+            to run the model
+        padding : str
+            padding function to use for where padding is necessary
+        audio_dir : pathlib.Path object
+            path to audio dir
+        """
+        self.model = model
+        self.padding = padding
+        self.audio_dir = audio_dir
+        self.bool_slowdown = bool_slowdown
+        self.slowdown_rate = slowdown_rate
+        self.kwargs = kwargs
+    def prepare_audio(self, sample):
+        """
+        Use bacpipe pipeline to load audio file, window it according to
+        model specific window length and preprocess the data, ready for
+        batch inference computation. Also log file length and shape for
+        metadata files.
+        Parameters
+        ----------
+        sample : pathlib.Path or str
+            path to audio file
+        Returns
+        -------
+        torch.Tensor
+            audio frames preprocessed with model specific preprocessing
+        """
+        audio = self._load_and_resample(sample)
+        # audio = audio.to(self.model.device)
+        if self.model.only_embed_annotations:
+            frames = self._only_load_annotated_segments(sample, audio, **self.kwargs)
+        else:
+            frames = self._window_audio(audio)
+        preprocessed_frames = self.model.preprocess(frames)
+        self.file_length[sample.stem] = len(audio[0]) / self.model.sr
+        self.preprocessed_shape = tuple(preprocessed_frames.shape)
+        if self.model.device == 'cuda':
+            del audio, frames
+            torch.cuda.empty_cache()
+        return preprocessed_frames
+    def _load_and_resample(self, path):
+        try:
+            if not self.bool_slowdown:
+                audio, sr = lb.load(
+                    str(path), sr=self.model.sr, mono=True
+                    )
+            else:
+                #TODO Need to ensure that input length get's prolonged accordingly
+                audio, sr = lb.load(
+                    str(path), sr=None, mono=True
+                    )
+                audio = lb.resample(
+                    audio,
+                    orig_sr=int(sr * self.slowdown_rate),
+                    target_sr=self.model.sr
+                    )
+            audio = audio.reshape(1, -1)
+        except Exception as e:
+            logger.exception(
+                f"\nError loading audio. Skipping {str(path)}."
+                f"Error: {e}"
+            )
+            raise e
+        if len(audio) == 0:
+            error = f"Audio file {path} is empty. " f"Skipping {path}."
+            logger.exception(error)
+            raise ValueError(error)
+        return torch.tensor(audio)
+    def _only_load_annotated_segments(
+        self, file_path, audio, annotations_filename='annotations.csv', **_
+        ):
+        import pandas as pd
+        annots = pd.read_csv(Path(self.audio_dir) / annotations_filename)
+        # filter current file
+        file_annots = annots[annots.audiofilename==file_path.relative_to(self.audio_dir)]
+        if len(file_annots) == 0:
+            file_annots = annots[annots.audiofilename==file_path.stem+file_path.suffix]
+        if len(file_annots) == 0:
+            file_annots = annots[annots.audiofilename==str(file_path.relative_to(self.audio_dir))]
+        starts = np.array(file_annots.start, dtype=np.float32)*self.model.sr
+        ends = np.array(file_annots.end, dtype=np.float32)*self.model.sr
+        audio = audio.cpu().squeeze()
+        for idx, (s, e) in enumerate(zip(starts, ends)):
+            s, e = int(s), int(e)
+            if s > len(audio):
+                logger.warning(
+                    f"Annotation with start {s} and end {e} is outside of "
+                    f"range of {file_path}. Skipping annotation."
+                )
+                continue
+            segments = lb.util.fix_length(
+                audio[s:e+1],
+                size=self.model.segment_length,
+                mode=self.padding
+                )
+            if idx == 0:
+                cumulative_segments = segments
+            else:
+                cumulative_segments = np.vstack([cumulative_segments, segments])
+        cumulative_segments = torch.Tensor(cumulative_segments)
+        cumulative_segments = cumulative_segments.to(self.device)
+        return cumulative_segments
+    def _load_audio_based_on_fixed_segment_length(self, audio, segment_length, **_):
+        nr_segments = len(audio) // segment_length +1
+        starts = np.arange(nr_segments) * segment_length * self.model.sr
+        ends = np.arange(1, nr_segments+1) * segment_length * self.model.sr
+        return starts, ends
+    def _load_and_pad_audio_based_on_grid(self, audio, starts, ends, file_path):
+        audio = audio.cpu().squeeze()
+        for idx, (s, e) in enumerate(zip(starts, ends)):
+            s, e = int(s), int(e)
+            if s > len(audio):
+                logger.warning(
+                    f"Annotation with start {s} and end {e} is outside of "
+                    f"range of {file_path}. Skipping annotation."
+                )
+                continue
+            segments = lb.util.fix_length(
+                audio[s:e+1],
+                size=self.model.segment_length,
+                mode=self.padding
+                )
+            if idx == 0:
+                cumulative_segments = segments
+            else:
+                cumulative_segments = np.vstack([cumulative_segments, segments])
+        cumulative_segments = torch.Tensor(cumulative_segments)
+        cumulative_segments = cumulative_segments.to(self.device)
+        return cumulative_segments
+    def _window_audio(self, audio):
+        num_frames = int(np.ceil(len(audio[0]) / self.model.segment_length))
+        if isinstance(audio, torch.Tensor):
+            audio = audio.cpu()
+        padded_audio = lb.util.fix_length(
+            audio,
+            size=int(num_frames * self.model.segment_length),
+            mode=self.padding,
+        )
+        logger.debug(f"{self.padding} was used on an audio segment.")
+        frames = padded_audio.reshape([num_frames, self.model.segment_length])
+        if not isinstance(frames, torch.Tensor):
+            frames = torch.tensor(frames)
+        # frames = frames.to(self.model.device)
+        return frames

bacpipe-1.3.0.dev2/bacpipe/core/constants.py ADDED Viewed

@@ -0,0 +1,62 @@
+TF_MODELS = [
+    'birdnet',
+    'perch_v2',
+    'perch_bird',
+    'google_whale',
+    'surfperch',
+    'vggish',
+    'hbdet',
+]
+EMBEDDING_DIMENSIONS = {
+    "audiomae": 768,
+    "audioprotopnet": 1024,
+    "avesecho_passt": 768,
+    "aves_especies": 768,
+    "bat": 64,
+    "beats": 768,
+    "birdaves_especies": 1024,
+    "biolingual": 512,
+    "birdnet": 1024,
+    "birdmae": 1280,
+    "convnext_birdset": 1024,
+    "hbdet": 2048,
+    "insect66": 1280,
+    "insect459": 1280,
+    "mix2": 960,
+    "naturebeats": 768,
+    "perch_bird": 1280,
+    "perch_v2": 1536,
+    "protoclr": 384,
+    "rcl_fs_bsed": 2048,
+    "surfperch": 1280,
+    "google_whale": 1280,
+    "vggish": 128,
+}
+NEEDS_CHECKPOINT = [
+    "audiomae",
+    "avesecho_passt",
+    "aves_especies",
+    "bat",
+    "beats",
+    "birdaves_especies",
+    "birdnet",
+    "hbdet",
+    "insect66",
+    "insect459",
+    "mix2",
+    "naturebeats",
+    "protoclr",
+    "rcl_fs_bsed"
+]
+supported_models = list(EMBEDDING_DIMENSIONS.keys())
+"""list[str]: Supported embedding models available in bacpipe."""
+models_needing_checkpoint = NEEDS_CHECKPOINT
+"""list[str]: Models that require a checkpoint to be downloaded before use."""

bacpipe 1.3.0.dev0__tar.gz → 1.3.0.dev2__tar.gz

bacpipe 1.3.0.dev0tar.gz → 1.3.0.dev2tar.gz