PyPI - birdnet-analyzer - Versions diffs - 2.0.0__py3-none-any.whl → 2.1.0__py3-none-any.whl - Mend

birdnet-analyzer 2.0.0py3-none-any.whl → 2.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (123) hide show

birdnet_analyzer/__init__.py +9 -8
birdnet_analyzer/analyze/__init__.py +19 -5
birdnet_analyzer/analyze/__main__.py +3 -4
birdnet_analyzer/analyze/cli.py +30 -25
birdnet_analyzer/analyze/core.py +246 -245
birdnet_analyzer/analyze/utils.py +694 -701
birdnet_analyzer/audio.py +368 -372
birdnet_analyzer/cli.py +732 -707
birdnet_analyzer/config.py +243 -242
birdnet_analyzer/eBird_taxonomy_codes_2024E.json +13046 -0
birdnet_analyzer/embeddings/__init__.py +3 -4
birdnet_analyzer/embeddings/__main__.py +3 -3
birdnet_analyzer/embeddings/cli.py +12 -13
birdnet_analyzer/embeddings/core.py +70 -70
birdnet_analyzer/embeddings/utils.py +220 -193
birdnet_analyzer/evaluation/__init__.py +189 -195
birdnet_analyzer/evaluation/__main__.py +3 -3
birdnet_analyzer/evaluation/assessment/__init__.py +0 -0
birdnet_analyzer/evaluation/assessment/metrics.py +388 -0
birdnet_analyzer/evaluation/assessment/performance_assessor.py +364 -0
birdnet_analyzer/evaluation/assessment/plotting.py +378 -0
birdnet_analyzer/evaluation/preprocessing/__init__.py +0 -0
birdnet_analyzer/evaluation/preprocessing/data_processor.py +631 -0
birdnet_analyzer/evaluation/preprocessing/utils.py +98 -0
birdnet_analyzer/gui/__init__.py +19 -23
birdnet_analyzer/gui/__main__.py +3 -3
birdnet_analyzer/gui/analysis.py +179 -174
birdnet_analyzer/gui/assets/arrow_down.svg +4 -4
birdnet_analyzer/gui/assets/arrow_left.svg +4 -4
birdnet_analyzer/gui/assets/arrow_right.svg +4 -4
birdnet_analyzer/gui/assets/arrow_up.svg +4 -4
birdnet_analyzer/gui/assets/gui.css +36 -28
birdnet_analyzer/gui/assets/gui.js +93 -93
birdnet_analyzer/gui/embeddings.py +638 -620
birdnet_analyzer/gui/evaluation.py +801 -813
birdnet_analyzer/gui/localization.py +75 -68
birdnet_analyzer/gui/multi_file.py +265 -246
birdnet_analyzer/gui/review.py +472 -527
birdnet_analyzer/gui/segments.py +191 -191
birdnet_analyzer/gui/settings.py +149 -129
birdnet_analyzer/gui/single_file.py +264 -269
birdnet_analyzer/gui/species.py +95 -95
birdnet_analyzer/gui/train.py +687 -698
birdnet_analyzer/gui/utils.py +797 -808
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_af.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ar.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_bg.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ca.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_cs.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_da.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_de.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_el.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_en_uk.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_es.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_fi.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_fr.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_he.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_hr.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_hu.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_in.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_is.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_it.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ja.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ko.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_lt.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ml.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_nl.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_no.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_pl.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_pt_BR.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_pt_PT.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ro.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ru.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sk.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sl.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sr.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sv.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_th.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_tr.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_uk.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_zh.txt +6522 -6522
birdnet_analyzer/lang/de.json +341 -334
birdnet_analyzer/lang/en.json +341 -334
birdnet_analyzer/lang/fi.json +341 -334
birdnet_analyzer/lang/fr.json +341 -334
birdnet_analyzer/lang/id.json +341 -334
birdnet_analyzer/lang/pt-br.json +341 -334
birdnet_analyzer/lang/ru.json +341 -334
birdnet_analyzer/lang/se.json +341 -334
birdnet_analyzer/lang/tlh.json +341 -334
birdnet_analyzer/lang/zh_TW.json +341 -334
birdnet_analyzer/model.py +1212 -1243
birdnet_analyzer/playground.py +5 -0
birdnet_analyzer/search/__init__.py +3 -3
birdnet_analyzer/search/__main__.py +3 -3
birdnet_analyzer/search/cli.py +11 -12
birdnet_analyzer/search/core.py +78 -78
birdnet_analyzer/search/utils.py +107 -111
birdnet_analyzer/segments/__init__.py +3 -3
birdnet_analyzer/segments/__main__.py +3 -3
birdnet_analyzer/segments/cli.py +13 -14
birdnet_analyzer/segments/core.py +81 -78
birdnet_analyzer/segments/utils.py +383 -394
birdnet_analyzer/species/__init__.py +3 -3
birdnet_analyzer/species/__main__.py +3 -3
birdnet_analyzer/species/cli.py +13 -14
birdnet_analyzer/species/core.py +35 -35
birdnet_analyzer/species/utils.py +74 -75
birdnet_analyzer/train/__init__.py +3 -3
birdnet_analyzer/train/__main__.py +3 -3
birdnet_analyzer/train/cli.py +13 -14
birdnet_analyzer/train/core.py +113 -113
birdnet_analyzer/train/utils.py +877 -847
birdnet_analyzer/translate.py +133 -104
birdnet_analyzer/utils.py +425 -419
{birdnet_analyzer-2.0.0.dist-info → birdnet_analyzer-2.1.0.dist-info}/METADATA +146 -129
birdnet_analyzer-2.1.0.dist-info/RECORD +125 -0
{birdnet_analyzer-2.0.0.dist-info → birdnet_analyzer-2.1.0.dist-info}/WHEEL +1 -1
{birdnet_analyzer-2.0.0.dist-info → birdnet_analyzer-2.1.0.dist-info}/licenses/LICENSE +18 -18
birdnet_analyzer/eBird_taxonomy_codes_2021E.json +0 -25280
birdnet_analyzer-2.0.0.dist-info/RECORD +0 -117
{birdnet_analyzer-2.0.0.dist-info → birdnet_analyzer-2.1.0.dist-info}/entry_points.txt +0 -0
{birdnet_analyzer-2.0.0.dist-info → birdnet_analyzer-2.1.0.dist-info}/top_level.txt +0 -0

birdnet_analyzer/audio.py CHANGED Viewed

@@ -1,372 +1,368 @@
-"""Module containing audio helper functions."""
-import librosa
-import numpy as np
-import soundfile as sf
-from scipy.signal import firwin, kaiserord, lfilter, find_peaks
-import birdnet_analyzer.config as cfg
-RANDOM = np.random.RandomState(cfg.RANDOM_SEED)
-def open_audio_file(path: str, sample_rate=48000, offset=0.0, duration=None, fmin=None, fmax=None, speed=1.0):
-    """Open an audio file.
-    Opens an audio file with librosa and the given settings.
-    Args:
-        path: Path to the audio file.
-        sample_rate: The sample rate at which the file should be processed.
-        offset: The starting offset.
-        duration: Maximum duration of the loaded content.
-        fmin: Minimum frequency for bandpass filter.
-        fmax: Maximum frequency for bandpass filter.
-        speed: Speed factor for audio playback.
-    Returns:
-        Returns the audio time series and the sampling rate.
-    """
-    # Open file with librosa (uses ffmpeg or libav)
-    if speed == 1.0:
-        sig, rate = librosa.load(
-            path, sr=sample_rate, offset=offset, duration=duration, mono=True, res_type="kaiser_fast"
-        )
-    else:
-        # Load audio with original sample rate
-        sig, rate = librosa.load(path, sr=None, offset=offset, duration=duration, mono=True)
-        # Resample with "fake" sample rate
-        sig = librosa.resample(sig, orig_sr=int(rate * speed), target_sr=sample_rate, res_type="kaiser_fast")
-        rate = sample_rate
-    # Bandpass filter
-    if fmin is not None and fmax is not None:
-        sig = bandpass(sig, rate, fmin, fmax)
-        # sig = bandpassKaiserFIR(sig, rate, fmin, fmax)
-    return sig, rate
-def get_audio_file_length(path):
-    """
-    Get the length of an audio file in seconds.
-    Args:
-        path (str): The file path to the audio file.
-    Returns:
-        float: The duration of the audio file in seconds.
-    """
-    # Open file with librosa (uses ffmpeg or libav)
-    return librosa.get_duration(filename=path, sr=None)
-def get_sample_rate(path: str):
-    """
-    Get the sample rate of an audio file.
-    Args:
-        path (str): The file path to the audio file.
-    Returns:
-        int: The sample rate of the audio file.
-    """
-    return librosa.get_samplerate(path)
-def save_signal(sig, fname: str, rate=48000):
-    """Saves a signal to file.
-    Args:
-        sig: The signal to be saved.
-        fname: The file path.
-    Returns:
-        None
-    """
-    sf.write(fname, sig, rate, "PCM_16")
-def pad(sig, seconds, srate, amount=None):
-    """Creates a noise vector with the given shape.
-    Args:
-        sig: The original audio signal.
-        shape: Shape of the noise.
-        amount: The noise intensity.
-    Returns:
-        An numpy array of noise with the given shape.
-    """
-    target_len = int(srate * seconds)
-    if len(sig) < target_len:
-        noise_shape = target_len - len(sig)
-        if not cfg.USE_NOISE:
-            noise = np.zeros(noise_shape, dtype=sig.dtype)
-        else:
-            # Random noise intensity
-            if amount is None:
-                amount = RANDOM.uniform(0.1, 0.5)
-            # Create Gaussian noise
-            try:
-                noise = RANDOM.normal(min(sig) * amount, max(sig) * amount, noise_shape).astype(sig.dtype)
-            except:
-                noise = np.zeros(noise_shape, dtype=sig.dtype)
-        return np.concatenate((sig, noise))
-    return sig
-def split_signal(sig, rate, seconds, overlap, minlen, amount=None):
-    """Split signal with overlap.
-    Args:
-        sig: The original signal to be split.
-        rate: The sampling rate.
-        seconds: The duration of a segment.
-        overlap: The overlapping seconds of segments.
-        minlen: Minimum length of a split.
-    Returns:
-        A list of splits.
-    """
-    # Split signal to chunks of duration with overlap, whereas each chunk still has minimum duration of signal
-    if rate is None or rate <= 0:
-        rate = cfg.SAMPLE_RATE
-    if seconds is None or seconds <= 0:
-        seconds = cfg.SIG_LENGTH
-    if overlap is None or overlap < 0:
-        overlap = cfg.SIG_OVERLAP
-    if minlen is None or minlen <= 0 or minlen > seconds:
-        minlen = cfg.SIG_MINLEN
-    # Make sure overlap is smaller then signal duration
-    if overlap >= seconds:
-        overlap = seconds - 0.01
-    # Number of frames per chunk, per step and per minimum signal
-    chunksize = int(rate * seconds)
-    stepsize = int(rate * (seconds - overlap))
-    minsize = int(rate * minlen)
-    # Start of last chunk
-    lastchunkpos = int((sig.size - chunksize + stepsize - 1) / stepsize) * stepsize
-    # Make sure at least one chunk is returned
-    if lastchunkpos < 0:
-        lastchunkpos = 0
-    # Omit last chunk if minimum signal duration is underrun
-    elif sig.size - lastchunkpos < minsize:
-        lastchunkpos = lastchunkpos - stepsize
-    # Append noise or empty signal of chunk duration, so all splits have desired length
-    if not cfg.USE_NOISE:
-        noise = np.zeros(shape=chunksize, dtype=sig.dtype)
-    else:
-        # Random noise intensity
-        if amount is None:
-            amount = RANDOM.uniform(0.1, 0.5)
-        # Create Gaussian noise
-        try:
-            noise = RANDOM.normal(loc=min(sig) * amount, scale=max(sig) * amount, size=chunksize).astype(sig.dtype)
-        except:
-            noise = np.zeros(shape=chunksize, dtype=sig.dtype)
-    data = np.concatenate((sig, noise))
-    # Split signal with overlap
-    sig_splits = []
-    for i in range(0, 1 + lastchunkpos, stepsize):
-        sig_splits.append(data[i : i + chunksize])
-    return sig_splits
-def crop_center(sig, rate, seconds):
-    """Crop signal to center.
-    Args:
-        sig: The original signal.
-        rate: The sampling rate.
-        seconds: The length of the signal.
-    Returns:
-        The cropped signal.
-    """
-    if len(sig) > int(seconds * rate):
-        start = int((len(sig) - int(seconds * rate)) / 2)
-        end = start + int(seconds * rate)
-        sig = sig[start:end]
-    # Pad with noise
-    else:
-        sig = pad(sig, seconds, rate, 0.5)
-    return sig
-def smart_crop_signal(sig, rate, sig_length, sig_overlap, sig_minlen):
-    """Smart crop audio signal based on peak detection.
-    This function analyzes the audio signal to find peaks in energy/amplitude,
-    which are more likely to contain relevant target signals (e.g., bird calls).
-    Only the audio segments with the highest energy peaks are returned.
-    Args:
-        sig: The audio signal.
-        rate: The sample rate of the audio signal.
-        sig_length: The desired length of each snippet in seconds.
-        sig_overlap: The overlap between snippets in seconds.
-        sig_minlen: The minimum length of a snippet in seconds.
-    Returns:
-        A list of audio snippets with the highest energy/peaks.
-    """
-    # If signal is too short, just return it
-    if len(sig) / rate <= sig_length:
-        return [sig]
-    # Calculate the window size in samples
-    window_size = int(sig_length * rate)
-    hop_size = int((sig_length - sig_overlap) * rate)
-    # Split the signal into overlapping windows
-    splits = split_signal(sig, rate, sig_length, sig_overlap, sig_minlen)
-    if len(splits) <= 1:
-        return splits
-    # Calculate energy for each window
-    energies = []
-    for split in splits:
-        # Calculate RMS energy
-        energy = np.sqrt(np.mean(split**2))
-        # Also consider peak values
-        peak = np.max(np.abs(split))
-        # Combine both metrics
-        energies.append(energy * 0.7 + peak * 0.3)  # Weighted combination
-    # Find peaks in the energy curve
-    # Smooth energies first to avoid small fluctuations
-    smoothed_energies = np.convolve(energies, np.ones(3)/3, mode='same')
-    peaks, _ = find_peaks(smoothed_energies, height=np.mean(smoothed_energies), distance=2)
-    # If no clear peaks found, fall back to selecting top energy segments
-    if len(peaks) < 2:
-        # Sort segments by energy and take top segments (up to 3 or 1/3 of total, whichever is more)
-        num_segments = max(3, len(splits) // 3)
-        indices = np.argsort(energies)[-num_segments:]
-        return [splits[i] for i in sorted(indices)]
-    # Return the audio segments corresponding to the peaks
-    peak_splits = [splits[i] for i in peaks]
-    # If we have too many peaks, select the strongest ones
-    if len(peak_splits) > 5:
-        peak_energies = [energies[i] for i in peaks]
-        sorted_indices = np.argsort(peak_energies)[::-1]  # Sort in descending order
-        peak_splits = [peak_splits[i] for i in sorted_indices[:5]]  # Take top 5
-    return peak_splits
-def bandpass(sig, rate, fmin, fmax, order=5):
-    """
-    Apply a bandpass filter to the input signal.
-    Args:
-        sig (numpy.ndarray): The input signal to be filtered.
-        rate (int): The sampling rate of the input signal.
-        fmin (float): The minimum frequency for the bandpass filter.
-        fmax (float): The maximum frequency for the bandpass filter.
-        order (int, optional): The order of the filter. Default is 5.
-    Returns:
-        numpy.ndarray: The filtered signal as a float32 array.
-    """
-    # Check if we have to bandpass at all
-    if fmin == cfg.SIG_FMIN and fmax == cfg.SIG_FMAX or fmin > fmax:
-        return sig
-    from scipy.signal import butter, lfilter
-    nyquist = 0.5 * rate
-    # Highpass?
-    if fmin > cfg.SIG_FMIN and fmax == cfg.SIG_FMAX:
-        low = fmin / nyquist
-        b, a = butter(order, low, btype="high")
-        sig = lfilter(b, a, sig)
-    # Lowpass?
-    elif fmin == cfg.SIG_FMIN and fmax < cfg.SIG_FMAX:
-        high = fmax / nyquist
-        b, a = butter(order, high, btype="low")
-        sig = lfilter(b, a, sig)
-    # Bandpass?
-    elif fmin > cfg.SIG_FMIN and fmax < cfg.SIG_FMAX:
-        low = fmin / nyquist
-        high = fmax / nyquist
-        b, a = butter(order, [low, high], btype="band")
-        sig = lfilter(b, a, sig)
-    return sig.astype("float32")
-# Raven is using Kaiser window FIR filter, so we try to emulate it.
-# Raven uses the Window method for FIR filter design.
-# A Kaiser window is used with a default transition bandwidth of 0.02 times
-# the Nyquist frequency and a default stop band attenuation of 100 dB.
-# For a complete description of this method, see Discrete-Time Signal Processing
-# (Second Edition), by Alan Oppenheim, Ronald Schafer, and John Buck, Prentice Hall 1998, pp. 474-476.
-def bandpass_kaiser_fir(sig, rate, fmin, fmax, width=0.02, stopband_attenuation_db=100):
-    """
-    Applies a bandpass filter to the given signal using a Kaiser window FIR filter.
-    Args:
-        sig (numpy.ndarray): The input signal to be filtered.
-        rate (int): The sample rate of the input signal.
-        fmin (float): The minimum frequency of the bandpass filter.
-        fmax (float): The maximum frequency of the bandpass filter.
-        width (float, optional): The transition width of the filter. Default is 0.02.
-        stopband_attenuation_db (float, optional): The desired attenuation in the stopband, in decibels. Default is 100.
-    Returns:
-        numpy.ndarray: The filtered signal as a float32 numpy array.
-    """
-    # Check if we have to bandpass at all
-    if fmin == cfg.SIG_FMIN and fmax == cfg.SIG_FMAX or fmin > fmax:
-        return sig
-    nyquist = 0.5 * rate
-    # Calculate the order and Kaiser parameter for the desired specifications.
-    N, beta = kaiserord(stopband_attenuation_db, width)
-    # Highpass?
-    if fmin > cfg.SIG_FMIN and fmax == cfg.SIG_FMAX:
-        low = fmin / nyquist
-        taps = firwin(N, low, window=("kaiser", beta), pass_zero=False)
-    # Lowpass?
-    elif fmin == cfg.SIG_FMIN and fmax < cfg.SIG_FMAX:
-        high = fmax / nyquist
-        taps = firwin(N, high, window=("kaiser", beta), pass_zero=True)
-    # Bandpass?
-    elif fmin > cfg.SIG_FMIN and fmax < cfg.SIG_FMAX:
-        low = fmin / nyquist
-        high = fmax / nyquist
-        taps = firwin(N, [low, high], window=("kaiser", beta), pass_zero=False)
-    # Apply the filter to the signal.
-    sig = lfilter(taps, 1.0, sig)
-    return sig.astype("float32")
+"""Module containing audio helper functions."""
+import librosa
+import numpy as np
+import soundfile as sf
+from scipy.signal import find_peaks, firwin, kaiserord, lfilter
+import birdnet_analyzer.config as cfg
+RANDOM = np.random.RandomState(cfg.RANDOM_SEED)
+def open_audio_file(path: str, sample_rate=48000, offset=0.0, duration=None, fmin=None, fmax=None, speed=1.0):
+    """Open an audio file.
+    Opens an audio file with librosa and the given settings.
+    Args:
+        path: Path to the audio file.
+        sample_rate: The sample rate at which the file should be processed.
+        offset: The starting offset.
+        duration: Maximum duration of the loaded content.
+        fmin: Minimum frequency for bandpass filter.
+        fmax: Maximum frequency for bandpass filter.
+        speed: Speed factor for audio playback.
+    Returns:
+        Returns the audio time series and the sampling rate.
+    """
+    # Open file with librosa (uses ffmpeg or libav)
+    if speed == 1.0:
+        sig, rate = librosa.load(
+            path, sr=sample_rate, offset=offset, duration=duration, mono=True, res_type="kaiser_fast"
+        )
+    else:
+        # Load audio with original sample rate
+        sig, rate = librosa.load(path, sr=None, offset=offset, duration=duration, mono=True)
+        # Resample with "fake" sample rate
+        sig = librosa.resample(sig, orig_sr=int(rate * speed), target_sr=sample_rate, res_type="kaiser_fast")
+        rate = sample_rate
+    # Bandpass filter
+    if fmin is not None and fmax is not None:
+        sig = bandpass(sig, rate, fmin, fmax)
+        # sig = bandpassKaiserFIR(sig, rate, fmin, fmax)
+    return sig, rate
+def get_audio_file_length(path):
+    """
+    Get the length of an audio file in seconds.
+    Args:
+        path (str): The file path to the audio file.
+    Returns:
+        float: The duration of the audio file in seconds.
+    """
+    # Open file with librosa (uses ffmpeg or libav)
+    return librosa.get_duration(path=path, sr=None)
+def get_sample_rate(path: str):
+    """
+    Get the sample rate of an audio file.
+    Args:
+        path (str): The file path to the audio file.
+    Returns:
+        int: The sample rate of the audio file.
+    """
+    return librosa.get_samplerate(path)
+def save_signal(sig, fname: str, rate=48000):
+    """Saves a signal to file.
+    Args:
+        sig: The signal to be saved.
+        fname: The file path.
+    Returns:
+        None
+    """
+    sf.write(fname, sig, rate, "PCM_16")
+def pad(sig, seconds, srate, amount=None):
+    """Creates a noise vector with the given shape.
+    Args:
+        sig: The original audio signal.
+        shape: Shape of the noise.
+        amount: The noise intensity.
+    Returns:
+        An numpy array of noise with the given shape.
+    """
+    target_len = int(srate * seconds)
+    if len(sig) < target_len:
+        noise_shape = target_len - len(sig)
+        if not cfg.USE_NOISE:
+            noise = np.zeros(noise_shape, dtype=sig.dtype)
+        else:
+            # Random noise intensity
+            if amount is None:
+                amount = RANDOM.uniform(0.1, 0.5)
+            # Create Gaussian noise
+            try:
+                noise = RANDOM.normal(min(sig) * amount, max(sig) * amount, noise_shape).astype(sig.dtype)
+            except:
+                noise = np.zeros(noise_shape, dtype=sig.dtype)
+        return np.concatenate((sig, noise))
+    return sig
+def split_signal(sig, rate, seconds, overlap, minlen, amount=None):
+    """Split signal with overlap.
+    Args:
+        sig: The original signal to be split.
+        rate: The sampling rate.
+        seconds: The duration of a segment.
+        overlap: The overlapping seconds of segments.
+        minlen: Minimum length of a split.
+    Returns:
+        A list of splits.
+    """
+    # Split signal to chunks of duration with overlap, whereas each chunk still has minimum duration of signal
+    if rate is None or rate <= 0:
+        rate = cfg.SAMPLE_RATE
+    if seconds is None or seconds <= 0:
+        seconds = cfg.SIG_LENGTH
+    if overlap is None or overlap < 0:
+        overlap = cfg.SIG_OVERLAP
+    if minlen is None or minlen <= 0 or minlen > seconds:
+        minlen = cfg.SIG_MINLEN
+    # Make sure overlap is smaller then signal duration
+    if overlap >= seconds:
+        overlap = seconds - 0.01
+    # Number of frames per chunk, per step and per minimum signal
+    chunksize = int(rate * seconds)
+    stepsize = int(rate * (seconds - overlap))
+    minsize = int(rate * minlen)
+    # Start of last chunk
+    lastchunkpos = int((sig.size - chunksize + stepsize - 1) / stepsize) * stepsize
+    # Make sure at least one chunk is returned
+    if lastchunkpos < 0:
+        lastchunkpos = 0
+    # Omit last chunk if minimum signal duration is underrun
+    elif sig.size - lastchunkpos < minsize:
+        lastchunkpos = lastchunkpos - stepsize
+    # Append noise or empty signal of chunk duration, so all splits have desired length
+    if not cfg.USE_NOISE:
+        noise = np.zeros(shape=chunksize, dtype=sig.dtype)
+    else:
+        # Random noise intensity
+        if amount is None:
+            amount = RANDOM.uniform(0.1, 0.5)
+        # Create Gaussian noise
+        try:
+            noise = RANDOM.normal(loc=min(sig) * amount, scale=max(sig) * amount, size=chunksize).astype(sig.dtype)
+        except:
+            noise = np.zeros(shape=chunksize, dtype=sig.dtype)
+    data = np.concatenate((sig, noise))
+    # Split signal with overlap
+    sig_splits = []
+    sig_splits.extend(data[i : i + chunksize] for i in range(0, lastchunkpos + 1, stepsize))
+    return sig_splits
+def crop_center(sig, rate, seconds):
+    """Crop signal to center.
+    Args:
+        sig: The original signal.
+        rate: The sampling rate.
+        seconds: The length of the signal.
+    Returns:
+        The cropped signal.
+    """
+    if len(sig) > int(seconds * rate):
+        start = int((len(sig) - int(seconds * rate)) / 2)
+        end = start + int(seconds * rate)
+        sig = sig[start:end]
+    # Pad with noise
+    else:
+        sig = pad(sig, seconds, rate, 0.5)
+    return sig
+def smart_crop_signal(sig, rate, sig_length, sig_overlap, sig_minlen):
+    """Smart crop audio signal based on peak detection.
+    This function analyzes the audio signal to find peaks in energy/amplitude,
+    which are more likely to contain relevant target signals (e.g., bird calls).
+    Only the audio segments with the highest energy peaks are returned.
+    Args:
+        sig: The audio signal.
+        rate: The sample rate of the audio signal.
+        sig_length: The desired length of each snippet in seconds.
+        sig_overlap: The overlap between snippets in seconds.
+        sig_minlen: The minimum length of a snippet in seconds.
+    Returns:
+        A list of audio snippets with the highest energy/peaks.
+    """
+    # If signal is too short, just return it
+    if len(sig) / rate <= sig_length:
+        return [sig]
+    # Split the signal into overlapping windows
+    splits = split_signal(sig, rate, sig_length, sig_overlap, sig_minlen)
+    if len(splits) <= 1:
+        return splits
+    # Calculate energy for each window
+    energies = []
+    for split in splits:
+        # Calculate RMS energy
+        energy = np.sqrt(np.mean(split**2))
+        # Also consider peak values
+        peak = np.max(np.abs(split))
+        # Combine both metrics
+        energies.append(energy * 0.7 + peak * 0.3)  # Weighted combination
+    # Find peaks in the energy curve
+    # Smooth energies first to avoid small fluctuations
+    smoothed_energies = np.convolve(energies, np.ones(3) / 3, mode="same")
+    peaks, _ = find_peaks(smoothed_energies, height=np.mean(smoothed_energies), distance=2)
+    # If no clear peaks found, fall back to selecting top energy segments
+    if len(peaks) < 2:
+        # Sort segments by energy and take top segments (up to 3 or 1/3 of total, whichever is more)
+        num_segments = max(3, len(splits) // 3)
+        indices = np.argsort(energies)[-num_segments:]
+        return [splits[i] for i in sorted(indices)]
+    # Return the audio segments corresponding to the peaks
+    peak_splits = [splits[i] for i in peaks]
+    # If we have too many peaks, select the strongest ones
+    if len(peak_splits) > 5:
+        peak_energies = [energies[i] for i in peaks]
+        sorted_indices = np.argsort(peak_energies)[::-1]  # Sort in descending order
+        peak_splits = [peak_splits[i] for i in sorted_indices[:5]]  # Take top 5
+    return peak_splits
+def bandpass(sig, rate, fmin, fmax, order=5):
+    """
+    Apply a bandpass filter to the input signal.
+    Args:
+        sig (numpy.ndarray): The input signal to be filtered.
+        rate (int): The sampling rate of the input signal.
+        fmin (float): The minimum frequency for the bandpass filter.
+        fmax (float): The maximum frequency for the bandpass filter.
+        order (int, optional): The order of the filter. Default is 5.
+    Returns:
+        numpy.ndarray: The filtered signal as a float32 array.
+    """
+    # Check if we have to bandpass at all
+    if (fmin == cfg.SIG_FMIN and fmax == cfg.SIG_FMAX) or fmin > fmax:
+        return sig
+    from scipy.signal import butter, lfilter
+    nyquist = 0.5 * rate
+    # Highpass?
+    if fmin > cfg.SIG_FMIN and fmax == cfg.SIG_FMAX:
+        low = fmin / nyquist
+        b, a = butter(order, low, btype="high")
+        sig = lfilter(b, a, sig)
+    # Lowpass?
+    elif fmin == cfg.SIG_FMIN and fmax < cfg.SIG_FMAX:
+        high = fmax / nyquist
+        b, a = butter(order, high, btype="low")
+        sig = lfilter(b, a, sig)
+    # Bandpass?
+    elif fmin > cfg.SIG_FMIN and fmax < cfg.SIG_FMAX:
+        low = fmin / nyquist
+        high = fmax / nyquist
+        b, a = butter(order, [low, high], btype="band")
+        sig = lfilter(b, a, sig)
+    return sig.astype("float32")
+# Raven is using Kaiser window FIR filter, so we try to emulate it.
+# Raven uses the Window method for FIR filter design.
+# A Kaiser window is used with a default transition bandwidth of 0.02 times
+# the Nyquist frequency and a default stop band attenuation of 100 dB.
+# For a complete description of this method, see Discrete-Time Signal Processing
+# (Second Edition), by Alan Oppenheim, Ronald Schafer, and John Buck, Prentice Hall 1998, pp. 474-476.
+def bandpass_kaiser_fir(sig, rate, fmin, fmax, width=0.02, stopband_attenuation_db=100):
+    """
+    Applies a bandpass filter to the given signal using a Kaiser window FIR filter.
+    Args:
+        sig (numpy.ndarray): The input signal to be filtered.
+        rate (int): The sample rate of the input signal.
+        fmin (float): The minimum frequency of the bandpass filter.
+        fmax (float): The maximum frequency of the bandpass filter.
+        width (float, optional): The transition width of the filter. Default is 0.02.
+        stopband_attenuation_db (float, optional): The desired attenuation in the stopband, in decibels. Default is 100.
+    Returns:
+        numpy.ndarray: The filtered signal as a float32 numpy array.
+    """
+    # Check if we have to bandpass at all
+    if (fmin == cfg.SIG_FMIN and fmax == cfg.SIG_FMAX) or fmin > fmax:
+        return sig
+    nyquist = 0.5 * rate
+    # Calculate the order and Kaiser parameter for the desired specifications.
+    N, beta = kaiserord(stopband_attenuation_db, width)
+    # Highpass?
+    if fmin > cfg.SIG_FMIN and fmax == cfg.SIG_FMAX:
+        low = fmin / nyquist
+        taps = firwin(N, low, window=("kaiser", beta), pass_zero=False)
+    # Lowpass?
+    elif fmin == cfg.SIG_FMIN and fmax < cfg.SIG_FMAX:
+        high = fmax / nyquist
+        taps = firwin(N, high, window=("kaiser", beta), pass_zero=True)
+    # Bandpass?
+    elif fmin > cfg.SIG_FMIN and fmax < cfg.SIG_FMAX:
+        low = fmin / nyquist
+        high = fmax / nyquist
+        taps = firwin(N, [low, high], window=("kaiser", beta), pass_zero=False)
+    # Apply the filter to the signal.
+    sig = lfilter(taps, 1.0, sig)
+    return sig.astype("float32")

birdnet-analyzer 2.0.0__py3-none-any.whl → 2.1.0__py3-none-any.whl

birdnet-analyzer 2.0.0py3-none-any.whl → 2.1.0py3-none-any.whl