PyPI - birdnet-analyzer - Versions diffs - 2.0.0__py3-none-any.whl → 2.1.0__py3-none-any.whl - Mend

birdnet-analyzer 2.0.0py3-none-any.whl → 2.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (123) hide show

birdnet_analyzer/__init__.py +9 -8
birdnet_analyzer/analyze/__init__.py +19 -5
birdnet_analyzer/analyze/__main__.py +3 -4
birdnet_analyzer/analyze/cli.py +30 -25
birdnet_analyzer/analyze/core.py +246 -245
birdnet_analyzer/analyze/utils.py +694 -701
birdnet_analyzer/audio.py +368 -372
birdnet_analyzer/cli.py +732 -707
birdnet_analyzer/config.py +243 -242
birdnet_analyzer/eBird_taxonomy_codes_2024E.json +13046 -0
birdnet_analyzer/embeddings/__init__.py +3 -4
birdnet_analyzer/embeddings/__main__.py +3 -3
birdnet_analyzer/embeddings/cli.py +12 -13
birdnet_analyzer/embeddings/core.py +70 -70
birdnet_analyzer/embeddings/utils.py +220 -193
birdnet_analyzer/evaluation/__init__.py +189 -195
birdnet_analyzer/evaluation/__main__.py +3 -3
birdnet_analyzer/evaluation/assessment/__init__.py +0 -0
birdnet_analyzer/evaluation/assessment/metrics.py +388 -0
birdnet_analyzer/evaluation/assessment/performance_assessor.py +364 -0
birdnet_analyzer/evaluation/assessment/plotting.py +378 -0
birdnet_analyzer/evaluation/preprocessing/__init__.py +0 -0
birdnet_analyzer/evaluation/preprocessing/data_processor.py +631 -0
birdnet_analyzer/evaluation/preprocessing/utils.py +98 -0
birdnet_analyzer/gui/__init__.py +19 -23
birdnet_analyzer/gui/__main__.py +3 -3
birdnet_analyzer/gui/analysis.py +179 -174
birdnet_analyzer/gui/assets/arrow_down.svg +4 -4
birdnet_analyzer/gui/assets/arrow_left.svg +4 -4
birdnet_analyzer/gui/assets/arrow_right.svg +4 -4
birdnet_analyzer/gui/assets/arrow_up.svg +4 -4
birdnet_analyzer/gui/assets/gui.css +36 -28
birdnet_analyzer/gui/assets/gui.js +93 -93
birdnet_analyzer/gui/embeddings.py +638 -620
birdnet_analyzer/gui/evaluation.py +801 -813
birdnet_analyzer/gui/localization.py +75 -68
birdnet_analyzer/gui/multi_file.py +265 -246
birdnet_analyzer/gui/review.py +472 -527
birdnet_analyzer/gui/segments.py +191 -191
birdnet_analyzer/gui/settings.py +149 -129
birdnet_analyzer/gui/single_file.py +264 -269
birdnet_analyzer/gui/species.py +95 -95
birdnet_analyzer/gui/train.py +687 -698
birdnet_analyzer/gui/utils.py +797 -808
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_af.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ar.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_bg.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ca.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_cs.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_da.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_de.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_el.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_en_uk.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_es.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_fi.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_fr.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_he.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_hr.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_hu.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_in.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_is.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_it.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ja.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ko.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_lt.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ml.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_nl.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_no.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_pl.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_pt_BR.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_pt_PT.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ro.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ru.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sk.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sl.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sr.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sv.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_th.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_tr.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_uk.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_zh.txt +6522 -6522
birdnet_analyzer/lang/de.json +341 -334
birdnet_analyzer/lang/en.json +341 -334
birdnet_analyzer/lang/fi.json +341 -334
birdnet_analyzer/lang/fr.json +341 -334
birdnet_analyzer/lang/id.json +341 -334
birdnet_analyzer/lang/pt-br.json +341 -334
birdnet_analyzer/lang/ru.json +341 -334
birdnet_analyzer/lang/se.json +341 -334
birdnet_analyzer/lang/tlh.json +341 -334
birdnet_analyzer/lang/zh_TW.json +341 -334
birdnet_analyzer/model.py +1212 -1243
birdnet_analyzer/playground.py +5 -0
birdnet_analyzer/search/__init__.py +3 -3
birdnet_analyzer/search/__main__.py +3 -3
birdnet_analyzer/search/cli.py +11 -12
birdnet_analyzer/search/core.py +78 -78
birdnet_analyzer/search/utils.py +107 -111
birdnet_analyzer/segments/__init__.py +3 -3
birdnet_analyzer/segments/__main__.py +3 -3
birdnet_analyzer/segments/cli.py +13 -14
birdnet_analyzer/segments/core.py +81 -78
birdnet_analyzer/segments/utils.py +383 -394
birdnet_analyzer/species/__init__.py +3 -3
birdnet_analyzer/species/__main__.py +3 -3
birdnet_analyzer/species/cli.py +13 -14
birdnet_analyzer/species/core.py +35 -35
birdnet_analyzer/species/utils.py +74 -75
birdnet_analyzer/train/__init__.py +3 -3
birdnet_analyzer/train/__main__.py +3 -3
birdnet_analyzer/train/cli.py +13 -14
birdnet_analyzer/train/core.py +113 -113
birdnet_analyzer/train/utils.py +877 -847
birdnet_analyzer/translate.py +133 -104
birdnet_analyzer/utils.py +425 -419
{birdnet_analyzer-2.0.0.dist-info → birdnet_analyzer-2.1.0.dist-info}/METADATA +146 -129
birdnet_analyzer-2.1.0.dist-info/RECORD +125 -0
{birdnet_analyzer-2.0.0.dist-info → birdnet_analyzer-2.1.0.dist-info}/WHEEL +1 -1
{birdnet_analyzer-2.0.0.dist-info → birdnet_analyzer-2.1.0.dist-info}/licenses/LICENSE +18 -18
birdnet_analyzer/eBird_taxonomy_codes_2021E.json +0 -25280
birdnet_analyzer-2.0.0.dist-info/RECORD +0 -117
{birdnet_analyzer-2.0.0.dist-info → birdnet_analyzer-2.1.0.dist-info}/entry_points.txt +0 -0
{birdnet_analyzer-2.0.0.dist-info → birdnet_analyzer-2.1.0.dist-info}/top_level.txt +0 -0

birdnet_analyzer/embeddings/__init__.py CHANGED Viewed

@@ -1,4 +1,3 @@
-from birdnet_analyzer.embeddings.core import embeddings
-__all__ = ["embeddings"]
+from birdnet_analyzer.embeddings.core import embeddings
+__all__ = ["embeddings"]

birdnet_analyzer/embeddings/__main__.py CHANGED Viewed

@@ -1,3 +1,3 @@
-from birdnet_analyzer.embeddings.cli import main
-main()
+from birdnet_analyzer.embeddings.cli import main
+main()

birdnet_analyzer/embeddings/cli.py CHANGED Viewed

@@ -1,13 +1,12 @@
-from birdnet_analyzer.utils import runtime_error_handler
-from birdnet_analyzer import embeddings
-@runtime_error_handler
-def main():
-    import birdnet_analyzer.cli as cli
-    parser = cli.embeddings_parser()
-    args = parser.parse_args()
-    embeddings(**vars(args))
+from birdnet_analyzer import embeddings
+from birdnet_analyzer.utils import runtime_error_handler
+@runtime_error_handler
+def main():
+    from birdnet_analyzer import cli
+    parser = cli.embeddings_parser()
+    args = parser.parse_args()
+    embeddings(**vars(args))

birdnet_analyzer/embeddings/core.py CHANGED Viewed

@@ -1,70 +1,70 @@
-def embeddings(
-    input: str,
-    database: str,
-    *,
-    overlap: float = 0.0,
-    audio_speed: float = 1.0,
-    fmin: int = 0,
-    fmax: int = 15000,
-    threads: int = 8,
-    batch_size: int = 1,
-):
-    """
-    Generates embeddings for audio files using the BirdNET-Analyzer.
-    This function processes audio files to extract embeddings, which are
-    representations of audio features. The embeddings can be used for
-    further analysis or comparison.
-    Args:
-        input (str): Path to the input audio file or directory containing audio files.
-        database (str): Path to the database where embeddings will be stored.
-        overlap (float, optional): Overlap between consecutive audio segments in seconds. Defaults to 0.0.
-        audio_speed (float, optional): Speed factor for audio processing. Defaults to 1.0.
-        fmin (int, optional): Minimum frequency (in Hz) for audio analysis. Defaults to 0.
-        fmax (int, optional): Maximum frequency (in Hz) for audio analysis. Defaults to 15000.
-        threads (int, optional): Number of threads to use for processing. Defaults to 8.
-        batch_size (int, optional): Number of audio segments to process in a single batch. Defaults to 1.
-    Raises:
-        FileNotFoundError: If the input path or database path does not exist.
-        ValueError: If any of the parameters are invalid.
-    Note:
-        Ensure that the required model files are downloaded and available before
-        calling this function. The `ensure_model_exists` function is used to
-        verify this.
-    Example:
-        embeddings(
-            input="path/to/audio",
-            database="path/to/database",
-            overlap=0.5,
-            audio_speed=1.0,
-            fmin=500,
-            fmax=10000,
-            threads=4,
-            batch_size=2
-        )
-    """
-    from birdnet_analyzer.embeddings.utils import run
-    from birdnet_analyzer.utils import ensure_model_exists
-    ensure_model_exists()
-    run(input, database, overlap, audio_speed, fmin, fmax, threads, batch_size)
-def get_database(db_path: str):
-    """Get the database object. Creates or opens the databse.
-    Args:
-        db: The path to the database.
-    Returns:
-        The database object.
-    """
-    import os
-    from perch_hoplite.db import sqlite_usearch_impl
-    if not os.path.exists(db_path):
-        os.makedirs(os.path.dirname(db_path), exist_ok=True)
-        db = sqlite_usearch_impl.SQLiteUsearchDB.create(
-            db_path=db_path,
-            usearch_cfg=sqlite_usearch_impl.get_default_usearch_config(embedding_dim=1024),  # TODO dont hardcode this
-        )
-        return db
-    return sqlite_usearch_impl.SQLiteUsearchDB.create(db_path=db_path)
+def embeddings(
+    audio_input: str,
+    database: str,
+    *,
+    overlap: float = 0.0,
+    audio_speed: float = 1.0,
+    fmin: int = 0,
+    fmax: int = 15000,
+    threads: int = 8,
+    batch_size: int = 1,
+    file_output: str | None = None,
+):
+    """
+    Generates embeddings for audio files using the BirdNET-Analyzer.
+    This function processes audio files to extract embeddings, which are
+    representations of audio features. The embeddings can be used for
+    further analysis or comparison.
+    Args:
+        audio_input (str): Path to the input audio file or directory containing audio files.
+        database (str): Path to the database where embeddings will be stored.
+        overlap (float, optional): Overlap between consecutive audio segments in seconds. Defaults to 0.0.
+        audio_speed (float, optional): Speed factor for audio processing. Defaults to 1.0.
+        fmin (int, optional): Minimum frequency (in Hz) for audio analysis. Defaults to 0.
+        fmax (int, optional): Maximum frequency (in Hz) for audio analysis. Defaults to 15000.
+        threads (int, optional): Number of threads to use for processing. Defaults to 8.
+        batch_size (int, optional): Number of audio segments to process in a single batch. Defaults to 1.
+    Raises:
+        FileNotFoundError: If the input path or database path does not exist.
+        ValueError: If any of the parameters are invalid.
+    Note:
+        Ensure that the required model files are downloaded and available before
+        calling this function. The `ensure_model_exists` function is used to
+        verify this.
+    Example:
+        embeddings(
+            "path/to/audio",
+            "path/to/database",
+            overlap=0.5,
+            audio_speed=1.0,
+            fmin=500,
+            fmax=10000,
+            threads=4,
+            batch_size=2
+        )
+    """
+    from birdnet_analyzer.embeddings.utils import run
+    from birdnet_analyzer.utils import ensure_model_exists
+    ensure_model_exists()
+    run(audio_input, database, overlap, audio_speed, fmin, fmax, threads, batch_size, file_output)
+def get_database(db_path: str):
+    """Get the database object. Creates or opens the databse.
+    Args:
+        db: The path to the database.
+    Returns:
+        The database object.
+    """
+    import os
+    from perch_hoplite.db import sqlite_usearch_impl
+    if not os.path.exists(db_path):
+        os.makedirs(os.path.dirname(db_path), exist_ok=True)
+        return sqlite_usearch_impl.SQLiteUsearchDB.create(
+            db_path=db_path,
+            usearch_cfg=sqlite_usearch_impl.get_default_usearch_config(embedding_dim=1024),  # TODO: dont hardcode this
+        )
+    return sqlite_usearch_impl.SQLiteUsearchDB.create(db_path=db_path)

birdnet_analyzer/embeddings/utils.py CHANGED Viewed

@@ -1,193 +1,220 @@
-"""Module used to extract embeddings for samples."""
-import datetime
-import os
-import numpy as np
-import birdnet_analyzer.audio as audio
-import birdnet_analyzer.config as cfg
-import birdnet_analyzer.model as model
-import birdnet_analyzer.utils as utils
-from birdnet_analyzer.analyze.utils import get_raw_audio_from_file
-from birdnet_analyzer.embeddings.core import get_database
-from perch_hoplite.db import sqlite_usearch_impl
-from perch_hoplite.db import interface as hoplite
-from ml_collections import ConfigDict
-from functools import partial
-from tqdm import tqdm
-from multiprocessing import Pool
-DATASET_NAME: str = "birdnet_analyzer_dataset"
-def analyze_file(item, db: sqlite_usearch_impl.SQLiteUsearchDB):
-    """Extracts the embeddings for a file.
-    Args:
-        item: (filepath, config)
-    """
-    # Get file path and restore cfg
-    fpath: str = item[0]
-    cfg.set_config(item[1])
-    offset = 0
-    duration = cfg.FILE_SPLITTING_DURATION
-    try:
-        fileLengthSeconds = int(audio.get_audio_file_length(fpath))
-    except Exception as ex:
-        # Write error log
-        print(f"Error: Cannot analyze audio file {fpath}. File corrupt?\n", flush=True)
-        utils.write_error_log(ex)
-        return None
-    # Start time
-    start_time = datetime.datetime.now()
-    # Status
-    print(f"Analyzing {fpath}", flush=True)
-    source_id = fpath
-    # Process each chunk
-    try:
-        while offset < fileLengthSeconds:
-            chunks = get_raw_audio_from_file(fpath, offset, duration)
-            start, end = offset, cfg.SIG_LENGTH + offset
-            samples = []
-            timestamps = []
-            for c in range(len(chunks)):
-                # Add to batch
-                samples.append(chunks[c])
-                timestamps.append([start, end])
-                # Advance start and end
-                start += cfg.SIG_LENGTH - cfg.SIG_OVERLAP
-                end = start + cfg.SIG_LENGTH
-                # Check if batch is full or last chunk
-                if len(samples) < cfg.BATCH_SIZE and c < len(chunks) - 1:
-                    continue
-                # Prepare sample and pass through model
-                data = np.array(samples, dtype="float32")
-                e = model.embeddings(data)
-                # Add to results
-                for i in range(len(samples)):
-                    # Get timestamp
-                    s_start, s_end = timestamps[i]
-                    # Check if embedding already exists
-                    existing_embedding = db.get_embeddings_by_source(
-                        DATASET_NAME, source_id, np.array([s_start, s_end])
-                    )
-                    if existing_embedding.size == 0:
-                        # Get prediction
-                        embeddings = e[i]
-                        # Store embeddings
-                        embeddings_source = hoplite.EmbeddingSource(DATASET_NAME, source_id, np.array([s_start, s_end]))
-                        # Insert into database
-                        db.insert_embedding(embeddings, embeddings_source)
-                        db.commit()
-                # Reset batch
-                samples = []
-                timestamps = []
-            offset = offset + duration
-    except Exception as ex:
-        # Write error log
-        print(f"Error: Cannot analyze audio file {fpath}.", flush=True)
-        utils.write_error_log(ex)
-        return
-    delta_time = (datetime.datetime.now() - start_time).total_seconds()
-    print("Finished {} in {:.2f} seconds".format(fpath, delta_time), flush=True)
-def check_database_settings(db: sqlite_usearch_impl.SQLiteUsearchDB):
-    try:
-        settings = db.get_metadata("birdnet_analyzer_settings")
-        if (
-            settings["BANDPASS_FMIN"] != cfg.BANDPASS_FMIN
-            or settings["BANDPASS_FMAX"] != cfg.BANDPASS_FMAX
-            or settings["AUDIO_SPEED"] != cfg.AUDIO_SPEED
-        ):
-            raise ValueError(
-                "Database settings do not match current configuration. DB Settings are: fmin: {}, fmax: {}, audio_speed: {}".format(
-                    settings["BANDPASS_FMIN"], settings["BANDPASS_FMAX"], settings["AUDIO_SPEED"]
-                )
-            )
-    except KeyError:
-        settings = ConfigDict(
-            {"BANDPASS_FMIN": cfg.BANDPASS_FMIN, "BANDPASS_FMAX": cfg.BANDPASS_FMAX, "AUDIO_SPEED": cfg.AUDIO_SPEED}
-        )
-        db.insert_metadata("birdnet_analyzer_settings", settings)
-        db.commit()
-def run(input, database, overlap, audio_speed, fmin, fmax, threads, batchsize):
-    ### Make sure to comment out appropriately if you are not using args. ###
-    # Set input and output path
-    cfg.INPUT_PATH = input
-    # Parse input files
-    if os.path.isdir(cfg.INPUT_PATH):
-        cfg.FILE_LIST = utils.collect_audio_files(cfg.INPUT_PATH)
-    else:
-        cfg.FILE_LIST = [cfg.INPUT_PATH]
-    # Set overlap
-    cfg.SIG_OVERLAP = max(0.0, min(2.9, float(overlap)))
-    # Set audio speed
-    cfg.AUDIO_SPEED = max(0.01, audio_speed)
-    # Set bandpass frequency range
-    cfg.BANDPASS_FMIN = max(0, min(cfg.SIG_FMAX, int(fmin)))
-    cfg.BANDPASS_FMAX = max(cfg.SIG_FMIN, min(cfg.SIG_FMAX, int(fmax)))
-    # Set number of threads
-    if os.path.isdir(cfg.INPUT_PATH):
-        cfg.CPU_THREADS = max(1, int(threads))
-        cfg.TFLITE_THREADS = 1
-    else:
-        cfg.CPU_THREADS = 1
-        cfg.TFLITE_THREADS = max(1, int(threads))
-    cfg.CPU_THREADS = 1  # TODO: with the current implementation, we can't use more than 1 thread
-    # Set batch size
-    cfg.BATCH_SIZE = max(1, int(batchsize))
-    # Add config items to each file list entry.
-    # We have to do this for Windows which does not
-    # support fork() and thus each process has to
-    # have its own config. USE LINUX!
-    flist = [(f, cfg.get_config()) for f in cfg.FILE_LIST]
-    db = get_database(database)
-    check_database_settings(db)
-    # Analyze files
-    if cfg.CPU_THREADS < 2:
-        for entry in tqdm(flist):
-            analyze_file(entry, db)
-    else:
-        with Pool(cfg.CPU_THREADS) as p:
-            tqdm(p.imap(partial(analyze_file, db=db), flist))
-    db.db.close()
+"""Module used to extract embeddings for samples."""
+import datetime
+import os
+from functools import partial
+from multiprocessing import Pool
+import numpy as np
+from ml_collections import ConfigDict
+from perch_hoplite.db import interface as hoplite
+from perch_hoplite.db import sqlite_usearch_impl
+from tqdm import tqdm
+import birdnet_analyzer.config as cfg
+from birdnet_analyzer import audio, model, utils
+from birdnet_analyzer.analyze.utils import get_raw_audio_from_file
+from birdnet_analyzer.embeddings.core import get_database
+DATASET_NAME: str = "birdnet_analyzer_dataset"
+def analyze_file(item, db: sqlite_usearch_impl.SQLiteUsearchDB):
+    """Extracts the embeddings for a file.
+    Args:
+        item: (filepath, config)
+    """
+    # Get file path and restore cfg
+    fpath: str = item[0]
+    cfg.set_config(item[1])
+    offset = 0
+    duration = cfg.FILE_SPLITTING_DURATION
+    try:
+        fileLengthSeconds = int(audio.get_audio_file_length(fpath))
+    except Exception as ex:
+        # Write error log
+        print(f"Error: Cannot analyze audio file {fpath}. File corrupt?\n", flush=True)
+        utils.write_error_log(ex)
+        return
+    # Start time
+    start_time = datetime.datetime.now()
+    # Status
+    print(f"Analyzing {fpath}", flush=True)
+    source_id = fpath
+    # Process each chunk
+    try:
+        while offset < fileLengthSeconds:
+            chunks = get_raw_audio_from_file(fpath, offset, duration)
+            start, end = offset, cfg.SIG_LENGTH + offset
+            samples = []
+            timestamps = []
+            for c in range(len(chunks)):
+                # Add to batch
+                samples.append(chunks[c])
+                timestamps.append([start, end])
+                # Advance start and end
+                start += cfg.SIG_LENGTH - cfg.SIG_OVERLAP
+                end = start + cfg.SIG_LENGTH
+                # Check if batch is full or last chunk
+                if len(samples) < cfg.BATCH_SIZE and c < len(chunks) - 1:
+                    continue
+                # Prepare sample and pass through model
+                data = np.array(samples, dtype="float32")
+                e = model.embeddings(data)
+                # Add to results
+                for i in range(len(samples)):
+                    # Get timestamp
+                    s_start, s_end = timestamps[i]
+                    # Check if embedding already exists
+                    existing_embedding = db.get_embeddings_by_source(DATASET_NAME, source_id, np.array([s_start, s_end]))
+                    if existing_embedding.size == 0:
+                        # Get prediction
+                        embeddings = e[i]
+                        # Store embeddings
+                        embeddings_source = hoplite.EmbeddingSource(DATASET_NAME, source_id, np.array([s_start, s_end]))
+                        # Insert into database
+                        db.insert_embedding(embeddings, embeddings_source)
+                        db.commit()
+                # Reset batch
+                samples = []
+                timestamps = []
+            offset = offset + duration
+    except Exception as ex:
+        # Write error log
+        print(f"Error: Cannot analyze audio file {fpath}.", flush=True)
+        utils.write_error_log(ex)
+        return
+    delta_time = (datetime.datetime.now() - start_time).total_seconds()
+    print(f"Finished {fpath} in {delta_time:.2f} seconds", flush=True)
+def check_database_settings(db: sqlite_usearch_impl.SQLiteUsearchDB):
+    try:
+        settings = db.get_metadata("birdnet_analyzer_settings")
+        if settings["BANDPASS_FMIN"] != cfg.BANDPASS_FMIN or settings["BANDPASS_FMAX"] != cfg.BANDPASS_FMAX or settings["AUDIO_SPEED"] != cfg.AUDIO_SPEED:
+            raise ValueError(
+                "Database settings do not match current configuration. DB Settings are: fmin:"
+                + f"{settings['BANDPASS_FMIN']}, fmax: {settings['BANDPASS_FMAX']}, audio_speed: {settings['AUDIO_SPEED']}"
+            )
+    except KeyError:
+        settings = ConfigDict({"BANDPASS_FMIN": cfg.BANDPASS_FMIN, "BANDPASS_FMAX": cfg.BANDPASS_FMAX, "AUDIO_SPEED": cfg.AUDIO_SPEED})
+        db.insert_metadata("birdnet_analyzer_settings", settings)
+        db.commit()
+def create_file_output(output_path: str, db: sqlite_usearch_impl.SQLiteUsearchDB):
+    """Creates a file output for the database.
+    Args:
+        output_path: Path to the output file.
+        db: Database object.
+    """
+    # Check if output path exists
+    if not os.path.exists(output_path):
+        os.makedirs(output_path)
+    # Get all embeddings
+    embedding_ids = db.get_embedding_ids()
+    # Write embeddings to file
+    for embedding_id in embedding_ids:
+        embedding = db.get_embedding(embedding_id)
+        source = db.get_embedding_source(embedding_id)
+        # Get start and end time
+        start, end = source.offsets
+        source_id = source.source_id.rsplit(".", 1)[0]
+        filename = f"{source_id}_{start}_{end}.birdnet.embeddings.txt"
+        # Get the common prefix between the output path and the filename
+        common_prefix = os.path.commonpath([output_path, os.path.dirname(filename)])
+        relative_filename = os.path.relpath(filename, common_prefix)
+        target_path = os.path.join(output_path, relative_filename)
+        # Ensure the target directory exists
+        os.makedirs(os.path.dirname(target_path), exist_ok=True)
+        # Write embedding values to a text file
+        with open(target_path, "w") as f:
+            f.write(",".join(map(str, embedding.tolist())))
+def run(audio_input, database, overlap, audio_speed, fmin, fmax, threads, batchsize, file_output):
+    ### Make sure to comment out appropriately if you are not using args. ###
+    # Set input and output path
+    cfg.INPUT_PATH = audio_input
+    # Parse input files
+    if os.path.isdir(cfg.INPUT_PATH):
+        cfg.FILE_LIST = utils.collect_audio_files(cfg.INPUT_PATH)
+    else:
+        cfg.FILE_LIST = [cfg.INPUT_PATH]
+    # Set overlap
+    cfg.SIG_OVERLAP = max(0.0, min(2.9, float(overlap)))
+    # Set audio speed
+    cfg.AUDIO_SPEED = max(0.01, audio_speed)
+    # Set bandpass frequency range
+    cfg.BANDPASS_FMIN = max(0, min(cfg.SIG_FMAX, int(fmin)))
+    cfg.BANDPASS_FMAX = max(cfg.SIG_FMIN, min(cfg.SIG_FMAX, int(fmax)))
+    # Set number of threads
+    if os.path.isdir(cfg.INPUT_PATH):
+        cfg.CPU_THREADS = max(1, int(threads))
+        cfg.TFLITE_THREADS = 1
+    else:
+        cfg.CPU_THREADS = 1
+        cfg.TFLITE_THREADS = max(1, int(threads))
+    cfg.CPU_THREADS = 1  # TODO: with the current implementation, we can't use more than 1 thread
+    # Set batch size
+    cfg.BATCH_SIZE = max(1, int(batchsize))
+    # Add config items to each file list entry.
+    # We have to do this for Windows which does not
+    # support fork() and thus each process has to
+    # have its own config. USE LINUX!
+    flist = [(f, cfg.get_config()) for f in cfg.FILE_LIST]
+    db = get_database(database)
+    check_database_settings(db)
+    # Analyze files
+    if cfg.CPU_THREADS < 2:
+        for entry in tqdm(flist):
+            analyze_file(entry, db)
+    else:
+        with Pool(cfg.CPU_THREADS) as p:
+            tqdm(p.imap(partial(analyze_file, db=db), flist))
+    if file_output:
+        create_file_output(file_output, db)
+    db.db.close()

birdnet-analyzer 2.0.0__py3-none-any.whl → 2.1.0__py3-none-any.whl

birdnet-analyzer 2.0.0py3-none-any.whl → 2.1.0py3-none-any.whl