PyPI - birdnet-analyzer - Versions diffs - 2.1.0__py3-none-any.whl → 2.1.1__py3-none-any.whl - Mend

birdnet-analyzer 2.1.0py3-none-any.whl → 2.1.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (120) hide show

birdnet_analyzer/__init__.py +9 -9
birdnet_analyzer/analyze/__init__.py +19 -19
birdnet_analyzer/analyze/__main__.py +3 -3
birdnet_analyzer/analyze/cli.py +30 -30
birdnet_analyzer/analyze/core.py +268 -246
birdnet_analyzer/analyze/utils.py +700 -694
birdnet_analyzer/audio.py +368 -368
birdnet_analyzer/cli.py +732 -732
birdnet_analyzer/config.py +243 -243
birdnet_analyzer/eBird_taxonomy_codes_2024E.json +13045 -13045
birdnet_analyzer/embeddings/__init__.py +3 -3
birdnet_analyzer/embeddings/__main__.py +3 -3
birdnet_analyzer/embeddings/cli.py +12 -12
birdnet_analyzer/embeddings/core.py +70 -70
birdnet_analyzer/embeddings/utils.py +173 -220
birdnet_analyzer/evaluation/__init__.py +189 -189
birdnet_analyzer/evaluation/__main__.py +3 -3
birdnet_analyzer/evaluation/assessment/metrics.py +388 -388
birdnet_analyzer/evaluation/assessment/performance_assessor.py +364 -364
birdnet_analyzer/evaluation/assessment/plotting.py +378 -378
birdnet_analyzer/evaluation/preprocessing/data_processor.py +631 -631
birdnet_analyzer/evaluation/preprocessing/utils.py +98 -98
birdnet_analyzer/gui/__init__.py +19 -19
birdnet_analyzer/gui/__main__.py +3 -3
birdnet_analyzer/gui/analysis.py +179 -179
birdnet_analyzer/gui/assets/arrow_down.svg +4 -4
birdnet_analyzer/gui/assets/arrow_left.svg +4 -4
birdnet_analyzer/gui/assets/arrow_right.svg +4 -4
birdnet_analyzer/gui/assets/arrow_up.svg +4 -4
birdnet_analyzer/gui/assets/gui.css +36 -36
birdnet_analyzer/gui/assets/gui.js +89 -93
birdnet_analyzer/gui/embeddings.py +638 -638
birdnet_analyzer/gui/evaluation.py +801 -801
birdnet_analyzer/gui/localization.py +75 -75
birdnet_analyzer/gui/multi_file.py +265 -265
birdnet_analyzer/gui/review.py +472 -472
birdnet_analyzer/gui/segments.py +191 -191
birdnet_analyzer/gui/settings.py +149 -149
birdnet_analyzer/gui/single_file.py +264 -264
birdnet_analyzer/gui/species.py +95 -95
birdnet_analyzer/gui/train.py +687 -687
birdnet_analyzer/gui/utils.py +803 -797
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_af.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ar.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_bg.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ca.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_cs.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_da.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_de.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_el.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_en_uk.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_es.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_fi.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_fr.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_he.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_hr.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_hu.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_in.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_is.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_it.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ja.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ko.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_lt.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ml.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_nl.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_no.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_pl.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_pt_BR.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_pt_PT.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ro.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ru.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sk.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sl.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sr.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sv.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_th.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_tr.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_uk.txt +6522 -6522
birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_zh.txt +6522 -6522
birdnet_analyzer/lang/de.json +342 -341
birdnet_analyzer/lang/en.json +342 -341
birdnet_analyzer/lang/fi.json +342 -341
birdnet_analyzer/lang/fr.json +342 -341
birdnet_analyzer/lang/id.json +342 -341
birdnet_analyzer/lang/pt-br.json +342 -341
birdnet_analyzer/lang/ru.json +342 -341
birdnet_analyzer/lang/se.json +342 -341
birdnet_analyzer/lang/tlh.json +342 -341
birdnet_analyzer/lang/zh_TW.json +342 -341
birdnet_analyzer/model.py +1213 -1212
birdnet_analyzer/search/__init__.py +3 -3
birdnet_analyzer/search/__main__.py +3 -3
birdnet_analyzer/search/cli.py +11 -11
birdnet_analyzer/search/core.py +78 -78
birdnet_analyzer/search/utils.py +104 -107
birdnet_analyzer/segments/__init__.py +3 -3
birdnet_analyzer/segments/__main__.py +3 -3
birdnet_analyzer/segments/cli.py +13 -13
birdnet_analyzer/segments/core.py +81 -81
birdnet_analyzer/segments/utils.py +383 -383
birdnet_analyzer/species/__init__.py +3 -3
birdnet_analyzer/species/__main__.py +3 -3
birdnet_analyzer/species/cli.py +13 -13
birdnet_analyzer/species/core.py +35 -35
birdnet_analyzer/species/utils.py +73 -74
birdnet_analyzer/train/__init__.py +3 -3
birdnet_analyzer/train/__main__.py +3 -3
birdnet_analyzer/train/cli.py +13 -13
birdnet_analyzer/train/core.py +113 -113
birdnet_analyzer/train/utils.py +878 -877
birdnet_analyzer/translate.py +132 -133
birdnet_analyzer/utils.py +425 -425
{birdnet_analyzer-2.1.0.dist-info → birdnet_analyzer-2.1.1.dist-info}/METADATA +147 -146
birdnet_analyzer-2.1.1.dist-info/RECORD +124 -0
{birdnet_analyzer-2.1.0.dist-info → birdnet_analyzer-2.1.1.dist-info}/licenses/LICENSE +18 -18
birdnet_analyzer/playground.py +0 -5
birdnet_analyzer-2.1.0.dist-info/RECORD +0 -125
{birdnet_analyzer-2.1.0.dist-info → birdnet_analyzer-2.1.1.dist-info}/WHEEL +0 -0
{birdnet_analyzer-2.1.0.dist-info → birdnet_analyzer-2.1.1.dist-info}/entry_points.txt +0 -0
{birdnet_analyzer-2.1.0.dist-info → birdnet_analyzer-2.1.1.dist-info}/top_level.txt +0 -0

birdnet_analyzer/embeddings/__init__.py CHANGED Viewed

@@ -1,3 +1,3 @@
-from birdnet_analyzer.embeddings.core import embeddings
-__all__ = ["embeddings"]
+from birdnet_analyzer.embeddings.core import embeddings
+__all__ = ["embeddings"]

birdnet_analyzer/embeddings/__main__.py CHANGED Viewed

@@ -1,3 +1,3 @@
-from birdnet_analyzer.embeddings.cli import main
-main()
+from birdnet_analyzer.embeddings.cli import main
+main()

birdnet_analyzer/embeddings/cli.py CHANGED Viewed

@@ -1,12 +1,12 @@
-from birdnet_analyzer import embeddings
-from birdnet_analyzer.utils import runtime_error_handler
-@runtime_error_handler
-def main():
-    from birdnet_analyzer import cli
-    parser = cli.embeddings_parser()
-    args = parser.parse_args()
-    embeddings(**vars(args))
+from birdnet_analyzer import embeddings
+from birdnet_analyzer.utils import runtime_error_handler
+@runtime_error_handler
+def main():
+    from birdnet_analyzer import cli
+    parser = cli.embeddings_parser()
+    args = parser.parse_args()
+    embeddings(**vars(args))

birdnet_analyzer/embeddings/core.py CHANGED Viewed

@@ -1,70 +1,70 @@
-def embeddings(
-    audio_input: str,
-    database: str,
-    *,
-    overlap: float = 0.0,
-    audio_speed: float = 1.0,
-    fmin: int = 0,
-    fmax: int = 15000,
-    threads: int = 8,
-    batch_size: int = 1,
-    file_output: str | None = None,
-):
-    """
-    Generates embeddings for audio files using the BirdNET-Analyzer.
-    This function processes audio files to extract embeddings, which are
-    representations of audio features. The embeddings can be used for
-    further analysis or comparison.
-    Args:
-        audio_input (str): Path to the input audio file or directory containing audio files.
-        database (str): Path to the database where embeddings will be stored.
-        overlap (float, optional): Overlap between consecutive audio segments in seconds. Defaults to 0.0.
-        audio_speed (float, optional): Speed factor for audio processing. Defaults to 1.0.
-        fmin (int, optional): Minimum frequency (in Hz) for audio analysis. Defaults to 0.
-        fmax (int, optional): Maximum frequency (in Hz) for audio analysis. Defaults to 15000.
-        threads (int, optional): Number of threads to use for processing. Defaults to 8.
-        batch_size (int, optional): Number of audio segments to process in a single batch. Defaults to 1.
-    Raises:
-        FileNotFoundError: If the input path or database path does not exist.
-        ValueError: If any of the parameters are invalid.
-    Note:
-        Ensure that the required model files are downloaded and available before
-        calling this function. The `ensure_model_exists` function is used to
-        verify this.
-    Example:
-        embeddings(
-            "path/to/audio",
-            "path/to/database",
-            overlap=0.5,
-            audio_speed=1.0,
-            fmin=500,
-            fmax=10000,
-            threads=4,
-            batch_size=2
-        )
-    """
-    from birdnet_analyzer.embeddings.utils import run
-    from birdnet_analyzer.utils import ensure_model_exists
-    ensure_model_exists()
-    run(audio_input, database, overlap, audio_speed, fmin, fmax, threads, batch_size, file_output)
-def get_database(db_path: str):
-    """Get the database object. Creates or opens the databse.
-    Args:
-        db: The path to the database.
-    Returns:
-        The database object.
-    """
-    import os
-    from perch_hoplite.db import sqlite_usearch_impl
-    if not os.path.exists(db_path):
-        os.makedirs(os.path.dirname(db_path), exist_ok=True)
-        return sqlite_usearch_impl.SQLiteUsearchDB.create(
-            db_path=db_path,
-            usearch_cfg=sqlite_usearch_impl.get_default_usearch_config(embedding_dim=1024),  # TODO: dont hardcode this
-        )
-    return sqlite_usearch_impl.SQLiteUsearchDB.create(db_path=db_path)
+def embeddings(
+    audio_input: str,
+    database: str,
+    *,
+    overlap: float = 0.0,
+    audio_speed: float = 1.0,
+    fmin: int = 0,
+    fmax: int = 15000,
+    threads: int = 8,
+    batch_size: int = 1,
+    file_output: str | None = None,
+):
+    """
+    Generates embeddings for audio files using the BirdNET-Analyzer.
+    This function processes audio files to extract embeddings, which are
+    representations of audio features. The embeddings can be used for
+    further analysis or comparison.
+    Args:
+        audio_input (str): Path to the input audio file or directory containing audio files.
+        database (str): Path to the database where embeddings will be stored.
+        overlap (float, optional): Overlap between consecutive audio segments in seconds. Defaults to 0.0.
+        audio_speed (float, optional): Speed factor for audio processing. Defaults to 1.0.
+        fmin (int, optional): Minimum frequency (in Hz) for audio analysis. Defaults to 0.
+        fmax (int, optional): Maximum frequency (in Hz) for audio analysis. Defaults to 15000.
+        threads (int, optional): Number of threads to use for processing. Defaults to 8.
+        batch_size (int, optional): Number of audio segments to process in a single batch. Defaults to 1.
+    Raises:
+        FileNotFoundError: If the input path or database path does not exist.
+        ValueError: If any of the parameters are invalid.
+    Note:
+        Ensure that the required model files are downloaded and available before
+        calling this function. The `ensure_model_exists` function is used to
+        verify this.
+    Example:
+        embeddings(
+            "path/to/audio",
+            "path/to/database",
+            overlap=0.5,
+            audio_speed=1.0,
+            fmin=500,
+            fmax=10000,
+            threads=4,
+            batch_size=2
+        )
+    """
+    from birdnet_analyzer.embeddings.utils import run
+    from birdnet_analyzer.utils import ensure_model_exists
+    ensure_model_exists()
+    run(audio_input, database, overlap, audio_speed, fmin, fmax, threads, batch_size, file_output)
+def get_database(db_path: str):
+    """Get the database object. Creates or opens the databse.
+    Args:
+        db: The path to the database.
+    Returns:
+        The database object.
+    """
+    import os
+    from perch_hoplite.db import sqlite_usearch_impl
+    if not os.path.exists(db_path):
+        os.makedirs(os.path.dirname(db_path), exist_ok=True)
+        return sqlite_usearch_impl.SQLiteUsearchDB.create(
+            db_path=db_path,
+            usearch_cfg=sqlite_usearch_impl.get_default_usearch_config(embedding_dim=1024),  # TODO: dont hardcode this
+        )
+    return sqlite_usearch_impl.SQLiteUsearchDB.create(db_path=db_path)

birdnet_analyzer/embeddings/utils.py CHANGED Viewed

@@ -1,220 +1,173 @@
-"""Module used to extract embeddings for samples."""
-import datetime
-import os
-from functools import partial
-from multiprocessing import Pool
-import numpy as np
-from ml_collections import ConfigDict
-from perch_hoplite.db import interface as hoplite
-from perch_hoplite.db import sqlite_usearch_impl
-from tqdm import tqdm
-import birdnet_analyzer.config as cfg
-from birdnet_analyzer import audio, model, utils
-from birdnet_analyzer.analyze.utils import get_raw_audio_from_file
-from birdnet_analyzer.embeddings.core import get_database
-DATASET_NAME: str = "birdnet_analyzer_dataset"
-def analyze_file(item, db: sqlite_usearch_impl.SQLiteUsearchDB):
-    """Extracts the embeddings for a file.
-    Args:
-        item: (filepath, config)
-    """
-    # Get file path and restore cfg
-    fpath: str = item[0]
-    cfg.set_config(item[1])
-    offset = 0
-    duration = cfg.FILE_SPLITTING_DURATION
-    try:
-        fileLengthSeconds = int(audio.get_audio_file_length(fpath))
-    except Exception as ex:
-        # Write error log
-        print(f"Error: Cannot analyze audio file {fpath}. File corrupt?\n", flush=True)
-        utils.write_error_log(ex)
-        return
-    # Start time
-    start_time = datetime.datetime.now()
-    # Status
-    print(f"Analyzing {fpath}", flush=True)
-    source_id = fpath
-    # Process each chunk
-    try:
-        while offset < fileLengthSeconds:
-            chunks = get_raw_audio_from_file(fpath, offset, duration)
-            start, end = offset, cfg.SIG_LENGTH + offset
-            samples = []
-            timestamps = []
-            for c in range(len(chunks)):
-                # Add to batch
-                samples.append(chunks[c])
-                timestamps.append([start, end])
-                # Advance start and end
-                start += cfg.SIG_LENGTH - cfg.SIG_OVERLAP
-                end = start + cfg.SIG_LENGTH
-                # Check if batch is full or last chunk
-                if len(samples) < cfg.BATCH_SIZE and c < len(chunks) - 1:
-                    continue
-                # Prepare sample and pass through model
-                data = np.array(samples, dtype="float32")
-                e = model.embeddings(data)
-                # Add to results
-                for i in range(len(samples)):
-                    # Get timestamp
-                    s_start, s_end = timestamps[i]
-                    # Check if embedding already exists
-                    existing_embedding = db.get_embeddings_by_source(DATASET_NAME, source_id, np.array([s_start, s_end]))
-                    if existing_embedding.size == 0:
-                        # Get prediction
-                        embeddings = e[i]
-                        # Store embeddings
-                        embeddings_source = hoplite.EmbeddingSource(DATASET_NAME, source_id, np.array([s_start, s_end]))
-                        # Insert into database
-                        db.insert_embedding(embeddings, embeddings_source)
-                        db.commit()
-                # Reset batch
-                samples = []
-                timestamps = []
-            offset = offset + duration
-    except Exception as ex:
-        # Write error log
-        print(f"Error: Cannot analyze audio file {fpath}.", flush=True)
-        utils.write_error_log(ex)
-        return
-    delta_time = (datetime.datetime.now() - start_time).total_seconds()
-    print(f"Finished {fpath} in {delta_time:.2f} seconds", flush=True)
-def check_database_settings(db: sqlite_usearch_impl.SQLiteUsearchDB):
-    try:
-        settings = db.get_metadata("birdnet_analyzer_settings")
-        if settings["BANDPASS_FMIN"] != cfg.BANDPASS_FMIN or settings["BANDPASS_FMAX"] != cfg.BANDPASS_FMAX or settings["AUDIO_SPEED"] != cfg.AUDIO_SPEED:
-            raise ValueError(
-                "Database settings do not match current configuration. DB Settings are: fmin:"
-                + f"{settings['BANDPASS_FMIN']}, fmax: {settings['BANDPASS_FMAX']}, audio_speed: {settings['AUDIO_SPEED']}"
-            )
-    except KeyError:
-        settings = ConfigDict({"BANDPASS_FMIN": cfg.BANDPASS_FMIN, "BANDPASS_FMAX": cfg.BANDPASS_FMAX, "AUDIO_SPEED": cfg.AUDIO_SPEED})
-        db.insert_metadata("birdnet_analyzer_settings", settings)
-        db.commit()
-def create_file_output(output_path: str, db: sqlite_usearch_impl.SQLiteUsearchDB):
-    """Creates a file output for the database.
-    Args:
-        output_path: Path to the output file.
-        db: Database object.
-    """
-    # Check if output path exists
-    if not os.path.exists(output_path):
-        os.makedirs(output_path)
-    # Get all embeddings
-    embedding_ids = db.get_embedding_ids()
-    # Write embeddings to file
-    for embedding_id in embedding_ids:
-        embedding = db.get_embedding(embedding_id)
-        source = db.get_embedding_source(embedding_id)
-        # Get start and end time
-        start, end = source.offsets
-        source_id = source.source_id.rsplit(".", 1)[0]
-        filename = f"{source_id}_{start}_{end}.birdnet.embeddings.txt"
-        # Get the common prefix between the output path and the filename
-        common_prefix = os.path.commonpath([output_path, os.path.dirname(filename)])
-        relative_filename = os.path.relpath(filename, common_prefix)
-        target_path = os.path.join(output_path, relative_filename)
-        # Ensure the target directory exists
-        os.makedirs(os.path.dirname(target_path), exist_ok=True)
-        # Write embedding values to a text file
-        with open(target_path, "w") as f:
-            f.write(",".join(map(str, embedding.tolist())))
-def run(audio_input, database, overlap, audio_speed, fmin, fmax, threads, batchsize, file_output):
-    ### Make sure to comment out appropriately if you are not using args. ###
-    # Set input and output path
-    cfg.INPUT_PATH = audio_input
-    # Parse input files
-    if os.path.isdir(cfg.INPUT_PATH):
-        cfg.FILE_LIST = utils.collect_audio_files(cfg.INPUT_PATH)
-    else:
-        cfg.FILE_LIST = [cfg.INPUT_PATH]
-    # Set overlap
-    cfg.SIG_OVERLAP = max(0.0, min(2.9, float(overlap)))
-    # Set audio speed
-    cfg.AUDIO_SPEED = max(0.01, audio_speed)
-    # Set bandpass frequency range
-    cfg.BANDPASS_FMIN = max(0, min(cfg.SIG_FMAX, int(fmin)))
-    cfg.BANDPASS_FMAX = max(cfg.SIG_FMIN, min(cfg.SIG_FMAX, int(fmax)))
-    # Set number of threads
-    if os.path.isdir(cfg.INPUT_PATH):
-        cfg.CPU_THREADS = max(1, int(threads))
-        cfg.TFLITE_THREADS = 1
-    else:
-        cfg.CPU_THREADS = 1
-        cfg.TFLITE_THREADS = max(1, int(threads))
-    cfg.CPU_THREADS = 1  # TODO: with the current implementation, we can't use more than 1 thread
-    # Set batch size
-    cfg.BATCH_SIZE = max(1, int(batchsize))
-    # Add config items to each file list entry.
-    # We have to do this for Windows which does not
-    # support fork() and thus each process has to
-    # have its own config. USE LINUX!
-    flist = [(f, cfg.get_config()) for f in cfg.FILE_LIST]
-    db = get_database(database)
-    check_database_settings(db)
-    # Analyze files
-    if cfg.CPU_THREADS < 2:
-        for entry in tqdm(flist):
-            analyze_file(entry, db)
-    else:
-        with Pool(cfg.CPU_THREADS) as p:
-            tqdm(p.imap(partial(analyze_file, db=db), flist))
-    if file_output:
-        create_file_output(file_output, db)
-    db.db.close()
+"""Module used to extract embeddings for samples."""
+import datetime
+import os
+from functools import partial
+from multiprocessing import Pool
+import numpy as np
+from ml_collections import ConfigDict
+from perch_hoplite.db import interface as hoplite
+from perch_hoplite.db import sqlite_usearch_impl
+from tqdm import tqdm
+import birdnet_analyzer.config as cfg
+from birdnet_analyzer import utils
+from birdnet_analyzer.analyze.utils import iterate_audio_chunks
+from birdnet_analyzer.embeddings.core import get_database
+DATASET_NAME: str = "birdnet_analyzer_dataset"
+def analyze_file(item, db: sqlite_usearch_impl.SQLiteUsearchDB):
+    """Extracts the embeddings for a file.
+    Args:
+        item: (filepath, config)
+    """
+    # Get file path and restore cfg
+    fpath: str = item[0]
+    cfg.set_config(item[1])
+    # Start time
+    start_time = datetime.datetime.now()
+    # Status
+    print(f"Analyzing {fpath}", flush=True)
+    source_id = fpath
+    # Process each chunk
+    try:
+        for s_start, s_end, embeddings in iterate_audio_chunks(fpath, embeddings=True):
+            # Check if embedding already exists
+            existing_embedding = db.get_embeddings_by_source(DATASET_NAME, source_id, np.array([s_start, s_end]))
+            if existing_embedding.size == 0:
+                # Store embeddings
+                embeddings_source = hoplite.EmbeddingSource(DATASET_NAME, source_id, np.array([s_start, s_end]))
+                # Insert into database
+                db.insert_embedding(embeddings, embeddings_source)
+                db.commit()
+    except Exception as ex:
+        # Write error log
+        print(f"Error: Cannot analyze audio file {fpath}.", flush=True)
+        utils.write_error_log(ex)
+        return
+    delta_time = (datetime.datetime.now() - start_time).total_seconds()
+    print(f"Finished {fpath} in {delta_time:.2f} seconds", flush=True)
+def check_database_settings(db: sqlite_usearch_impl.SQLiteUsearchDB):
+    try:
+        settings = db.get_metadata("birdnet_analyzer_settings")
+        if settings["BANDPASS_FMIN"] != cfg.BANDPASS_FMIN or settings["BANDPASS_FMAX"] != cfg.BANDPASS_FMAX or settings["AUDIO_SPEED"] != cfg.AUDIO_SPEED:
+            raise ValueError(
+                "Database settings do not match current configuration. DB Settings are: fmin:"
+                + f"{settings['BANDPASS_FMIN']}, fmax: {settings['BANDPASS_FMAX']}, audio_speed: {settings['AUDIO_SPEED']}"
+            )
+    except KeyError:
+        settings = ConfigDict({"BANDPASS_FMIN": cfg.BANDPASS_FMIN, "BANDPASS_FMAX": cfg.BANDPASS_FMAX, "AUDIO_SPEED": cfg.AUDIO_SPEED})
+        db.insert_metadata("birdnet_analyzer_settings", settings)
+        db.commit()
+def create_file_output(output_path: str, db: sqlite_usearch_impl.SQLiteUsearchDB):
+    """Creates a file output for the database.
+    Args:
+        output_path: Path to the output file.
+        db: Database object.
+    """
+    # Check if output path exists
+    if not os.path.exists(output_path):
+        os.makedirs(output_path)
+    # Get all embeddings
+    embedding_ids = db.get_embedding_ids()
+    # Write embeddings to file
+    for embedding_id in embedding_ids:
+        embedding = db.get_embedding(embedding_id)
+        source = db.get_embedding_source(embedding_id)
+        # Get start and end time
+        start, end = source.offsets
+        source_id = source.source_id.rsplit(".", 1)[0]
+        filename = f"{source_id}_{start}_{end}.birdnet.embeddings.txt"
+        # Get the common prefix between the output path and the filename
+        common_prefix = os.path.commonpath([output_path, os.path.dirname(filename)])
+        relative_filename = os.path.relpath(filename, common_prefix)
+        target_path = os.path.join(output_path, relative_filename)
+        # Ensure the target directory exists
+        os.makedirs(os.path.dirname(target_path), exist_ok=True)
+        # Write embedding values to a text file
+        with open(target_path, "w") as f:
+            f.write(",".join(map(str, embedding.tolist())))
+def run(audio_input, database, overlap, audio_speed, fmin, fmax, threads, batchsize, file_output):
+    ### Make sure to comment out appropriately if you are not using args. ###
+    # Set input and output path
+    cfg.INPUT_PATH = audio_input
+    # Parse input files
+    if os.path.isdir(cfg.INPUT_PATH):
+        cfg.FILE_LIST = utils.collect_audio_files(cfg.INPUT_PATH)
+    else:
+        cfg.FILE_LIST = [cfg.INPUT_PATH]
+    # Set overlap
+    cfg.SIG_OVERLAP = max(0.0, min(2.9, float(overlap)))
+    # Set audio speed
+    cfg.AUDIO_SPEED = max(0.01, audio_speed)
+    # Set bandpass frequency range
+    cfg.BANDPASS_FMIN = max(0, min(cfg.SIG_FMAX, int(fmin)))
+    cfg.BANDPASS_FMAX = max(cfg.SIG_FMIN, min(cfg.SIG_FMAX, int(fmax)))
+    # Set number of threads
+    if os.path.isdir(cfg.INPUT_PATH):
+        cfg.CPU_THREADS = max(1, int(threads))
+        cfg.TFLITE_THREADS = 1
+    else:
+        cfg.CPU_THREADS = 1
+        cfg.TFLITE_THREADS = max(1, int(threads))
+    cfg.CPU_THREADS = 1  # TODO: with the current implementation, we can't use more than 1 thread
+    # Set batch size
+    cfg.BATCH_SIZE = max(1, int(batchsize))
+    # Add config items to each file list entry.
+    # We have to do this for Windows which does not
+    # support fork() and thus each process has to
+    # have its own config. USE LINUX!
+    flist = [(f, cfg.get_config()) for f in cfg.FILE_LIST]
+    db = get_database(database)
+    check_database_settings(db)
+    # Analyze files
+    if cfg.CPU_THREADS < 2:
+        for entry in tqdm(flist):
+            analyze_file(entry, db)
+    else:
+        with Pool(cfg.CPU_THREADS) as p:
+            tqdm(p.imap(partial(analyze_file, db=db), flist))
+    if file_output:
+        create_file_output(file_output, db)
+    db.db.close()

birdnet-analyzer 2.1.0__py3-none-any.whl → 2.1.1__py3-none-any.whl

birdnet-analyzer 2.1.0py3-none-any.whl → 2.1.1py3-none-any.whl