PyPI - britekit - Versions diffs - 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl - Mend

britekit 0.1.3py3-none-any.whl → 0.1.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of britekit might be problematic. Click here for more details.

Files changed (46) hide show

britekit/__about__.py +1 -1
britekit/cli.py +6 -1
britekit/commands/__init__.py +2 -1
britekit/commands/_analyze.py +37 -11
britekit/commands/_audioset.py +8 -8
britekit/commands/_calibrate.py +8 -8
britekit/commands/_ckpt_ops.py +6 -6
britekit/commands/_db_add.py +12 -12
britekit/commands/_db_delete.py +15 -15
britekit/commands/_embed.py +4 -4
britekit/commands/_ensemble.py +7 -7
britekit/commands/_extract.py +158 -19
britekit/commands/_find_dup.py +5 -5
britekit/commands/_inat.py +4 -4
britekit/commands/_init.py +1 -1
britekit/commands/_pickle.py +13 -7
britekit/commands/_plot.py +26 -26
britekit/commands/_reextract.py +6 -6
britekit/commands/_reports.py +22 -22
britekit/commands/_search.py +12 -12
britekit/commands/_train.py +6 -6
britekit/commands/_tune.py +13 -13
britekit/commands/_wav2mp3.py +2 -2
britekit/commands/_xeno.py +7 -7
britekit/commands/_youtube.py +3 -3
britekit/core/analyzer.py +43 -13
britekit/core/audio.py +14 -14
britekit/core/augmentation.py +24 -0
britekit/core/data_module.py +2 -2
britekit/core/dataset.py +1 -4
britekit/core/plot.py +8 -8
britekit/core/predictor.py +51 -23
britekit/core/reextractor.py +6 -6
britekit/core/util.py +44 -8
britekit/models/base_model.py +0 -1
britekit/occurrence_db/occurrence_data_provider.py +13 -13
britekit/testing/per_recording_tester.py +2 -2
britekit/training_db/extractor.py +65 -30
britekit/training_db/training_data_provider.py +1 -1
britekit/training_db/training_db.py +97 -100
britekit-0.1.5.dist-info/METADATA +299 -0
{britekit-0.1.3.dist-info → britekit-0.1.5.dist-info}/RECORD +45 -45
britekit-0.1.3.dist-info/METADATA +0 -290
{britekit-0.1.3.dist-info → britekit-0.1.5.dist-info}/WHEEL +0 -0
{britekit-0.1.3.dist-info → britekit-0.1.5.dist-info}/entry_points.txt +0 -0
{britekit-0.1.3.dist-info → britekit-0.1.5.dist-info}/licenses/LICENSE.txt +0 -0

britekit/__about__.py CHANGED Viewed

@@ -1,4 +1,4 @@
 # SPDX-FileCopyrightText: 2025-present Jan Huus <jhuus1@gmail.com>
 #
 # SPDX-License-Identifier: MIT
-__version__ = "0.1.3"
+__version__ = "0.1.5"

britekit/cli.py CHANGED Viewed

@@ -31,7 +31,11 @@ from .commands._db_delete import (
 )
 from .commands._embed import _embed_cmd
 from .commands._ensemble import _ensemble_cmd
-from .commands._extract import _extract_all_cmd, _extract_by_image_cmd
+from .commands._extract import (
+    _extract_all_cmd,
+    _extract_by_csv_cmd,
+    _extract_by_image_cmd,
+)
 from .commands._find_dup import _find_dup_cmd
 from .commands._inat import _inat_cmd
 from .commands._init import _init_cmd
@@ -83,6 +87,7 @@ cli.add_command(_del_stype_cmd)
 cli.add_command(_embed_cmd)
 cli.add_command(_ensemble_cmd)
 cli.add_command(_extract_all_cmd)
+cli.add_command(_extract_by_csv_cmd)
 cli.add_command(_extract_by_image_cmd)
 cli.add_command(_find_dup_cmd)

britekit/commands/__init__.py CHANGED Viewed

@@ -14,7 +14,7 @@ from ._db_delete import (
 )
 from ._embed import embed
 from ._ensemble import ensemble
-from ._extract import extract_all, extract_by_image
+from ._extract import extract_all, extract_by_csv, extract_by_image
 from ._find_dup import find_dup
 from ._inat import inat
 from ._init import init
@@ -57,6 +57,7 @@ __all__ = [
     "embed",
     "ensemble",
     "extract_all",
+    "extract_by_csv",
     "extract_by_image",
     "find_dup",
     "find_lr",

britekit/commands/_analyze.py CHANGED Viewed

@@ -17,10 +17,12 @@ def analyze(
     input_path: str = "",
     output_path: str = "",
     rtype: str = "both",
+    start_seconds: float = 0,
     min_score: Optional[float] = None,
     num_threads: Optional[int] = None,
     overlap: Optional[float] = None,
     segment_len: Optional[float] = None,
+    debug_mode: bool = False,
 ):
     """
     Run inference on audio recordings to detect and classify sounds.
@@ -30,15 +32,18 @@ def analyze(
     CSV files, or both.
     Args:
-        cfg_path (str): Path to YAML configuration file defining model and inference settings.
-        input_path (str): Path to input audio file or directory containing audio files.
-        output_path (str): Path to output directory where results will be saved.
-        rtype (str): Output format type. Options are "audacity", "csv", or "both".
-        min_score (float, optional): Confidence threshold. Predictions below this value are excluded.
-        num_threads (int, optional): Number of threads to use for processing. Default is 3.
-        overlap (float, optional): Spectrogram overlap in seconds for sliding window analysis.
-        segment_len (float, optional): Fixed segment length in seconds. If specified, labels are
-                                     fixed-length; otherwise they are variable-length.
+    - cfg_path (str): Path to YAML configuration file defining model and inference settings.
+    - input_path (str): Path to input audio file or directory containing audio files.
+    - output_path (str): Path to output directory where results will be saved.
+    - rtype (str): Output format type. Options are "audacity", "csv", or "both".
+    - start_seconds (float): Where to start processing each recording, in seconds.
+      For example, '71' and '1:11' have the same meaning, and cause the first 71 seconds to be ignored. Default = 0.
+    - min_score (float, optional): Confidence threshold. Predictions below this value are excluded.
+    - num_threads (int, optional): Number of threads to use for processing. Default is 3.
+    - overlap (float, optional): Spectrogram overlap in seconds for sliding window analysis.
+    - segment_len (float, optional): Fixed segment length in seconds. If specified, labels are
+        fixed-length; otherwise they are variable-length.
+    - debug_mode (bool): If specified, log the top scores for the first spectrogram, then stop.
     """
     # defer slow imports to improve --help performance
@@ -78,7 +83,7 @@ def analyze(
         start_time = time.time()
         analyzer = Analyzer()
-        analyzer.run(input_path, output_path, rtype)
+        analyzer.run(input_path, output_path, rtype, start_seconds, debug_mode)
         elapsed_time = util.format_elapsed_time(start_time, time.time())
         logging.info(f"Elapsed time = {elapsed_time}")
     except InferenceError as e:
@@ -119,6 +124,12 @@ def analyze(
     default="both",
     help='Output format type. Options are "audacity", "csv", or "both". Default="both".',
 )
+@click.option(
+    "--start",
+    "start_seconds_str",
+    type=str,
+    help="Where to start processing each recording, in seconds. For example, '71' and '1:11' have the same meaning, and cause the first 71 seconds to be ignored. Default = 0.",
+)
 @click.option(
     "-m",
     "--min_score",
@@ -136,7 +147,7 @@ def analyze(
     "--overlap",
     "overlap",
     type=float,
-    help="Number of threads (optional, default = 3)",
+    help="Amount of segment overlap in seconds.",
 )
 @click.option(
     "--seg",
@@ -144,26 +155,41 @@ def analyze(
     type=float,
     help="Optional segment length in seconds. If specified, labels are fixed-length. Otherwise they are variable-length.",
 )
+@click.option(
+    "--debug",
+    "debug_mode",
+    is_flag=True,
+    help="If specified, log the top scores for the first spectrogram, then stop.",
+)
 def _analyze_cmd(
     cfg_path: str,
     input_path: str,
     output_path: str,
     rtype: str,
+    start_seconds_str: Optional[str] = None,
     min_score: Optional[float] = None,
     num_threads: Optional[int] = None,
     overlap: Optional[float] = None,
     segment_len: Optional[float] = None,
+    debug_mode: bool = False,
 ):
     from britekit.core import util
     util.set_logging()
+    if start_seconds_str:
+        start_seconds = util._get_seconds_from_time_string(start_seconds_str)
+    else:
+        start_seconds = 0
     analyze(
         cfg_path,
         input_path,
         output_path,
         rtype,
+        start_seconds,
         min_score,
         num_threads,
         overlap,
         segment_len,
+        debug_mode,
     )

britekit/commands/_audioset.py CHANGED Viewed

@@ -201,14 +201,14 @@ def audioset(
     shows which other classes commonly co-occur with the specified class.
     Args:
-        class_name (str): Name of the audio class to download (e.g., "train", "speech", "music").
-        curated_csv_path (str): Path to CSV file containing a curated list of clips to download.
-        output_dir (str): Directory where downloaded recordings will be saved.
-        max_downloads (int): Maximum number of recordings to download. Default is 500.
-        sampling_rate (float): Output sampling rate in Hz. Default is 32000.
-        num_to_skip (int): Number of initial recordings to skip. Default is 0.
-        do_report (bool): If True, generate a report on associated secondary classes instead of downloading.
-        root_dir (str): Directory that contains the data directory. Default is working directory.
+    - class_name (str): Name of the audio class to download (e.g., "train", "speech", "music").
+    - curated_csv_path (str): Path to CSV file containing a curated list of clips to download.
+    - output_dir (str): Directory where downloaded recordings will be saved.
+    - max_downloads (int): Maximum number of recordings to download. Default is 500.
+    - sampling_rate (float): Output sampling rate in Hz. Default is 32000.
+    - num_to_skip (int): Number of initial recordings to skip. Default is 0.
+    - do_report (bool): If True, generate a report on associated secondary classes instead of downloading.
+    - root_dir (str): Directory that contains the data directory. Default is working directory.
     """
     if class_name is None and curated_csv_path is None:

britekit/commands/_calibrate.py CHANGED Viewed

@@ -34,14 +34,14 @@ def calibrate(
     prediction scores to better reflect true probabilities.
     Args:
-        cfg_path (str, optional): Path to YAML file defining configuration overrides.
-        annotations_path (str): Path to CSV file containing ground truth annotations.
-        label_dir (str): Directory containing model prediction labels (Audacity format).
-        output_path (str): Directory where calibration reports will be saved.
-        recordings_path (str, optional): Directory containing audio recordings. Defaults to annotations directory.
-        cutoff (float): Ignore predictions below this threshold during calibration. Default is 0.4.
-        coef (float, optional): Use this coefficient for the calibration plot.
-        inter (float, optional): Use this intercept for the calibration plot.
+    - cfg_path (str, optional): Path to YAML file defining configuration overrides.
+    - annotations_path (str): Path to CSV file containing ground truth annotations.
+    - label_dir (str): Directory containing model prediction labels (Audacity format).
+    - output_path (str): Directory where calibration reports will be saved.
+    - recordings_path (str, optional): Directory containing audio recordings. Defaults to annotations directory.
+    - cutoff (float): Ignore predictions below this threshold during calibration. Default is 0.4.
+    - coef (float, optional): Use this coefficient for the calibration plot.
+    - inter (float, optional): Use this intercept for the calibration plot.
     """
     from britekit.testing.per_segment_tester import PerSegmentTester

britekit/commands/_ckpt_ops.py CHANGED Viewed

@@ -19,9 +19,9 @@ def ckpt_avg(input_path: str="", output_path: Optional[str]=None):
     with averaged weights.
     Args:
-        input_path (str): Directory containing checkpoint files (*.ckpt) to average.
-        output_path (str, optional): Path for the output averaged checkpoint.
-                                   Defaults to "average.ckpt" in the input directory.
+    - input_path (str): Directory containing checkpoint files (*.ckpt) to average.
+    - output_path (str, optional): Path for the output averaged checkpoint.
+        Defaults to "average.ckpt" in the input directory.
     """
     import torch
@@ -88,7 +88,7 @@ def ckpt_freeze(input_path: str=""):
     and inference rather than continued training.
     Args:
-        input_path (str): Path to the checkpoint file to freeze.
+    - input_path (str): Path to the checkpoint file to freeze.
     """
     import pytorch_lightning as pl
     from britekit.models.model_loader import load_from_checkpoint
@@ -136,8 +136,8 @@ def ckpt_onnx(
     checkpoint.
     Args:
-        cfg_path (str, optional): Path to YAML file defining configuration overrides.
-        input_path (str): Path to the PyTorch checkpoint file to convert.
+    - cfg_path (str, optional): Path to YAML file defining configuration overrides.
+    - input_path (str): Path to the PyTorch checkpoint file to convert.
     """
     import torch
     from britekit.models.model_loader import load_from_checkpoint

britekit/commands/_db_add.py CHANGED Viewed

@@ -18,8 +18,8 @@ def add_cat(db_path: Optional[str]=None, name: str="") -> None:
     that contain multiple related species classes.
     Args:
-        db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
-        name (str): Name of the category to add (e.g., "Birds", "Mammals").
+    - db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
+    - name (str): Name of the category to add (e.g., "Birds", "Mammals").
     """
     from britekit.training_db.training_db import TrainingDatabase
@@ -58,8 +58,8 @@ def add_stype(db_path: Optional[str]=None, name: str="") -> None:
     or sounds produced by the same species.
     Args:
-        db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
-        name (str): Name of the sound type to add (e.g., "Song", "Call", "Alarm").
+    - db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
+    - name (str): Name of the sound type to add (e.g., "Song", "Call", "Alarm").
     """
     from britekit.training_db.training_db import TrainingDatabase
@@ -98,8 +98,8 @@ def add_src(db_path: Optional[str]=None, name: str="") -> None:
     maintain provenance and can be useful for data quality analysis.
     Args:
-        db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
-        name (str): Name of the source to add (e.g., "Xeno-Canto", "Macaulay Library").
+    - db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
+    - name (str): Name of the source to add (e.g., "Xeno-Canto", "Macaulay Library").
     """
     from britekit.training_db.training_db import TrainingDatabase
@@ -145,12 +145,12 @@ def add_class(
     This is typically used to add new species or sound types to the training database.
     Args:
-        db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
-        category (str): Name of the category this class belongs to. Defaults to "default".
-        name (str): Primary name of the class (e.g., "Common Yellowthroat").
-        code (str): Primary code for the class (e.g., "COYE").
-        alt_name (str, optional): Alternate name for the class (e.g., scientific name).
-        alt_code (str, optional): Alternate code for the class (e.g., scientific code).
+    - db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
+    - category (str): Name of the category this class belongs to. Defaults to "default".
+    - name (str): Primary name of the class (e.g., "Common Yellowthroat").
+    - code (str): Primary code for the class (e.g., "COYE").
+    - alt_name (str, optional): Alternate name for the class (e.g., scientific name).
+    - alt_code (str, optional): Alternate code for the class (e.g., scientific code).
     """
     from britekit.training_db.training_db import TrainingDatabase

britekit/commands/_db_delete.py CHANGED Viewed

@@ -20,8 +20,8 @@ def del_cat(db_path: Optional[str]=None, name: Optional[str]=None) -> None:
     This is a destructive operation that cannot be undone.
     Args:
-        db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
-        name (str): Name of the category to delete (e.g., "Birds", "Mammals").
+    - db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
+    - name (str): Name of the category to delete (e.g., "Birds", "Mammals").
     """
     from britekit.training_db.training_db import TrainingDatabase
@@ -73,8 +73,8 @@ def del_class(db_path: Optional[str]=None, name: Optional[str]=None) -> None:
     be undone and will affect any training data associated with this class.
     Args:
-        db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
-        name (str): Name of the class to delete (e.g., "Common Yellowthroat").
+    - db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
+    - name (str): Name of the class to delete (e.g., "Common Yellowthroat").
     """
     from britekit.training_db.training_db import TrainingDatabase
@@ -123,8 +123,8 @@ def del_rec(db_path: Optional[str]=None, file_name: Optional[str]=None) -> None:
     extracted from it.
     Args:
-        db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
-        file_name (str): Name of the recording file to delete (e.g., "XC123456.mp3").
+    - db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
+    - file_name (str): Name of the recording file to delete (e.g., "XC123456.mp3").
     """
     from britekit.training_db.training_db import TrainingDatabase
@@ -167,8 +167,8 @@ def del_sgroup(db_path: Optional[str]=None, name: Optional[str]=None) -> None:
     This command removes the entire group and all spectrograms within it.
     Args:
-        db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
-        name (str): Name of the spectrogram group to delete (e.g., "default", "augmented").
+    - db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
+    - name (str): Name of the spectrogram group to delete (e.g., "default", "augmented").
     """
     from britekit.training_db.training_db import TrainingDatabase
@@ -212,8 +212,8 @@ def del_stype(db_path: Optional[str]=None, name: Optional[str]=None) -> None:
     to null, effectively removing the sound type classification while keeping the audio data.
     Args:
-        db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
-        name (str): Name of the sound type to delete (e.g., "Song", "Call", "Alarm").
+    - db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
+    - name (str): Name of the sound type to delete (e.g., "Song", "Call", "Alarm").
     """
     from britekit.training_db.training_db import TrainingDatabase
@@ -257,8 +257,8 @@ def del_src(db_path: Optional[str]=None, name: Optional[str]=None) -> None:
     removing entire datasets from a specific source (e.g., removing all Xeno-Canto data).
     Args:
-        db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
-        name (str): Name of the source to delete (e.g., "Xeno-Canto", "Macaulay Library").
+    - db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
+    - name (str): Name of the source to delete (e.g., "Xeno-Canto", "Macaulay Library").
     """
     from britekit.training_db.training_db import TrainingDatabase
@@ -305,9 +305,9 @@ def del_seg(db_path: Optional[str]=None, class_name: Optional[str]=None, dir_pat
     allowing you to remove low-quality or incorrectly labeled segments.
     Args:
-        db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
-        class_name (str): Name of the class whose segments should be considered for deletion.
-        dir_path (str): Path to directory containing spectrogram image files.
+    - db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
+    - class_name (str): Name of the class whose segments should be considered for deletion.
+    - dir_path (str): Path to directory containing spectrogram image files.
     """
     from britekit.training_db.training_db import TrainingDatabase

britekit/commands/_embed.py CHANGED Viewed

@@ -23,10 +23,10 @@ def embed(
     downstream tasks. The embeddings are compressed and stored in the database.
     Args:
-        cfg_path (str, optional): Path to YAML file defining configuration overrides.
-        db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
-        class_name (str, optional): Name of a specific class to process. If omitted, processes all classes.
-        spec_group (str): Spectrogram group name to process. Defaults to 'default'.
+    - cfg_path (str, optional): Path to YAML file defining configuration overrides.
+    - db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
+    - class_name (str, optional): Name of a specific class to process. If omitted, processes all classes.
+    - spec_group (str): Spectrogram group name to process. Defaults to 'default'.
     """
     def embed_block(

britekit/commands/_ensemble.py CHANGED Viewed

@@ -65,13 +65,13 @@ def ensemble(
     ensembles of the given size and test each one to identify the best ensemble.
     Args:
-        cfg_path (str, optional): Path to YAML file defining configuration overrides.
-        ckpt_path (str): Path to directory containing checkpoints.
-        ensemble_size (int): Number of checkpoints in ensemble (default=3).
-        num_tries (int): Maximum number of ensembles to try (default=100).
-        metric (str): Metric to use to compare ensembles (default=micro_roc).
-        annotations_path (str): Path to CSV file containing ground truth annotations.
-        recordings_path (str, optional): Directory containing audio recordings. Defaults to annotations directory.
+    - cfg_path (str, optional): Path to YAML file defining configuration overrides.
+    - ckpt_path (str): Path to directory containing checkpoints.
+    - ensemble_size (int): Number of checkpoints in ensemble (default=3).
+    - num_tries (int): Maximum number of ensembles to try (default=100).
+    - metric (str): Metric to use to compare ensembles (default=micro_roc).
+    - annotations_path (str): Path to CSV file containing ground truth annotations.
+    - recordings_path (str, optional): Directory containing audio recordings. Defaults to annotations directory.
     """
     import glob
     import itertools

britekit/commands/_extract.py CHANGED Viewed

@@ -29,15 +29,15 @@ def extract_all(
     it will be automatically created.
     Args:
-        cfg_path (str, optional): Path to YAML file defining configuration overrides.
-        db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
-        cat_name (str, optional): Category name for new class creation (e.g., "bird"). Defaults to "default".
-        class_code (str, optional): Class code for new class creation (e.g., "COYE").
-        class_name (str): Name of the class for the recordings (e.g., "Common Yellowthroat").
-        dir_path (str): Path to directory containing audio recordings to process.
-        overlap (float, optional): Spectrogram overlap in seconds. Defaults to config value.
-        src_name (str, optional): Source name for the recordings (e.g., "Xeno-Canto"). Defaults to "default".
-        spec_group (str, optional): Spectrogram group name for organizing extractions. Defaults to "default".
+    - cfg_path (str, optional): Path to YAML file defining configuration overrides.
+    - db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
+    - cat_name (str, optional): Category name for new class creation (e.g., "bird"). Defaults to "default".
+    - class_code (str, optional): Class code for new class creation (e.g., "COYE").
+    - class_name (str): Name of the class for the recordings (e.g., "Common Yellowthroat").
+    - dir_path (str): Path to directory containing audio recordings to process.
+    - overlap (float, optional): Spectrogram overlap in seconds. Defaults to config value.
+    - src_name (str, optional): Source name for the recordings (e.g., "Xeno-Canto"). Defaults to "default".
+    - spec_group (str, optional): Spectrogram group name for organizing extractions. Defaults to "default".
     """
     from britekit.training_db.extractor import Extractor
     from britekit.training_db.training_db import TrainingDatabase
@@ -134,6 +134,145 @@ def _extract_all_cmd(
     )
+def extract_by_csv(
+    cfg_path: Optional[str]=None,
+    db_path: Optional[str]=None,
+    cat_name: Optional[str]=None,
+    class_code: Optional[str]=None,
+    class_name: str="",
+    rec_dir: str="",
+    csv_path: str="",
+    dest_dir: Optional[str]=None,
+    src_name: Optional[str]=None,
+    spec_group: Optional[str]=None,
+) -> None:
+    """
+    Extract spectrograms that correspond to rows in a CSV file.
+    This command parses a CSV file to identify the corresponding audio
+    segments and extracts those spectrograms from the original recordings.
+    This is useful when you have pre-selected spectrograms (e.g., from manual review
+    or search results) and want to extract only those specific segments. The CSV file
+    needs two columns: recording and start_time, where recording is the stem of the
+    recording file name (e.g. XC12345) and start_time is the offset in seconds from the
+    start of the recording.
+    Args:
+    - cfg_path (str, optional): Path to YAML file defining configuration overrides.
+    - db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
+    - cat_name (str, optional): Category name for new class creation (e.g., "bird"). Defaults to "default".
+    - class_code (str, optional): Class code for new class creation (e.g., "COYE").
+    - class_name (str): Name of the class for the recordings (e.g., "Common Yellowthroat").
+    - rec_dir (str): Path to directory containing the original audio recordings.
+    - csv_path (str): Path to CSV file containing two columns (recording and offset) to identify segments to extract.
+    - dest_dir (str, optional): If specified, copy used recordings to this directory.
+    - src_name (str, optional): Source name for the recordings (e.g., "Xeno-Canto"). Defaults to "default".
+    - spec_group (str, optional): Spectrogram group name for organizing extractions. Defaults to "default".
+    """
+    from britekit.training_db.extractor import Extractor
+    from britekit.training_db.training_db import TrainingDatabase
+    cfg = get_config(cfg_path)
+    if db_path is not None:
+        cfg.train.train_db = db_path
+    with TrainingDatabase(cfg.train.train_db) as db:
+        extractor = Extractor(
+            db, class_name, class_code, cat_name, src_name, spec_group=spec_group
+        )
+        count = extractor.extract_by_csv(rec_dir, csv_path, dest_dir)
+        logging.info(f"Inserted {count} spectrograms")
+@click.command(
+    name="extract-by-csv",
+    short_help="Insert spectrograms that correspond to rows in a CSV file.",
+    help=util.cli_help_from_doc(extract_by_csv.__doc__),
+)
+@click.option(
+    "-c",
+    "--cfg",
+    "cfg_path",
+    type=click.Path(exists=True),
+    required=False,
+    help="Path to YAML file defining config overrides.",
+)
+@click.option(
+    "-d", "--db", "db_path", required=False, help="Path to the training database."
+)
+@click.option(
+    "--cat",
+    "cat_name",
+    required=False,
+    help="Category name, e.g. 'bird' for when new class is added. Defaults to 'default'.",
+)
+@click.option(
+    "--code",
+    "class_code",
+    required=False,
+    help="Class code for when new class is added.",
+)
+@click.option("--name", "class_name", required=True, help="Class name.")
+@click.option(
+    "--rec-dir",
+    "rec_dir",
+    type=click.Path(exists=True, file_okay=False, dir_okay=True),
+    required=True,
+    help="Path to directory containing recordings.",
+)
+@click.option(
+    "--csv-path",
+    "csv_path",
+    type=click.Path(exists=True, file_okay=True, dir_okay=False),
+    required=True,
+    help="Path to CSV file containing two columns (recording and offset) to identify segments to extract.",
+)
+@click.option(
+    "--dest-dir",
+    "dest_dir",
+    type=click.Path(exists=True, file_okay=False, dir_okay=True),
+    required=False,
+    help="Copy used recordings to this directory if specified.",
+)
+@click.option(
+    "--src",
+    "src_name",
+    required=False,
+    help="Source name for inserted recordings. Defaults to 'default'.",
+)
+@click.option(
+    "--sgroup",
+    "spec_group",
+    required=False,
+    help="Spectrogram group name. Defaults to 'default'.",
+)
+def _extract_by_csv_cmd(
+    cfg_path: Optional[str],
+    db_path: Optional[str],
+    cat_name: Optional[str],
+    class_code: Optional[str],
+    class_name: str,
+    rec_dir: str,
+    csv_path: str,
+    dest_dir: Optional[str],
+    src_name: Optional[str],
+    spec_group: Optional[str],
+) -> None:
+    util.set_logging()
+    extract_by_csv(
+        cfg_path,
+        db_path,
+        cat_name,
+        class_code,
+        class_name,
+        rec_dir,
+        csv_path,
+        dest_dir,
+        src_name,
+        spec_group,
+    )
 def extract_by_image(
     cfg_path: Optional[str]=None,
     db_path: Optional[str]=None,
@@ -158,16 +297,16 @@ def extract_by_image(
     that allows the command to locate and extract the corresponding audio segments.
     Args:
-        cfg_path (str, optional): Path to YAML file defining configuration overrides.
-        db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
-        cat_name (str, optional): Category name for new class creation (e.g., "bird"). Defaults to "default".
-        class_code (str, optional): Class code for new class creation (e.g., "COYE").
-        class_name (str): Name of the class for the recordings (e.g., "Common Yellowthroat").
-        rec_dir (str): Path to directory containing the original audio recordings.
-        spec_dir (str): Path to directory containing spectrogram image files.
-        dest_dir (str, optional): If specified, copy used recordings to this directory.
-        src_name (str, optional): Source name for the recordings (e.g., "Xeno-Canto"). Defaults to "default".
-        spec_group (str, optional): Spectrogram group name for organizing extractions. Defaults to "default".
+    - cfg_path (str, optional): Path to YAML file defining configuration overrides.
+    - db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
+    - cat_name (str, optional): Category name for new class creation (e.g., "bird"). Defaults to "default".
+    - class_code (str, optional): Class code for new class creation (e.g., "COYE").
+    - class_name (str): Name of the class for the recordings (e.g., "Common Yellowthroat").
+    - rec_dir (str): Path to directory containing the original audio recordings.
+    - spec_dir (str): Path to directory containing spectrogram image files.
+    - dest_dir (str, optional): If specified, copy used recordings to this directory.
+    - src_name (str, optional): Source name for the recordings (e.g., "Xeno-Canto"). Defaults to "default".
+    - spec_group (str, optional): Spectrogram group name for organizing extractions. Defaults to "default".
     """
     from britekit.training_db.extractor import Extractor
     from britekit.training_db.training_db import TrainingDatabase

britekit/commands/_find_dup.py CHANGED Viewed

@@ -31,11 +31,11 @@ def find_dup(
     using cosine distance.
     Args:
-        cfg_path (str, optional): Path to YAML file defining configuration overrides.
-        db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
-        class_name (str): Name of the class to scan for duplicates (e.g., "Common Yellowthroat").
-        delete (bool): If True, remove duplicate recordings from the database. If False, only report them.
-        spec_group (str): Spectrogram group name to use for embedding comparison. Defaults to "default".
+    - cfg_path (str, optional): Path to YAML file defining configuration overrides.
+    - db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
+    - class_name (str): Name of the class to scan for duplicates (e.g., "Common Yellowthroat").
+    - delete (bool): If True, remove duplicate recordings from the database. If False, only report them.
+    - spec_group (str): Spectrogram group name to use for embedding comparison. Defaults to "default".
     """
     class Recording:

britekit/commands/_inat.py CHANGED Viewed

@@ -54,10 +54,10 @@ def inat(
     The command respects the maximum download limit and can optionally add filename prefixes.
     Args:
-        output_dir (str): Directory where downloaded recordings will be saved.
-        max_downloads (int): Maximum number of recordings to download. Default is 500.
-        name (str): Species name to search for (e.g., "Common Yellowthroat", "Geothlypis trichas").
-        no_prefix (bool): If True, skip adding "N" prefix to filenames. Default adds prefix.
+    - output_dir (str): Directory where downloaded recordings will be saved.
+    - max_downloads (int): Maximum number of recordings to download. Default is 500.
+    - name (str): Species name to search for (e.g., "Common Yellowthroat", "Geothlypis trichas").
+    - no_prefix (bool): If True, skip adding "N" prefix to filenames. Default adds prefix.
     """
     import pyinaturalist

britekit/commands/_init.py CHANGED Viewed

@@ -32,7 +32,7 @@ def init(dest: Optional[Path]=None) -> None:
     a default directory structure.
     Args:
-        dest (Path): Directory to copy files into. Subdirectories are created as needed.
+    - dest (Path): Directory to copy files into. Subdirectories are created as needed.
     Examples:
         britekit init --dest .

britekit 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl

Potentially problematic release.

britekit 0.1.3py3-none-any.whl → 0.1.5py3-none-any.whl