PyPI - britekit - Versions diffs - 0.0.8__py3-none-any.whl → 0.0.9__py3-none-any.whl - Mend

britekit 0.0.8py3-none-any.whl → 0.0.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of britekit might be problematic. Click here for more details.

Files changed (12) hide show

britekit/__about__.py +1 -1
britekit/cli.py +2 -0
britekit/commands/__init__.py +2 -0
britekit/commands/_ensemble.py +237 -0
britekit/commands/_reports.py +2 -2
britekit/commands/_tune.py +2 -2
britekit/core/trainer.py +3 -2
{britekit-0.0.8.dist-info → britekit-0.0.9.dist-info}/METADATA +1 -1
{britekit-0.0.8.dist-info → britekit-0.0.9.dist-info}/RECORD +12 -11
{britekit-0.0.8.dist-info → britekit-0.0.9.dist-info}/WHEEL +0 -0
{britekit-0.0.8.dist-info → britekit-0.0.9.dist-info}/entry_points.txt +0 -0
{britekit-0.0.8.dist-info → britekit-0.0.9.dist-info}/licenses/LICENSE.txt +0 -0

britekit/__about__.py CHANGED Viewed

@@ -1,4 +1,4 @@
 # SPDX-FileCopyrightText: 2025-present Jan Huus <jhuus1@gmail.com>
 #
 # SPDX-License-Identifier: MIT
-__version__ = "0.0.8"
+__version__ = "0.0.9"

britekit/cli.py CHANGED Viewed

@@ -30,6 +30,7 @@ from .commands._db_delete import (
     _del_stype_cmd,
 )
 from .commands._embed import _embed_cmd
+from .commands._ensemble import _ensemble_cmd
 from .commands._extract import _extract_all_cmd, _extract_by_image_cmd
 from .commands._find_dup import _find_dup_cmd
 from .commands._inat import _inat_cmd
@@ -80,6 +81,7 @@ cli.add_command(_del_src_cmd)
 cli.add_command(_del_stype_cmd)
 cli.add_command(_embed_cmd)
+cli.add_command(_ensemble_cmd)
 cli.add_command(_extract_all_cmd)
 cli.add_command(_extract_by_image_cmd)

britekit/commands/__init__.py CHANGED Viewed

@@ -13,6 +13,7 @@ from ._db_delete import (
     del_stype,
 )
 from ._embed import embed
+from ._ensemble import ensemble
 from ._extract import extract_all, extract_by_image
 from ._find_dup import find_dup
 from ._inat import inat
@@ -54,6 +55,7 @@ __all__ = [
     "del_src",
     "del_stype",
     "embed",
+    "ensemble",
     "extract_all",
     "extract_by_image",
     "find_dup",

britekit/commands/_ensemble.py ADDED Viewed

@@ -0,0 +1,237 @@
+# File name starts with _ to keep it out of typeahead for API users.
+# Defer some imports to improve --help performance.
+import logging
+import os
+from pathlib import Path
+import tempfile
+from typing import Optional
+import click
+from britekit.core.config_loader import get_config
+from britekit.core import util
+def _eval_ensemble(ensemble, temp_dir, annotations_path, recording_dir):
+    import shutil
+    from britekit.core.analyzer import Analyzer
+    from britekit.testing.per_segment_tester import PerSegmentTester
+    # delete any checkpoints in the temp dir
+    for filename in os.listdir(temp_dir):
+        file_path = os.path.join(temp_dir, filename)
+        os.remove(file_path)
+    # copy checkpoints to the temp dir
+    for file_path in ensemble:
+        file_name = Path(file_path).name
+        dest_path = os.path.join(temp_dir, file_name)
+        shutil.copyfile(file_path, dest_path)
+    # run inference on the given test
+    util.set_logging(level=logging.ERROR) # suppress logging during inference and analysis
+    label_dir = "ensemble_evaluation_labels"
+    inference_output_dir = str(Path(recording_dir) / label_dir)
+    Analyzer().run(recording_dir, inference_output_dir)
+    min_score = 0.8 # irrelevant really
+    with tempfile.TemporaryDirectory() as output_dir:
+        tester = PerSegmentTester(
+            annotations_path,
+            recording_dir,
+            inference_output_dir,
+            output_dir,
+            min_score,
+        )
+        tester.initialize()
+        pr_stats = tester.get_pr_auc_stats()
+        roc_stats = tester.get_roc_auc_stats()
+        scores = {
+            "macro_pr": pr_stats["macro_pr_auc"],
+            "micro_pr": pr_stats["micro_pr_auc_trained"],
+            "macro_roc": roc_stats["macro_roc_auc"],
+            "micro_roc": roc_stats["micro_roc_auc_trained"]
+        }
+        shutil.rmtree(inference_output_dir)
+        util.set_logging() # restore logging
+    return scores
+def ensemble(
+    cfg_path: Optional[str]=None,
+    ckpt_path: str="",
+    ensemble_size: int=3,
+    num_tries: int=100,
+    metric: str = "micro_roc",
+    annotations_path: str = "",
+    recordings_path: Optional[str] = None,
+    output_path: str = "",
+) -> None:
+    """
+    Find the best ensemble of a given size from a group of checkpoints.
+    Given a directory containing checkpoints, and an ensemble size (default=3), select random
+    ensembles of the given size and test each one to identify the best ensemble.
+    Args:
+        cfg_path (str, optional): Path to YAML file defining configuration overrides.
+        ckpt_path (str): Path to directory containing checkpoints.
+        ensemble_size (int): Number of checkpoints in ensemble (default=3).
+        num_tries (int): Maximum number of ensembles to try (default=100).
+        metric (str): Metric to use to compare ensembles (default=micro_roc).
+        annotations_path (str): Path to CSV file containing ground truth annotations.
+        recordings_path (str, optional): Directory containing audio recordings. Defaults to annotations directory.
+        output_path (str): Directory where reports will be saved.
+    """
+    import glob
+    import itertools
+    import math
+    import random
+    if metric not in ["macro_pr", "micro_pr", "macro_roc", "micro_roc"]:
+        logging.error(f"Error: invalid metric ({metric})")
+        return
+    cfg, _ = get_config(cfg_path)
+    ckpt_paths = sorted(glob.glob(os.path.join(ckpt_path, "*.ckpt")))
+    num_ckpts = len(ckpt_paths)
+    if num_ckpts == 0:
+        logging.error(f"Error: no checkpoints found in {ckpt_path}")
+        return
+    elif num_ckpts < ensemble_size:
+        logging.error(f"Error: number of checkpoints ({num_ckpts}) is less than requested ensemble size ({ensemble_size})")
+        return
+    if not recordings_path:
+        recordings_path = str(Path(annotations_path).parent)
+    with tempfile.TemporaryDirectory() as temp_dir:
+        cfg.misc.ckpt_folder = temp_dir
+        cfg.infer.min_score = 0
+        best_score = 0
+        best_ensemble = None
+        count = 1
+        total_combinations = math.comb(len(ckpt_paths), ensemble_size)
+        if total_combinations <= num_tries:
+            # Exhaustive search
+            logging.info("Doing exhaustive search")
+            for ensemble in itertools.combinations(ckpt_paths, ensemble_size):
+                scores = _eval_ensemble(ensemble, temp_dir, annotations_path, recordings_path)
+                logging.info(f"For ensemble {count} of {total_combinations}, score = {scores[metric]:.4f}")
+                if scores[metric] > best_score:
+                    best_score = scores[metric]
+                    best_ensemble = ensemble
+                count += 1
+        else:
+            # Random sampling without replacement
+            logging.info("Doing random sampling")
+            seen: set = set()
+            while len(seen) < num_tries:
+                ensemble = tuple(sorted(random.sample(ckpt_paths, ensemble_size)))
+                if ensemble not in seen:
+                    seen.add(ensemble)
+                    scores = _eval_ensemble(ensemble, temp_dir, annotations_path, recordings_path)
+                    logging.info(f"For ensemble {count} of {num_tries}, score = {scores[metric]:.4f}")
+                    if scores[metric] > best_score:
+                        best_score = scores[metric]
+                        best_ensemble = ensemble
+                count += 1
+    logging.info(f"Best score = {best_score:.4f}")
+    best_names = [Path(ckpt_path).name for ckpt_path in best_ensemble]
+    logging.info(f"Best ensemble = {best_names}")
+@click.command(
+    name="ensemble",
+    short_help="Find the best ensemble of a given size from a group of checkpoints.",
+    help=util.cli_help_from_doc(ensemble.__doc__),
+)
+@click.option(
+    "-c",
+    "--cfg",
+    "cfg_path",
+    type=click.Path(exists=True),
+    required=False,
+    help="Path to YAML file defining config overrides.",
+)
+@click.option(
+    "--ckpt_path",
+    "ckpt_path",
+    type=click.Path(exists=True, file_okay=False, dir_okay=True),
+    required=True,
+    help="Directory containing checkpoints."
+)
+@click.option(
+    "-e",
+    "--ensemble_size",
+    "ensemble_size",
+    type=int,
+    default=3,
+    help="Number of checkpoints in ensemble (default=3)."
+)
+@click.option(
+    "-n",
+    "--num_tries",
+    "num_tries",
+    type=int,
+    default=100,
+    help="Maximum number of ensembles to try (default=100)."
+)
+@click.option(
+    "-m",
+    "--metric",
+    "metric",
+    type=click.Choice(
+        [
+            "macro_pr",
+            "micro_pr",
+            "macro_roc",
+            "micro_roc",
+        ]
+    ),
+    default="micro_roc",
+    help="Metric used to compare ensembles (default=micro_roc). Macro-averaging uses annotated classes only, but micro-averaging uses all classes.",
+)
+@click.option(
+    "-a",
+    "--annotations",
+    "annotations_path",
+    type=click.Path(exists=True, file_okay=True, dir_okay=False),
+    required=True,
+    help="Path to CSV file containing annotations or ground truth).",
+)
+@click.option(
+    "-r",
+    "--recordings",
+    "recordings_path",
+    type=click.Path(exists=True, file_okay=False, dir_okay=True),
+    required=False,
+    help="Recordings directory. Default is directory containing annotations file.",
+)
+@click.option(
+    "-o",
+    "--output",
+    "output_path",
+    type=click.Path(file_okay=False, dir_okay=True),
+    required=True,
+    help="Path to output directory.",
+)
+def _ensemble_cmd(
+    cfg_path: Optional[str],
+    ckpt_path: str,
+    ensemble_size: int,
+    num_tries: int,
+    metric: str,
+    annotations_path: str,
+    recordings_path: Optional[str],
+    output_path: str,
+) -> None:
+    util.set_logging()
+    ensemble(cfg_path, ckpt_path, ensemble_size, num_tries, metric, annotations_path, recordings_path, output_path)

britekit/commands/_reports.py CHANGED Viewed

@@ -276,14 +276,14 @@ def rpt_epochs(
             tester.initialize()
             pr_stats = tester.get_pr_auc_stats()
-            pr_score = pr_stats["micro_pr_auc"]
+            pr_score = pr_stats["micro_pr_auc_trained"]
             pr_scores.append(pr_score)
             if pr_score > max_pr_score:
                 max_pr_score = pr_score
                 max_pr_epoch = epoch_num
             roc_stats = tester.get_roc_auc_stats()
-            roc_score = roc_stats["micro_roc_auc"]
+            roc_score = roc_stats["micro_roc_auc_trained"]
             roc_scores.append(roc_score)
             if roc_score > max_roc_score:
                 max_roc_score = roc_score

britekit/commands/_tune.py CHANGED Viewed

@@ -18,7 +18,7 @@ def tune(
     param_path: Optional[str] = None,
     output_path: str = "",
     annotations_path: str = "",
-    metric: str = "macro_roc",
+    metric: str = "micro_roc",
     recordings_path: str = "",
     train_log_path: str = "",
     num_trials: int = 0,
@@ -159,7 +159,7 @@ def tune(
             "micro_roc",
         ]
     ),
-    default="macro_roc",
+    default="micro_roc",
     help="Metric used to compare runs. Macro-averaging uses annotated classes only, but micro-averaging uses all classes.",
 )
 @click.option(

britekit/core/trainer.py CHANGED Viewed

@@ -125,11 +125,12 @@ class Trainer:
         if val_rocs:
             import math
             import numpy as np
             mean = float(np.mean(val_rocs))
-            std  = float(np.std(val_rocs, ddof=1)) if len(val_rocs) > 1 else 0.0
+            std = float(np.std(val_rocs, ddof=1)) if len(val_rocs) > 1 else 0.0
             n = len(val_rocs)
             se = std / math.sqrt(n) if n > 1 else 0.0
-            ci95 = 1.96 * se # 95% CI using normal approximation
+            ci95 = 1.96 * se  # 95% CI using normal approximation
             logging.info("Using micro-averaged ROC AUC")
             scores_str = ", ".join(f"{v:.4f}" for v in val_rocs)

{britekit-0.0.8.dist-info → britekit-0.0.9.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: britekit
-Version: 0.0.8
+Version: 0.0.9
 Summary: Core functions for bioacoustic recognizers.
 Project-URL: Documentation, https://github.com/jhuus/BriteKit#readme
 Project-URL: Issues, https://github.com/jhuus/BriteKit/issues

{britekit-0.0.8.dist-info → britekit-0.0.9.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-britekit/cli.py,sha256=jhWE19ye4Yyeoog-KCBzazdsmfQAGk_LZdXKnTCTmF0,3003
+britekit/cli.py,sha256=nnrCMfw3-1GJ4rKFpqTLu8JcBGxTocMn7nwzU4OSaew,3080
 britekit/core/analyzer.py,sha256=4hctyNvM3mZ0FywEWKbPHamxzl1nZh1xdHkBxM4WPxo,5617
 britekit/core/audio.py,sha256=8QLbNDAiQyViEhrVC8jU0n32we4C22W_jPfc_KcOlmQ,15853
 britekit/core/augmentation.py,sha256=5_wyB-6gt7uM68Zl-rO_fPu1D6tlsd2m5oWhA6l0W9Q,5721
@@ -11,7 +11,7 @@ britekit/core/pickler.py,sha256=Vj-_DdFQUQj2bIVoyWe5puI8g8dTP9x7ZavbvM1iQZo,5788
 britekit/core/plot.py,sha256=hLuLB1VdtdFyaSHVDGl5tjjFCRgOJJ1ucTVJHM_3D_0,5332
 britekit/core/predictor.py,sha256=u4H8horTTvcg4Oqfpy5PG44eiiMeR5RU3aPZnMiXRCw,22914
 britekit/core/reextractor.py,sha256=gazhIZN8V1K4T_Q_kc-ihxUYbkNnc_hoAS6bpYQc95I,8396
-britekit/core/trainer.py,sha256=N5EsbCzxw3wXxs2PTJJ0OfYFkIi49HCRM0ylT5zSSZk,6439
+britekit/core/trainer.py,sha256=tKyXZf5vm1yHJ8tyVvwgDOprAVZPKdiVEbLHlDJ8hKo,6440
 britekit/core/tuner.py,sha256=FMmy4p3_j2Tojs4ONPzuUeRpCPWGlttr4rUJac7Hkyk,16435
 britekit/core/util.py,sha256=0JsEEN09hFPQzuttCKaejWofXAjCGSvWEewjkiLAh3E,19172
 britekit/models/base_model.py,sha256=9T7TwHx3K8fl10Vb-qUuypK3NDDZM-ktB8ZLHzqQhdc,16883
@@ -32,7 +32,7 @@ britekit/testing/per_segment_tester.py,sha256=FnaozQ8VmH99aYc1ibmDFfOk_ADgsXQGU_
 britekit/training_db/extractor.py,sha256=pT7lAUsNzYs3RXDzpMv7q0MKg6TktiFLKrRtKTWv6ho,8409
 britekit/training_db/training_data_provider.py,sha256=V5aBjsCvrWViZ0Jv05hgcKRizcAXmqoj4q3hAHedoD8,5651
 britekit/training_db/training_db.py,sha256=OOfD1pcbq5HVJbzhmuI-D-gkPHWSoz0cCO4zIUGFvoY,65011
-britekit/__about__.py,sha256=-uGInVbPaVLti1Rr4PYUteRetwYfxeLtIuqiLmEcRjA,122
+britekit/__about__.py,sha256=QXWLwMXjHd1KWRO6vKHNgPREhZNrZv3ac2FWBvQPN6E,122
 britekit/__init__.py,sha256=RpruzdjbvTcFNf21zJYY8HrAhJei91FtNNLjIBmw-kw,1857
 britekit/install/data/classes.csv,sha256=OdTZ8oQdx7N-HKyhftxZStGZYsjhCy4UbanwtQJ2wBM,54
 britekit/install/data/ignore.txt,sha256=RbKvEHtUCbgRYolwR1IucClwyD3q7l2s6QuRjph-Us4,68
@@ -64,7 +64,7 @@ britekit/install/yaml/samples/tune_dropout.yaml,sha256=f3QEfPOZecjwthqzAWodI8-PX
 britekit/install/yaml/samples/tune_learning_rate.yaml,sha256=UTtpsJwO33UWW0oecGR_LV3nQPtyC1dbpkkJpGOlI68,83
 britekit/install/yaml/samples/tune_optimizer.yaml,sha256=VtGlZmMJ22gaZWJ7CPLNHRZ-8EHeB5GmxywQm1Iy1MM,73
 britekit/install/yaml/samples/tune_smooth.yaml,sha256=IZq2lohiJWVdzPl-i3aCEwEsJLmG_bg7EvyBUSI-R0o,83
-britekit/commands/__init__.py,sha256=cgiHBDFQ7o1JL-wk9z0R_QEn7UVV_E0SPN7AANzxRdM,1538
+britekit/commands/__init__.py,sha256=mms49ChyrGj4zzeUge6bl7uiPhOMjFm37NTk23ZFmXw,1586
 britekit/commands/_analyze.py,sha256=Hss0ubLjGM2FSbQk52S9wvfj73-gkym4uW_o8Td-BOc,4954
 britekit/commands/_audioset.py,sha256=BqmAJq6yWpyqBYIUWt9d0khBTQRa3vgUMdCS4U0fxvA,9957
 britekit/commands/_calibrate.py,sha256=338dRyGRj-Bw_4wFxiANDCbo-lZgdl0OR2gD8PmLv8U,4912
@@ -72,6 +72,7 @@ britekit/commands/_ckpt_ops.py,sha256=gutU8wqzrJCIyyuo_kLtIaOm9tq6h7q1Xm9L2QNU56
 britekit/commands/_db_add.py,sha256=LQD3nR_d8oI19YNi06EzE62kS5DlbvL-q2HZSRmEGeE,7261
 britekit/commands/_db_delete.py,sha256=rCV2tL8x-sNgsYmHZc6Id7_4-iLynwkK2f2_KRFkAZo,14541
 britekit/commands/_embed.py,sha256=MlP1HMRBmOANWEdbW1qhpnFGaxMUyeGEYOqaXV6K_cg,4391
+britekit/commands/_ensemble.py,sha256=UElN1aajykpktekfA4bKPHh0VB1NYwJtaEjn91xRF2c,7849
 britekit/commands/_extract.py,sha256=7c_XnJY42IQ2AA70JmgFU9IkIUodkDoLy2vfYWU99AE,8865
 britekit/commands/_find_dup.py,sha256=yPn2EqG0icYHgUN8_87KuY9uOqEwDxqvhJc-DfBD40w,6353
 britekit/commands/_inat.py,sha256=ojTre5BCj_jmEh6x2kzNhcminLN6h5bzsYpxyrxGRdQ,4164
@@ -79,16 +80,16 @@ britekit/commands/_init.py,sha256=FmaQRY-7SYSHCLXL__47LEPecWir7X6zEB05KpradFw,28
 britekit/commands/_pickle.py,sha256=p990FsJGfSXcgjtBzH7nPGPh023b8cH0D7RZywQQ5Aw,3488
 britekit/commands/_plot.py,sha256=7vZXsYP9dv4PbHb8K3YbJFZc65YoPIBjEMBolyh6Has,13084
 britekit/commands/_reextract.py,sha256=kCmSjeghg6mhrJ46ibRTmBkGVytU7flFvTbqsnYhBvY,3770
-britekit/commands/_reports.py,sha256=KVYtpeFQpUC4jAIm2k2xV7aiNq826DL6sUrYEJD38X0,22023
+britekit/commands/_reports.py,sha256=qnUEWUgEB3BFzshBAQ9nz75Mvjpl2bEZCBy5ttNx7l4,22039
 britekit/commands/_search.py,sha256=HIUXwfPvh3rxpgaFSR3bAAI38OtGVPyMo5GMfLtLX-8,9991
 britekit/commands/_train.py,sha256=vGFKlfcv35cOelArQNbVbTRbDWogT_IMg0wZt5virHY,4158
-britekit/commands/_tune.py,sha256=8dEZZURE769C0JZwhNpzB6pQxVklzl2w2cyXyWyhWXs,7331
+britekit/commands/_tune.py,sha256=g9GnlOSJpa-ZfNAw2iCMzw0qPgLFTGdTUjzw8Ghjfvc,7331
 britekit/commands/_wav2mp3.py,sha256=2Q4cjT6OhJmBPTNzGRMrDd6dSdBBufuQdjhH1V8ghLo,2167
 britekit/commands/_xeno.py,sha256=_6YxQ7xFdaSy5DNUaigkbYp3E8EhtOhTC9b6OFS0MFA,6026
 britekit/commands/_youtube.py,sha256=_u1LrwY_2GxllKd505N_2ArFMbACQ_PtVxuqUCYxFe0,2214
 britekit/core/__init__.py,sha256=QcjcFyvO5KqJLF_HBeqiCk925uU5jTUjIV5lJix9XY4,556
-britekit-0.0.8.dist-info/METADATA,sha256=Qtzlff9X_WI1Cz8zpTyntAwFemS8hNbS0ClWJV9KVXk,18555
-britekit-0.0.8.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-britekit-0.0.8.dist-info/entry_points.txt,sha256=ycnPy5DLX14RTf7lKfkQAVyIf1B1zTL1gMsHm455wmg,46
-britekit-0.0.8.dist-info/licenses/LICENSE.txt,sha256=kPoHm6iop8-CUa_720Tt8gqyvLD6D_7218u1hCCpErk,1092
-britekit-0.0.8.dist-info/RECORD,,
+britekit-0.0.9.dist-info/METADATA,sha256=XFCWiF08LtF--mnG5gfLK0T7DeypGxF0oH4-s_T8u2g,18555
+britekit-0.0.9.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+britekit-0.0.9.dist-info/entry_points.txt,sha256=ycnPy5DLX14RTf7lKfkQAVyIf1B1zTL1gMsHm455wmg,46
+britekit-0.0.9.dist-info/licenses/LICENSE.txt,sha256=kPoHm6iop8-CUa_720Tt8gqyvLD6D_7218u1hCCpErk,1092
+britekit-0.0.9.dist-info/RECORD,,

{britekit-0.0.8.dist-info → britekit-0.0.9.dist-info}/WHEEL RENAMED Viewed

File without changes

{britekit-0.0.8.dist-info → britekit-0.0.9.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{britekit-0.0.8.dist-info → britekit-0.0.9.dist-info}/licenses/LICENSE.txt RENAMED Viewed

File without changes

britekit 0.0.8__py3-none-any.whl → 0.0.9__py3-none-any.whl

Potentially problematic release.

britekit 0.0.8py3-none-any.whl → 0.0.9py3-none-any.whl