britekit 0.0.8__py3-none-any.whl → 0.0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of britekit might be problematic. Click here for more details.

britekit/__about__.py CHANGED
@@ -1,4 +1,4 @@
1
1
  # SPDX-FileCopyrightText: 2025-present Jan Huus <jhuus1@gmail.com>
2
2
  #
3
3
  # SPDX-License-Identifier: MIT
4
- __version__ = "0.0.8"
4
+ __version__ = "0.0.9"
britekit/cli.py CHANGED
@@ -30,6 +30,7 @@ from .commands._db_delete import (
30
30
  _del_stype_cmd,
31
31
  )
32
32
  from .commands._embed import _embed_cmd
33
+ from .commands._ensemble import _ensemble_cmd
33
34
  from .commands._extract import _extract_all_cmd, _extract_by_image_cmd
34
35
  from .commands._find_dup import _find_dup_cmd
35
36
  from .commands._inat import _inat_cmd
@@ -80,6 +81,7 @@ cli.add_command(_del_src_cmd)
80
81
  cli.add_command(_del_stype_cmd)
81
82
 
82
83
  cli.add_command(_embed_cmd)
84
+ cli.add_command(_ensemble_cmd)
83
85
  cli.add_command(_extract_all_cmd)
84
86
  cli.add_command(_extract_by_image_cmd)
85
87
 
@@ -13,6 +13,7 @@ from ._db_delete import (
13
13
  del_stype,
14
14
  )
15
15
  from ._embed import embed
16
+ from ._ensemble import ensemble
16
17
  from ._extract import extract_all, extract_by_image
17
18
  from ._find_dup import find_dup
18
19
  from ._inat import inat
@@ -54,6 +55,7 @@ __all__ = [
54
55
  "del_src",
55
56
  "del_stype",
56
57
  "embed",
58
+ "ensemble",
57
59
  "extract_all",
58
60
  "extract_by_image",
59
61
  "find_dup",
@@ -0,0 +1,237 @@
1
+ # File name starts with _ to keep it out of typeahead for API users.
2
+ # Defer some imports to improve --help performance.
3
+ import logging
4
+ import os
5
+ from pathlib import Path
6
+ import tempfile
7
+ from typing import Optional
8
+
9
+ import click
10
+
11
+ from britekit.core.config_loader import get_config
12
+ from britekit.core import util
13
+
14
+ def _eval_ensemble(ensemble, temp_dir, annotations_path, recording_dir):
15
+ import shutil
16
+
17
+ from britekit.core.analyzer import Analyzer
18
+ from britekit.testing.per_segment_tester import PerSegmentTester
19
+
20
+ # delete any checkpoints in the temp dir
21
+ for filename in os.listdir(temp_dir):
22
+ file_path = os.path.join(temp_dir, filename)
23
+ os.remove(file_path)
24
+
25
+ # copy checkpoints to the temp dir
26
+ for file_path in ensemble:
27
+ file_name = Path(file_path).name
28
+ dest_path = os.path.join(temp_dir, file_name)
29
+ shutil.copyfile(file_path, dest_path)
30
+
31
+ # run inference on the given test
32
+ util.set_logging(level=logging.ERROR) # suppress logging during inference and analysis
33
+ label_dir = "ensemble_evaluation_labels"
34
+ inference_output_dir = str(Path(recording_dir) / label_dir)
35
+ Analyzer().run(recording_dir, inference_output_dir)
36
+
37
+ min_score = 0.8 # irrelevant really
38
+ with tempfile.TemporaryDirectory() as output_dir:
39
+ tester = PerSegmentTester(
40
+ annotations_path,
41
+ recording_dir,
42
+ inference_output_dir,
43
+ output_dir,
44
+ min_score,
45
+ )
46
+ tester.initialize()
47
+
48
+ pr_stats = tester.get_pr_auc_stats()
49
+ roc_stats = tester.get_roc_auc_stats()
50
+
51
+ scores = {
52
+ "macro_pr": pr_stats["macro_pr_auc"],
53
+ "micro_pr": pr_stats["micro_pr_auc_trained"],
54
+ "macro_roc": roc_stats["macro_roc_auc"],
55
+ "micro_roc": roc_stats["micro_roc_auc_trained"]
56
+ }
57
+
58
+ shutil.rmtree(inference_output_dir)
59
+ util.set_logging() # restore logging
60
+
61
+ return scores
62
+
63
+ def ensemble(
64
+ cfg_path: Optional[str]=None,
65
+ ckpt_path: str="",
66
+ ensemble_size: int=3,
67
+ num_tries: int=100,
68
+ metric: str = "micro_roc",
69
+ annotations_path: str = "",
70
+ recordings_path: Optional[str] = None,
71
+ output_path: str = "",
72
+ ) -> None:
73
+ """
74
+ Find the best ensemble of a given size from a group of checkpoints.
75
+
76
+ Given a directory containing checkpoints, and an ensemble size (default=3), select random
77
+ ensembles of the given size and test each one to identify the best ensemble.
78
+
79
+ Args:
80
+ cfg_path (str, optional): Path to YAML file defining configuration overrides.
81
+ ckpt_path (str): Path to directory containing checkpoints.
82
+ ensemble_size (int): Number of checkpoints in ensemble (default=3).
83
+ num_tries (int): Maximum number of ensembles to try (default=100).
84
+ metric (str): Metric to use to compare ensembles (default=micro_roc).
85
+ annotations_path (str): Path to CSV file containing ground truth annotations.
86
+ recordings_path (str, optional): Directory containing audio recordings. Defaults to annotations directory.
87
+ output_path (str): Directory where reports will be saved.
88
+ """
89
+ import glob
90
+ import itertools
91
+ import math
92
+ import random
93
+
94
+ if metric not in ["macro_pr", "micro_pr", "macro_roc", "micro_roc"]:
95
+ logging.error(f"Error: invalid metric ({metric})")
96
+ return
97
+
98
+ cfg, _ = get_config(cfg_path)
99
+ ckpt_paths = sorted(glob.glob(os.path.join(ckpt_path, "*.ckpt")))
100
+ num_ckpts = len(ckpt_paths)
101
+ if num_ckpts == 0:
102
+ logging.error(f"Error: no checkpoints found in {ckpt_path}")
103
+ return
104
+ elif num_ckpts < ensemble_size:
105
+ logging.error(f"Error: number of checkpoints ({num_ckpts}) is less than requested ensemble size ({ensemble_size})")
106
+ return
107
+
108
+ if not recordings_path:
109
+ recordings_path = str(Path(annotations_path).parent)
110
+
111
+ with tempfile.TemporaryDirectory() as temp_dir:
112
+ cfg.misc.ckpt_folder = temp_dir
113
+ cfg.infer.min_score = 0
114
+
115
+ best_score = 0
116
+ best_ensemble = None
117
+ count = 1
118
+ total_combinations = math.comb(len(ckpt_paths), ensemble_size)
119
+ if total_combinations <= num_tries:
120
+ # Exhaustive search
121
+ logging.info("Doing exhaustive search")
122
+ for ensemble in itertools.combinations(ckpt_paths, ensemble_size):
123
+ scores = _eval_ensemble(ensemble, temp_dir, annotations_path, recordings_path)
124
+ logging.info(f"For ensemble {count} of {total_combinations}, score = {scores[metric]:.4f}")
125
+ if scores[metric] > best_score:
126
+ best_score = scores[metric]
127
+ best_ensemble = ensemble
128
+
129
+ count += 1
130
+ else:
131
+ # Random sampling without replacement
132
+ logging.info("Doing random sampling")
133
+ seen: set = set()
134
+ while len(seen) < num_tries:
135
+ ensemble = tuple(sorted(random.sample(ckpt_paths, ensemble_size)))
136
+ if ensemble not in seen:
137
+ seen.add(ensemble)
138
+ scores = _eval_ensemble(ensemble, temp_dir, annotations_path, recordings_path)
139
+ logging.info(f"For ensemble {count} of {num_tries}, score = {scores[metric]:.4f}")
140
+ if scores[metric] > best_score:
141
+ best_score = scores[metric]
142
+ best_ensemble = ensemble
143
+
144
+ count += 1
145
+
146
+ logging.info(f"Best score = {best_score:.4f}")
147
+
148
+ best_names = [Path(ckpt_path).name for ckpt_path in best_ensemble]
149
+ logging.info(f"Best ensemble = {best_names}")
150
+
151
+ @click.command(
152
+ name="ensemble",
153
+ short_help="Find the best ensemble of a given size from a group of checkpoints.",
154
+ help=util.cli_help_from_doc(ensemble.__doc__),
155
+ )
156
+ @click.option(
157
+ "-c",
158
+ "--cfg",
159
+ "cfg_path",
160
+ type=click.Path(exists=True),
161
+ required=False,
162
+ help="Path to YAML file defining config overrides.",
163
+ )
164
+ @click.option(
165
+ "--ckpt_path",
166
+ "ckpt_path",
167
+ type=click.Path(exists=True, file_okay=False, dir_okay=True),
168
+ required=True,
169
+ help="Directory containing checkpoints."
170
+ )
171
+ @click.option(
172
+ "-e",
173
+ "--ensemble_size",
174
+ "ensemble_size",
175
+ type=int,
176
+ default=3,
177
+ help="Number of checkpoints in ensemble (default=3)."
178
+ )
179
+ @click.option(
180
+ "-n",
181
+ "--num_tries",
182
+ "num_tries",
183
+ type=int,
184
+ default=100,
185
+ help="Maximum number of ensembles to try (default=100)."
186
+ )
187
+ @click.option(
188
+ "-m",
189
+ "--metric",
190
+ "metric",
191
+ type=click.Choice(
192
+ [
193
+ "macro_pr",
194
+ "micro_pr",
195
+ "macro_roc",
196
+ "micro_roc",
197
+ ]
198
+ ),
199
+ default="micro_roc",
200
+ help="Metric used to compare ensembles (default=micro_roc). Macro-averaging uses annotated classes only, but micro-averaging uses all classes.",
201
+ )
202
+ @click.option(
203
+ "-a",
204
+ "--annotations",
205
+ "annotations_path",
206
+ type=click.Path(exists=True, file_okay=True, dir_okay=False),
207
+ required=True,
208
+ help="Path to CSV file containing annotations or ground truth).",
209
+ )
210
+ @click.option(
211
+ "-r",
212
+ "--recordings",
213
+ "recordings_path",
214
+ type=click.Path(exists=True, file_okay=False, dir_okay=True),
215
+ required=False,
216
+ help="Recordings directory. Default is directory containing annotations file.",
217
+ )
218
+ @click.option(
219
+ "-o",
220
+ "--output",
221
+ "output_path",
222
+ type=click.Path(file_okay=False, dir_okay=True),
223
+ required=True,
224
+ help="Path to output directory.",
225
+ )
226
+ def _ensemble_cmd(
227
+ cfg_path: Optional[str],
228
+ ckpt_path: str,
229
+ ensemble_size: int,
230
+ num_tries: int,
231
+ metric: str,
232
+ annotations_path: str,
233
+ recordings_path: Optional[str],
234
+ output_path: str,
235
+ ) -> None:
236
+ util.set_logging()
237
+ ensemble(cfg_path, ckpt_path, ensemble_size, num_tries, metric, annotations_path, recordings_path, output_path)
@@ -276,14 +276,14 @@ def rpt_epochs(
276
276
  tester.initialize()
277
277
 
278
278
  pr_stats = tester.get_pr_auc_stats()
279
- pr_score = pr_stats["micro_pr_auc"]
279
+ pr_score = pr_stats["micro_pr_auc_trained"]
280
280
  pr_scores.append(pr_score)
281
281
  if pr_score > max_pr_score:
282
282
  max_pr_score = pr_score
283
283
  max_pr_epoch = epoch_num
284
284
 
285
285
  roc_stats = tester.get_roc_auc_stats()
286
- roc_score = roc_stats["micro_roc_auc"]
286
+ roc_score = roc_stats["micro_roc_auc_trained"]
287
287
  roc_scores.append(roc_score)
288
288
  if roc_score > max_roc_score:
289
289
  max_roc_score = roc_score
@@ -18,7 +18,7 @@ def tune(
18
18
  param_path: Optional[str] = None,
19
19
  output_path: str = "",
20
20
  annotations_path: str = "",
21
- metric: str = "macro_roc",
21
+ metric: str = "micro_roc",
22
22
  recordings_path: str = "",
23
23
  train_log_path: str = "",
24
24
  num_trials: int = 0,
@@ -159,7 +159,7 @@ def tune(
159
159
  "micro_roc",
160
160
  ]
161
161
  ),
162
- default="macro_roc",
162
+ default="micro_roc",
163
163
  help="Metric used to compare runs. Macro-averaging uses annotated classes only, but micro-averaging uses all classes.",
164
164
  )
165
165
  @click.option(
britekit/core/trainer.py CHANGED
@@ -125,11 +125,12 @@ class Trainer:
125
125
  if val_rocs:
126
126
  import math
127
127
  import numpy as np
128
+
128
129
  mean = float(np.mean(val_rocs))
129
- std = float(np.std(val_rocs, ddof=1)) if len(val_rocs) > 1 else 0.0
130
+ std = float(np.std(val_rocs, ddof=1)) if len(val_rocs) > 1 else 0.0
130
131
  n = len(val_rocs)
131
132
  se = std / math.sqrt(n) if n > 1 else 0.0
132
- ci95 = 1.96 * se # 95% CI using normal approximation
133
+ ci95 = 1.96 * se # 95% CI using normal approximation
133
134
 
134
135
  logging.info("Using micro-averaged ROC AUC")
135
136
  scores_str = ", ".join(f"{v:.4f}" for v in val_rocs)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: britekit
3
- Version: 0.0.8
3
+ Version: 0.0.9
4
4
  Summary: Core functions for bioacoustic recognizers.
5
5
  Project-URL: Documentation, https://github.com/jhuus/BriteKit#readme
6
6
  Project-URL: Issues, https://github.com/jhuus/BriteKit/issues
@@ -1,4 +1,4 @@
1
- britekit/cli.py,sha256=jhWE19ye4Yyeoog-KCBzazdsmfQAGk_LZdXKnTCTmF0,3003
1
+ britekit/cli.py,sha256=nnrCMfw3-1GJ4rKFpqTLu8JcBGxTocMn7nwzU4OSaew,3080
2
2
  britekit/core/analyzer.py,sha256=4hctyNvM3mZ0FywEWKbPHamxzl1nZh1xdHkBxM4WPxo,5617
3
3
  britekit/core/audio.py,sha256=8QLbNDAiQyViEhrVC8jU0n32we4C22W_jPfc_KcOlmQ,15853
4
4
  britekit/core/augmentation.py,sha256=5_wyB-6gt7uM68Zl-rO_fPu1D6tlsd2m5oWhA6l0W9Q,5721
@@ -11,7 +11,7 @@ britekit/core/pickler.py,sha256=Vj-_DdFQUQj2bIVoyWe5puI8g8dTP9x7ZavbvM1iQZo,5788
11
11
  britekit/core/plot.py,sha256=hLuLB1VdtdFyaSHVDGl5tjjFCRgOJJ1ucTVJHM_3D_0,5332
12
12
  britekit/core/predictor.py,sha256=u4H8horTTvcg4Oqfpy5PG44eiiMeR5RU3aPZnMiXRCw,22914
13
13
  britekit/core/reextractor.py,sha256=gazhIZN8V1K4T_Q_kc-ihxUYbkNnc_hoAS6bpYQc95I,8396
14
- britekit/core/trainer.py,sha256=N5EsbCzxw3wXxs2PTJJ0OfYFkIi49HCRM0ylT5zSSZk,6439
14
+ britekit/core/trainer.py,sha256=tKyXZf5vm1yHJ8tyVvwgDOprAVZPKdiVEbLHlDJ8hKo,6440
15
15
  britekit/core/tuner.py,sha256=FMmy4p3_j2Tojs4ONPzuUeRpCPWGlttr4rUJac7Hkyk,16435
16
16
  britekit/core/util.py,sha256=0JsEEN09hFPQzuttCKaejWofXAjCGSvWEewjkiLAh3E,19172
17
17
  britekit/models/base_model.py,sha256=9T7TwHx3K8fl10Vb-qUuypK3NDDZM-ktB8ZLHzqQhdc,16883
@@ -32,7 +32,7 @@ britekit/testing/per_segment_tester.py,sha256=FnaozQ8VmH99aYc1ibmDFfOk_ADgsXQGU_
32
32
  britekit/training_db/extractor.py,sha256=pT7lAUsNzYs3RXDzpMv7q0MKg6TktiFLKrRtKTWv6ho,8409
33
33
  britekit/training_db/training_data_provider.py,sha256=V5aBjsCvrWViZ0Jv05hgcKRizcAXmqoj4q3hAHedoD8,5651
34
34
  britekit/training_db/training_db.py,sha256=OOfD1pcbq5HVJbzhmuI-D-gkPHWSoz0cCO4zIUGFvoY,65011
35
- britekit/__about__.py,sha256=-uGInVbPaVLti1Rr4PYUteRetwYfxeLtIuqiLmEcRjA,122
35
+ britekit/__about__.py,sha256=QXWLwMXjHd1KWRO6vKHNgPREhZNrZv3ac2FWBvQPN6E,122
36
36
  britekit/__init__.py,sha256=RpruzdjbvTcFNf21zJYY8HrAhJei91FtNNLjIBmw-kw,1857
37
37
  britekit/install/data/classes.csv,sha256=OdTZ8oQdx7N-HKyhftxZStGZYsjhCy4UbanwtQJ2wBM,54
38
38
  britekit/install/data/ignore.txt,sha256=RbKvEHtUCbgRYolwR1IucClwyD3q7l2s6QuRjph-Us4,68
@@ -64,7 +64,7 @@ britekit/install/yaml/samples/tune_dropout.yaml,sha256=f3QEfPOZecjwthqzAWodI8-PX
64
64
  britekit/install/yaml/samples/tune_learning_rate.yaml,sha256=UTtpsJwO33UWW0oecGR_LV3nQPtyC1dbpkkJpGOlI68,83
65
65
  britekit/install/yaml/samples/tune_optimizer.yaml,sha256=VtGlZmMJ22gaZWJ7CPLNHRZ-8EHeB5GmxywQm1Iy1MM,73
66
66
  britekit/install/yaml/samples/tune_smooth.yaml,sha256=IZq2lohiJWVdzPl-i3aCEwEsJLmG_bg7EvyBUSI-R0o,83
67
- britekit/commands/__init__.py,sha256=cgiHBDFQ7o1JL-wk9z0R_QEn7UVV_E0SPN7AANzxRdM,1538
67
+ britekit/commands/__init__.py,sha256=mms49ChyrGj4zzeUge6bl7uiPhOMjFm37NTk23ZFmXw,1586
68
68
  britekit/commands/_analyze.py,sha256=Hss0ubLjGM2FSbQk52S9wvfj73-gkym4uW_o8Td-BOc,4954
69
69
  britekit/commands/_audioset.py,sha256=BqmAJq6yWpyqBYIUWt9d0khBTQRa3vgUMdCS4U0fxvA,9957
70
70
  britekit/commands/_calibrate.py,sha256=338dRyGRj-Bw_4wFxiANDCbo-lZgdl0OR2gD8PmLv8U,4912
@@ -72,6 +72,7 @@ britekit/commands/_ckpt_ops.py,sha256=gutU8wqzrJCIyyuo_kLtIaOm9tq6h7q1Xm9L2QNU56
72
72
  britekit/commands/_db_add.py,sha256=LQD3nR_d8oI19YNi06EzE62kS5DlbvL-q2HZSRmEGeE,7261
73
73
  britekit/commands/_db_delete.py,sha256=rCV2tL8x-sNgsYmHZc6Id7_4-iLynwkK2f2_KRFkAZo,14541
74
74
  britekit/commands/_embed.py,sha256=MlP1HMRBmOANWEdbW1qhpnFGaxMUyeGEYOqaXV6K_cg,4391
75
+ britekit/commands/_ensemble.py,sha256=UElN1aajykpktekfA4bKPHh0VB1NYwJtaEjn91xRF2c,7849
75
76
  britekit/commands/_extract.py,sha256=7c_XnJY42IQ2AA70JmgFU9IkIUodkDoLy2vfYWU99AE,8865
76
77
  britekit/commands/_find_dup.py,sha256=yPn2EqG0icYHgUN8_87KuY9uOqEwDxqvhJc-DfBD40w,6353
77
78
  britekit/commands/_inat.py,sha256=ojTre5BCj_jmEh6x2kzNhcminLN6h5bzsYpxyrxGRdQ,4164
@@ -79,16 +80,16 @@ britekit/commands/_init.py,sha256=FmaQRY-7SYSHCLXL__47LEPecWir7X6zEB05KpradFw,28
79
80
  britekit/commands/_pickle.py,sha256=p990FsJGfSXcgjtBzH7nPGPh023b8cH0D7RZywQQ5Aw,3488
80
81
  britekit/commands/_plot.py,sha256=7vZXsYP9dv4PbHb8K3YbJFZc65YoPIBjEMBolyh6Has,13084
81
82
  britekit/commands/_reextract.py,sha256=kCmSjeghg6mhrJ46ibRTmBkGVytU7flFvTbqsnYhBvY,3770
82
- britekit/commands/_reports.py,sha256=KVYtpeFQpUC4jAIm2k2xV7aiNq826DL6sUrYEJD38X0,22023
83
+ britekit/commands/_reports.py,sha256=qnUEWUgEB3BFzshBAQ9nz75Mvjpl2bEZCBy5ttNx7l4,22039
83
84
  britekit/commands/_search.py,sha256=HIUXwfPvh3rxpgaFSR3bAAI38OtGVPyMo5GMfLtLX-8,9991
84
85
  britekit/commands/_train.py,sha256=vGFKlfcv35cOelArQNbVbTRbDWogT_IMg0wZt5virHY,4158
85
- britekit/commands/_tune.py,sha256=8dEZZURE769C0JZwhNpzB6pQxVklzl2w2cyXyWyhWXs,7331
86
+ britekit/commands/_tune.py,sha256=g9GnlOSJpa-ZfNAw2iCMzw0qPgLFTGdTUjzw8Ghjfvc,7331
86
87
  britekit/commands/_wav2mp3.py,sha256=2Q4cjT6OhJmBPTNzGRMrDd6dSdBBufuQdjhH1V8ghLo,2167
87
88
  britekit/commands/_xeno.py,sha256=_6YxQ7xFdaSy5DNUaigkbYp3E8EhtOhTC9b6OFS0MFA,6026
88
89
  britekit/commands/_youtube.py,sha256=_u1LrwY_2GxllKd505N_2ArFMbACQ_PtVxuqUCYxFe0,2214
89
90
  britekit/core/__init__.py,sha256=QcjcFyvO5KqJLF_HBeqiCk925uU5jTUjIV5lJix9XY4,556
90
- britekit-0.0.8.dist-info/METADATA,sha256=Qtzlff9X_WI1Cz8zpTyntAwFemS8hNbS0ClWJV9KVXk,18555
91
- britekit-0.0.8.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
92
- britekit-0.0.8.dist-info/entry_points.txt,sha256=ycnPy5DLX14RTf7lKfkQAVyIf1B1zTL1gMsHm455wmg,46
93
- britekit-0.0.8.dist-info/licenses/LICENSE.txt,sha256=kPoHm6iop8-CUa_720Tt8gqyvLD6D_7218u1hCCpErk,1092
94
- britekit-0.0.8.dist-info/RECORD,,
91
+ britekit-0.0.9.dist-info/METADATA,sha256=XFCWiF08LtF--mnG5gfLK0T7DeypGxF0oH4-s_T8u2g,18555
92
+ britekit-0.0.9.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
93
+ britekit-0.0.9.dist-info/entry_points.txt,sha256=ycnPy5DLX14RTf7lKfkQAVyIf1B1zTL1gMsHm455wmg,46
94
+ britekit-0.0.9.dist-info/licenses/LICENSE.txt,sha256=kPoHm6iop8-CUa_720Tt8gqyvLD6D_7218u1hCCpErk,1092
95
+ britekit-0.0.9.dist-info/RECORD,,