britekit 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of britekit might be problematic. Click here for more details.

britekit/__about__.py CHANGED
@@ -1,4 +1,4 @@
1
1
  # SPDX-FileCopyrightText: 2025-present Jan Huus <jhuus1@gmail.com>
2
2
  #
3
3
  # SPDX-License-Identifier: MIT
4
- __version__ = "0.1.1"
4
+ __version__ = "0.1.2"
@@ -11,34 +11,24 @@ import click
11
11
  from britekit.core.config_loader import get_config
12
12
  from britekit.core import util
13
13
 
14
- def _eval_ensemble(ensemble, temp_dir, annotations_path, recording_dir):
15
- import shutil
16
-
17
- from britekit.core.analyzer import Analyzer
14
+ def _eval_ensemble(ensemble, dataframe_dict, annotations_path, recordings_path, inference_output_dir):
15
+ import pandas as pd
18
16
  from britekit.testing.per_segment_tester import PerSegmentTester
19
17
 
20
- # delete any checkpoints in the temp dir
21
- for filename in os.listdir(temp_dir):
22
- file_path = os.path.join(temp_dir, filename)
23
- os.remove(file_path)
24
-
25
- # copy checkpoints to the temp dir
26
- for file_path in ensemble:
27
- file_name = Path(file_path).name
28
- dest_path = os.path.join(temp_dir, file_name)
29
- shutil.copyfile(file_path, dest_path)
18
+ # create a dataframe with the average scores for the ensemble
19
+ avg_df: pd.DataFrame = dataframe_dict[ensemble[0]].copy()
20
+ avg_df["score"] = sum(dataframe_dict[ckpt_path]["score"] for ckpt_path in ensemble) / len(ensemble)
30
21
 
31
- # run inference on the given test
32
- util.set_logging(level=logging.ERROR) # suppress logging during inference and analysis
33
- label_dir = "ensemble_evaluation_labels"
34
- inference_output_dir = str(Path(recording_dir) / label_dir)
35
- Analyzer().run(recording_dir, inference_output_dir)
22
+ # save the dataframe to the usual inference output location
23
+ scores_csv_path = str(Path(inference_output_dir) / 'scores.csv')
24
+ avg_df.to_csv(scores_csv_path, index=False)
36
25
 
37
- min_score = 0.8 # irrelevant really
38
26
  with tempfile.TemporaryDirectory() as output_dir:
27
+ util.set_logging(level=logging.ERROR) # suppress logging during test reporting
28
+ min_score = 0.8 # arbitrary threshold
39
29
  tester = PerSegmentTester(
40
30
  annotations_path,
41
- recording_dir,
31
+ recordings_path,
42
32
  inference_output_dir,
43
33
  output_dir,
44
34
  min_score,
@@ -47,6 +37,7 @@ def _eval_ensemble(ensemble, temp_dir, annotations_path, recording_dir):
47
37
 
48
38
  pr_stats = tester.get_pr_auc_stats()
49
39
  roc_stats = tester.get_roc_auc_stats()
40
+ util.set_logging() # restore logging
50
41
 
51
42
  scores = {
52
43
  "macro_pr": pr_stats["macro_pr_auc"],
@@ -55,11 +46,9 @@ def _eval_ensemble(ensemble, temp_dir, annotations_path, recording_dir):
55
46
  "micro_roc": roc_stats["micro_roc_auc_trained"]
56
47
  }
57
48
 
58
- shutil.rmtree(inference_output_dir)
59
- util.set_logging() # restore logging
60
-
61
49
  return scores
62
50
 
51
+
63
52
  def ensemble(
64
53
  cfg_path: Optional[str]=None,
65
54
  ckpt_path: str="",
@@ -87,7 +76,13 @@ def ensemble(
87
76
  import glob
88
77
  import itertools
89
78
  import math
79
+ import os
90
80
  import random
81
+ import shutil
82
+
83
+ import pandas as pd
84
+
85
+ from britekit.core.analyzer import Analyzer
91
86
 
92
87
  if metric not in ["macro_pr", "micro_pr", "macro_roc", "micro_roc"]:
93
88
  logging.error(f"Error: invalid metric ({metric})")
@@ -106,10 +101,29 @@ def ensemble(
106
101
  if not recordings_path:
107
102
  recordings_path = str(Path(annotations_path).parent)
108
103
 
109
- with tempfile.TemporaryDirectory() as temp_dir:
110
- cfg.misc.ckpt_folder = temp_dir
104
+ with tempfile.TemporaryDirectory() as ensemble_dir:
105
+ cfg.misc.ckpt_folder = ensemble_dir
111
106
  cfg.infer.min_score = 0
112
107
 
108
+ # get a dataframe of predictions per checkpoint
109
+ label_dir = "ensemble_evaluation_labels"
110
+ inference_output_dir = str(Path(recordings_path) / label_dir)
111
+ scores_csv_path = str(Path(inference_output_dir) / 'scores.csv')
112
+ dataframe_dict = {}
113
+ for ckpt_path in ckpt_paths:
114
+ ckpt_name = Path(ckpt_path).name
115
+ logging.info(f"Running inference with {ckpt_name}")
116
+ dest_path = str(Path(ensemble_dir) / ckpt_name)
117
+ shutil.copyfile(ckpt_path, dest_path)
118
+
119
+ util.set_logging(level=logging.ERROR) # suppress logging during inference
120
+ Analyzer().run(recordings_path, inference_output_dir, rtype='csv')
121
+ util.set_logging()
122
+
123
+ df = pd.read_csv(scores_csv_path)
124
+ dataframe_dict[ckpt_path] = df
125
+ os.remove(dest_path)
126
+
113
127
  best_score = 0
114
128
  best_ensemble = None
115
129
  count = 1
@@ -118,7 +132,7 @@ def ensemble(
118
132
  # Exhaustive search
119
133
  logging.info("Doing exhaustive search")
120
134
  for ensemble in itertools.combinations(ckpt_paths, ensemble_size):
121
- scores = _eval_ensemble(ensemble, temp_dir, annotations_path, recordings_path)
135
+ scores = _eval_ensemble(ensemble, dataframe_dict, annotations_path, recordings_path, inference_output_dir)
122
136
  logging.info(f"For ensemble {count} of {total_combinations}, score = {scores[metric]:.4f}")
123
137
  if scores[metric] > best_score:
124
138
  best_score = scores[metric]
@@ -133,7 +147,7 @@ def ensemble(
133
147
  ensemble = tuple(sorted(random.sample(ckpt_paths, ensemble_size)))
134
148
  if ensemble not in seen:
135
149
  seen.add(ensemble)
136
- scores = _eval_ensemble(ensemble, temp_dir, annotations_path, recordings_path)
150
+ scores = _eval_ensemble(ensemble, dataframe_dict, annotations_path, recordings_path, inference_output_dir)
137
151
  logging.info(f"For ensemble {count} of {num_tries}, score = {scores[metric]:.4f}")
138
152
  if scores[metric] > best_score:
139
153
  best_score = scores[metric]
@@ -141,6 +155,8 @@ def ensemble(
141
155
 
142
156
  count += 1
143
157
 
158
+ shutil.rmtree(inference_output_dir)
159
+
144
160
  logging.info(f"Best score = {best_score:.4f}")
145
161
 
146
162
  assert best_ensemble is not None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: britekit
3
- Version: 0.1.1
3
+ Version: 0.1.2
4
4
  Summary: Core functions for bioacoustic recognizers.
5
5
  Project-URL: Documentation, https://github.com/jhuus/BriteKit#readme
6
6
  Project-URL: Issues, https://github.com/jhuus/BriteKit/issues
@@ -32,7 +32,7 @@ britekit/testing/per_segment_tester.py,sha256=5zFpe8wVEEtVjthKKFRb0dwE9T0v4JIll9
32
32
  britekit/training_db/extractor.py,sha256=yblYTco9b-bZhBTOkGkNANOabsNo8IfQGJxPvh_eJyo,8406
33
33
  britekit/training_db/training_data_provider.py,sha256=V5aBjsCvrWViZ0Jv05hgcKRizcAXmqoj4q3hAHedoD8,5651
34
34
  britekit/training_db/training_db.py,sha256=xZqN1sMC2yFMEDm9rOrCigN3CUfUzTaTUkX3IZ_zHRc,64922
35
- britekit/__about__.py,sha256=iTA-8NubPgFzs2Mkc2etUzBbWweMfnEmfcp3_GiYcqw,122
35
+ britekit/__about__.py,sha256=p7Eggv-ttiNa77u9u08fyOSsZF30h6Wirtx06CqlBII,122
36
36
  britekit/__init__.py,sha256=RpruzdjbvTcFNf21zJYY8HrAhJei91FtNNLjIBmw-kw,1857
37
37
  britekit/install/data/classes.csv,sha256=OdTZ8oQdx7N-HKyhftxZStGZYsjhCy4UbanwtQJ2wBM,54
38
38
  britekit/install/data/ignore.txt,sha256=RbKvEHtUCbgRYolwR1IucClwyD3q7l2s6QuRjph-Us4,68
@@ -72,7 +72,7 @@ britekit/commands/_ckpt_ops.py,sha256=2l-eJuxGBTHtQZ2Nked82KeSbP7WIwZ-yAYuM4v4Hl
72
72
  britekit/commands/_db_add.py,sha256=brUCb7LZVJ7XezlSmpaKilz9hYoII_DvfjhS1v64cr0,7249
73
73
  britekit/commands/_db_delete.py,sha256=ziqxnQhBOjHgqlu0uk6GA8A7I9FOMYcPEscmPxThAVY,14520
74
74
  britekit/commands/_embed.py,sha256=gTQK4YOilwsZCY2r8HhaWUZBpMkA-OEZsR5RkNV1euM,4388
75
- britekit/commands/_ensemble.py,sha256=O6_BDgHWXdlCn_JdbZdI8X5NdGlnjJiz6nuJfVYkE0g,7583
75
+ britekit/commands/_ensemble.py,sha256=jssoAKvKgy2KNAQ2-tIapQgo_YfywwxSnPRnIwnmC38,8427
76
76
  britekit/commands/_extract.py,sha256=iz9VG1KnV-d7cFliQpmWiSi6Ezt9hCm5Iur9r-XBb20,8859
77
77
  britekit/commands/_find_dup.py,sha256=Zig-s04BUQzeY4s7DjEQM53-e6KgCXxSw05R6BfSkNw,6350
78
78
  britekit/commands/_inat.py,sha256=ojTre5BCj_jmEh6x2kzNhcminLN6h5bzsYpxyrxGRdQ,4164
@@ -88,8 +88,8 @@ britekit/commands/_wav2mp3.py,sha256=2Q4cjT6OhJmBPTNzGRMrDd6dSdBBufuQdjhH1V8ghLo
88
88
  britekit/commands/_xeno.py,sha256=4qS2iXeGMl0jYvTAvhSvX0VvIlp7STlX27o273WwBHk,6165
89
89
  britekit/commands/_youtube.py,sha256=_u1LrwY_2GxllKd505N_2ArFMbACQ_PtVxuqUCYxFe0,2214
90
90
  britekit/core/__init__.py,sha256=QcjcFyvO5KqJLF_HBeqiCk925uU5jTUjIV5lJix9XY4,556
91
- britekit-0.1.1.dist-info/METADATA,sha256=Ni0op2mIqZvbDJQw8DiekHMMkMTSU1fkUPjSgadbLWE,18552
92
- britekit-0.1.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
93
- britekit-0.1.1.dist-info/entry_points.txt,sha256=ycnPy5DLX14RTf7lKfkQAVyIf1B1zTL1gMsHm455wmg,46
94
- britekit-0.1.1.dist-info/licenses/LICENSE.txt,sha256=kPoHm6iop8-CUa_720Tt8gqyvLD6D_7218u1hCCpErk,1092
95
- britekit-0.1.1.dist-info/RECORD,,
91
+ britekit-0.1.2.dist-info/METADATA,sha256=MLgsBLSqBDBxK3yjFtsB5I50SbD2HSVsmrLmYNnVgSk,18552
92
+ britekit-0.1.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
93
+ britekit-0.1.2.dist-info/entry_points.txt,sha256=ycnPy5DLX14RTf7lKfkQAVyIf1B1zTL1gMsHm455wmg,46
94
+ britekit-0.1.2.dist-info/licenses/LICENSE.txt,sha256=kPoHm6iop8-CUa_720Tt8gqyvLD6D_7218u1hCCpErk,1092
95
+ britekit-0.1.2.dist-info/RECORD,,