britekit 0.1.3__tar.gz → 0.1.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of britekit might be problematic. Click here for more details.

Files changed (127) hide show
  1. britekit-0.1.4/PKG-INFO +299 -0
  2. britekit-0.1.4/README.md +260 -0
  3. {britekit-0.1.3 → britekit-0.1.4}/britekit/__about__.py +1 -1
  4. {britekit-0.1.3 → britekit-0.1.4}/britekit/commands/__init__.py +2 -1
  5. {britekit-0.1.3 → britekit-0.1.4}/britekit/commands/_analyze.py +9 -9
  6. {britekit-0.1.3 → britekit-0.1.4}/britekit/commands/_audioset.py +8 -8
  7. {britekit-0.1.3 → britekit-0.1.4}/britekit/commands/_calibrate.py +8 -8
  8. {britekit-0.1.3 → britekit-0.1.4}/britekit/commands/_ckpt_ops.py +6 -6
  9. {britekit-0.1.3 → britekit-0.1.4}/britekit/commands/_db_add.py +12 -12
  10. {britekit-0.1.3 → britekit-0.1.4}/britekit/commands/_db_delete.py +15 -15
  11. {britekit-0.1.3 → britekit-0.1.4}/britekit/commands/_embed.py +4 -4
  12. {britekit-0.1.3 → britekit-0.1.4}/britekit/commands/_ensemble.py +7 -7
  13. {britekit-0.1.3 → britekit-0.1.4}/britekit/commands/_extract.py +158 -19
  14. {britekit-0.1.3 → britekit-0.1.4}/britekit/commands/_find_dup.py +5 -5
  15. {britekit-0.1.3 → britekit-0.1.4}/britekit/commands/_inat.py +4 -4
  16. {britekit-0.1.3 → britekit-0.1.4}/britekit/commands/_init.py +1 -1
  17. {britekit-0.1.3 → britekit-0.1.4}/britekit/commands/_pickle.py +7 -7
  18. {britekit-0.1.3 → britekit-0.1.4}/britekit/commands/_plot.py +26 -26
  19. {britekit-0.1.3 → britekit-0.1.4}/britekit/commands/_reextract.py +6 -6
  20. {britekit-0.1.3 → britekit-0.1.4}/britekit/commands/_reports.py +22 -22
  21. {britekit-0.1.3 → britekit-0.1.4}/britekit/commands/_search.py +12 -12
  22. {britekit-0.1.3 → britekit-0.1.4}/britekit/commands/_train.py +6 -6
  23. {britekit-0.1.3 → britekit-0.1.4}/britekit/commands/_tune.py +12 -12
  24. {britekit-0.1.3 → britekit-0.1.4}/britekit/commands/_wav2mp3.py +2 -2
  25. {britekit-0.1.3 → britekit-0.1.4}/britekit/commands/_xeno.py +7 -7
  26. {britekit-0.1.3 → britekit-0.1.4}/britekit/commands/_youtube.py +3 -3
  27. {britekit-0.1.3 → britekit-0.1.4}/src/britekit/cli.py +6 -1
  28. {britekit-0.1.3 → britekit-0.1.4}/src/britekit/core/analyzer.py +8 -8
  29. {britekit-0.1.3 → britekit-0.1.4}/src/britekit/core/audio.py +14 -14
  30. {britekit-0.1.3 → britekit-0.1.4}/src/britekit/core/data_module.py +2 -2
  31. {britekit-0.1.3 → britekit-0.1.4}/src/britekit/core/plot.py +8 -8
  32. {britekit-0.1.3 → britekit-0.1.4}/src/britekit/core/predictor.py +21 -21
  33. {britekit-0.1.3 → britekit-0.1.4}/src/britekit/core/reextractor.py +6 -6
  34. {britekit-0.1.3 → britekit-0.1.4}/src/britekit/core/util.py +8 -8
  35. {britekit-0.1.3 → britekit-0.1.4}/src/britekit/occurrence_db/occurrence_data_provider.py +13 -13
  36. {britekit-0.1.3 → britekit-0.1.4}/src/britekit/training_db/extractor.py +65 -30
  37. {britekit-0.1.3 → britekit-0.1.4}/src/britekit/training_db/training_data_provider.py +1 -1
  38. {britekit-0.1.3 → britekit-0.1.4}/src/britekit/training_db/training_db.py +97 -100
  39. britekit-0.1.3/PKG-INFO +0 -290
  40. britekit-0.1.3/README.md +0 -251
  41. {britekit-0.1.3 → britekit-0.1.4}/.gitignore +0 -0
  42. {britekit-0.1.3 → britekit-0.1.4}/LICENSE.txt +0 -0
  43. {britekit-0.1.3 → britekit-0.1.4}/britekit/__init__.py +0 -0
  44. {britekit-0.1.3 → britekit-0.1.4}/britekit/core/__init__.py +0 -0
  45. {britekit-0.1.3 → britekit-0.1.4}/britekit/install/data/audioset/class_inclusion.csv +0 -0
  46. {britekit-0.1.3 → britekit-0.1.4}/britekit/install/data/audioset/class_list.csv +0 -0
  47. {britekit-0.1.3 → britekit-0.1.4}/britekit/install/data/audioset/curated/aircraft.csv +0 -0
  48. {britekit-0.1.3 → britekit-0.1.4}/britekit/install/data/audioset/curated/car.csv +0 -0
  49. {britekit-0.1.3 → britekit-0.1.4}/britekit/install/data/audioset/curated/chainsaw.csv +0 -0
  50. {britekit-0.1.3 → britekit-0.1.4}/britekit/install/data/audioset/curated/cow.csv +0 -0
  51. {britekit-0.1.3 → britekit-0.1.4}/britekit/install/data/audioset/curated/cricket.csv +0 -0
  52. {britekit-0.1.3 → britekit-0.1.4}/britekit/install/data/audioset/curated/dog.csv +0 -0
  53. {britekit-0.1.3 → britekit-0.1.4}/britekit/install/data/audioset/curated/rain.csv +0 -0
  54. {britekit-0.1.3 → britekit-0.1.4}/britekit/install/data/audioset/curated/rooster.csv +0 -0
  55. {britekit-0.1.3 → britekit-0.1.4}/britekit/install/data/audioset/curated/sheep.csv +0 -0
  56. {britekit-0.1.3 → britekit-0.1.4}/britekit/install/data/audioset/curated/siren.csv +0 -0
  57. {britekit-0.1.3 → britekit-0.1.4}/britekit/install/data/audioset/curated/speech.csv +0 -0
  58. {britekit-0.1.3 → britekit-0.1.4}/britekit/install/data/audioset/curated/truck.csv +0 -0
  59. {britekit-0.1.3 → britekit-0.1.4}/britekit/install/data/audioset/curated/wind.csv +0 -0
  60. {britekit-0.1.3 → britekit-0.1.4}/britekit/install/data/audioset/unbalanced_train_segments.csv +0 -0
  61. {britekit-0.1.3 → britekit-0.1.4}/britekit/install/data/classes.csv +0 -0
  62. {britekit-0.1.3 → britekit-0.1.4}/britekit/install/data/ignore.txt +0 -0
  63. {britekit-0.1.3 → britekit-0.1.4}/britekit/install/yaml/base_config.yaml +0 -0
  64. {britekit-0.1.3 → britekit-0.1.4}/britekit/install/yaml/samples/cfg_infer.yaml +0 -0
  65. {britekit-0.1.3 → britekit-0.1.4}/britekit/install/yaml/samples/train_dla.yaml +0 -0
  66. {britekit-0.1.3 → britekit-0.1.4}/britekit/install/yaml/samples/train_effnet.yaml +0 -0
  67. {britekit-0.1.3 → britekit-0.1.4}/britekit/install/yaml/samples/train_gernet.yaml +0 -0
  68. {britekit-0.1.3 → britekit-0.1.4}/britekit/install/yaml/samples/train_hgnet.yaml +0 -0
  69. {britekit-0.1.3 → britekit-0.1.4}/britekit/install/yaml/samples/train_timm.yaml +0 -0
  70. {britekit-0.1.3 → britekit-0.1.4}/britekit/install/yaml/samples/train_vovnet.yaml +0 -0
  71. {britekit-0.1.3 → britekit-0.1.4}/britekit/install/yaml/samples/tune_dropout.yaml +0 -0
  72. {britekit-0.1.3 → britekit-0.1.4}/britekit/install/yaml/samples/tune_learning_rate.yaml +0 -0
  73. {britekit-0.1.3 → britekit-0.1.4}/britekit/install/yaml/samples/tune_optimizer.yaml +0 -0
  74. {britekit-0.1.3 → britekit-0.1.4}/britekit/install/yaml/samples/tune_smooth.yaml +0 -0
  75. {britekit-0.1.3 → britekit-0.1.4}/install/data/audioset/class_inclusion.csv +0 -0
  76. {britekit-0.1.3 → britekit-0.1.4}/install/data/audioset/class_list.csv +0 -0
  77. {britekit-0.1.3 → britekit-0.1.4}/install/data/audioset/curated/aircraft.csv +0 -0
  78. {britekit-0.1.3 → britekit-0.1.4}/install/data/audioset/curated/car.csv +0 -0
  79. {britekit-0.1.3 → britekit-0.1.4}/install/data/audioset/curated/chainsaw.csv +0 -0
  80. {britekit-0.1.3 → britekit-0.1.4}/install/data/audioset/curated/cow.csv +0 -0
  81. {britekit-0.1.3 → britekit-0.1.4}/install/data/audioset/curated/cricket.csv +0 -0
  82. {britekit-0.1.3 → britekit-0.1.4}/install/data/audioset/curated/dog.csv +0 -0
  83. {britekit-0.1.3 → britekit-0.1.4}/install/data/audioset/curated/rain.csv +0 -0
  84. {britekit-0.1.3 → britekit-0.1.4}/install/data/audioset/curated/rooster.csv +0 -0
  85. {britekit-0.1.3 → britekit-0.1.4}/install/data/audioset/curated/sheep.csv +0 -0
  86. {britekit-0.1.3 → britekit-0.1.4}/install/data/audioset/curated/siren.csv +0 -0
  87. {britekit-0.1.3 → britekit-0.1.4}/install/data/audioset/curated/speech.csv +0 -0
  88. {britekit-0.1.3 → britekit-0.1.4}/install/data/audioset/curated/truck.csv +0 -0
  89. {britekit-0.1.3 → britekit-0.1.4}/install/data/audioset/curated/wind.csv +0 -0
  90. {britekit-0.1.3 → britekit-0.1.4}/install/data/audioset/unbalanced_train_segments.csv +0 -0
  91. {britekit-0.1.3 → britekit-0.1.4}/install/data/classes.csv +0 -0
  92. {britekit-0.1.3 → britekit-0.1.4}/install/data/ignore.txt +0 -0
  93. {britekit-0.1.3 → britekit-0.1.4}/install/yaml/base_config.yaml +0 -0
  94. {britekit-0.1.3 → britekit-0.1.4}/install/yaml/samples/cfg_infer.yaml +0 -0
  95. {britekit-0.1.3 → britekit-0.1.4}/install/yaml/samples/train_dla.yaml +0 -0
  96. {britekit-0.1.3 → britekit-0.1.4}/install/yaml/samples/train_effnet.yaml +0 -0
  97. {britekit-0.1.3 → britekit-0.1.4}/install/yaml/samples/train_gernet.yaml +0 -0
  98. {britekit-0.1.3 → britekit-0.1.4}/install/yaml/samples/train_hgnet.yaml +0 -0
  99. {britekit-0.1.3 → britekit-0.1.4}/install/yaml/samples/train_timm.yaml +0 -0
  100. {britekit-0.1.3 → britekit-0.1.4}/install/yaml/samples/train_vovnet.yaml +0 -0
  101. {britekit-0.1.3 → britekit-0.1.4}/install/yaml/samples/tune_dropout.yaml +0 -0
  102. {britekit-0.1.3 → britekit-0.1.4}/install/yaml/samples/tune_learning_rate.yaml +0 -0
  103. {britekit-0.1.3 → britekit-0.1.4}/install/yaml/samples/tune_optimizer.yaml +0 -0
  104. {britekit-0.1.3 → britekit-0.1.4}/install/yaml/samples/tune_smooth.yaml +0 -0
  105. {britekit-0.1.3 → britekit-0.1.4}/pyproject.toml +0 -0
  106. {britekit-0.1.3 → britekit-0.1.4}/src/britekit/core/augmentation.py +0 -0
  107. {britekit-0.1.3 → britekit-0.1.4}/src/britekit/core/base_config.py +0 -0
  108. {britekit-0.1.3 → britekit-0.1.4}/src/britekit/core/config_loader.py +0 -0
  109. {britekit-0.1.3 → britekit-0.1.4}/src/britekit/core/dataset.py +0 -0
  110. {britekit-0.1.3 → britekit-0.1.4}/src/britekit/core/exceptions.py +0 -0
  111. {britekit-0.1.3 → britekit-0.1.4}/src/britekit/core/pickler.py +0 -0
  112. {britekit-0.1.3 → britekit-0.1.4}/src/britekit/core/trainer.py +0 -0
  113. {britekit-0.1.3 → britekit-0.1.4}/src/britekit/core/tuner.py +0 -0
  114. {britekit-0.1.3 → britekit-0.1.4}/src/britekit/models/base_model.py +0 -0
  115. {britekit-0.1.3 → britekit-0.1.4}/src/britekit/models/dla.py +0 -0
  116. {britekit-0.1.3 → britekit-0.1.4}/src/britekit/models/effnet.py +0 -0
  117. {britekit-0.1.3 → britekit-0.1.4}/src/britekit/models/gernet.py +0 -0
  118. {britekit-0.1.3 → britekit-0.1.4}/src/britekit/models/head_factory.py +0 -0
  119. {britekit-0.1.3 → britekit-0.1.4}/src/britekit/models/hgnet.py +0 -0
  120. {britekit-0.1.3 → britekit-0.1.4}/src/britekit/models/model_loader.py +0 -0
  121. {britekit-0.1.3 → britekit-0.1.4}/src/britekit/models/timm_model.py +0 -0
  122. {britekit-0.1.3 → britekit-0.1.4}/src/britekit/models/vovnet.py +0 -0
  123. {britekit-0.1.3 → britekit-0.1.4}/src/britekit/occurrence_db/occurrence_db.py +0 -0
  124. {britekit-0.1.3 → britekit-0.1.4}/src/britekit/testing/base_tester.py +0 -0
  125. {britekit-0.1.3 → britekit-0.1.4}/src/britekit/testing/per_block_tester.py +0 -0
  126. {britekit-0.1.3 → britekit-0.1.4}/src/britekit/testing/per_recording_tester.py +0 -0
  127. {britekit-0.1.3 → britekit-0.1.4}/src/britekit/testing/per_segment_tester.py +0 -0
@@ -0,0 +1,299 @@
1
+ Metadata-Version: 2.4
2
+ Name: britekit
3
+ Version: 0.1.4
4
+ Summary: Core functions for bioacoustic recognizers.
5
+ Project-URL: Documentation, https://github.com/jhuus/BriteKit#readme
6
+ Project-URL: Issues, https://github.com/jhuus/BriteKit/issues
7
+ Project-URL: Source, https://github.com/jhuus/BriteKit
8
+ Author-email: Jan Huus <jhuus1@gmail.com>
9
+ License-Expression: MIT
10
+ License-File: LICENSE.txt
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Programming Language :: Python
13
+ Classifier: Programming Language :: Python :: 3.8
14
+ Classifier: Programming Language :: Python :: 3.9
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: Implementation :: CPython
19
+ Classifier: Programming Language :: Python :: Implementation :: PyPy
20
+ Requires-Python: >=3.8
21
+ Requires-Dist: click<8.3,>=8.1
22
+ Requires-Dist: librosa<2.0,>=0.10
23
+ Requires-Dist: lightning<2.6,>=2.5
24
+ Requires-Dist: matplotlib<3.11,>=3.8
25
+ Requires-Dist: numpy<2.4,>=2.0
26
+ Requires-Dist: omegaconf<2.4,>=2.3
27
+ Requires-Dist: onnx<2.0,>=1.18
28
+ Requires-Dist: pandas<2.3,>=2.0
29
+ Requires-Dist: pyinaturalist<1.0,>=0.20
30
+ Requires-Dist: pytorch-lightning<2.6,>=2.5
31
+ Requires-Dist: scipy<2.0,>=1.13
32
+ Requires-Dist: soundfile<1.0,>=0.13
33
+ Requires-Dist: tensorboard<3.0,>=2.19
34
+ Requires-Dist: timm<2.0,>=1.0.17
35
+ Requires-Dist: torch<2.9,>=2.5
36
+ Requires-Dist: torchaudio<2.9,>=2.5
37
+ Requires-Dist: yt-dlp>=2025.6.25
38
+ Description-Content-Type: text/markdown
39
+
40
+ # BriteKit
41
+
42
+ -----
43
+
44
+ ## Getting Started
45
+
46
+ - [Introduction](#introduction)
47
+ - [License](#license)
48
+ - [Installation](#installation)
49
+ - [Configuration](#configuration)
50
+ - [Downloading Recordings](#downloading-recordings)
51
+ - [Managing Training Data](#managing-training-data)
52
+ - [Training](#training)
53
+ - [Testing](#testing)
54
+ - [Tuning](#tuning)
55
+ - [Ensembling](#ensembling)
56
+ - [Calibrating](#calibrating)
57
+
58
+ ## More Information
59
+
60
+ - [Spectrograms](#spectrograms)
61
+ - [Backbones and Classifier Heads](#backbones-and-classifier-heads)
62
+ - [Metrics (PR-AUC and ROC-AUC)](#metrics-pr-auc-and-roc-auc)
63
+ - [Data Augmentation](#data-augmentation)
64
+ - [Development Environment](#development-environment)
65
+
66
+ ## Reference Guides
67
+
68
+ - [Command Reference](https://github.com/jhuus/BriteKit/blob/master/command-reference.md)
69
+ - [Command API Reference](https://github.com/jhuus/BriteKit/blob/master/command-api-reference.md)
70
+ - [General API Reference](https://github.com/jhuus/BriteKit/blob/master/api-reference.md)
71
+ - [Configuration Reference](https://github.com/jhuus/BriteKit/blob/master/config-reference.md)
72
+
73
+ # Getting Started
74
+
75
+ -----
76
+
77
+ ## Introduction
78
+ BriteKit (Bioacoustic Recognizer Technology Kit) is a Python package that facilitates the development of bioacoustic recognizers using deep learning.
79
+ It provides a command-line interface (CLI) as well as a Python API, to support functions such as:
80
+ - downloading recordings from Xeno-Canto, iNaturalist, and YouTube (optionally using Google Audioset metadata)
81
+ - managing training data in a SQLite database
82
+ - training models
83
+ - testing, tuning and calibrating models
84
+ - reporting
85
+ - deployment and inference
86
+
87
+ To view a list of BriteKit commands, type `britekit --help`. You can also get help for individual commands, e.g. `britekit train --help` describes the `train` command.
88
+ When accessing BriteKit from Python, the `britekit.commands` namespace contains a function for each command, as documented [here](command-api-reference.md).
89
+ The classes used by the commands can also be accessed, and are documented [here](api-reference.md).
90
+ ## License
91
+ BriteKit is distributed under the terms of the [MIT](https://spdx.org/licenses/MIT.html) license.
92
+ ## Installation
93
+ It is best to install BriteKit in a virtual environment, such as a [Python venv](https://docs.python.org/3/library/venv.html). Once you have that set up, install the BriteKit package using pip:
94
+ ```console
95
+ pip install britekit
96
+ ```
97
+ In Windows environments, you then need to uninstall and reinstall PyTorch:
98
+ ```
99
+ pip uninstall -y torch torchvision torchaudio
100
+ pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu126
101
+ ```
102
+ Note that cu126 refers to CUDA 12.6.\
103
+ Once BriteKit is installed, initialize a working environment using the `init` command:
104
+ ```console
105
+ britekit init --dest=<directory path>
106
+ ```
107
+ This creates the directories needed and installs sample files. If you omit `--dest`, it will create
108
+ directories under the current working directory.
109
+ ## Configuration
110
+ Configuration parameters are documented [here](config-reference.md). After running `britekit init`, the file `yaml/base_config.yaml` contains all parameters in YAML format.
111
+ Most CLI commands have a `--config` argument that allows you to specify the path to a YAML file that overrides selected parameters. For example, when running the [train](https://github.com/jhuus/BriteKit/blob/master/command-reference.md#britekit-train) command,
112
+ you could provide a YAML file containing the following:
113
+ ```
114
+ train:
115
+ model_type: "effnet.4"
116
+ learning_rate: .002
117
+ drop_rate: 0.1
118
+ num_epochs: 20
119
+ ```
120
+ This overrides the default values for `model_type`, `learning_rate`, `drop_rate` and `num_epochs`. When using the API, you can update configuration parameters like this:
121
+ ```
122
+ import britekit as bk
123
+ cfg = bk.get_config()
124
+ cfg.train.model_type = "effnet.4"
125
+ ```
126
+ ## Downloading Recordings
127
+ The [inat](https://github.com/jhuus/BriteKit/blob/master/command-reference.md#britekit-inat), [xeno](https://github.com/jhuus/BriteKit/blob/master/command-reference.md#britekit-xeno) and [youtube](https://github.com/jhuus/BriteKit/blob/master/command-reference.md#britekit-youtube) commands make it easy to download recordings from Xeno-Canto, iNaturalist and YouTube. For iNaturalist it is important to provide the scientific name. For example, to download recordings of the American Green Frog (lithobates clamitans), type:
128
+ ```
129
+ britekit inat --name "lithobates clamitans" --output <output-path>
130
+ ```
131
+ For Xeno-Canto, use `--name` for the common name or `--sci` for the scientific name. For YouTube, specify the ID of the corresponding video. For example, specify `--id K_EsxukdNXM` to download the audio from https://www.youtube.com/watch?v=K_EsxukdNXM.
132
+
133
+ The [audioset](https://github.com/jhuus/BriteKit/blob/master/command-reference.md#britekit-audioset) command lets you download using [Google Audioset](https://research.google.com/audioset/), which is metadata that classifies sounds in YouTube videos. Audioset was released in March 2017, so any videos uploaded later than that are not included. Also, some videos that are tagged in Audioset are no longer available. Type `britekit audioset --help` for more information.
134
+ ## Managing Training Data
135
+ Once you have a collection of recordings, the steps to prepare it for training are:
136
+ 1. Extract spectrograms from recordings and insert them into the training database.
137
+ 2. Curate the training spectrograms.
138
+ 3. Create a pickle file from the training data.
139
+ Then provide the path to the pickle file when running training.
140
+
141
+ Suppose we have a folder called `recordings/cow`. To generate spectrograms and insert them into the training database, we could type `britekit extract-all --name Cow --dir recordings/cow`. This will create a SQLite database in `data/training.db` and populate it with spectrograms using the default configuration.
142
+ To browse the database, you can use [DB Browser for SQLite](https://sqlitebrowser.org/), or a similar application.
143
+ That will reveal the following tables:
144
+ - Class: classes that the recognizer will be trained to identify, e.g. American Robin
145
+ - Category: categories such as Bird, Mammal or Amphibian
146
+ - Source: sources of recordings, e.g. Xeno-Canto or iNaturalist.
147
+ - Recording: individual recordings
148
+ - Segment: fixed-length sections of recordings
149
+ - SpecGroup: groups of spectrograms that share spectrogram parameters
150
+ - SpecValue: spectrograms, each referencing a Segment and SpecGroup
151
+ - SegmentClass: associations between Segment and Class, to identify the classes that occur in a segment
152
+
153
+ There are commands to add or delete database records, e.g. [add-cat](https://github.com/jhuus/BriteKit/blob/master/command-reference.md#britekit-add-cat) and [del-cat](https://github.com/jhuus/BriteKit/blob/master/command-reference.md#britekit-del-cat) to add or delete a category record. In addition, specifying the `--cat` argument with the [extract-all](https://github.com/jhuus/BriteKit/blob/master/command-reference.md#britekit-extract-all) or [extract-by-image](https://github.com/jhuus/BriteKit/blob/master/command-reference.md#britekit-extract-by-image) commands will add the required category record if it does not exist. You can plot database spectrograms using [plot-db](https://github.com/jhuus/BriteKit/blob/master/command-reference.md#britekit-plot-db), or plot spectrograms for recordings using [plot-rec](https://github.com/jhuus/BriteKit/blob/master/command-reference.md#britekit-plot-rec) or [plot-dir](https://github.com/jhuus/BriteKit/blob/master/command-reference.md#britekit-plot-dir). Once you have a folder of spectrogram images, you can manually delete or copy some of them. The [extract-by-image](https://github.com/jhuus/BriteKit/blob/master/command-reference.md#britekit-extract-by-image) command will then extract only the spectrograms corresponding to the given images. Similarly, the [del-seg](https://github.com/jhuus/BriteKit/blob/master/command-reference.md#britekit-del-seg) command will delete segments, and their spectrograms, corresponding to the images in a directory.
154
+
155
+ It is important to tune spectrogram parameters such as height, width, maximum/minimum frequency and window length for your specific application. This is discussed more in the [tuning](#Tuning) section below, but for now be aware that you can set specific parameters in a YAML file to pass to an extract or plot command. For example:
156
+ ```
157
+ audio:
158
+ min_freq: 350
159
+ max_freq: 4000
160
+ win_length: .08
161
+ spec_height: 192
162
+ spec_width: 256
163
+ ```
164
+ The FFT window length is specified as a fraction of a second: .08 seconds in this example. That way the real window length does not vary if you change the sampling rate. As a rule of thumb, the sampling rate should be about 2.1 times the maximum frequency. Before training your first model, it is advisable to examine some spectrogram images and choose settings that seem reasonable as a starting point. For example, the frequency range needed for your application may be greater or less than the defaults.
165
+
166
+ The SpecGroup table allows you to easily experiment with different spectrogram settings. Running [extract-all](https://github.com/jhuus/BriteKit/blob/master/command-reference.md#britekit-extract-all) or [extract-by-image](https://github.com/jhuus/BriteKit/blob/master/command-reference.md#britekit-extract-by-image) creates spectrograms assigned to the default SpecGroup, if none is specified. Once you have curated some training data, use the [reextract](https://github.com/jhuus/BriteKit/blob/master/command-reference.md#britekit-reextract) command to create another set of spectrograms, assigned to a different SpecGroup. That way you can keep spectrograms with different settings for easy experimentation.
167
+ ## Training
168
+ The [pickle](https://github.com/jhuus/BriteKit/blob/master/command-reference.md#britekit-pickle) command creates a binary pickle file (`data/training.pkl` by default), which is the source of training data for the [train](https://github.com/jhuus/BriteKit/blob/master/command-reference.md#britekit-train) command. Reading a binary file is much faster than querying the database, so this speeds up the training process. Also, this provides a simple way to select a SpecGroup, and/or a subset of classes for training. For training, you should always provide a config file to override some defaults. Here is an expanded version of the earlier example:
169
+ ```
170
+ train:
171
+ train_pickle: "data/low_freq.pkl"
172
+ model_type: "effnet.4"
173
+ head_type: "basic_sed"
174
+ learning_rate: .002
175
+ drop_rate: 0.1
176
+ drop_path_rate: 0.1
177
+ val_portion: 0.1
178
+ num_epochs: 20
179
+ ```
180
+ The `model_type` parameter can be "timm.x" for any model x supported by [timm](https://github.com/huggingface/pytorch-image-models). However, many bioacoustic recognizers benefit from a smaller model than typical timm models. Therefore BriteKit provides a set of scalable models, such as "effnet.3" and "effnet.4", where larger numbers indicate larger models. The scalable models are:
181
+ | Model | Original Name | Comments | Original Paper |
182
+ |---|---|---|---|
183
+ | dla | DLA | Slow and not good for large models, but works well for some very small models. | [here](https://arxiv.org/abs/1707.06484) |
184
+ | effnet | EfficientNetV2 | Medium speed, widely used, useful for all sizes. | [here](https://arxiv.org/abs/2104.00298) |
185
+ | gernet | GerNet | Fast, useful for all but the smallest models. | [here](https://arxiv.org/abs/2006.14090) |
186
+ | hgnet | HgNetV2| Fast, useful for all but the smallest models. | not published |
187
+ | vovnet | VovNet | Medium-fast, useful for all sizes. | [here](https://arxiv.org/abs/1904.09730) |
188
+
189
+ For very small models, say with less than 10 classes and just a few thousand training spectrograms, DLA and VovNet are good candidates. As model size increases, DLA becomes slower and less appropriate. Of course, it is best to try different models and model sizes to see which works best for your application.
190
+
191
+ If `head_type` is not specified, BriteKit uses the default classifier head defined by the model. However, you can also specify any of the following head types:
192
+ | Head Type | Description |
193
+ |---|---|
194
+ | basic | A basic non-SED classifier head. |
195
+ | effnet | The classifier head used in EfficientNetV2. |
196
+ | hgnet | The classifier head used in HgNetV2. |
197
+ | basic_sed | A basic SED head. |
198
+ | scalable_sed | The basic_sed head can be larger than desired, and this one allows you to control the size. |
199
+
200
+ Specifying `head_type="effnet"` is sometimes helpful for other models such as DLA and VovNet. See the discussion of [Backbones and Classifier Heads](#backbones-and-classifier-heads) below for more information.
201
+
202
+ You can specify `val_portion` > 0 to run validation on a portion of the training data, or `num_folds` > 1 to run k-fold cross-validation. In the latter case, training output will be in logs/fold-0/version_x etc. Otherwise it is under logs/version_x. Output from the first training run is saved in version_0, and the version number is incremented in subsequent runs. To view graphs of the loss and learning rate, type `tensorboard --logdir <log directory>`. This will launch an embedded web server and display a URL that you can use to view graphs such as the learning rate in a web browser.
203
+
204
+ ## Testing
205
+ To run a test, you need to annotate a set of test recordings, analyze them with your model or ensemble, and then run the [rpt-test](https://github.com/jhuus/BriteKit/blob/master/command-reference.md#britekit-rpt-test) command. Annotations must be saved in a CSV file with a defined format. For initial testing and tuning it is best to annotate each relevant sound (per-segment), but for later usage you may wish to use per-block (e.g. minute) or per-recording annotations. Per-recording annotations are defined in a CSV file with these columns:
206
+ | Column | Description |
207
+ |---|---|
208
+ | recording | Just the stem of the recording name, e.g. XC12345, not XC12345.mp3. |
209
+ | classes | Defined classes found in the recording, separated by commas. For example: AMCR,BCCH,COYE.
210
+
211
+ Per-block annotations are defined in a CSV file with these columns:
212
+ | Column | Description |
213
+ |---|---|
214
+ | recording | Just the stem of the recording name, as above. |
215
+ | block | 1 for the first block (e.g. minute), 2 for the second, etc. |
216
+ | classes | Defined classes found in that block, if any, separated by commas.
217
+
218
+ Per-segment annotations are recommended, and are defined in a CSV file with these columns:
219
+ | Column | Description |
220
+ |---|---|
221
+ | recording | Just the stem of the recording name, as above. |
222
+ | class | Identified class.
223
+ | start_time | Where the sound starts, in seconds from the start of the recording.
224
+ | end_time | Where the sound ends, in seconds from the start of the recording.
225
+
226
+ Use the [analyze](https://github.com/jhuus/BriteKit/blob/master/command-reference.md#britekit-analyze) command to analyze the recordings with your model or ensemble. For testing, be sure to specify `--min_score 0`. That way all predictions will be saved, not just those above a particular threshold, which is important when calculating metrics. See [Metrics (PR-AUC and ROC-AUC)](#metrics-pr-auc-and-roc-auc) for more information.
227
+
228
+ It's usually best for a test to consist of a single directory of recordings, containing a file called annotations.csv. If that directory is called recordings and you run analyze specifying `--output recordings/labels`, you could generate test reports as follows:
229
+ ```
230
+ britekit rpt-test -a recordings/annotations.csv -l labels -o <output-dir>
231
+ ```
232
+ If your annotations were per-block or per-recording, you would specify the `--granularity block` or `--granularity recording` argument (`--granularity segment` is the default).
233
+ ## Tuning
234
+ Before tuning your model, you need to create a good test, as described in the previous section. Then you can use the [tune](https://github.com/jhuus/BriteKit/blob/master/command-reference.md#britekit-tune) command to find optimal settings for a given test. If you are only tuning inference parameters, you can run many iterations very quickly, since no training is needed. To tune training hyperparameters, many training runs are needed, which takes longer. You can also use the [tune](https://github.com/jhuus/BriteKit/blob/master/command-reference.md#britekit-tune) command to tune audio and spectrogram settings. In that case, every iteration extracts a new set of spectrograms, which takes even longer.
235
+
236
+ Here is a practical approach:
237
+ 1. Review spectrogram plots with different settings, especially `spec_duration`, `spec_width`, `spec_height`, `min_frequency`, `max_frequency` and `win_length`. Then choose reasonable-looking initial settings. For example, if all the relevant sounds fall between 1000 and 5000 Hz, set min and max frequency accordingly.
238
+ 2. Tune the main training hyperparameters, especially `model_type`, `head_type` and `num_epochs`.
239
+ 3. Tune the audio/spectrogram hyperparameters.
240
+ 4. Tune data augmentation hyperparameters, which are described in the [Data Augmentation](#data-augmentation) section below.
241
+ 5. Tune the inference `audio_power` hyperparameter.
242
+ 6. Perform a second tuning pass, starting at step 2 above.
243
+
244
+ This usually leads to a substantial improvement in scores (see [Metrics (PR-AUC and ROC-AUC)](#metrics-pr-auc-and-roc-auc). If you are using a SED classifier head, it is also worth tuning `segment_len` and `overlap`.
245
+
246
+ To run the [tune](https://github.com/jhuus/BriteKit/blob/master/command-reference.md#britekit-tune) command, you would typically use a config YAML file as described earlier, plus a special tuning YAML file, as in this example:
247
+ ```
248
+ - name: spec_width
249
+ type: int
250
+ bounds:
251
+ - 256
252
+ - 512
253
+ step: 64
254
+ ```
255
+ This gives the name of the parameter to tune, its data type, and the bounds and step sizes to try. In this case, we want to try `spec_width` values of 256, 320, 384, 448 and 512. You can also tune multiple parameters at the same time, by simply appending more definitions similar to this one. Parameters that have a choice of defined values rather than a range are specified like this:
256
+ ```
257
+ - name: head_type
258
+ type: categorical
259
+ choices:
260
+ - "effnet"
261
+ - "hgnet"
262
+ - "basic_sed"
263
+ ```
264
+ When running the [tune](https://github.com/jhuus/BriteKit/blob/master/command-reference.md#britekit-tune) command, you can ask it to test all defined combinations based on the input, or to test a random sample. To try 100 random combinations, add the argument `--tries 100`. To tune audio/spectrogram parameters, add the `--extract` argument. To tune inference only, add the `--notrain` argument.
265
+
266
+ Training is non-deterministic, and results for a given group of settings can vary substantially across multiple training runs. Therefore it is important to specify the `--runs` argument, indicating how often training should be run for a given set of values.
267
+
268
+ As an example, to find the best `spec_width` value, we could type a command like this:
269
+ ```
270
+ britekit tune -c yaml/my_train.yml -p yaml/my_tune.yml -a my_test/annotations.csv -o output/tune-spec-width --runs 5 --extract
271
+ ```
272
+ This will perform an extract before each trial, and use the average score from 5 training runs in each case. Scores will be based on the given test, using macro-averaged ROC-AUC, although this can be changed with the `--metric` argument.
273
+
274
+ ## Ensembling
275
+ Combining multiple checkpoints in an ensemble is a quick and easy way to improve classifier results. This can be especially powerful when different model architectures are used, but even with the same model type and training protocol, ensembling almost always improves results.
276
+
277
+ Using an ensemble is very easy - just copy all the ensemble checkpoint files to the data/ckpt directory (or whichever directory is specified by the `ckpt_folder` configuration parameter). With too many models in an ensemble, inference will become very slow, and at some point there is no benefit to adding more checkpoints anyway. In most cases an ensemble of 3-6 checkpoints is best.
278
+
279
+ Given a per-segment test and a directory containing checkpoints, use the [ensemble](https://github.com/jhuus/BriteKit/blob/master/command-reference.md#britekit-ensemble) command to find the highest-scoring ensemble of a given size.
280
+
281
+ ## Calibrating
282
+ By default, the scores or predictions generated by your models may not align well with probabilities. Ideally, a score of .8 should be correct about 80% of the time, but for a given ensemble it might actually be correct 70% or 90% of the time. Aligning output predictions with probabilities is called calibration.
283
+
284
+ Use the [calibrate](https://github.com/jhuus/BriteKit/blob/master/command-reference.md#britekit-calibrate) command to calibrate your model or ensemble. Given a per-segment test and inference output it will generate a graph showing the uncalibrated and calibrated scores. Calibrated scores are based on a corresponding scaling coefficient and intercept. To use these values, set the `scaling_coefficient` and `scaling_intercept` parameters in your inference configuration.
285
+
286
+ # More Information
287
+
288
+ -----
289
+
290
+ ## Spectrograms
291
+ TBD
292
+ ## Backbones and Classifier Heads
293
+ TBD
294
+ ## Metrics (PR-AUC and ROC-AUC)
295
+ TBD
296
+ ## Data Augmentation
297
+ TBD
298
+ ## Development Environment
299
+ TBD
@@ -0,0 +1,260 @@
1
+ # BriteKit
2
+
3
+ -----
4
+
5
+ ## Getting Started
6
+
7
+ - [Introduction](#introduction)
8
+ - [License](#license)
9
+ - [Installation](#installation)
10
+ - [Configuration](#configuration)
11
+ - [Downloading Recordings](#downloading-recordings)
12
+ - [Managing Training Data](#managing-training-data)
13
+ - [Training](#training)
14
+ - [Testing](#testing)
15
+ - [Tuning](#tuning)
16
+ - [Ensembling](#ensembling)
17
+ - [Calibrating](#calibrating)
18
+
19
+ ## More Information
20
+
21
+ - [Spectrograms](#spectrograms)
22
+ - [Backbones and Classifier Heads](#backbones-and-classifier-heads)
23
+ - [Metrics (PR-AUC and ROC-AUC)](#metrics-pr-auc-and-roc-auc)
24
+ - [Data Augmentation](#data-augmentation)
25
+ - [Development Environment](#development-environment)
26
+
27
+ ## Reference Guides
28
+
29
+ - [Command Reference](https://github.com/jhuus/BriteKit/blob/master/command-reference.md)
30
+ - [Command API Reference](https://github.com/jhuus/BriteKit/blob/master/command-api-reference.md)
31
+ - [General API Reference](https://github.com/jhuus/BriteKit/blob/master/api-reference.md)
32
+ - [Configuration Reference](https://github.com/jhuus/BriteKit/blob/master/config-reference.md)
33
+
34
+ # Getting Started
35
+
36
+ -----
37
+
38
+ ## Introduction
39
+ BriteKit (Bioacoustic Recognizer Technology Kit) is a Python package that facilitates the development of bioacoustic recognizers using deep learning.
40
+ It provides a command-line interface (CLI) as well as a Python API, to support functions such as:
41
+ - downloading recordings from Xeno-Canto, iNaturalist, and YouTube (optionally using Google Audioset metadata)
42
+ - managing training data in a SQLite database
43
+ - training models
44
+ - testing, tuning and calibrating models
45
+ - reporting
46
+ - deployment and inference
47
+
48
+ To view a list of BriteKit commands, type `britekit --help`. You can also get help for individual commands, e.g. `britekit train --help` describes the `train` command.
49
+ When accessing BriteKit from Python, the `britekit.commands` namespace contains a function for each command, as documented [here](command-api-reference.md).
50
+ The classes used by the commands can also be accessed, and are documented [here](api-reference.md).
51
+ ## License
52
+ BriteKit is distributed under the terms of the [MIT](https://spdx.org/licenses/MIT.html) license.
53
+ ## Installation
54
+ It is best to install BriteKit in a virtual environment, such as a [Python venv](https://docs.python.org/3/library/venv.html). Once you have that set up, install the BriteKit package using pip:
55
+ ```console
56
+ pip install britekit
57
+ ```
58
+ In Windows environments, you then need to uninstall and reinstall PyTorch:
59
+ ```
60
+ pip uninstall -y torch torchvision torchaudio
61
+ pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu126
62
+ ```
63
+ Note that cu126 refers to CUDA 12.6.\
64
+ Once BriteKit is installed, initialize a working environment using the `init` command:
65
+ ```console
66
+ britekit init --dest=<directory path>
67
+ ```
68
+ This creates the directories needed and installs sample files. If you omit `--dest`, it will create
69
+ directories under the current working directory.
70
+ ## Configuration
71
+ Configuration parameters are documented [here](config-reference.md). After running `britekit init`, the file `yaml/base_config.yaml` contains all parameters in YAML format.
72
+ Most CLI commands have a `--config` argument that allows you to specify the path to a YAML file that overrides selected parameters. For example, when running the [train](https://github.com/jhuus/BriteKit/blob/master/command-reference.md#britekit-train) command,
73
+ you could provide a YAML file containing the following:
74
+ ```
75
+ train:
76
+ model_type: "effnet.4"
77
+ learning_rate: .002
78
+ drop_rate: 0.1
79
+ num_epochs: 20
80
+ ```
81
+ This overrides the default values for `model_type`, `learning_rate`, `drop_rate` and `num_epochs`. When using the API, you can update configuration parameters like this:
82
+ ```
83
+ import britekit as bk
84
+ cfg = bk.get_config()
85
+ cfg.train.model_type = "effnet.4"
86
+ ```
87
+ ## Downloading Recordings
88
+ The [inat](https://github.com/jhuus/BriteKit/blob/master/command-reference.md#britekit-inat), [xeno](https://github.com/jhuus/BriteKit/blob/master/command-reference.md#britekit-xeno) and [youtube](https://github.com/jhuus/BriteKit/blob/master/command-reference.md#britekit-youtube) commands make it easy to download recordings from Xeno-Canto, iNaturalist and YouTube. For iNaturalist it is important to provide the scientific name. For example, to download recordings of the American Green Frog (lithobates clamitans), type:
89
+ ```
90
+ britekit inat --name "lithobates clamitans" --output <output-path>
91
+ ```
92
+ For Xeno-Canto, use `--name` for the common name or `--sci` for the scientific name. For YouTube, specify the ID of the corresponding video. For example, specify `--id K_EsxukdNXM` to download the audio from https://www.youtube.com/watch?v=K_EsxukdNXM.
93
+
94
+ The [audioset](https://github.com/jhuus/BriteKit/blob/master/command-reference.md#britekit-audioset) command lets you download using [Google Audioset](https://research.google.com/audioset/), which is metadata that classifies sounds in YouTube videos. Audioset was released in March 2017, so any videos uploaded later than that are not included. Also, some videos that are tagged in Audioset are no longer available. Type `britekit audioset --help` for more information.
95
+ ## Managing Training Data
96
+ Once you have a collection of recordings, the steps to prepare it for training are:
97
+ 1. Extract spectrograms from recordings and insert them into the training database.
98
+ 2. Curate the training spectrograms.
99
+ 3. Create a pickle file from the training data.
100
+ Then provide the path to the pickle file when running training.
101
+
102
+ Suppose we have a folder called `recordings/cow`. To generate spectrograms and insert them into the training database, we could type `britekit extract-all --name Cow --dir recordings/cow`. This will create a SQLite database in `data/training.db` and populate it with spectrograms using the default configuration.
103
+ To browse the database, you can use [DB Browser for SQLite](https://sqlitebrowser.org/), or a similar application.
104
+ That will reveal the following tables:
105
+ - Class: classes that the recognizer will be trained to identify, e.g. American Robin
106
+ - Category: categories such as Bird, Mammal or Amphibian
107
+ - Source: sources of recordings, e.g. Xeno-Canto or iNaturalist.
108
+ - Recording: individual recordings
109
+ - Segment: fixed-length sections of recordings
110
+ - SpecGroup: groups of spectrograms that share spectrogram parameters
111
+ - SpecValue: spectrograms, each referencing a Segment and SpecGroup
112
+ - SegmentClass: associations between Segment and Class, to identify the classes that occur in a segment
113
+
114
+ There are commands to add or delete database records, e.g. [add-cat](https://github.com/jhuus/BriteKit/blob/master/command-reference.md#britekit-add-cat) and [del-cat](https://github.com/jhuus/BriteKit/blob/master/command-reference.md#britekit-del-cat) to add or delete a category record. In addition, specifying the `--cat` argument with the [extract-all](https://github.com/jhuus/BriteKit/blob/master/command-reference.md#britekit-extract-all) or [extract-by-image](https://github.com/jhuus/BriteKit/blob/master/command-reference.md#britekit-extract-by-image) commands will add the required category record if it does not exist. You can plot database spectrograms using [plot-db](https://github.com/jhuus/BriteKit/blob/master/command-reference.md#britekit-plot-db), or plot spectrograms for recordings using [plot-rec](https://github.com/jhuus/BriteKit/blob/master/command-reference.md#britekit-plot-rec) or [plot-dir](https://github.com/jhuus/BriteKit/blob/master/command-reference.md#britekit-plot-dir). Once you have a folder of spectrogram images, you can manually delete or copy some of them. The [extract-by-image](https://github.com/jhuus/BriteKit/blob/master/command-reference.md#britekit-extract-by-image) command will then extract only the spectrograms corresponding to the given images. Similarly, the [del-seg](https://github.com/jhuus/BriteKit/blob/master/command-reference.md#britekit-del-seg) command will delete segments, and their spectrograms, corresponding to the images in a directory.
115
+
116
+ It is important to tune spectrogram parameters such as height, width, maximum/minimum frequency and window length for your specific application. This is discussed more in the [tuning](#Tuning) section below, but for now be aware that you can set specific parameters in a YAML file to pass to an extract or plot command. For example:
117
+ ```
118
+ audio:
119
+ min_freq: 350
120
+ max_freq: 4000
121
+ win_length: .08
122
+ spec_height: 192
123
+ spec_width: 256
124
+ ```
125
+ The FFT window length is specified as a fraction of a second: .08 seconds in this example. That way the real window length does not vary if you change the sampling rate. As a rule of thumb, the sampling rate should be about 2.1 times the maximum frequency. Before training your first model, it is advisable to examine some spectrogram images and choose settings that seem reasonable as a starting point. For example, the frequency range needed for your application may be greater or less than the defaults.
126
+
127
+ The SpecGroup table allows you to easily experiment with different spectrogram settings. Running [extract-all](https://github.com/jhuus/BriteKit/blob/master/command-reference.md#britekit-extract-all) or [extract-by-image](https://github.com/jhuus/BriteKit/blob/master/command-reference.md#britekit-extract-by-image) creates spectrograms assigned to the default SpecGroup, if none is specified. Once you have curated some training data, use the [reextract](https://github.com/jhuus/BriteKit/blob/master/command-reference.md#britekit-reextract) command to create another set of spectrograms, assigned to a different SpecGroup. That way you can keep spectrograms with different settings for easy experimentation.
128
+ ## Training
129
+ The [pickle](https://github.com/jhuus/BriteKit/blob/master/command-reference.md#britekit-pickle) command creates a binary pickle file (`data/training.pkl` by default), which is the source of training data for the [train](https://github.com/jhuus/BriteKit/blob/master/command-reference.md#britekit-train) command. Reading a binary file is much faster than querying the database, so this speeds up the training process. Also, this provides a simple way to select a SpecGroup, and/or a subset of classes for training. For training, you should always provide a config file to override some defaults. Here is an expanded version of the earlier example:
130
+ ```
131
+ train:
132
+ train_pickle: "data/low_freq.pkl"
133
+ model_type: "effnet.4"
134
+ head_type: "basic_sed"
135
+ learning_rate: .002
136
+ drop_rate: 0.1
137
+ drop_path_rate: 0.1
138
+ val_portion: 0.1
139
+ num_epochs: 20
140
+ ```
141
+ The `model_type` parameter can be "timm.x" for any model x supported by [timm](https://github.com/huggingface/pytorch-image-models). However, many bioacoustic recognizers benefit from a smaller model than typical timm models. Therefore BriteKit provides a set of scalable models, such as "effnet.3" and "effnet.4", where larger numbers indicate larger models. The scalable models are:
142
+ | Model | Original Name | Comments | Original Paper |
143
+ |---|---|---|---|
144
+ | dla | DLA | Slow and not good for large models, but works well for some very small models. | [here](https://arxiv.org/abs/1707.06484) |
145
+ | effnet | EfficientNetV2 | Medium speed, widely used, useful for all sizes. | [here](https://arxiv.org/abs/2104.00298) |
146
+ | gernet | GerNet | Fast, useful for all but the smallest models. | [here](https://arxiv.org/abs/2006.14090) |
147
+ | hgnet | HgNetV2| Fast, useful for all but the smallest models. | not published |
148
+ | vovnet | VovNet | Medium-fast, useful for all sizes. | [here](https://arxiv.org/abs/1904.09730) |
149
+
150
+ For very small models, say with less than 10 classes and just a few thousand training spectrograms, DLA and VovNet are good candidates. As model size increases, DLA becomes slower and less appropriate. Of course, it is best to try different models and model sizes to see which works best for your application.
151
+
152
+ If `head_type` is not specified, BriteKit uses the default classifier head defined by the model. However, you can also specify any of the following head types:
153
+ | Head Type | Description |
154
+ |---|---|
155
+ | basic | A basic non-SED classifier head. |
156
+ | effnet | The classifier head used in EfficientNetV2. |
157
+ | hgnet | The classifier head used in HgNetV2. |
158
+ | basic_sed | A basic SED head. |
159
+ | scalable_sed | The basic_sed head can be larger than desired, and this one allows you to control the size. |
160
+
161
+ Specifying `head_type="effnet"` is sometimes helpful for other models such as DLA and VovNet. See the discussion of [Backbones and Classifier Heads](#backbones-and-classifier-heads) below for more information.
162
+
163
+ You can specify `val_portion` > 0 to run validation on a portion of the training data, or `num_folds` > 1 to run k-fold cross-validation. In the latter case, training output will be in logs/fold-0/version_x etc. Otherwise it is under logs/version_x. Output from the first training run is saved in version_0, and the version number is incremented in subsequent runs. To view graphs of the loss and learning rate, type `tensorboard --logdir <log directory>`. This will launch an embedded web server and display a URL that you can use to view graphs such as the learning rate in a web browser.
164
+
165
+ ## Testing
166
+ To run a test, you need to annotate a set of test recordings, analyze them with your model or ensemble, and then run the [rpt-test](https://github.com/jhuus/BriteKit/blob/master/command-reference.md#britekit-rpt-test) command. Annotations must be saved in a CSV file with a defined format. For initial testing and tuning it is best to annotate each relevant sound (per-segment), but for later usage you may wish to use per-block (e.g. minute) or per-recording annotations. Per-recording annotations are defined in a CSV file with these columns:
167
+ | Column | Description |
168
+ |---|---|
169
+ | recording | Just the stem of the recording name, e.g. XC12345, not XC12345.mp3. |
170
+ | classes | Defined classes found in the recording, separated by commas. For example: AMCR,BCCH,COYE.
171
+
172
+ Per-block annotations are defined in a CSV file with these columns:
173
+ | Column | Description |
174
+ |---|---|
175
+ | recording | Just the stem of the recording name, as above. |
176
+ | block | 1 for the first block (e.g. minute), 2 for the second, etc. |
177
+ | classes | Defined classes found in that block, if any, separated by commas.
178
+
179
+ Per-segment annotations are recommended, and are defined in a CSV file with these columns:
180
+ | Column | Description |
181
+ |---|---|
182
+ | recording | Just the stem of the recording name, as above. |
183
+ | class | Identified class.
184
+ | start_time | Where the sound starts, in seconds from the start of the recording.
185
+ | end_time | Where the sound ends, in seconds from the start of the recording.
186
+
187
+ Use the [analyze](https://github.com/jhuus/BriteKit/blob/master/command-reference.md#britekit-analyze) command to analyze the recordings with your model or ensemble. For testing, be sure to specify `--min_score 0`. That way all predictions will be saved, not just those above a particular threshold, which is important when calculating metrics. See [Metrics (PR-AUC and ROC-AUC)](#metrics-pr-auc-and-roc-auc) for more information.
188
+
189
+ It's usually best for a test to consist of a single directory of recordings, containing a file called annotations.csv. If that directory is called recordings and you run analyze specifying `--output recordings/labels`, you could generate test reports as follows:
190
+ ```
191
+ britekit rpt-test -a recordings/annotations.csv -l labels -o <output-dir>
192
+ ```
193
+ If your annotations were per-block or per-recording, you would specify the `--granularity block` or `--granularity recording` argument (`--granularity segment` is the default).
194
+ ## Tuning
195
+ Before tuning your model, you need to create a good test, as described in the previous section. Then you can use the [tune](https://github.com/jhuus/BriteKit/blob/master/command-reference.md#britekit-tune) command to find optimal settings for a given test. If you are only tuning inference parameters, you can run many iterations very quickly, since no training is needed. To tune training hyperparameters, many training runs are needed, which takes longer. You can also use the [tune](https://github.com/jhuus/BriteKit/blob/master/command-reference.md#britekit-tune) command to tune audio and spectrogram settings. In that case, every iteration extracts a new set of spectrograms, which takes even longer.
196
+
197
+ Here is a practical approach:
198
+ 1. Review spectrogram plots with different settings, especially `spec_duration`, `spec_width`, `spec_height`, `min_frequency`, `max_frequency` and `win_length`. Then choose reasonable-looking initial settings. For example, if all the relevant sounds fall between 1000 and 5000 Hz, set min and max frequency accordingly.
199
+ 2. Tune the main training hyperparameters, especially `model_type`, `head_type` and `num_epochs`.
200
+ 3. Tune the audio/spectrogram hyperparameters.
201
+ 4. Tune data augmentation hyperparameters, which are described in the [Data Augmentation](#data-augmentation) section below.
202
+ 5. Tune the inference `audio_power` hyperparameter.
203
+ 6. Perform a second tuning pass, starting at step 2 above.
204
+
205
+ This usually leads to a substantial improvement in scores (see [Metrics (PR-AUC and ROC-AUC)](#metrics-pr-auc-and-roc-auc). If you are using a SED classifier head, it is also worth tuning `segment_len` and `overlap`.
206
+
207
+ To run the [tune](https://github.com/jhuus/BriteKit/blob/master/command-reference.md#britekit-tune) command, you would typically use a config YAML file as described earlier, plus a special tuning YAML file, as in this example:
208
+ ```
209
+ - name: spec_width
210
+ type: int
211
+ bounds:
212
+ - 256
213
+ - 512
214
+ step: 64
215
+ ```
216
+ This gives the name of the parameter to tune, its data type, and the bounds and step sizes to try. In this case, we want to try `spec_width` values of 256, 320, 384, 448 and 512. You can also tune multiple parameters at the same time, by simply appending more definitions similar to this one. Parameters that have a choice of defined values rather than a range are specified like this:
217
+ ```
218
+ - name: head_type
219
+ type: categorical
220
+ choices:
221
+ - "effnet"
222
+ - "hgnet"
223
+ - "basic_sed"
224
+ ```
225
+ When running the [tune](https://github.com/jhuus/BriteKit/blob/master/command-reference.md#britekit-tune) command, you can ask it to test all defined combinations based on the input, or to test a random sample. To try 100 random combinations, add the argument `--tries 100`. To tune audio/spectrogram parameters, add the `--extract` argument. To tune inference only, add the `--notrain` argument.
226
+
227
+ Training is non-deterministic, and results for a given group of settings can vary substantially across multiple training runs. Therefore it is important to specify the `--runs` argument, indicating how often training should be run for a given set of values.
228
+
229
+ As an example, to find the best `spec_width` value, we could type a command like this:
230
+ ```
231
+ britekit tune -c yaml/my_train.yml -p yaml/my_tune.yml -a my_test/annotations.csv -o output/tune-spec-width --runs 5 --extract
232
+ ```
233
+ This will perform an extract before each trial, and use the average score from 5 training runs in each case. Scores will be based on the given test, using macro-averaged ROC-AUC, although this can be changed with the `--metric` argument.
234
+
235
+ ## Ensembling
236
+ Combining multiple checkpoints in an ensemble is a quick and easy way to improve classifier results. This can be especially powerful when different model architectures are used, but even with the same model type and training protocol, ensembling almost always improves results.
237
+
238
+ Using an ensemble is very easy - just copy all the ensemble checkpoint files to the data/ckpt directory (or whichever directory is specified by the `ckpt_folder` configuration parameter). With too many models in an ensemble, inference will become very slow, and at some point there is no benefit to adding more checkpoints anyway. In most cases an ensemble of 3-6 checkpoints is best.
239
+
240
+ Given a per-segment test and a directory containing checkpoints, use the [ensemble](https://github.com/jhuus/BriteKit/blob/master/command-reference.md#britekit-ensemble) command to find the highest-scoring ensemble of a given size.
241
+
242
+ ## Calibrating
243
+ By default, the scores or predictions generated by your models may not align well with probabilities. Ideally, a score of .8 should be correct about 80% of the time, but for a given ensemble it might actually be correct 70% or 90% of the time. Aligning output predictions with probabilities is called calibration.
244
+
245
+ Use the [calibrate](https://github.com/jhuus/BriteKit/blob/master/command-reference.md#britekit-calibrate) command to calibrate your model or ensemble. Given a per-segment test and inference output it will generate a graph showing the uncalibrated and calibrated scores. Calibrated scores are based on a corresponding scaling coefficient and intercept. To use these values, set the `scaling_coefficient` and `scaling_intercept` parameters in your inference configuration.
246
+
247
+ # More Information
248
+
249
+ -----
250
+
251
+ ## Spectrograms
252
+ TBD
253
+ ## Backbones and Classifier Heads
254
+ TBD
255
+ ## Metrics (PR-AUC and ROC-AUC)
256
+ TBD
257
+ ## Data Augmentation
258
+ TBD
259
+ ## Development Environment
260
+ TBD
@@ -1,4 +1,4 @@
1
1
  # SPDX-FileCopyrightText: 2025-present Jan Huus <jhuus1@gmail.com>
2
2
  #
3
3
  # SPDX-License-Identifier: MIT
4
- __version__ = "0.1.3"
4
+ __version__ = "0.1.4"
@@ -14,7 +14,7 @@ from ._db_delete import (
14
14
  )
15
15
  from ._embed import embed
16
16
  from ._ensemble import ensemble
17
- from ._extract import extract_all, extract_by_image
17
+ from ._extract import extract_all, extract_by_csv, extract_by_image
18
18
  from ._find_dup import find_dup
19
19
  from ._inat import inat
20
20
  from ._init import init
@@ -57,6 +57,7 @@ __all__ = [
57
57
  "embed",
58
58
  "ensemble",
59
59
  "extract_all",
60
+ "extract_by_csv",
60
61
  "extract_by_image",
61
62
  "find_dup",
62
63
  "find_lr",
@@ -30,15 +30,15 @@ def analyze(
30
30
  CSV files, or both.
31
31
 
32
32
  Args:
33
- cfg_path (str): Path to YAML configuration file defining model and inference settings.
34
- input_path (str): Path to input audio file or directory containing audio files.
35
- output_path (str): Path to output directory where results will be saved.
36
- rtype (str): Output format type. Options are "audacity", "csv", or "both".
37
- min_score (float, optional): Confidence threshold. Predictions below this value are excluded.
38
- num_threads (int, optional): Number of threads to use for processing. Default is 3.
39
- overlap (float, optional): Spectrogram overlap in seconds for sliding window analysis.
40
- segment_len (float, optional): Fixed segment length in seconds. If specified, labels are
41
- fixed-length; otherwise they are variable-length.
33
+ - cfg_path (str): Path to YAML configuration file defining model and inference settings.
34
+ - input_path (str): Path to input audio file or directory containing audio files.
35
+ - output_path (str): Path to output directory where results will be saved.
36
+ - rtype (str): Output format type. Options are "audacity", "csv", or "both".
37
+ - min_score (float, optional): Confidence threshold. Predictions below this value are excluded.
38
+ - num_threads (int, optional): Number of threads to use for processing. Default is 3.
39
+ - overlap (float, optional): Spectrogram overlap in seconds for sliding window analysis.
40
+ - segment_len (float, optional): Fixed segment length in seconds. If specified, labels are
41
+ fixed-length; otherwise they are variable-length.
42
42
  """
43
43
 
44
44
  # defer slow imports to improve --help performance