britekit 0.0.4__tar.gz → 0.0.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of britekit might be problematic. Click here for more details.

Files changed (124) hide show
  1. {britekit-0.0.4 → britekit-0.0.6}/PKG-INFO +21 -15
  2. {britekit-0.0.4 → britekit-0.0.6}/README.md +18 -12
  3. {britekit-0.0.4 → britekit-0.0.6}/britekit/__about__.py +1 -1
  4. {britekit-0.0.4 → britekit-0.0.6}/britekit/commands/_audioset.py +1 -0
  5. {britekit-0.0.4 → britekit-0.0.6}/britekit/commands/_inat.py +1 -0
  6. {britekit-0.0.4 → britekit-0.0.6}/britekit/commands/_reports.py +4 -3
  7. {britekit-0.0.4 → britekit-0.0.6}/britekit/commands/_train.py +10 -0
  8. {britekit-0.0.4 → britekit-0.0.6}/britekit/commands/_xeno.py +1 -0
  9. {britekit-0.0.4 → britekit-0.0.6}/britekit/commands/_youtube.py +1 -0
  10. {britekit-0.0.4/britekit/install/yaml/samples → britekit-0.0.6/britekit/install/yaml}/base_config.yaml +10 -7
  11. {britekit-0.0.4/install/yaml/samples → britekit-0.0.6/install/yaml}/base_config.yaml +10 -7
  12. {britekit-0.0.4 → britekit-0.0.6}/pyproject.toml +2 -2
  13. {britekit-0.0.4 → britekit-0.0.6}/src/britekit/models/base_model.py +3 -18
  14. {britekit-0.0.4 → britekit-0.0.6}/src/britekit/testing/per_minute_tester.py +2 -2
  15. {britekit-0.0.4 → britekit-0.0.6}/src/britekit/testing/per_segment_tester.py +1 -1
  16. {britekit-0.0.4 → britekit-0.0.6}/.gitignore +0 -0
  17. {britekit-0.0.4 → britekit-0.0.6}/LICENSE.txt +0 -0
  18. {britekit-0.0.4 → britekit-0.0.6}/britekit/__init__.py +0 -0
  19. {britekit-0.0.4 → britekit-0.0.6}/britekit/commands/__init__.py +0 -0
  20. {britekit-0.0.4 → britekit-0.0.6}/britekit/commands/_analyze.py +0 -0
  21. {britekit-0.0.4 → britekit-0.0.6}/britekit/commands/_calibrate.py +0 -0
  22. {britekit-0.0.4 → britekit-0.0.6}/britekit/commands/_ckpt_ops.py +0 -0
  23. {britekit-0.0.4 → britekit-0.0.6}/britekit/commands/_db_add.py +0 -0
  24. {britekit-0.0.4 → britekit-0.0.6}/britekit/commands/_db_delete.py +0 -0
  25. {britekit-0.0.4 → britekit-0.0.6}/britekit/commands/_embed.py +0 -0
  26. {britekit-0.0.4 → britekit-0.0.6}/britekit/commands/_extract.py +0 -0
  27. {britekit-0.0.4 → britekit-0.0.6}/britekit/commands/_find_dup.py +0 -0
  28. {britekit-0.0.4 → britekit-0.0.6}/britekit/commands/_init.py +0 -0
  29. {britekit-0.0.4 → britekit-0.0.6}/britekit/commands/_pickle.py +0 -0
  30. {britekit-0.0.4 → britekit-0.0.6}/britekit/commands/_plot.py +0 -0
  31. {britekit-0.0.4 → britekit-0.0.6}/britekit/commands/_reextract.py +0 -0
  32. {britekit-0.0.4 → britekit-0.0.6}/britekit/commands/_search.py +0 -0
  33. {britekit-0.0.4 → britekit-0.0.6}/britekit/commands/_tune.py +0 -0
  34. {britekit-0.0.4 → britekit-0.0.6}/britekit/commands/_wav2mp3.py +0 -0
  35. {britekit-0.0.4 → britekit-0.0.6}/britekit/core/__init__.py +0 -0
  36. {britekit-0.0.4 → britekit-0.0.6}/britekit/install/data/audioset/class_inclusion.csv +0 -0
  37. {britekit-0.0.4 → britekit-0.0.6}/britekit/install/data/audioset/class_list.csv +0 -0
  38. {britekit-0.0.4 → britekit-0.0.6}/britekit/install/data/audioset/curated/aircraft.csv +0 -0
  39. {britekit-0.0.4 → britekit-0.0.6}/britekit/install/data/audioset/curated/car.csv +0 -0
  40. {britekit-0.0.4 → britekit-0.0.6}/britekit/install/data/audioset/curated/chainsaw.csv +0 -0
  41. {britekit-0.0.4 → britekit-0.0.6}/britekit/install/data/audioset/curated/cow.csv +0 -0
  42. {britekit-0.0.4 → britekit-0.0.6}/britekit/install/data/audioset/curated/cricket.csv +0 -0
  43. {britekit-0.0.4 → britekit-0.0.6}/britekit/install/data/audioset/curated/dog.csv +0 -0
  44. {britekit-0.0.4 → britekit-0.0.6}/britekit/install/data/audioset/curated/rain.csv +0 -0
  45. {britekit-0.0.4 → britekit-0.0.6}/britekit/install/data/audioset/curated/rooster.csv +0 -0
  46. {britekit-0.0.4 → britekit-0.0.6}/britekit/install/data/audioset/curated/sheep.csv +0 -0
  47. {britekit-0.0.4 → britekit-0.0.6}/britekit/install/data/audioset/curated/siren.csv +0 -0
  48. {britekit-0.0.4 → britekit-0.0.6}/britekit/install/data/audioset/curated/speech.csv +0 -0
  49. {britekit-0.0.4 → britekit-0.0.6}/britekit/install/data/audioset/curated/truck.csv +0 -0
  50. {britekit-0.0.4 → britekit-0.0.6}/britekit/install/data/audioset/curated/wind.csv +0 -0
  51. {britekit-0.0.4 → britekit-0.0.6}/britekit/install/data/audioset/unbalanced_train_segments.csv +0 -0
  52. {britekit-0.0.4 → britekit-0.0.6}/britekit/install/data/classes.csv +0 -0
  53. {britekit-0.0.4 → britekit-0.0.6}/britekit/install/data/ignore.txt +0 -0
  54. {britekit-0.0.4 → britekit-0.0.6}/britekit/install/yaml/samples/cfg_infer.yaml +0 -0
  55. {britekit-0.0.4 → britekit-0.0.6}/britekit/install/yaml/samples/train_dla.yaml +0 -0
  56. {britekit-0.0.4 → britekit-0.0.6}/britekit/install/yaml/samples/train_effnet.yaml +0 -0
  57. {britekit-0.0.4 → britekit-0.0.6}/britekit/install/yaml/samples/train_gernet.yaml +0 -0
  58. {britekit-0.0.4 → britekit-0.0.6}/britekit/install/yaml/samples/train_hgnet.yaml +0 -0
  59. {britekit-0.0.4 → britekit-0.0.6}/britekit/install/yaml/samples/train_timm.yaml +0 -0
  60. {britekit-0.0.4 → britekit-0.0.6}/britekit/install/yaml/samples/train_vovnet.yaml +0 -0
  61. {britekit-0.0.4 → britekit-0.0.6}/britekit/install/yaml/samples/tune_dropout.yaml +0 -0
  62. {britekit-0.0.4 → britekit-0.0.6}/britekit/install/yaml/samples/tune_learning_rate.yaml +0 -0
  63. {britekit-0.0.4 → britekit-0.0.6}/britekit/install/yaml/samples/tune_optimizer.yaml +0 -0
  64. {britekit-0.0.4 → britekit-0.0.6}/britekit/install/yaml/samples/tune_smooth.yaml +0 -0
  65. {britekit-0.0.4 → britekit-0.0.6}/install/data/audioset/class_inclusion.csv +0 -0
  66. {britekit-0.0.4 → britekit-0.0.6}/install/data/audioset/class_list.csv +0 -0
  67. {britekit-0.0.4 → britekit-0.0.6}/install/data/audioset/curated/aircraft.csv +0 -0
  68. {britekit-0.0.4 → britekit-0.0.6}/install/data/audioset/curated/car.csv +0 -0
  69. {britekit-0.0.4 → britekit-0.0.6}/install/data/audioset/curated/chainsaw.csv +0 -0
  70. {britekit-0.0.4 → britekit-0.0.6}/install/data/audioset/curated/cow.csv +0 -0
  71. {britekit-0.0.4 → britekit-0.0.6}/install/data/audioset/curated/cricket.csv +0 -0
  72. {britekit-0.0.4 → britekit-0.0.6}/install/data/audioset/curated/dog.csv +0 -0
  73. {britekit-0.0.4 → britekit-0.0.6}/install/data/audioset/curated/rain.csv +0 -0
  74. {britekit-0.0.4 → britekit-0.0.6}/install/data/audioset/curated/rooster.csv +0 -0
  75. {britekit-0.0.4 → britekit-0.0.6}/install/data/audioset/curated/sheep.csv +0 -0
  76. {britekit-0.0.4 → britekit-0.0.6}/install/data/audioset/curated/siren.csv +0 -0
  77. {britekit-0.0.4 → britekit-0.0.6}/install/data/audioset/curated/speech.csv +0 -0
  78. {britekit-0.0.4 → britekit-0.0.6}/install/data/audioset/curated/truck.csv +0 -0
  79. {britekit-0.0.4 → britekit-0.0.6}/install/data/audioset/curated/wind.csv +0 -0
  80. {britekit-0.0.4 → britekit-0.0.6}/install/data/audioset/unbalanced_train_segments.csv +0 -0
  81. {britekit-0.0.4 → britekit-0.0.6}/install/data/classes.csv +0 -0
  82. {britekit-0.0.4 → britekit-0.0.6}/install/data/ignore.txt +0 -0
  83. {britekit-0.0.4 → britekit-0.0.6}/install/yaml/samples/cfg_infer.yaml +0 -0
  84. {britekit-0.0.4 → britekit-0.0.6}/install/yaml/samples/train_dla.yaml +0 -0
  85. {britekit-0.0.4 → britekit-0.0.6}/install/yaml/samples/train_effnet.yaml +0 -0
  86. {britekit-0.0.4 → britekit-0.0.6}/install/yaml/samples/train_gernet.yaml +0 -0
  87. {britekit-0.0.4 → britekit-0.0.6}/install/yaml/samples/train_hgnet.yaml +0 -0
  88. {britekit-0.0.4 → britekit-0.0.6}/install/yaml/samples/train_timm.yaml +0 -0
  89. {britekit-0.0.4 → britekit-0.0.6}/install/yaml/samples/train_vovnet.yaml +0 -0
  90. {britekit-0.0.4 → britekit-0.0.6}/install/yaml/samples/tune_dropout.yaml +0 -0
  91. {britekit-0.0.4 → britekit-0.0.6}/install/yaml/samples/tune_learning_rate.yaml +0 -0
  92. {britekit-0.0.4 → britekit-0.0.6}/install/yaml/samples/tune_optimizer.yaml +0 -0
  93. {britekit-0.0.4 → britekit-0.0.6}/install/yaml/samples/tune_smooth.yaml +0 -0
  94. {britekit-0.0.4 → britekit-0.0.6}/src/britekit/cli.py +0 -0
  95. {britekit-0.0.4 → britekit-0.0.6}/src/britekit/core/analyzer.py +0 -0
  96. {britekit-0.0.4 → britekit-0.0.6}/src/britekit/core/audio.py +0 -0
  97. {britekit-0.0.4 → britekit-0.0.6}/src/britekit/core/augmentation.py +0 -0
  98. {britekit-0.0.4 → britekit-0.0.6}/src/britekit/core/base_config.py +0 -0
  99. {britekit-0.0.4 → britekit-0.0.6}/src/britekit/core/config_loader.py +0 -0
  100. {britekit-0.0.4 → britekit-0.0.6}/src/britekit/core/data_module.py +0 -0
  101. {britekit-0.0.4 → britekit-0.0.6}/src/britekit/core/dataset.py +0 -0
  102. {britekit-0.0.4 → britekit-0.0.6}/src/britekit/core/exceptions.py +0 -0
  103. {britekit-0.0.4 → britekit-0.0.6}/src/britekit/core/pickler.py +0 -0
  104. {britekit-0.0.4 → britekit-0.0.6}/src/britekit/core/plot.py +0 -0
  105. {britekit-0.0.4 → britekit-0.0.6}/src/britekit/core/predictor.py +0 -0
  106. {britekit-0.0.4 → britekit-0.0.6}/src/britekit/core/reextractor.py +0 -0
  107. {britekit-0.0.4 → britekit-0.0.6}/src/britekit/core/trainer.py +0 -0
  108. {britekit-0.0.4 → britekit-0.0.6}/src/britekit/core/tuner.py +0 -0
  109. {britekit-0.0.4 → britekit-0.0.6}/src/britekit/core/util.py +0 -0
  110. {britekit-0.0.4 → britekit-0.0.6}/src/britekit/models/dla.py +0 -0
  111. {britekit-0.0.4 → britekit-0.0.6}/src/britekit/models/effnet.py +0 -0
  112. {britekit-0.0.4 → britekit-0.0.6}/src/britekit/models/gernet.py +0 -0
  113. {britekit-0.0.4 → britekit-0.0.6}/src/britekit/models/head_factory.py +0 -0
  114. {britekit-0.0.4 → britekit-0.0.6}/src/britekit/models/hgnet.py +0 -0
  115. {britekit-0.0.4 → britekit-0.0.6}/src/britekit/models/model_loader.py +0 -0
  116. {britekit-0.0.4 → britekit-0.0.6}/src/britekit/models/timm_model.py +0 -0
  117. {britekit-0.0.4 → britekit-0.0.6}/src/britekit/models/vovnet.py +0 -0
  118. {britekit-0.0.4 → britekit-0.0.6}/src/britekit/occurrence_db/occurrence_data_provider.py +0 -0
  119. {britekit-0.0.4 → britekit-0.0.6}/src/britekit/occurrence_db/occurrence_db.py +0 -0
  120. {britekit-0.0.4 → britekit-0.0.6}/src/britekit/testing/base_tester.py +0 -0
  121. {britekit-0.0.4 → britekit-0.0.6}/src/britekit/testing/per_recording_tester.py +0 -0
  122. {britekit-0.0.4 → britekit-0.0.6}/src/britekit/training_db/extractor.py +0 -0
  123. {britekit-0.0.4 → britekit-0.0.6}/src/britekit/training_db/training_data_provider.py +0 -0
  124. {britekit-0.0.4 → britekit-0.0.6}/src/britekit/training_db/training_db.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: britekit
3
- Version: 0.0.4
3
+ Version: 0.0.6
4
4
  Summary: Core functions for bioacoustic recognizers.
5
5
  Project-URL: Documentation, https://github.com/jhuus/BriteKit#readme
6
6
  Project-URL: Issues, https://github.com/jhuus/BriteKit/issues
@@ -32,8 +32,8 @@ Requires-Dist: scipy<2.0,>=1.15
32
32
  Requires-Dist: soundfile<1.0,>=0.13
33
33
  Requires-Dist: tensorboard<3.0,>=2.19
34
34
  Requires-Dist: timm<2.0,>=1.0.17
35
- Requires-Dist: torch<2.7,>=2.5
36
- Requires-Dist: torchaudio<2.7,>=2.5
35
+ Requires-Dist: torch<2.9,>=2.5
36
+ Requires-Dist: torchaudio<2.9,>=2.5
37
37
  Requires-Dist: yt-dlp>=2025.6.25
38
38
  Description-Content-Type: text/markdown
39
39
 
@@ -65,10 +65,10 @@ Description-Content-Type: text/markdown
65
65
 
66
66
  ## Reference Guides
67
67
 
68
- - [Command Reference](command-reference.md)
69
- - [Command API Reference](command-api-reference.md)
70
- - [General API Reference](api-reference.md)
71
- - [Configuration Reference](config-reference.md)
68
+ - [Command Reference](https://github.com/jhuus/BriteKit/blob/master/command-reference.md)
69
+ - [Command API Reference](https://github.com/jhuus/BriteKit/blob/master/command-api-reference.md)
70
+ - [General API Reference](https://github.com/jhuus/BriteKit/blob/master/api-reference.md)
71
+ - [Configuration Reference](https://github.com/jhuus/BriteKit/blob/master/config-reference.md)
72
72
 
73
73
  # Getting Started
74
74
 
@@ -77,7 +77,7 @@ Description-Content-Type: text/markdown
77
77
  ## Introduction
78
78
  BriteKit (Bioacoustic Recognizer Technology Kit) is a Python package that facilitates the development of bioacoustic recognizers using deep learning.
79
79
  It provides a command-line interface (CLI) as well as a Python API, to support functions such as:
80
- - downloading recordings from Xeno-Canto, iNaturalist, and Youtube (optionally using Google Audioset metadata)
80
+ - downloading recordings from Xeno-Canto, iNaturalist, and YouTube (optionally using Google Audioset metadata)
81
81
  - managing training data in a SQLite database
82
82
  - training models
83
83
  - testing, tuning and calibrating models
@@ -90,10 +90,16 @@ The classes used by the commands can also be accessed, and are documented [here]
90
90
  ## License
91
91
  BriteKit is distributed under the terms of the [MIT](https://spdx.org/licenses/MIT.html) license.
92
92
  ## Installation
93
- Install the BriteKit package using pip:
93
+ It is best to install BriteKit in a virtual environment, such as a [Python venv](https://docs.python.org/3/library/venv.html). Once you have that set up, install the BriteKit package using pip:
94
94
  ```console
95
95
  pip install britekit
96
96
  ```
97
+ In Windows environments, you then need to uninstall and reinstall PyTorch:
98
+ ```
99
+ pip uninstall -y torch torchvision torchaudio
100
+ pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu126
101
+ ```
102
+ Note that cu126 refers to CUDA 12.6.\
97
103
  Once BriteKit is installed, initialize a working environment using the `init` command:
98
104
  ```console
99
105
  britekit init --dest=<directory path>
@@ -118,13 +124,13 @@ cfg, _ = bk.get_config()
118
124
  cfg.train.model_type = "effnet.4"
119
125
  ```
120
126
  ## Downloading Recordings
121
- The `inat`, `xeno` and `youtube` commands make it easy to download recordings from Xeno_Canto, iNaturalist and Youtube. For iNaturalist it is important to provide the scientific name. For example, to download recordings of the American Green Frog (lithobates clamitans), type:
127
+ The `inat`, `xeno` and `youtube` commands make it easy to download recordings from Xeno-Canto, iNaturalist and YouTube. For iNaturalist it is important to provide the scientific name. For example, to download recordings of the American Green Frog (lithobates clamitans), type:
122
128
  ```
123
129
  britekit inat --name "lithobates clamitans" --output <output-path>
124
130
  ```
125
- For Xeno-Canto, use `--name` for the common name or `--sci` for the scientific name. For Youtube, specify the ID of the corresponding video. For example, specify `--id K_EsxukdNXM` to download the audio from https://www.youtube.com/watch?v=K_EsxukdNXM.
131
+ For Xeno-Canto, use `--name` for the common name or `--sci` for the scientific name. For YouTube, specify the ID of the corresponding video. For example, specify `--id K_EsxukdNXM` to download the audio from https://www.youtube.com/watch?v=K_EsxukdNXM.
126
132
 
127
- BriteKit also supports downloads using [Google Audioset](https://research.google.com/audioset/), which is metadata that classifies sounds in Youtube videos. Audioset was released in March 2017, so any videos uploaded later than that are not included. Also, some videos that are tagged in Audioset are no longer available. Type `britekit audioset --help` for more information.
133
+ BriteKit also supports downloads using [Google Audioset](https://research.google.com/audioset/), which is metadata that classifies sounds in YouTube videos. Audioset was released in March 2017, so any videos uploaded later than that are not included. Also, some videos that are tagged in Audioset are no longer available. Type `britekit audioset --help` for more information.
128
134
  ## Managing Training Data
129
135
  Once you have a collection of recordings, the steps to prepare it for training are:
130
136
  1. Extract spectrograms from recordings and insert them into the training database.
@@ -200,7 +206,7 @@ To run a test, you need to annotate a set of test recordings, analyze them with
200
206
  | Column | Description |
201
207
  |---|---|
202
208
  | recording | Just the stem of the recording name, e.g. XC12345, not XC12345.mp3. |
203
- | classes | Defined classes found in the recording, separated by commas. For example "AMCR,BCCH,COYE", but without the quotes.
209
+ | classes | Defined classes found in the recording, separated by commas. For example: AMCR,BCCH,COYE.
204
210
 
205
211
  Per-minute annotations are defined in a CSV file with these columns:
206
212
  | Column | Description |
@@ -232,7 +238,7 @@ Here is a practical approach:
232
238
  2. Do an initial tuning pass of the main training hyperparameters, especially model_type, head_type and num_epochs.
233
239
  3. Based on the above, carefully tune the audio/spectrogram parameters.
234
240
 
235
- This usually leads to a substantial improvement in scores (see [Metrics (PR-AUC and ROC-AUC)](#metrics-pr-auc-and-roc-auc), and then you can proceed to fine-tuning the training and inference. For inference, it is usually worth tuning the `audio_power` parameter. If you are using a SED classifier head, it is also worth tuning `segment_len` and `overlap`. For traininf, it may be worth tuning the data augmentation hyperparameters, which are described in detail in the [Data Augmentation](#data-augmentation) section below.
241
+ This usually leads to a substantial improvement in scores (see [Metrics (PR-AUC and ROC-AUC)](#metrics-pr-auc-and-roc-auc), and then you can proceed to fine-tuning the training and inference. For inference, it is usually worth tuning the `audio_power` parameter. If you are using a SED classifier head, it is also worth tuning `segment_len` and `overlap`. For training, it may be worth tuning the data augmentation hyperparameters, which are described in detail in the [Data Augmentation](#data-augmentation) section below.
236
242
 
237
243
  To run the `tune` command, you would typically use a config YAML file as described earlier, plus a special tuning YAML file, as in this example:
238
244
  ```
@@ -243,7 +249,7 @@ To run the `tune` command, you would typically use a config YAML file as describ
243
249
  - 512
244
250
  step: 64
245
251
  ```
246
- This gives the name of the parameter to tune, its datatype, and the bounds and step sizes to try. In this case, we want to try spec_width values of 256, 320, 384, 448 and 512. You can also tune multiple parameters at the same time, by simply appending more definitions similar to this one. Parameters that have a choice of defined values rather than a range are specified like this:
252
+ This gives the name of the parameter to tune, its data type, and the bounds and step sizes to try. In this case, we want to try spec_width values of 256, 320, 384, 448 and 512. You can also tune multiple parameters at the same time, by simply appending more definitions similar to this one. Parameters that have a choice of defined values rather than a range are specified like this:
247
253
  ```
248
254
  - name: head_type
249
255
  type: categorical
@@ -26,10 +26,10 @@
26
26
 
27
27
  ## Reference Guides
28
28
 
29
- - [Command Reference](command-reference.md)
30
- - [Command API Reference](command-api-reference.md)
31
- - [General API Reference](api-reference.md)
32
- - [Configuration Reference](config-reference.md)
29
+ - [Command Reference](https://github.com/jhuus/BriteKit/blob/master/command-reference.md)
30
+ - [Command API Reference](https://github.com/jhuus/BriteKit/blob/master/command-api-reference.md)
31
+ - [General API Reference](https://github.com/jhuus/BriteKit/blob/master/api-reference.md)
32
+ - [Configuration Reference](https://github.com/jhuus/BriteKit/blob/master/config-reference.md)
33
33
 
34
34
  # Getting Started
35
35
 
@@ -38,7 +38,7 @@
38
38
  ## Introduction
39
39
  BriteKit (Bioacoustic Recognizer Technology Kit) is a Python package that facilitates the development of bioacoustic recognizers using deep learning.
40
40
  It provides a command-line interface (CLI) as well as a Python API, to support functions such as:
41
- - downloading recordings from Xeno-Canto, iNaturalist, and Youtube (optionally using Google Audioset metadata)
41
+ - downloading recordings from Xeno-Canto, iNaturalist, and YouTube (optionally using Google Audioset metadata)
42
42
  - managing training data in a SQLite database
43
43
  - training models
44
44
  - testing, tuning and calibrating models
@@ -51,10 +51,16 @@ The classes used by the commands can also be accessed, and are documented [here]
51
51
  ## License
52
52
  BriteKit is distributed under the terms of the [MIT](https://spdx.org/licenses/MIT.html) license.
53
53
  ## Installation
54
- Install the BriteKit package using pip:
54
+ It is best to install BriteKit in a virtual environment, such as a [Python venv](https://docs.python.org/3/library/venv.html). Once you have that set up, install the BriteKit package using pip:
55
55
  ```console
56
56
  pip install britekit
57
57
  ```
58
+ In Windows environments, you then need to uninstall and reinstall PyTorch:
59
+ ```
60
+ pip uninstall -y torch torchvision torchaudio
61
+ pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu126
62
+ ```
63
+ Note that cu126 refers to CUDA 12.6.\
58
64
  Once BriteKit is installed, initialize a working environment using the `init` command:
59
65
  ```console
60
66
  britekit init --dest=<directory path>
@@ -79,13 +85,13 @@ cfg, _ = bk.get_config()
79
85
  cfg.train.model_type = "effnet.4"
80
86
  ```
81
87
  ## Downloading Recordings
82
- The `inat`, `xeno` and `youtube` commands make it easy to download recordings from Xeno_Canto, iNaturalist and Youtube. For iNaturalist it is important to provide the scientific name. For example, to download recordings of the American Green Frog (lithobates clamitans), type:
88
+ The `inat`, `xeno` and `youtube` commands make it easy to download recordings from Xeno-Canto, iNaturalist and YouTube. For iNaturalist it is important to provide the scientific name. For example, to download recordings of the American Green Frog (lithobates clamitans), type:
83
89
  ```
84
90
  britekit inat --name "lithobates clamitans" --output <output-path>
85
91
  ```
86
- For Xeno-Canto, use `--name` for the common name or `--sci` for the scientific name. For Youtube, specify the ID of the corresponding video. For example, specify `--id K_EsxukdNXM` to download the audio from https://www.youtube.com/watch?v=K_EsxukdNXM.
92
+ For Xeno-Canto, use `--name` for the common name or `--sci` for the scientific name. For YouTube, specify the ID of the corresponding video. For example, specify `--id K_EsxukdNXM` to download the audio from https://www.youtube.com/watch?v=K_EsxukdNXM.
87
93
 
88
- BriteKit also supports downloads using [Google Audioset](https://research.google.com/audioset/), which is metadata that classifies sounds in Youtube videos. Audioset was released in March 2017, so any videos uploaded later than that are not included. Also, some videos that are tagged in Audioset are no longer available. Type `britekit audioset --help` for more information.
94
+ BriteKit also supports downloads using [Google Audioset](https://research.google.com/audioset/), which is metadata that classifies sounds in YouTube videos. Audioset was released in March 2017, so any videos uploaded later than that are not included. Also, some videos that are tagged in Audioset are no longer available. Type `britekit audioset --help` for more information.
89
95
  ## Managing Training Data
90
96
  Once you have a collection of recordings, the steps to prepare it for training are:
91
97
  1. Extract spectrograms from recordings and insert them into the training database.
@@ -161,7 +167,7 @@ To run a test, you need to annotate a set of test recordings, analyze them with
161
167
  | Column | Description |
162
168
  |---|---|
163
169
  | recording | Just the stem of the recording name, e.g. XC12345, not XC12345.mp3. |
164
- | classes | Defined classes found in the recording, separated by commas. For example "AMCR,BCCH,COYE", but without the quotes.
170
+ | classes | Defined classes found in the recording, separated by commas. For example: AMCR,BCCH,COYE.
165
171
 
166
172
  Per-minute annotations are defined in a CSV file with these columns:
167
173
  | Column | Description |
@@ -193,7 +199,7 @@ Here is a practical approach:
193
199
  2. Do an initial tuning pass of the main training hyperparameters, especially model_type, head_type and num_epochs.
194
200
  3. Based on the above, carefully tune the audio/spectrogram parameters.
195
201
 
196
- This usually leads to a substantial improvement in scores (see [Metrics (PR-AUC and ROC-AUC)](#metrics-pr-auc-and-roc-auc), and then you can proceed to fine-tuning the training and inference. For inference, it is usually worth tuning the `audio_power` parameter. If you are using a SED classifier head, it is also worth tuning `segment_len` and `overlap`. For traininf, it may be worth tuning the data augmentation hyperparameters, which are described in detail in the [Data Augmentation](#data-augmentation) section below.
202
+ This usually leads to a substantial improvement in scores (see [Metrics (PR-AUC and ROC-AUC)](#metrics-pr-auc-and-roc-auc), and then you can proceed to fine-tuning the training and inference. For inference, it is usually worth tuning the `audio_power` parameter. If you are using a SED classifier head, it is also worth tuning `segment_len` and `overlap`. For training, it may be worth tuning the data augmentation hyperparameters, which are described in detail in the [Data Augmentation](#data-augmentation) section below.
197
203
 
198
204
  To run the `tune` command, you would typically use a config YAML file as described earlier, plus a special tuning YAML file, as in this example:
199
205
  ```
@@ -204,7 +210,7 @@ To run the `tune` command, you would typically use a config YAML file as describ
204
210
  - 512
205
211
  step: 64
206
212
  ```
207
- This gives the name of the parameter to tune, its datatype, and the bounds and step sizes to try. In this case, we want to try spec_width values of 256, 320, 384, 448 and 512. You can also tune multiple parameters at the same time, by simply appending more definitions similar to this one. Parameters that have a choice of defined values rather than a range are specified like this:
213
+ This gives the name of the parameter to tune, its data type, and the bounds and step sizes to try. In this case, we want to try spec_width values of 256, 320, 384, 448 and 512. You can also tune multiple parameters at the same time, by simply appending more definitions similar to this one. Parameters that have a choice of defined values rather than a range are specified like this:
208
214
  ```
209
215
  - name: head_type
210
216
  type: categorical
@@ -1,4 +1,4 @@
1
1
  # SPDX-FileCopyrightText: 2025-present Jan Huus <jhuus1@gmail.com>
2
2
  #
3
3
  # SPDX-License-Identifier: MIT
4
- __version__ = "0.0.4"
4
+ __version__ = "0.0.6"
@@ -255,6 +255,7 @@ def audioset(
255
255
  help="Path to CSV with curated list of clips.",
256
256
  )
257
257
  @click.option(
258
+ "-o",
258
259
  "--output",
259
260
  "output_dir",
260
261
  required=True,
@@ -103,6 +103,7 @@ def inat(
103
103
  )
104
104
  @click.option("--name", required=True, type=str, help="Species name.")
105
105
  @click.option(
106
+ "-o",
106
107
  "--output",
107
108
  "output_dir",
108
109
  required=True,
@@ -244,12 +244,14 @@ def rpt_epochs(
244
244
  max_roc_score, max_roc_epoch = 0, 0
245
245
  pr_scores = []
246
246
  roc_scores = []
247
- util.set_logging(level=logging.ERROR) # suppress output from Analyzer and PerSegmentTester
248
247
  with tempfile.TemporaryDirectory() as temp_dir:
249
248
  cfg.misc.ckpt_folder = temp_dir
250
249
 
251
250
  for epoch_num in epoch_nums:
251
+ util.set_logging() # restore console output
252
252
  logging.info(f"Processing epoch {epoch_num}")
253
+ # suppress output from Analyzer and PerSegmentTester
254
+ util.set_logging(level=logging.ERROR)
253
255
 
254
256
  # copy checkpoint to temp dir
255
257
  from_path = epoch_to_ckpt[epoch_num]
@@ -270,6 +272,7 @@ def rpt_epochs(
270
272
  temp_dir,
271
273
  threshold=0.8,
272
274
  )
275
+
273
276
  tester.initialize()
274
277
 
275
278
  pr_stats = tester.get_pr_auc_stats()
@@ -288,8 +291,6 @@ def rpt_epochs(
288
291
 
289
292
  os.remove(to_path)
290
293
 
291
- util.set_logging() # restore console output
292
-
293
294
  # Save CSV
294
295
  df = pd.DataFrame()
295
296
  df["epoch"] = epoch_nums
@@ -56,6 +56,16 @@ def _train_cmd(
56
56
  cfg_path: str,
57
57
  ):
58
58
  util.set_logging()
59
+
60
+ import platform, torch
61
+ if platform.system() == "Windows" and not torch.cuda.is_available():
62
+ logging.warning(
63
+ "CUDA is not available. On Windows, reinstall a CUDA-enabled PyTorch build like this:\n"
64
+ " pip uninstall -y torch torchvision torchaudio\n"
65
+ " pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cuxxx"
66
+ "For example, use cu126 for CUDA 12.6."
67
+ )
68
+
59
69
  train(cfg_path)
60
70
 
61
71
 
@@ -154,6 +154,7 @@ def xeno(
154
154
  @click.option("--key", type=str, help="Xeno-Canto API key.")
155
155
  @click.option("--name", required=True, type=str, help="Species name.")
156
156
  @click.option(
157
+ "-o",
157
158
  "--output",
158
159
  "output_dir",
159
160
  required=True,
@@ -60,6 +60,7 @@ def youtube(
60
60
  help="Youtube ID.",
61
61
  )
62
62
  @click.option(
63
+ "-o",
63
64
  "--output",
64
65
  "output_dir",
65
66
  required=True,
@@ -11,7 +11,7 @@ audio: !!python/object:britekit.core.base_config.Audio
11
11
  sampling_rate: 18000
12
12
  spec_duration: 5.0
13
13
  spec_height: 128
14
- spec_width: 500
14
+ spec_width: 480
15
15
  top_db: 80
16
16
  win_length: 0.055
17
17
  infer: !!python/object:britekit.core.base_config.Inference
@@ -25,12 +25,12 @@ infer: !!python/object:britekit.core.base_config.Inference
25
25
  min_score: 0.8
26
26
  num_threads: 3
27
27
  openvino_block_size: 100
28
+ overlap: 0.0
28
29
  raise_min_to_confirm: 0.5
29
30
  scaling_coefficient: 1.0
30
31
  scaling_intercept: 0.0
31
32
  seed: 99
32
33
  segment_len: null
33
- spec_overlap_seconds: 0.0
34
34
  misc: !!python/object:britekit.core.base_config.Miscellaneous
35
35
  ckpt_folder: data/ckpt
36
36
  classes_file: data/classes.txt
@@ -41,13 +41,16 @@ misc: !!python/object:britekit.core.base_config.Miscellaneous
41
41
  search_ckpt_path: null
42
42
  source_regexes:
43
43
  - !!python/tuple
44
- - XC\d+
44
+ - ^[A-Za-z0-9_-]{11}-\d+$
45
+ - Audioset
46
+ - !!python/tuple
47
+ - ^XC\d+$
45
48
  - Xeno-Canto
46
49
  - !!python/tuple
47
- - N\d+
50
+ - ^N\d+$
48
51
  - iNaturalist
49
52
  - !!python/tuple
50
- - \d+
53
+ - ^\d+$
51
54
  - Macaulay Library
52
55
  - !!python/tuple
53
56
  - .*
@@ -108,7 +111,7 @@ train: !!python/object:britekit.core.base_config.Training
108
111
  noise_class_name: Noise
109
112
  num_epochs: 10
110
113
  num_folds: 1
111
- num_workers: 2
114
+ num_workers: 3
112
115
  offpeak_weight: 0.002
113
116
  opt_beta1: 0.9
114
117
  opt_beta2: 0.999
@@ -124,6 +127,6 @@ train: !!python/object:britekit.core.base_config.Training
124
127
  shuffle: true
125
128
  test_pickle: null
126
129
  train_db: data/training.db
127
- train_pickle: null
130
+ train_pickle: data/training.pkl
128
131
  val_portion: 0
129
132
  warmup_fraction: 0.0
@@ -11,7 +11,7 @@ audio: !!python/object:britekit.core.base_config.Audio
11
11
  sampling_rate: 18000
12
12
  spec_duration: 5.0
13
13
  spec_height: 128
14
- spec_width: 500
14
+ spec_width: 480
15
15
  top_db: 80
16
16
  win_length: 0.055
17
17
  infer: !!python/object:britekit.core.base_config.Inference
@@ -25,12 +25,12 @@ infer: !!python/object:britekit.core.base_config.Inference
25
25
  min_score: 0.8
26
26
  num_threads: 3
27
27
  openvino_block_size: 100
28
+ overlap: 0.0
28
29
  raise_min_to_confirm: 0.5
29
30
  scaling_coefficient: 1.0
30
31
  scaling_intercept: 0.0
31
32
  seed: 99
32
33
  segment_len: null
33
- spec_overlap_seconds: 0.0
34
34
  misc: !!python/object:britekit.core.base_config.Miscellaneous
35
35
  ckpt_folder: data/ckpt
36
36
  classes_file: data/classes.txt
@@ -41,13 +41,16 @@ misc: !!python/object:britekit.core.base_config.Miscellaneous
41
41
  search_ckpt_path: null
42
42
  source_regexes:
43
43
  - !!python/tuple
44
- - XC\d+
44
+ - ^[A-Za-z0-9_-]{11}-\d+$
45
+ - Audioset
46
+ - !!python/tuple
47
+ - ^XC\d+$
45
48
  - Xeno-Canto
46
49
  - !!python/tuple
47
- - N\d+
50
+ - ^N\d+$
48
51
  - iNaturalist
49
52
  - !!python/tuple
50
- - \d+
53
+ - ^\d+$
51
54
  - Macaulay Library
52
55
  - !!python/tuple
53
56
  - .*
@@ -108,7 +111,7 @@ train: !!python/object:britekit.core.base_config.Training
108
111
  noise_class_name: Noise
109
112
  num_epochs: 10
110
113
  num_folds: 1
111
- num_workers: 2
114
+ num_workers: 3
112
115
  offpeak_weight: 0.002
113
116
  opt_beta1: 0.9
114
117
  opt_beta2: 0.999
@@ -124,6 +127,6 @@ train: !!python/object:britekit.core.base_config.Training
124
127
  shuffle: true
125
128
  test_pickle: null
126
129
  train_db: data/training.db
127
- train_pickle: null
130
+ train_pickle: data/training.pkl
128
131
  val_portion: 0
129
132
  warmup_fraction: 0.0
@@ -31,8 +31,8 @@ dependencies = [
31
31
  "pandas>=2.2,<2.3",
32
32
  "pyinaturalist>=0.20,<1.0",
33
33
  "soundfile>=0.13,<1.0",
34
- "torch>=2.5,<2.7",
35
- "torchaudio>=2.5,<2.7",
34
+ "torch>=2.5,<2.9",
35
+ "torchaudio>=2.5,<2.9",
36
36
  "librosa>=0.10,<2.0",
37
37
  "lightning>=2.5,<2.6",
38
38
  "matplotlib>=3.9,<3.11",
@@ -178,15 +178,10 @@ class BaseModel(pl.LightningModule):
178
178
  else:
179
179
  preds = torch.softmax(seg_logits, dim=1)
180
180
 
181
- acc = accuracy(preds, y, task="multilabel", num_labels=self.num_classes)
182
181
  self.log("val_loss", loss, on_step=False, on_epoch=True, prog_bar=True)
183
- self.log("val_acc", acc, on_step=False, on_epoch=True, prog_bar=True)
184
182
 
185
- pr_auc = metrics.average_precision_score(y.cpu(), preds.cpu(), average="micro")
186
183
  roc_auc = metrics.roc_auc_score(y.cpu(), preds.cpu(), average="micro")
187
-
188
- self.log("val_pr_auc", pr_auc, on_step=False, on_epoch=True, prog_bar=True)
189
- self.log("val_roc_auc", roc_auc, on_step=False, on_epoch=True, prog_bar=True)
184
+ self.log("val_roc", roc_auc, on_step=False, on_epoch=True, prog_bar=True)
190
185
 
191
186
  return loss
192
187
 
@@ -199,18 +194,8 @@ class BaseModel(pl.LightningModule):
199
194
 
200
195
  if self.multi_label:
201
196
  preds = torch.sigmoid(seg_logits)
202
- micro_aps = metrics.average_precision_score(
203
- y.cpu(), preds.cpu(), average="micro"
204
- )
205
- self.log(
206
- "test_micro_aps", micro_aps, on_step=False, on_epoch=True, prog_bar=True
207
- )
208
- macro_aps = metrics.average_precision_score(
209
- y.cpu(), preds.cpu(), average="macro"
210
- )
211
- self.log(
212
- "test_macro_aps", macro_aps, on_step=False, on_epoch=True, prog_bar=True
213
- )
197
+ roc_auc = metrics.roc_auc_score(y.cpu(), preds.cpu(), average="micro")
198
+ self.log("test_roc_auc", roc_auc, on_step=False, on_epoch=True, prog_bar=True)
214
199
 
215
200
  return loss
216
201
 
@@ -237,7 +237,7 @@ class PerMinuteTester(BaseTester):
237
237
  f"\rPercent complete: {int(threshold * 100)}%", end="", flush=True
238
238
  )
239
239
 
240
- logging.info()
240
+ logging.info("")
241
241
  pr_table_dict = {}
242
242
  pr_table_dict["annotated_thresholds"] = thresholds
243
243
  pr_table_dict["annotated_precisions_minutes"] = precision_annotated_minutes
@@ -425,7 +425,7 @@ class PerMinuteTester(BaseTester):
425
425
  rpt.append(
426
426
  f" Recall (minutes) = {100 * self.details_dict['recall_trained']:.2f}%\n"
427
427
  )
428
- logging.info()
428
+ logging.info("")
429
429
  with open(os.path.join(self.output_dir, "summary_report.txt"), "w") as summary:
430
430
  for rpt_line in rpt:
431
431
  logging.info(rpt_line[:-1])
@@ -543,7 +543,7 @@ class PerSegmentTester(BaseTester):
543
543
  f"Average of macro-ROC-annotated and micro-ROC-trained = {self.roc_auc_dict['combined_roc_auc_trained']:.4f}\n"
544
544
  )
545
545
 
546
- logging.info()
546
+ logging.info("")
547
547
  with open(os.path.join(self.output_dir, "summary_report.txt"), "w") as summary:
548
548
  for rpt_line in rpt:
549
549
  logging.info(rpt_line[:-1])
File without changes
File without changes
File without changes
File without changes