britekit 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of britekit might be problematic. Click here for more details.
- britekit/__about__.py +1 -1
- britekit/cli.py +6 -1
- britekit/commands/__init__.py +2 -1
- britekit/commands/_analyze.py +37 -11
- britekit/commands/_audioset.py +8 -8
- britekit/commands/_calibrate.py +8 -8
- britekit/commands/_ckpt_ops.py +6 -6
- britekit/commands/_db_add.py +12 -12
- britekit/commands/_db_delete.py +15 -15
- britekit/commands/_embed.py +4 -4
- britekit/commands/_ensemble.py +7 -7
- britekit/commands/_extract.py +158 -19
- britekit/commands/_find_dup.py +5 -5
- britekit/commands/_inat.py +4 -4
- britekit/commands/_init.py +1 -1
- britekit/commands/_pickle.py +13 -7
- britekit/commands/_plot.py +26 -26
- britekit/commands/_reextract.py +6 -6
- britekit/commands/_reports.py +22 -22
- britekit/commands/_search.py +12 -12
- britekit/commands/_train.py +6 -6
- britekit/commands/_tune.py +13 -13
- britekit/commands/_wav2mp3.py +2 -2
- britekit/commands/_xeno.py +7 -7
- britekit/commands/_youtube.py +3 -3
- britekit/core/analyzer.py +43 -13
- britekit/core/audio.py +14 -14
- britekit/core/augmentation.py +24 -0
- britekit/core/data_module.py +2 -2
- britekit/core/dataset.py +1 -4
- britekit/core/plot.py +8 -8
- britekit/core/predictor.py +51 -23
- britekit/core/reextractor.py +6 -6
- britekit/core/util.py +44 -8
- britekit/models/base_model.py +0 -1
- britekit/occurrence_db/occurrence_data_provider.py +13 -13
- britekit/testing/per_recording_tester.py +2 -2
- britekit/training_db/extractor.py +65 -30
- britekit/training_db/training_data_provider.py +1 -1
- britekit/training_db/training_db.py +97 -100
- britekit-0.1.5.dist-info/METADATA +299 -0
- {britekit-0.1.3.dist-info → britekit-0.1.5.dist-info}/RECORD +45 -45
- britekit-0.1.3.dist-info/METADATA +0 -290
- {britekit-0.1.3.dist-info → britekit-0.1.5.dist-info}/WHEEL +0 -0
- {britekit-0.1.3.dist-info → britekit-0.1.5.dist-info}/entry_points.txt +0 -0
- {britekit-0.1.3.dist-info → britekit-0.1.5.dist-info}/licenses/LICENSE.txt +0 -0
britekit/commands/_pickle.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
# File name starts with _ to keep it out of typeahead for API users.
|
|
2
2
|
# Defer some imports to improve --help performance.
|
|
3
3
|
import logging
|
|
4
|
+
import os
|
|
4
5
|
from pathlib import Path
|
|
5
6
|
from typing import Optional
|
|
6
7
|
|
|
@@ -27,13 +28,13 @@ def pickle(
|
|
|
27
28
|
or specific classes specified by a CSV file.
|
|
28
29
|
|
|
29
30
|
Args:
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
31
|
+
- cfg_path (str, optional): Path to YAML file defining configuration overrides.
|
|
32
|
+
- classes_path (str, optional): Path to CSV file containing class names to include.
|
|
33
|
+
If omitted, includes all classes in the database.
|
|
34
|
+
- db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
|
|
35
|
+
- output_path (str, optional): Output pickle file path. Defaults to "data/training.pkl".
|
|
36
|
+
- max_per_class (int, optional): Maximum number of spectrograms to include per class.
|
|
37
|
+
- spec_group (str): Spectrogram group name to extract from. Defaults to 'default'.
|
|
37
38
|
"""
|
|
38
39
|
from britekit.core.pickler import Pickler
|
|
39
40
|
|
|
@@ -41,6 +42,11 @@ def pickle(
|
|
|
41
42
|
if db_path is None:
|
|
42
43
|
db_path = cfg.train.train_db
|
|
43
44
|
|
|
45
|
+
if classes_path is not None:
|
|
46
|
+
if not os.path.exists(classes_path):
|
|
47
|
+
logging.error(f"Error: file {classes_path} not found.")
|
|
48
|
+
return
|
|
49
|
+
|
|
44
50
|
if output_path is None:
|
|
45
51
|
output_path = str(Path(root_dir) / "data" / "training.pkl")
|
|
46
52
|
|
britekit/commands/_plot.py
CHANGED
|
@@ -76,15 +76,15 @@ def plot_db(
|
|
|
76
76
|
number of spectrograms plotted.
|
|
77
77
|
|
|
78
78
|
Args:
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
79
|
+
- cfg_path (str, optional): Path to YAML file defining configuration overrides.
|
|
80
|
+
- class_name (str): Name of the class to plot spectrograms for (e.g., "Common Yellowthroat").
|
|
81
|
+
- db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
|
|
82
|
+
- ndims (bool): If True, do not show time and frequency dimensions on the spectrogram plots.
|
|
83
|
+
- max_count (int, optional): Maximum number of spectrograms to plot. If omitted, plots all available.
|
|
84
|
+
- output_path (str): Directory where spectrogram images will be saved.
|
|
85
|
+
- prefix (str, optional): Only include recordings that start with this filename prefix.
|
|
86
|
+
- power (float, optional): Raise spectrograms to this power for visualization. Lower values show more detail.
|
|
87
|
+
- spec_group (str, optional): Spectrogram group name to plot from. Defaults to "default".
|
|
88
88
|
"""
|
|
89
89
|
from britekit.core.plot import plot_spec
|
|
90
90
|
from britekit.training_db.training_db import TrainingDatabase
|
|
@@ -158,7 +158,7 @@ def plot_db(
|
|
|
158
158
|
"--ndims",
|
|
159
159
|
"ndims",
|
|
160
160
|
is_flag=True,
|
|
161
|
-
help="If specified, do not show
|
|
161
|
+
help="If specified, do not show seconds on x-axis and frequencies on y-axis.",
|
|
162
162
|
)
|
|
163
163
|
@click.option(
|
|
164
164
|
"--max",
|
|
@@ -237,13 +237,13 @@ def plot_dir(
|
|
|
237
237
|
overlapping segments.
|
|
238
238
|
|
|
239
239
|
Args:
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
240
|
+
- cfg_path (str, optional): Path to YAML file defining configuration overrides.
|
|
241
|
+
- ndims (bool): If True, do not show time and frequency dimensions on the spectrogram plots.
|
|
242
|
+
- input_path (str): Directory containing audio recordings to process.
|
|
243
|
+
- output_path (str): Directory where spectrogram images will be saved.
|
|
244
|
+
- all (bool): If True, plot each recording as one spectrogram. If False, break into segments.
|
|
245
|
+
- overlap (float): Spectrogram overlap in seconds when breaking recordings into segments. Default is 0.
|
|
246
|
+
- power (float): Raise spectrograms to this power for visualization. Lower values show more detail. Default is 1.0.
|
|
247
247
|
"""
|
|
248
248
|
from britekit.core.audio import Audio
|
|
249
249
|
|
|
@@ -284,7 +284,7 @@ def plot_dir(
|
|
|
284
284
|
"--ndims",
|
|
285
285
|
"ndims",
|
|
286
286
|
is_flag=True,
|
|
287
|
-
help="If specified, show seconds on x-axis and frequencies on y-axis.",
|
|
287
|
+
help="If specified, do not show seconds on x-axis and frequencies on y-axis.",
|
|
288
288
|
)
|
|
289
289
|
@click.option(
|
|
290
290
|
"-i",
|
|
@@ -353,13 +353,13 @@ def plot_rec(
|
|
|
353
353
|
overlapping segments.
|
|
354
354
|
|
|
355
355
|
Args:
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
356
|
+
- cfg_path (str, optional): Path to YAML file defining configuration overrides.
|
|
357
|
+
- ndims (bool): If True, do not show time and frequency dimensions on the spectrogram plots.
|
|
358
|
+
- input_path (str): Path to the audio recording file to process.
|
|
359
|
+
- output_path (str): Directory where spectrogram images will be saved.
|
|
360
|
+
- all (bool): If True, plot the entire recording as one spectrogram. If False, break into segments.
|
|
361
|
+
- overlap (float): Spectrogram overlap in seconds when breaking the recording into segments. Default is 0.
|
|
362
|
+
- power (float): Raise spectrograms to this power for visualization. Lower values show more detail. Default is 1.0.
|
|
363
363
|
"""
|
|
364
364
|
from britekit.core.audio import Audio
|
|
365
365
|
|
|
@@ -394,7 +394,7 @@ def plot_rec(
|
|
|
394
394
|
"--ndims",
|
|
395
395
|
"ndims",
|
|
396
396
|
is_flag=True,
|
|
397
|
-
help="If specified, show seconds on x-axis and frequencies on y-axis.",
|
|
397
|
+
help="If specified, do not show seconds on x-axis and frequencies on y-axis.",
|
|
398
398
|
)
|
|
399
399
|
@click.option(
|
|
400
400
|
"-i",
|
britekit/commands/_reextract.py
CHANGED
|
@@ -30,12 +30,12 @@ def reextract(
|
|
|
30
30
|
updating the database.
|
|
31
31
|
|
|
32
32
|
Args:
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
33
|
+
- cfg_path (str, optional): Path to YAML file defining configuration overrides.
|
|
34
|
+
- db_path (str, optional): Path to the training database. Defaults to cfg.train.training_db.
|
|
35
|
+
- class_name (str, optional): Name of a specific class to reextract. If omitted, processes all classes.
|
|
36
|
+
- classes_path (str, optional): Path to CSV file listing classes to reextract. Alternative to class_name.
|
|
37
|
+
- check (bool): If True, only check that all recording paths are accessible without updating database.
|
|
38
|
+
- spec_group (str): Spectrogram group name for storing the extracted spectrograms. Defaults to 'default'.
|
|
39
39
|
"""
|
|
40
40
|
from britekit.core.reextractor import Reextractor
|
|
41
41
|
cfg = get_config(cfg_path)
|
britekit/commands/_reports.py
CHANGED
|
@@ -27,8 +27,8 @@ def rpt_ann(
|
|
|
27
27
|
breakdowns.
|
|
28
28
|
|
|
29
29
|
Args:
|
|
30
|
-
|
|
31
|
-
|
|
30
|
+
- annotations_path (str): Path to CSV file containing per-segment annotations.
|
|
31
|
+
- output_path (str): Directory where summary reports will be saved.
|
|
32
32
|
"""
|
|
33
33
|
import pandas as pd
|
|
34
34
|
|
|
@@ -136,9 +136,9 @@ def rpt_db(cfg_path: Optional[str] = None,
|
|
|
136
136
|
and can be used for data management and quality control.
|
|
137
137
|
|
|
138
138
|
Args:
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
139
|
+
- cfg_path (str, optional): Path to YAML file defining configuration overrides.
|
|
140
|
+
- db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
|
|
141
|
+
- output_path (str): Directory where database reports will be saved.
|
|
142
142
|
"""
|
|
143
143
|
from britekit.training_db.training_db import TrainingDatabase
|
|
144
144
|
from britekit.training_db.training_data_provider import TrainingDataProvider
|
|
@@ -202,10 +202,10 @@ def rpt_epochs(
|
|
|
202
202
|
This is useful to determine the number of training epochs needed.
|
|
203
203
|
|
|
204
204
|
Args:
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
205
|
+
- cfg_path (str, optional): Path to YAML file defining configuration overrides.
|
|
206
|
+
- input_path (str): Checkpoint directory generated by training.
|
|
207
|
+
- annotations_path (str): Path to CSV file containing ground truth annotations.
|
|
208
|
+
- output_path (str): Directory where the graph image will be saved.
|
|
209
209
|
"""
|
|
210
210
|
import matplotlib.pyplot as plt
|
|
211
211
|
from matplotlib.ticker import MaxNLocator
|
|
@@ -397,9 +397,9 @@ def rpt_labels(
|
|
|
397
397
|
across different recordings and classes.
|
|
398
398
|
|
|
399
399
|
Args:
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
400
|
+
- label_dir (str): Directory containing inference output (CSV or Audacity labels).
|
|
401
|
+
- output_path (str): Directory where summary reports will be saved.
|
|
402
|
+
- min_score (float, optional): Ignore detections below this confidence threshold.
|
|
403
403
|
"""
|
|
404
404
|
import pandas as pd
|
|
405
405
|
|
|
@@ -544,21 +544,21 @@ def rpt_test(
|
|
|
544
544
|
F1 scores, and various visualization plots to help understand model behavior.
|
|
545
545
|
|
|
546
546
|
Args:
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
547
|
+
- cfg_path (str, optional): Path to YAML file defining configuration overrides.
|
|
548
|
+
- granularity (str): Evaluation granularity ("recording", "block", or "segment"). Default is "segment".
|
|
549
|
+
- annotations_path (str): Path to CSV file containing ground truth annotations.
|
|
550
|
+
- label_dir (str): Directory containing model prediction labels (Audacity format).
|
|
551
|
+
- output_path (str): Directory where test reports will be saved.
|
|
552
|
+
- recordings_path (str, optional): Directory containing audio recordings. Defaults to annotations directory.
|
|
553
|
+
- min_score (float, optional): Provide detailed reports for this confidence threshold.
|
|
554
|
+
- block_size (int, optional): block_size in seconds (default=60).
|
|
555
|
+
- precision (float): For recording granularity, report true positive seconds at this precision. Default is 0.95.
|
|
556
556
|
"""
|
|
557
557
|
from britekit.testing.per_block_tester import PerBlockTester
|
|
558
558
|
from britekit.testing.per_recording_tester import PerRecordingTester
|
|
559
559
|
from britekit.testing.per_segment_tester import PerSegmentTester
|
|
560
560
|
|
|
561
|
-
cfg = get_config()
|
|
561
|
+
cfg = get_config(cfg_path)
|
|
562
562
|
try:
|
|
563
563
|
if not recordings_path:
|
|
564
564
|
recordings_path = str(Path(annotations_path).parent)
|
britekit/commands/_search.py
CHANGED
|
@@ -33,18 +33,18 @@ def search(
|
|
|
33
33
|
based on embedding similarity. Results are plotted and saved to the output directory.
|
|
34
34
|
|
|
35
35
|
Args:
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
36
|
+
- cfg_path (str): Path to YAML configuration file defining model settings.
|
|
37
|
+
- db_path (str): Path to the training database containing spectrograms to search.
|
|
38
|
+
- class_name (str): Name of the class/species to search within the database.
|
|
39
|
+
- max_dist (float): Maximum distance threshold. Results with distance greater than this are excluded.
|
|
40
|
+
- exp (float): Exponent to raise spectrograms to for visualization (shows background sounds).
|
|
41
|
+
- num_to_plot (int): Maximum number of similar spectrograms to plot and save.
|
|
42
|
+
- output_path (str): Directory where search results and plots will be saved.
|
|
43
|
+
- input_path (str): Path to the audio file containing the target spectrogram.
|
|
44
|
+
- offset (float): Time offset in seconds where the target spectrogram is extracted.
|
|
45
|
+
- exclude_db (str, optional): Path to an exclusion database. Spectrograms in this database are excluded from results.
|
|
46
|
+
- class_name2 (str, optional): Class name in the exclusion database. Defaults to the search class name.
|
|
47
|
+
- spec_group (str): Spectrogram group name in the database. Defaults to 'default'.
|
|
48
48
|
"""
|
|
49
49
|
|
|
50
50
|
class SpecInfo:
|
britekit/commands/_train.py
CHANGED
|
@@ -26,8 +26,8 @@ def train(
|
|
|
26
26
|
automatically. The final trained model can be used for inference and evaluation.
|
|
27
27
|
|
|
28
28
|
Args:
|
|
29
|
-
|
|
30
|
-
|
|
29
|
+
- cfg_path (str, optional): Path to YAML file defining configuration overrides.
|
|
30
|
+
If not specified, uses default configuration.
|
|
31
31
|
"""
|
|
32
32
|
from britekit.core.trainer import Trainer
|
|
33
33
|
|
|
@@ -82,10 +82,10 @@ def find_lr(cfg_path: str, num_batches: int):
|
|
|
82
82
|
avoiding rates that are too high (causing instability) or too low (slow convergence).
|
|
83
83
|
|
|
84
84
|
Args:
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
85
|
+
- cfg_path (str, optional): Path to YAML file defining configuration overrides.
|
|
86
|
+
If not specified, uses default configuration.
|
|
87
|
+
- num_batches (int): Number of training batches to analyze for learning rate finding.
|
|
88
|
+
Default is 100. Higher values provide more accurate results but take longer.
|
|
89
89
|
"""
|
|
90
90
|
from britekit.core.trainer import Trainer
|
|
91
91
|
|
britekit/commands/_tune.py
CHANGED
|
@@ -41,18 +41,18 @@ def tune(
|
|
|
41
41
|
The param_path specifies a YAML file that defines the parameters to be tuned, as described in the README.
|
|
42
42
|
|
|
43
43
|
Args:
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
44
|
+
- cfg_path (str, optional): Path to YAML file defining configuration overrides.
|
|
45
|
+
- param_path (str, optional): Path to YAML file defining hyperparameters to tune and their search space.
|
|
46
|
+
- output_path (str): Directory where reports will be saved.
|
|
47
|
+
- annotations_path (str): Path to CSV file containing ground truth annotations.
|
|
48
|
+
- metric (str): Metric used to compare runs. Options include various MAP and ROC metrics.
|
|
49
|
+
- recordings_path (str, optional): Directory containing audio recordings. Defaults to annotations directory.
|
|
50
|
+
- train_log_path (str, optional): Training log directory. Defaults to "logs".
|
|
51
|
+
- num_trials (int): Number of random trials to run. If 0, performs exhaustive search.
|
|
52
|
+
- num_runs (int): Number of runs to average for each parameter combination. Default is 1.
|
|
53
|
+
- extract (bool): Extract new spectrograms before training, to tune spectrogram parameters.
|
|
54
|
+
- skip_training (bool): Iterate on inference only, using checkpoints from the last training run.
|
|
55
|
+
- classes_path (str, optional): Path to CSV containing class names for extract option. Default is all classes.
|
|
56
56
|
"""
|
|
57
57
|
import yaml
|
|
58
58
|
from britekit.core.tuner import Tuner
|
|
@@ -72,7 +72,7 @@ def tune(
|
|
|
72
72
|
recordings_path = str(Path(annotations_path).parent)
|
|
73
73
|
|
|
74
74
|
if not train_log_path:
|
|
75
|
-
train_log_path =
|
|
75
|
+
train_log_path = "logs"
|
|
76
76
|
|
|
77
77
|
if param_path is not None:
|
|
78
78
|
with open(param_path) as input_file:
|
britekit/commands/_wav2mp3.py
CHANGED
|
@@ -24,8 +24,8 @@ def wav2mp3(
|
|
|
24
24
|
requirements for large audio datasets.
|
|
25
25
|
|
|
26
26
|
Args:
|
|
27
|
-
|
|
28
|
-
|
|
27
|
+
- dir (str): Path to directory containing audio files to convert.
|
|
28
|
+
- sampling_rate (int): Output sampling rate in Hz. Default is 32000 Hz.
|
|
29
29
|
"""
|
|
30
30
|
CONVERT_TYPES = {
|
|
31
31
|
".flac",
|
britekit/commands/_xeno.py
CHANGED
|
@@ -75,13 +75,13 @@ def xeno(
|
|
|
75
75
|
Then specify the key in the --key argument, or set the environment variable XCKEY=<key>.
|
|
76
76
|
|
|
77
77
|
Args:
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
78
|
+
- key (str): Xeno-Canto API key for authentication. Can also be set via XCKEY environment variable.
|
|
79
|
+
- output_dir (str): Directory where downloaded recordings will be saved.
|
|
80
|
+
- max_downloads (int): Maximum number of recordings to download. Default is 500.
|
|
81
|
+
- name (str): Species name to search for (common name or scientific name).
|
|
82
|
+
- ignore_licence (bool): If True, ignore license restrictions. By default, excludes BY-NC-ND licensed recordings.
|
|
83
|
+
- scientific_name (bool): If True, treat the name as a scientific name rather than common name.
|
|
84
|
+
- seen_only (bool): If True, only download recordings where the animal was seen (animal-seen=yes).
|
|
85
85
|
"""
|
|
86
86
|
import requests
|
|
87
87
|
|
britekit/commands/_youtube.py
CHANGED
|
@@ -17,9 +17,9 @@ def youtube(
|
|
|
17
17
|
Download an audio recording from Youtube, given a Youtube ID.
|
|
18
18
|
|
|
19
19
|
Args:
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
20
|
+
- id (str): ID of the clip to download.
|
|
21
|
+
- output_dir (str): Directory where downloaded recordings will be saved.
|
|
22
|
+
- sampling_rate (float): Output sampling rate in Hz. Default is 32000.
|
|
23
23
|
"""
|
|
24
24
|
import librosa
|
|
25
25
|
import numpy as np
|
britekit/core/analyzer.py
CHANGED
|
@@ -56,21 +56,29 @@ class Analyzer:
|
|
|
56
56
|
with open(Path(output_path) / "manifest.yaml", "w") as out_file:
|
|
57
57
|
out_file.write(info_str)
|
|
58
58
|
|
|
59
|
-
def _process_recordings(
|
|
59
|
+
def _process_recordings(
|
|
60
|
+
self, recording_paths, output_path, rtype, start_seconds, thread_num, debug_mode=False
|
|
61
|
+
):
|
|
60
62
|
"""
|
|
61
63
|
This runs on its own thread and processes all recordings in the given list.
|
|
62
64
|
|
|
63
65
|
Args:
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
66
|
+
- recording_paths (list): Individual recording paths.
|
|
67
|
+
- output_path (str): Where to write the output.
|
|
68
|
+
- rtype (str): Output format: "audacity", "csv" or "both".
|
|
69
|
+
- start_seconds (float): Where to start processing each recording, in seconds from start.
|
|
67
70
|
"""
|
|
68
71
|
from britekit.core.predictor import Predictor
|
|
69
72
|
|
|
70
73
|
predictor = Predictor(self.cfg.misc.ckpt_folder)
|
|
71
74
|
for recording_path in recording_paths:
|
|
72
75
|
logging.info(f"[Thread {thread_num}] Processing {recording_path}")
|
|
73
|
-
scores, frame_map, offsets = predictor.get_raw_scores(
|
|
76
|
+
scores, frame_map, offsets = predictor.get_raw_scores(
|
|
77
|
+
recording_path, start_seconds
|
|
78
|
+
)
|
|
79
|
+
if debug_mode:
|
|
80
|
+
predictor.log_scores(scores) # log the scores for debugging
|
|
81
|
+
|
|
74
82
|
recording_name = Path(recording_path).stem
|
|
75
83
|
if rtype in {"audacity", "both"}:
|
|
76
84
|
file_path = str(Path(output_path) / f"{recording_name}_scores.txt")
|
|
@@ -82,6 +90,9 @@ class Analyzer:
|
|
|
82
90
|
)
|
|
83
91
|
self.dataframes.append(dataframe)
|
|
84
92
|
|
|
93
|
+
if debug_mode:
|
|
94
|
+
break
|
|
95
|
+
|
|
85
96
|
if thread_num == 1:
|
|
86
97
|
self._save_manifest(output_path, predictor)
|
|
87
98
|
|
|
@@ -91,8 +102,8 @@ class Analyzer:
|
|
|
91
102
|
Split the input list into `n` lists based on index modulo `n`.
|
|
92
103
|
|
|
93
104
|
Args:
|
|
94
|
-
|
|
95
|
-
|
|
105
|
+
- input_list (list): The input list to split.
|
|
106
|
+
- n (int): Number of resulting groups.
|
|
96
107
|
|
|
97
108
|
Returns:
|
|
98
109
|
List[List]: A list of `n` lists, where each sublist contains elements
|
|
@@ -103,14 +114,24 @@ class Analyzer:
|
|
|
103
114
|
result[i % n].append(item)
|
|
104
115
|
return result
|
|
105
116
|
|
|
106
|
-
def run(
|
|
117
|
+
def run(
|
|
118
|
+
self,
|
|
119
|
+
input_path: str,
|
|
120
|
+
output_path: str,
|
|
121
|
+
rtype: str = "audacity",
|
|
122
|
+
start_seconds: float = 0,
|
|
123
|
+
debug_mode: bool = False
|
|
124
|
+
):
|
|
107
125
|
"""
|
|
108
126
|
Run inference.
|
|
109
127
|
|
|
110
128
|
Args:
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
129
|
+
- input_path (str): Recording or directory containing recordings.
|
|
130
|
+
- output_path (str): Output directory.
|
|
131
|
+
- rtype (str): Output format: "audacity", "csv" or "both".
|
|
132
|
+
- start_seconds (float): Where to start processing each recording, in seconds.
|
|
133
|
+
- debug_mode (bool): If true, log scores for the first spectrogram, then stop.
|
|
134
|
+
For example, '71' and '1:11' have the same meaning, and cause the first 71 seconds to be ignored. Default = 0.
|
|
114
135
|
"""
|
|
115
136
|
import pandas as pd
|
|
116
137
|
|
|
@@ -127,14 +148,23 @@ class Analyzer:
|
|
|
127
148
|
self.dataframes = []
|
|
128
149
|
num_threads = min(self.cfg.infer.num_threads, len(recording_paths))
|
|
129
150
|
if num_threads == 1:
|
|
130
|
-
self._process_recordings(
|
|
151
|
+
self._process_recordings(
|
|
152
|
+
recording_paths, output_path, rtype, start_seconds, 1, debug_mode,
|
|
153
|
+
)
|
|
131
154
|
else:
|
|
132
155
|
recordings_per_thread = self._split_list(recording_paths, num_threads)
|
|
133
156
|
threads = []
|
|
134
157
|
for i in range(num_threads):
|
|
135
158
|
thread = threading.Thread(
|
|
136
159
|
target=self._process_recordings,
|
|
137
|
-
args=(
|
|
160
|
+
args=(
|
|
161
|
+
recordings_per_thread[i],
|
|
162
|
+
output_path,
|
|
163
|
+
rtype,
|
|
164
|
+
start_seconds,
|
|
165
|
+
i + 1,
|
|
166
|
+
debug_mode,
|
|
167
|
+
),
|
|
138
168
|
)
|
|
139
169
|
thread.start()
|
|
140
170
|
threads.append(thread)
|
britekit/core/audio.py
CHANGED
|
@@ -46,7 +46,7 @@ class Audio:
|
|
|
46
46
|
so we downsample rather than upsampling.
|
|
47
47
|
|
|
48
48
|
Args:
|
|
49
|
-
|
|
49
|
+
- cfg (Optional[BaseConfig]): Configuration object. If None, uses default config.
|
|
50
50
|
"""
|
|
51
51
|
import torchaudio as ta
|
|
52
52
|
|
|
@@ -113,7 +113,7 @@ class Audio:
|
|
|
113
113
|
if choose_channel is enabled in the configuration.
|
|
114
114
|
|
|
115
115
|
Args:
|
|
116
|
-
|
|
116
|
+
- path (str): Path to the audio recording file.
|
|
117
117
|
|
|
118
118
|
Returns:
|
|
119
119
|
tuple: (signal, sampling_rate) where:
|
|
@@ -168,18 +168,18 @@ class Audio:
|
|
|
168
168
|
Returns both normalized (0-1 range) and unnormalized versions of the spectrograms.
|
|
169
169
|
|
|
170
170
|
Args:
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
171
|
+
- start_times (list[float]): List of start times in seconds from the beginning
|
|
172
|
+
of the recording for each spectrogram.
|
|
173
|
+
- spec_duration (Optional[float]): Length of each spectrogram in seconds.
|
|
174
|
+
Defaults to cfg.audio.spec_duration.
|
|
175
|
+
- freq_scale (Optional[str]): Frequency scale to use ('linear', 'log', 'mel').
|
|
176
|
+
Defaults to cfg.audio.freq_scale.
|
|
177
|
+
- decibels (Optional[float]): Whether to convert to decibels.
|
|
178
|
+
Defaults to cfg.audio.decibels.
|
|
179
|
+
- top_db (Optional[int]): Maximum decibel value for normalization.
|
|
180
|
+
Defaults to cfg.audio.top_db.
|
|
181
|
+
- db_power (Optional[int]): Power to apply after decibel conversion.
|
|
182
|
+
Defaults to cfg.audio.db_power.
|
|
183
183
|
|
|
184
184
|
Returns:
|
|
185
185
|
tuple: (normalized_specs, unnormalized_specs) where:
|
britekit/core/augmentation.py
CHANGED
|
@@ -1,10 +1,14 @@
|
|
|
1
1
|
# Defer some imports to improve initialization performance.
|
|
2
|
+
import ctypes
|
|
2
3
|
from functools import partial
|
|
4
|
+
import logging
|
|
5
|
+
from multiprocessing import Value
|
|
3
6
|
import random
|
|
4
7
|
|
|
5
8
|
from britekit.core.base_config import BaseConfig
|
|
6
9
|
|
|
7
10
|
AUGMENTATION_REGISTRY = {}
|
|
11
|
+
_have_real_noise = Value(ctypes.c_bool, True)
|
|
8
12
|
|
|
9
13
|
|
|
10
14
|
def register_augmentation(name):
|
|
@@ -60,7 +64,27 @@ class AugmentationPipeline:
|
|
|
60
64
|
"""
|
|
61
65
|
Add an actual noise spectrogram but, unlike mixup, do not update the label.
|
|
62
66
|
"""
|
|
67
|
+
global _have_real_noise
|
|
68
|
+
if not _have_real_noise.value:
|
|
69
|
+
return spec
|
|
70
|
+
|
|
63
71
|
noise_spec = self.dataset.get_random_noise()
|
|
72
|
+
if noise_spec is None:
|
|
73
|
+
# with multiple workers, only do this once
|
|
74
|
+
with _have_real_noise.get_lock():
|
|
75
|
+
if _have_real_noise.value:
|
|
76
|
+
_have_real_noise.value = False
|
|
77
|
+
logging.error("")
|
|
78
|
+
logging.error("*** WARNING:")
|
|
79
|
+
logging.error(
|
|
80
|
+
"No noise class is defined, but add_real_noise is enabled."
|
|
81
|
+
)
|
|
82
|
+
logging.error("In most cases it is best to provide noise data.")
|
|
83
|
+
logging.error(
|
|
84
|
+
"The add_real_noise augmentation will be disabled in this run."
|
|
85
|
+
)
|
|
86
|
+
logging.error("")
|
|
87
|
+
return spec
|
|
64
88
|
|
|
65
89
|
# Validate shapes match
|
|
66
90
|
if noise_spec.shape != spec.shape:
|
britekit/core/data_module.py
CHANGED
|
@@ -124,7 +124,7 @@ class DataModule(LightningDataModule):
|
|
|
124
124
|
Load data from a pickle file with error handling.
|
|
125
125
|
|
|
126
126
|
Args:
|
|
127
|
-
|
|
127
|
+
- path (str): Path to the pickle file
|
|
128
128
|
|
|
129
129
|
Returns:
|
|
130
130
|
Tuple containing (class_names, class_codes, alt_names, alt_codes, specs, labels)
|
|
@@ -175,7 +175,7 @@ class DataModule(LightningDataModule):
|
|
|
175
175
|
Prepare train/validation split for a specific fold.
|
|
176
176
|
|
|
177
177
|
Args:
|
|
178
|
-
|
|
178
|
+
- fold_index (int): Index of the fold to prepare
|
|
179
179
|
|
|
180
180
|
Raises:
|
|
181
181
|
ValueError: If fold_index is invalid or val_portion is invalid
|
britekit/core/dataset.py
CHANGED
|
@@ -5,7 +5,6 @@ from torch.utils.data import Dataset
|
|
|
5
5
|
from typing import Any, Callable, List, Optional
|
|
6
6
|
|
|
7
7
|
from britekit.core.augmentation import AugmentationPipeline
|
|
8
|
-
from britekit.core.exceptions import TrainingError
|
|
9
8
|
from britekit.core.config_loader import get_config
|
|
10
9
|
from britekit.core.util import expand_spectrogram
|
|
11
10
|
|
|
@@ -115,9 +114,7 @@ class SpectrogramDataset(Dataset):
|
|
|
115
114
|
Return a random noise spec from the training data
|
|
116
115
|
"""
|
|
117
116
|
if not self.noise_indexes:
|
|
118
|
-
|
|
119
|
-
"Attempt to use noise during augmentation when none defined"
|
|
120
|
-
)
|
|
117
|
+
return None
|
|
121
118
|
|
|
122
119
|
idx = random.randint(0, len(self.noise_indexes) - 1)
|
|
123
120
|
return self._get_spec(self.noise_indexes[idx])
|