britekit 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of britekit might be problematic. Click here for more details.
- britekit/__about__.py +1 -1
- britekit/cli.py +6 -1
- britekit/commands/__init__.py +2 -1
- britekit/commands/_analyze.py +9 -9
- britekit/commands/_audioset.py +8 -8
- britekit/commands/_calibrate.py +8 -8
- britekit/commands/_ckpt_ops.py +6 -6
- britekit/commands/_db_add.py +12 -12
- britekit/commands/_db_delete.py +15 -15
- britekit/commands/_embed.py +4 -4
- britekit/commands/_ensemble.py +7 -7
- britekit/commands/_extract.py +158 -19
- britekit/commands/_find_dup.py +5 -5
- britekit/commands/_inat.py +4 -4
- britekit/commands/_init.py +1 -1
- britekit/commands/_pickle.py +7 -7
- britekit/commands/_plot.py +26 -26
- britekit/commands/_reextract.py +6 -6
- britekit/commands/_reports.py +22 -22
- britekit/commands/_search.py +12 -12
- britekit/commands/_train.py +6 -6
- britekit/commands/_tune.py +12 -12
- britekit/commands/_wav2mp3.py +2 -2
- britekit/commands/_xeno.py +7 -7
- britekit/commands/_youtube.py +3 -3
- britekit/core/analyzer.py +8 -8
- britekit/core/audio.py +14 -14
- britekit/core/data_module.py +2 -2
- britekit/core/plot.py +8 -8
- britekit/core/predictor.py +21 -21
- britekit/core/reextractor.py +6 -6
- britekit/core/util.py +8 -8
- britekit/occurrence_db/occurrence_data_provider.py +13 -13
- britekit/training_db/extractor.py +65 -30
- britekit/training_db/training_data_provider.py +1 -1
- britekit/training_db/training_db.py +97 -100
- britekit-0.1.4.dist-info/METADATA +299 -0
- {britekit-0.1.3.dist-info → britekit-0.1.4.dist-info}/RECORD +41 -41
- britekit-0.1.3.dist-info/METADATA +0 -290
- {britekit-0.1.3.dist-info → britekit-0.1.4.dist-info}/WHEEL +0 -0
- {britekit-0.1.3.dist-info → britekit-0.1.4.dist-info}/entry_points.txt +0 -0
- {britekit-0.1.3.dist-info → britekit-0.1.4.dist-info}/licenses/LICENSE.txt +0 -0
britekit/__about__.py
CHANGED
britekit/cli.py
CHANGED
|
@@ -31,7 +31,11 @@ from .commands._db_delete import (
|
|
|
31
31
|
)
|
|
32
32
|
from .commands._embed import _embed_cmd
|
|
33
33
|
from .commands._ensemble import _ensemble_cmd
|
|
34
|
-
from .commands._extract import
|
|
34
|
+
from .commands._extract import (
|
|
35
|
+
_extract_all_cmd,
|
|
36
|
+
_extract_by_csv_cmd,
|
|
37
|
+
_extract_by_image_cmd,
|
|
38
|
+
)
|
|
35
39
|
from .commands._find_dup import _find_dup_cmd
|
|
36
40
|
from .commands._inat import _inat_cmd
|
|
37
41
|
from .commands._init import _init_cmd
|
|
@@ -83,6 +87,7 @@ cli.add_command(_del_stype_cmd)
|
|
|
83
87
|
cli.add_command(_embed_cmd)
|
|
84
88
|
cli.add_command(_ensemble_cmd)
|
|
85
89
|
cli.add_command(_extract_all_cmd)
|
|
90
|
+
cli.add_command(_extract_by_csv_cmd)
|
|
86
91
|
cli.add_command(_extract_by_image_cmd)
|
|
87
92
|
|
|
88
93
|
cli.add_command(_find_dup_cmd)
|
britekit/commands/__init__.py
CHANGED
|
@@ -14,7 +14,7 @@ from ._db_delete import (
|
|
|
14
14
|
)
|
|
15
15
|
from ._embed import embed
|
|
16
16
|
from ._ensemble import ensemble
|
|
17
|
-
from ._extract import extract_all, extract_by_image
|
|
17
|
+
from ._extract import extract_all, extract_by_csv, extract_by_image
|
|
18
18
|
from ._find_dup import find_dup
|
|
19
19
|
from ._inat import inat
|
|
20
20
|
from ._init import init
|
|
@@ -57,6 +57,7 @@ __all__ = [
|
|
|
57
57
|
"embed",
|
|
58
58
|
"ensemble",
|
|
59
59
|
"extract_all",
|
|
60
|
+
"extract_by_csv",
|
|
60
61
|
"extract_by_image",
|
|
61
62
|
"find_dup",
|
|
62
63
|
"find_lr",
|
britekit/commands/_analyze.py
CHANGED
|
@@ -30,15 +30,15 @@ def analyze(
|
|
|
30
30
|
CSV files, or both.
|
|
31
31
|
|
|
32
32
|
Args:
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
33
|
+
- cfg_path (str): Path to YAML configuration file defining model and inference settings.
|
|
34
|
+
- input_path (str): Path to input audio file or directory containing audio files.
|
|
35
|
+
- output_path (str): Path to output directory where results will be saved.
|
|
36
|
+
- rtype (str): Output format type. Options are "audacity", "csv", or "both".
|
|
37
|
+
- min_score (float, optional): Confidence threshold. Predictions below this value are excluded.
|
|
38
|
+
- num_threads (int, optional): Number of threads to use for processing. Default is 3.
|
|
39
|
+
- overlap (float, optional): Spectrogram overlap in seconds for sliding window analysis.
|
|
40
|
+
- segment_len (float, optional): Fixed segment length in seconds. If specified, labels are
|
|
41
|
+
fixed-length; otherwise they are variable-length.
|
|
42
42
|
"""
|
|
43
43
|
|
|
44
44
|
# defer slow imports to improve --help performance
|
britekit/commands/_audioset.py
CHANGED
|
@@ -201,14 +201,14 @@ def audioset(
|
|
|
201
201
|
shows which other classes commonly co-occur with the specified class.
|
|
202
202
|
|
|
203
203
|
Args:
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
204
|
+
- class_name (str): Name of the audio class to download (e.g., "train", "speech", "music").
|
|
205
|
+
- curated_csv_path (str): Path to CSV file containing a curated list of clips to download.
|
|
206
|
+
- output_dir (str): Directory where downloaded recordings will be saved.
|
|
207
|
+
- max_downloads (int): Maximum number of recordings to download. Default is 500.
|
|
208
|
+
- sampling_rate (float): Output sampling rate in Hz. Default is 32000.
|
|
209
|
+
- num_to_skip (int): Number of initial recordings to skip. Default is 0.
|
|
210
|
+
- do_report (bool): If True, generate a report on associated secondary classes instead of downloading.
|
|
211
|
+
- root_dir (str): Directory that contains the data directory. Default is working directory.
|
|
212
212
|
"""
|
|
213
213
|
|
|
214
214
|
if class_name is None and curated_csv_path is None:
|
britekit/commands/_calibrate.py
CHANGED
|
@@ -34,14 +34,14 @@ def calibrate(
|
|
|
34
34
|
prediction scores to better reflect true probabilities.
|
|
35
35
|
|
|
36
36
|
Args:
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
37
|
+
- cfg_path (str, optional): Path to YAML file defining configuration overrides.
|
|
38
|
+
- annotations_path (str): Path to CSV file containing ground truth annotations.
|
|
39
|
+
- label_dir (str): Directory containing model prediction labels (Audacity format).
|
|
40
|
+
- output_path (str): Directory where calibration reports will be saved.
|
|
41
|
+
- recordings_path (str, optional): Directory containing audio recordings. Defaults to annotations directory.
|
|
42
|
+
- cutoff (float): Ignore predictions below this threshold during calibration. Default is 0.4.
|
|
43
|
+
- coef (float, optional): Use this coefficient for the calibration plot.
|
|
44
|
+
- inter (float, optional): Use this intercept for the calibration plot.
|
|
45
45
|
"""
|
|
46
46
|
from britekit.testing.per_segment_tester import PerSegmentTester
|
|
47
47
|
|
britekit/commands/_ckpt_ops.py
CHANGED
|
@@ -19,9 +19,9 @@ def ckpt_avg(input_path: str="", output_path: Optional[str]=None):
|
|
|
19
19
|
with averaged weights.
|
|
20
20
|
|
|
21
21
|
Args:
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
22
|
+
- input_path (str): Directory containing checkpoint files (*.ckpt) to average.
|
|
23
|
+
- output_path (str, optional): Path for the output averaged checkpoint.
|
|
24
|
+
Defaults to "average.ckpt" in the input directory.
|
|
25
25
|
"""
|
|
26
26
|
import torch
|
|
27
27
|
|
|
@@ -88,7 +88,7 @@ def ckpt_freeze(input_path: str=""):
|
|
|
88
88
|
and inference rather than continued training.
|
|
89
89
|
|
|
90
90
|
Args:
|
|
91
|
-
|
|
91
|
+
- input_path (str): Path to the checkpoint file to freeze.
|
|
92
92
|
"""
|
|
93
93
|
import pytorch_lightning as pl
|
|
94
94
|
from britekit.models.model_loader import load_from_checkpoint
|
|
@@ -136,8 +136,8 @@ def ckpt_onnx(
|
|
|
136
136
|
checkpoint.
|
|
137
137
|
|
|
138
138
|
Args:
|
|
139
|
-
|
|
140
|
-
|
|
139
|
+
- cfg_path (str, optional): Path to YAML file defining configuration overrides.
|
|
140
|
+
- input_path (str): Path to the PyTorch checkpoint file to convert.
|
|
141
141
|
"""
|
|
142
142
|
import torch
|
|
143
143
|
from britekit.models.model_loader import load_from_checkpoint
|
britekit/commands/_db_add.py
CHANGED
|
@@ -18,8 +18,8 @@ def add_cat(db_path: Optional[str]=None, name: str="") -> None:
|
|
|
18
18
|
that contain multiple related species classes.
|
|
19
19
|
|
|
20
20
|
Args:
|
|
21
|
-
|
|
22
|
-
|
|
21
|
+
- db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
|
|
22
|
+
- name (str): Name of the category to add (e.g., "Birds", "Mammals").
|
|
23
23
|
"""
|
|
24
24
|
from britekit.training_db.training_db import TrainingDatabase
|
|
25
25
|
|
|
@@ -58,8 +58,8 @@ def add_stype(db_path: Optional[str]=None, name: str="") -> None:
|
|
|
58
58
|
or sounds produced by the same species.
|
|
59
59
|
|
|
60
60
|
Args:
|
|
61
|
-
|
|
62
|
-
|
|
61
|
+
- db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
|
|
62
|
+
- name (str): Name of the sound type to add (e.g., "Song", "Call", "Alarm").
|
|
63
63
|
"""
|
|
64
64
|
from britekit.training_db.training_db import TrainingDatabase
|
|
65
65
|
|
|
@@ -98,8 +98,8 @@ def add_src(db_path: Optional[str]=None, name: str="") -> None:
|
|
|
98
98
|
maintain provenance and can be useful for data quality analysis.
|
|
99
99
|
|
|
100
100
|
Args:
|
|
101
|
-
|
|
102
|
-
|
|
101
|
+
- db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
|
|
102
|
+
- name (str): Name of the source to add (e.g., "Xeno-Canto", "Macaulay Library").
|
|
103
103
|
"""
|
|
104
104
|
from britekit.training_db.training_db import TrainingDatabase
|
|
105
105
|
|
|
@@ -145,12 +145,12 @@ def add_class(
|
|
|
145
145
|
This is typically used to add new species or sound types to the training database.
|
|
146
146
|
|
|
147
147
|
Args:
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
148
|
+
- db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
|
|
149
|
+
- category (str): Name of the category this class belongs to. Defaults to "default".
|
|
150
|
+
- name (str): Primary name of the class (e.g., "Common Yellowthroat").
|
|
151
|
+
- code (str): Primary code for the class (e.g., "COYE").
|
|
152
|
+
- alt_name (str, optional): Alternate name for the class (e.g., scientific name).
|
|
153
|
+
- alt_code (str, optional): Alternate code for the class (e.g., scientific code).
|
|
154
154
|
"""
|
|
155
155
|
from britekit.training_db.training_db import TrainingDatabase
|
|
156
156
|
|
britekit/commands/_db_delete.py
CHANGED
|
@@ -20,8 +20,8 @@ def del_cat(db_path: Optional[str]=None, name: Optional[str]=None) -> None:
|
|
|
20
20
|
This is a destructive operation that cannot be undone.
|
|
21
21
|
|
|
22
22
|
Args:
|
|
23
|
-
|
|
24
|
-
|
|
23
|
+
- db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
|
|
24
|
+
- name (str): Name of the category to delete (e.g., "Birds", "Mammals").
|
|
25
25
|
"""
|
|
26
26
|
from britekit.training_db.training_db import TrainingDatabase
|
|
27
27
|
|
|
@@ -73,8 +73,8 @@ def del_class(db_path: Optional[str]=None, name: Optional[str]=None) -> None:
|
|
|
73
73
|
be undone and will affect any training data associated with this class.
|
|
74
74
|
|
|
75
75
|
Args:
|
|
76
|
-
|
|
77
|
-
|
|
76
|
+
- db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
|
|
77
|
+
- name (str): Name of the class to delete (e.g., "Common Yellowthroat").
|
|
78
78
|
"""
|
|
79
79
|
from britekit.training_db.training_db import TrainingDatabase
|
|
80
80
|
|
|
@@ -123,8 +123,8 @@ def del_rec(db_path: Optional[str]=None, file_name: Optional[str]=None) -> None:
|
|
|
123
123
|
extracted from it.
|
|
124
124
|
|
|
125
125
|
Args:
|
|
126
|
-
|
|
127
|
-
|
|
126
|
+
- db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
|
|
127
|
+
- file_name (str): Name of the recording file to delete (e.g., "XC123456.mp3").
|
|
128
128
|
"""
|
|
129
129
|
from britekit.training_db.training_db import TrainingDatabase
|
|
130
130
|
|
|
@@ -167,8 +167,8 @@ def del_sgroup(db_path: Optional[str]=None, name: Optional[str]=None) -> None:
|
|
|
167
167
|
This command removes the entire group and all spectrograms within it.
|
|
168
168
|
|
|
169
169
|
Args:
|
|
170
|
-
|
|
171
|
-
|
|
170
|
+
- db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
|
|
171
|
+
- name (str): Name of the spectrogram group to delete (e.g., "default", "augmented").
|
|
172
172
|
"""
|
|
173
173
|
from britekit.training_db.training_db import TrainingDatabase
|
|
174
174
|
|
|
@@ -212,8 +212,8 @@ def del_stype(db_path: Optional[str]=None, name: Optional[str]=None) -> None:
|
|
|
212
212
|
to null, effectively removing the sound type classification while keeping the audio data.
|
|
213
213
|
|
|
214
214
|
Args:
|
|
215
|
-
|
|
216
|
-
|
|
215
|
+
- db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
|
|
216
|
+
- name (str): Name of the sound type to delete (e.g., "Song", "Call", "Alarm").
|
|
217
217
|
"""
|
|
218
218
|
from britekit.training_db.training_db import TrainingDatabase
|
|
219
219
|
|
|
@@ -257,8 +257,8 @@ def del_src(db_path: Optional[str]=None, name: Optional[str]=None) -> None:
|
|
|
257
257
|
removing entire datasets from a specific source (e.g., removing all Xeno-Canto data).
|
|
258
258
|
|
|
259
259
|
Args:
|
|
260
|
-
|
|
261
|
-
|
|
260
|
+
- db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
|
|
261
|
+
- name (str): Name of the source to delete (e.g., "Xeno-Canto", "Macaulay Library").
|
|
262
262
|
"""
|
|
263
263
|
from britekit.training_db.training_db import TrainingDatabase
|
|
264
264
|
|
|
@@ -305,9 +305,9 @@ def del_seg(db_path: Optional[str]=None, class_name: Optional[str]=None, dir_pat
|
|
|
305
305
|
allowing you to remove low-quality or incorrectly labeled segments.
|
|
306
306
|
|
|
307
307
|
Args:
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
308
|
+
- db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
|
|
309
|
+
- class_name (str): Name of the class whose segments should be considered for deletion.
|
|
310
|
+
- dir_path (str): Path to directory containing spectrogram image files.
|
|
311
311
|
"""
|
|
312
312
|
from britekit.training_db.training_db import TrainingDatabase
|
|
313
313
|
|
britekit/commands/_embed.py
CHANGED
|
@@ -23,10 +23,10 @@ def embed(
|
|
|
23
23
|
downstream tasks. The embeddings are compressed and stored in the database.
|
|
24
24
|
|
|
25
25
|
Args:
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
26
|
+
- cfg_path (str, optional): Path to YAML file defining configuration overrides.
|
|
27
|
+
- db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
|
|
28
|
+
- class_name (str, optional): Name of a specific class to process. If omitted, processes all classes.
|
|
29
|
+
- spec_group (str): Spectrogram group name to process. Defaults to 'default'.
|
|
30
30
|
"""
|
|
31
31
|
|
|
32
32
|
def embed_block(
|
britekit/commands/_ensemble.py
CHANGED
|
@@ -65,13 +65,13 @@ def ensemble(
|
|
|
65
65
|
ensembles of the given size and test each one to identify the best ensemble.
|
|
66
66
|
|
|
67
67
|
Args:
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
68
|
+
- cfg_path (str, optional): Path to YAML file defining configuration overrides.
|
|
69
|
+
- ckpt_path (str): Path to directory containing checkpoints.
|
|
70
|
+
- ensemble_size (int): Number of checkpoints in ensemble (default=3).
|
|
71
|
+
- num_tries (int): Maximum number of ensembles to try (default=100).
|
|
72
|
+
- metric (str): Metric to use to compare ensembles (default=micro_roc).
|
|
73
|
+
- annotations_path (str): Path to CSV file containing ground truth annotations.
|
|
74
|
+
- recordings_path (str, optional): Directory containing audio recordings. Defaults to annotations directory.
|
|
75
75
|
"""
|
|
76
76
|
import glob
|
|
77
77
|
import itertools
|
britekit/commands/_extract.py
CHANGED
|
@@ -29,15 +29,15 @@ def extract_all(
|
|
|
29
29
|
it will be automatically created.
|
|
30
30
|
|
|
31
31
|
Args:
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
32
|
+
- cfg_path (str, optional): Path to YAML file defining configuration overrides.
|
|
33
|
+
- db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
|
|
34
|
+
- cat_name (str, optional): Category name for new class creation (e.g., "bird"). Defaults to "default".
|
|
35
|
+
- class_code (str, optional): Class code for new class creation (e.g., "COYE").
|
|
36
|
+
- class_name (str): Name of the class for the recordings (e.g., "Common Yellowthroat").
|
|
37
|
+
- dir_path (str): Path to directory containing audio recordings to process.
|
|
38
|
+
- overlap (float, optional): Spectrogram overlap in seconds. Defaults to config value.
|
|
39
|
+
- src_name (str, optional): Source name for the recordings (e.g., "Xeno-Canto"). Defaults to "default".
|
|
40
|
+
- spec_group (str, optional): Spectrogram group name for organizing extractions. Defaults to "default".
|
|
41
41
|
"""
|
|
42
42
|
from britekit.training_db.extractor import Extractor
|
|
43
43
|
from britekit.training_db.training_db import TrainingDatabase
|
|
@@ -134,6 +134,145 @@ def _extract_all_cmd(
|
|
|
134
134
|
)
|
|
135
135
|
|
|
136
136
|
|
|
137
|
+
def extract_by_csv(
|
|
138
|
+
cfg_path: Optional[str]=None,
|
|
139
|
+
db_path: Optional[str]=None,
|
|
140
|
+
cat_name: Optional[str]=None,
|
|
141
|
+
class_code: Optional[str]=None,
|
|
142
|
+
class_name: str="",
|
|
143
|
+
rec_dir: str="",
|
|
144
|
+
csv_path: str="",
|
|
145
|
+
dest_dir: Optional[str]=None,
|
|
146
|
+
src_name: Optional[str]=None,
|
|
147
|
+
spec_group: Optional[str]=None,
|
|
148
|
+
) -> None:
|
|
149
|
+
"""
|
|
150
|
+
Extract spectrograms that correspond to rows in a CSV file.
|
|
151
|
+
|
|
152
|
+
This command parses a CSV file to identify the corresponding audio
|
|
153
|
+
segments and extracts those spectrograms from the original recordings.
|
|
154
|
+
This is useful when you have pre-selected spectrograms (e.g., from manual review
|
|
155
|
+
or search results) and want to extract only those specific segments. The CSV file
|
|
156
|
+
needs two columns: recording and start_time, where recording is the stem of the
|
|
157
|
+
recording file name (e.g. XC12345) and start_time is the offset in seconds from the
|
|
158
|
+
start of the recording.
|
|
159
|
+
|
|
160
|
+
Args:
|
|
161
|
+
- cfg_path (str, optional): Path to YAML file defining configuration overrides.
|
|
162
|
+
- db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
|
|
163
|
+
- cat_name (str, optional): Category name for new class creation (e.g., "bird"). Defaults to "default".
|
|
164
|
+
- class_code (str, optional): Class code for new class creation (e.g., "COYE").
|
|
165
|
+
- class_name (str): Name of the class for the recordings (e.g., "Common Yellowthroat").
|
|
166
|
+
- rec_dir (str): Path to directory containing the original audio recordings.
|
|
167
|
+
- csv_path (str): Path to CSV file containing two columns (recording and offset) to identify segments to extract.
|
|
168
|
+
- dest_dir (str, optional): If specified, copy used recordings to this directory.
|
|
169
|
+
- src_name (str, optional): Source name for the recordings (e.g., "Xeno-Canto"). Defaults to "default".
|
|
170
|
+
- spec_group (str, optional): Spectrogram group name for organizing extractions. Defaults to "default".
|
|
171
|
+
"""
|
|
172
|
+
from britekit.training_db.extractor import Extractor
|
|
173
|
+
from britekit.training_db.training_db import TrainingDatabase
|
|
174
|
+
|
|
175
|
+
cfg = get_config(cfg_path)
|
|
176
|
+
if db_path is not None:
|
|
177
|
+
cfg.train.train_db = db_path
|
|
178
|
+
|
|
179
|
+
with TrainingDatabase(cfg.train.train_db) as db:
|
|
180
|
+
extractor = Extractor(
|
|
181
|
+
db, class_name, class_code, cat_name, src_name, spec_group=spec_group
|
|
182
|
+
)
|
|
183
|
+
count = extractor.extract_by_csv(rec_dir, csv_path, dest_dir)
|
|
184
|
+
logging.info(f"Inserted {count} spectrograms")
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
@click.command(
|
|
188
|
+
name="extract-by-csv",
|
|
189
|
+
short_help="Insert spectrograms that correspond to rows in a CSV file.",
|
|
190
|
+
help=util.cli_help_from_doc(extract_by_csv.__doc__),
|
|
191
|
+
)
|
|
192
|
+
@click.option(
|
|
193
|
+
"-c",
|
|
194
|
+
"--cfg",
|
|
195
|
+
"cfg_path",
|
|
196
|
+
type=click.Path(exists=True),
|
|
197
|
+
required=False,
|
|
198
|
+
help="Path to YAML file defining config overrides.",
|
|
199
|
+
)
|
|
200
|
+
@click.option(
|
|
201
|
+
"-d", "--db", "db_path", required=False, help="Path to the training database."
|
|
202
|
+
)
|
|
203
|
+
@click.option(
|
|
204
|
+
"--cat",
|
|
205
|
+
"cat_name",
|
|
206
|
+
required=False,
|
|
207
|
+
help="Category name, e.g. 'bird' for when new class is added. Defaults to 'default'.",
|
|
208
|
+
)
|
|
209
|
+
@click.option(
|
|
210
|
+
"--code",
|
|
211
|
+
"class_code",
|
|
212
|
+
required=False,
|
|
213
|
+
help="Class code for when new class is added.",
|
|
214
|
+
)
|
|
215
|
+
@click.option("--name", "class_name", required=True, help="Class name.")
|
|
216
|
+
@click.option(
|
|
217
|
+
"--rec-dir",
|
|
218
|
+
"rec_dir",
|
|
219
|
+
type=click.Path(exists=True, file_okay=False, dir_okay=True),
|
|
220
|
+
required=True,
|
|
221
|
+
help="Path to directory containing recordings.",
|
|
222
|
+
)
|
|
223
|
+
@click.option(
|
|
224
|
+
"--csv-path",
|
|
225
|
+
"csv_path",
|
|
226
|
+
type=click.Path(exists=True, file_okay=True, dir_okay=False),
|
|
227
|
+
required=True,
|
|
228
|
+
help="Path to CSV file containing two columns (recording and offset) to identify segments to extract.",
|
|
229
|
+
)
|
|
230
|
+
@click.option(
|
|
231
|
+
"--dest-dir",
|
|
232
|
+
"dest_dir",
|
|
233
|
+
type=click.Path(exists=True, file_okay=False, dir_okay=True),
|
|
234
|
+
required=False,
|
|
235
|
+
help="Copy used recordings to this directory if specified.",
|
|
236
|
+
)
|
|
237
|
+
@click.option(
|
|
238
|
+
"--src",
|
|
239
|
+
"src_name",
|
|
240
|
+
required=False,
|
|
241
|
+
help="Source name for inserted recordings. Defaults to 'default'.",
|
|
242
|
+
)
|
|
243
|
+
@click.option(
|
|
244
|
+
"--sgroup",
|
|
245
|
+
"spec_group",
|
|
246
|
+
required=False,
|
|
247
|
+
help="Spectrogram group name. Defaults to 'default'.",
|
|
248
|
+
)
|
|
249
|
+
def _extract_by_csv_cmd(
|
|
250
|
+
cfg_path: Optional[str],
|
|
251
|
+
db_path: Optional[str],
|
|
252
|
+
cat_name: Optional[str],
|
|
253
|
+
class_code: Optional[str],
|
|
254
|
+
class_name: str,
|
|
255
|
+
rec_dir: str,
|
|
256
|
+
csv_path: str,
|
|
257
|
+
dest_dir: Optional[str],
|
|
258
|
+
src_name: Optional[str],
|
|
259
|
+
spec_group: Optional[str],
|
|
260
|
+
) -> None:
|
|
261
|
+
util.set_logging()
|
|
262
|
+
extract_by_csv(
|
|
263
|
+
cfg_path,
|
|
264
|
+
db_path,
|
|
265
|
+
cat_name,
|
|
266
|
+
class_code,
|
|
267
|
+
class_name,
|
|
268
|
+
rec_dir,
|
|
269
|
+
csv_path,
|
|
270
|
+
dest_dir,
|
|
271
|
+
src_name,
|
|
272
|
+
spec_group,
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
|
|
137
276
|
def extract_by_image(
|
|
138
277
|
cfg_path: Optional[str]=None,
|
|
139
278
|
db_path: Optional[str]=None,
|
|
@@ -158,16 +297,16 @@ def extract_by_image(
|
|
|
158
297
|
that allows the command to locate and extract the corresponding audio segments.
|
|
159
298
|
|
|
160
299
|
Args:
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
300
|
+
- cfg_path (str, optional): Path to YAML file defining configuration overrides.
|
|
301
|
+
- db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
|
|
302
|
+
- cat_name (str, optional): Category name for new class creation (e.g., "bird"). Defaults to "default".
|
|
303
|
+
- class_code (str, optional): Class code for new class creation (e.g., "COYE").
|
|
304
|
+
- class_name (str): Name of the class for the recordings (e.g., "Common Yellowthroat").
|
|
305
|
+
- rec_dir (str): Path to directory containing the original audio recordings.
|
|
306
|
+
- spec_dir (str): Path to directory containing spectrogram image files.
|
|
307
|
+
- dest_dir (str, optional): If specified, copy used recordings to this directory.
|
|
308
|
+
- src_name (str, optional): Source name for the recordings (e.g., "Xeno-Canto"). Defaults to "default".
|
|
309
|
+
- spec_group (str, optional): Spectrogram group name for organizing extractions. Defaults to "default".
|
|
171
310
|
"""
|
|
172
311
|
from britekit.training_db.extractor import Extractor
|
|
173
312
|
from britekit.training_db.training_db import TrainingDatabase
|
britekit/commands/_find_dup.py
CHANGED
|
@@ -31,11 +31,11 @@ def find_dup(
|
|
|
31
31
|
using cosine distance.
|
|
32
32
|
|
|
33
33
|
Args:
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
34
|
+
- cfg_path (str, optional): Path to YAML file defining configuration overrides.
|
|
35
|
+
- db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
|
|
36
|
+
- class_name (str): Name of the class to scan for duplicates (e.g., "Common Yellowthroat").
|
|
37
|
+
- delete (bool): If True, remove duplicate recordings from the database. If False, only report them.
|
|
38
|
+
- spec_group (str): Spectrogram group name to use for embedding comparison. Defaults to "default".
|
|
39
39
|
"""
|
|
40
40
|
|
|
41
41
|
class Recording:
|
britekit/commands/_inat.py
CHANGED
|
@@ -54,10 +54,10 @@ def inat(
|
|
|
54
54
|
The command respects the maximum download limit and can optionally add filename prefixes.
|
|
55
55
|
|
|
56
56
|
Args:
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
57
|
+
- output_dir (str): Directory where downloaded recordings will be saved.
|
|
58
|
+
- max_downloads (int): Maximum number of recordings to download. Default is 500.
|
|
59
|
+
- name (str): Species name to search for (e.g., "Common Yellowthroat", "Geothlypis trichas").
|
|
60
|
+
- no_prefix (bool): If True, skip adding "N" prefix to filenames. Default adds prefix.
|
|
61
61
|
"""
|
|
62
62
|
import pyinaturalist
|
|
63
63
|
|
britekit/commands/_init.py
CHANGED
|
@@ -32,7 +32,7 @@ def init(dest: Optional[Path]=None) -> None:
|
|
|
32
32
|
a default directory structure.
|
|
33
33
|
|
|
34
34
|
Args:
|
|
35
|
-
|
|
35
|
+
- dest (Path): Directory to copy files into. Subdirectories are created as needed.
|
|
36
36
|
|
|
37
37
|
Examples:
|
|
38
38
|
britekit init --dest .
|
britekit/commands/_pickle.py
CHANGED
|
@@ -27,13 +27,13 @@ def pickle(
|
|
|
27
27
|
or specific classes specified by a CSV file.
|
|
28
28
|
|
|
29
29
|
Args:
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
30
|
+
- cfg_path (str, optional): Path to YAML file defining configuration overrides.
|
|
31
|
+
- classes_path (str, optional): Path to CSV file containing class names to include.
|
|
32
|
+
If omitted, includes all classes in the database.
|
|
33
|
+
- db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
|
|
34
|
+
- output_path (str, optional): Output pickle file path. Defaults to "data/training.pkl".
|
|
35
|
+
- max_per_class (int, optional): Maximum number of spectrograms to include per class.
|
|
36
|
+
- spec_group (str): Spectrogram group name to extract from. Defaults to 'default'.
|
|
37
37
|
"""
|
|
38
38
|
from britekit.core.pickler import Pickler
|
|
39
39
|
|