britekit 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of britekit might be problematic. Click here for more details.

Files changed (42) hide show
  1. britekit/__about__.py +1 -1
  2. britekit/cli.py +6 -1
  3. britekit/commands/__init__.py +2 -1
  4. britekit/commands/_analyze.py +9 -9
  5. britekit/commands/_audioset.py +8 -8
  6. britekit/commands/_calibrate.py +8 -8
  7. britekit/commands/_ckpt_ops.py +6 -6
  8. britekit/commands/_db_add.py +12 -12
  9. britekit/commands/_db_delete.py +15 -15
  10. britekit/commands/_embed.py +4 -4
  11. britekit/commands/_ensemble.py +7 -7
  12. britekit/commands/_extract.py +158 -19
  13. britekit/commands/_find_dup.py +5 -5
  14. britekit/commands/_inat.py +4 -4
  15. britekit/commands/_init.py +1 -1
  16. britekit/commands/_pickle.py +7 -7
  17. britekit/commands/_plot.py +26 -26
  18. britekit/commands/_reextract.py +6 -6
  19. britekit/commands/_reports.py +22 -22
  20. britekit/commands/_search.py +12 -12
  21. britekit/commands/_train.py +6 -6
  22. britekit/commands/_tune.py +12 -12
  23. britekit/commands/_wav2mp3.py +2 -2
  24. britekit/commands/_xeno.py +7 -7
  25. britekit/commands/_youtube.py +3 -3
  26. britekit/core/analyzer.py +8 -8
  27. britekit/core/audio.py +14 -14
  28. britekit/core/data_module.py +2 -2
  29. britekit/core/plot.py +8 -8
  30. britekit/core/predictor.py +21 -21
  31. britekit/core/reextractor.py +6 -6
  32. britekit/core/util.py +8 -8
  33. britekit/occurrence_db/occurrence_data_provider.py +13 -13
  34. britekit/training_db/extractor.py +65 -30
  35. britekit/training_db/training_data_provider.py +1 -1
  36. britekit/training_db/training_db.py +97 -100
  37. britekit-0.1.4.dist-info/METADATA +299 -0
  38. {britekit-0.1.3.dist-info → britekit-0.1.4.dist-info}/RECORD +41 -41
  39. britekit-0.1.3.dist-info/METADATA +0 -290
  40. {britekit-0.1.3.dist-info → britekit-0.1.4.dist-info}/WHEEL +0 -0
  41. {britekit-0.1.3.dist-info → britekit-0.1.4.dist-info}/entry_points.txt +0 -0
  42. {britekit-0.1.3.dist-info → britekit-0.1.4.dist-info}/licenses/LICENSE.txt +0 -0
britekit/__about__.py CHANGED
@@ -1,4 +1,4 @@
1
1
  # SPDX-FileCopyrightText: 2025-present Jan Huus <jhuus1@gmail.com>
2
2
  #
3
3
  # SPDX-License-Identifier: MIT
4
- __version__ = "0.1.3"
4
+ __version__ = "0.1.4"
britekit/cli.py CHANGED
@@ -31,7 +31,11 @@ from .commands._db_delete import (
31
31
  )
32
32
  from .commands._embed import _embed_cmd
33
33
  from .commands._ensemble import _ensemble_cmd
34
- from .commands._extract import _extract_all_cmd, _extract_by_image_cmd
34
+ from .commands._extract import (
35
+ _extract_all_cmd,
36
+ _extract_by_csv_cmd,
37
+ _extract_by_image_cmd,
38
+ )
35
39
  from .commands._find_dup import _find_dup_cmd
36
40
  from .commands._inat import _inat_cmd
37
41
  from .commands._init import _init_cmd
@@ -83,6 +87,7 @@ cli.add_command(_del_stype_cmd)
83
87
  cli.add_command(_embed_cmd)
84
88
  cli.add_command(_ensemble_cmd)
85
89
  cli.add_command(_extract_all_cmd)
90
+ cli.add_command(_extract_by_csv_cmd)
86
91
  cli.add_command(_extract_by_image_cmd)
87
92
 
88
93
  cli.add_command(_find_dup_cmd)
@@ -14,7 +14,7 @@ from ._db_delete import (
14
14
  )
15
15
  from ._embed import embed
16
16
  from ._ensemble import ensemble
17
- from ._extract import extract_all, extract_by_image
17
+ from ._extract import extract_all, extract_by_csv, extract_by_image
18
18
  from ._find_dup import find_dup
19
19
  from ._inat import inat
20
20
  from ._init import init
@@ -57,6 +57,7 @@ __all__ = [
57
57
  "embed",
58
58
  "ensemble",
59
59
  "extract_all",
60
+ "extract_by_csv",
60
61
  "extract_by_image",
61
62
  "find_dup",
62
63
  "find_lr",
@@ -30,15 +30,15 @@ def analyze(
30
30
  CSV files, or both.
31
31
 
32
32
  Args:
33
- cfg_path (str): Path to YAML configuration file defining model and inference settings.
34
- input_path (str): Path to input audio file or directory containing audio files.
35
- output_path (str): Path to output directory where results will be saved.
36
- rtype (str): Output format type. Options are "audacity", "csv", or "both".
37
- min_score (float, optional): Confidence threshold. Predictions below this value are excluded.
38
- num_threads (int, optional): Number of threads to use for processing. Default is 3.
39
- overlap (float, optional): Spectrogram overlap in seconds for sliding window analysis.
40
- segment_len (float, optional): Fixed segment length in seconds. If specified, labels are
41
- fixed-length; otherwise they are variable-length.
33
+ - cfg_path (str): Path to YAML configuration file defining model and inference settings.
34
+ - input_path (str): Path to input audio file or directory containing audio files.
35
+ - output_path (str): Path to output directory where results will be saved.
36
+ - rtype (str): Output format type. Options are "audacity", "csv", or "both".
37
+ - min_score (float, optional): Confidence threshold. Predictions below this value are excluded.
38
+ - num_threads (int, optional): Number of threads to use for processing. Default is 3.
39
+ - overlap (float, optional): Spectrogram overlap in seconds for sliding window analysis.
40
+ - segment_len (float, optional): Fixed segment length in seconds. If specified, labels are
41
+ fixed-length; otherwise they are variable-length.
42
42
  """
43
43
 
44
44
  # defer slow imports to improve --help performance
@@ -201,14 +201,14 @@ def audioset(
201
201
  shows which other classes commonly co-occur with the specified class.
202
202
 
203
203
  Args:
204
- class_name (str): Name of the audio class to download (e.g., "train", "speech", "music").
205
- curated_csv_path (str): Path to CSV file containing a curated list of clips to download.
206
- output_dir (str): Directory where downloaded recordings will be saved.
207
- max_downloads (int): Maximum number of recordings to download. Default is 500.
208
- sampling_rate (float): Output sampling rate in Hz. Default is 32000.
209
- num_to_skip (int): Number of initial recordings to skip. Default is 0.
210
- do_report (bool): If True, generate a report on associated secondary classes instead of downloading.
211
- root_dir (str): Directory that contains the data directory. Default is working directory.
204
+ - class_name (str): Name of the audio class to download (e.g., "train", "speech", "music").
205
+ - curated_csv_path (str): Path to CSV file containing a curated list of clips to download.
206
+ - output_dir (str): Directory where downloaded recordings will be saved.
207
+ - max_downloads (int): Maximum number of recordings to download. Default is 500.
208
+ - sampling_rate (float): Output sampling rate in Hz. Default is 32000.
209
+ - num_to_skip (int): Number of initial recordings to skip. Default is 0.
210
+ - do_report (bool): If True, generate a report on associated secondary classes instead of downloading.
211
+ - root_dir (str): Directory that contains the data directory. Default is working directory.
212
212
  """
213
213
 
214
214
  if class_name is None and curated_csv_path is None:
@@ -34,14 +34,14 @@ def calibrate(
34
34
  prediction scores to better reflect true probabilities.
35
35
 
36
36
  Args:
37
- cfg_path (str, optional): Path to YAML file defining configuration overrides.
38
- annotations_path (str): Path to CSV file containing ground truth annotations.
39
- label_dir (str): Directory containing model prediction labels (Audacity format).
40
- output_path (str): Directory where calibration reports will be saved.
41
- recordings_path (str, optional): Directory containing audio recordings. Defaults to annotations directory.
42
- cutoff (float): Ignore predictions below this threshold during calibration. Default is 0.4.
43
- coef (float, optional): Use this coefficient for the calibration plot.
44
- inter (float, optional): Use this intercept for the calibration plot.
37
+ - cfg_path (str, optional): Path to YAML file defining configuration overrides.
38
+ - annotations_path (str): Path to CSV file containing ground truth annotations.
39
+ - label_dir (str): Directory containing model prediction labels (Audacity format).
40
+ - output_path (str): Directory where calibration reports will be saved.
41
+ - recordings_path (str, optional): Directory containing audio recordings. Defaults to annotations directory.
42
+ - cutoff (float): Ignore predictions below this threshold during calibration. Default is 0.4.
43
+ - coef (float, optional): Use this coefficient for the calibration plot.
44
+ - inter (float, optional): Use this intercept for the calibration plot.
45
45
  """
46
46
  from britekit.testing.per_segment_tester import PerSegmentTester
47
47
 
@@ -19,9 +19,9 @@ def ckpt_avg(input_path: str="", output_path: Optional[str]=None):
19
19
  with averaged weights.
20
20
 
21
21
  Args:
22
- input_path (str): Directory containing checkpoint files (*.ckpt) to average.
23
- output_path (str, optional): Path for the output averaged checkpoint.
24
- Defaults to "average.ckpt" in the input directory.
22
+ - input_path (str): Directory containing checkpoint files (*.ckpt) to average.
23
+ - output_path (str, optional): Path for the output averaged checkpoint.
24
+ Defaults to "average.ckpt" in the input directory.
25
25
  """
26
26
  import torch
27
27
 
@@ -88,7 +88,7 @@ def ckpt_freeze(input_path: str=""):
88
88
  and inference rather than continued training.
89
89
 
90
90
  Args:
91
- input_path (str): Path to the checkpoint file to freeze.
91
+ - input_path (str): Path to the checkpoint file to freeze.
92
92
  """
93
93
  import pytorch_lightning as pl
94
94
  from britekit.models.model_loader import load_from_checkpoint
@@ -136,8 +136,8 @@ def ckpt_onnx(
136
136
  checkpoint.
137
137
 
138
138
  Args:
139
- cfg_path (str, optional): Path to YAML file defining configuration overrides.
140
- input_path (str): Path to the PyTorch checkpoint file to convert.
139
+ - cfg_path (str, optional): Path to YAML file defining configuration overrides.
140
+ - input_path (str): Path to the PyTorch checkpoint file to convert.
141
141
  """
142
142
  import torch
143
143
  from britekit.models.model_loader import load_from_checkpoint
@@ -18,8 +18,8 @@ def add_cat(db_path: Optional[str]=None, name: str="") -> None:
18
18
  that contain multiple related species classes.
19
19
 
20
20
  Args:
21
- db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
22
- name (str): Name of the category to add (e.g., "Birds", "Mammals").
21
+ - db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
22
+ - name (str): Name of the category to add (e.g., "Birds", "Mammals").
23
23
  """
24
24
  from britekit.training_db.training_db import TrainingDatabase
25
25
 
@@ -58,8 +58,8 @@ def add_stype(db_path: Optional[str]=None, name: str="") -> None:
58
58
  or sounds produced by the same species.
59
59
 
60
60
  Args:
61
- db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
62
- name (str): Name of the sound type to add (e.g., "Song", "Call", "Alarm").
61
+ - db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
62
+ - name (str): Name of the sound type to add (e.g., "Song", "Call", "Alarm").
63
63
  """
64
64
  from britekit.training_db.training_db import TrainingDatabase
65
65
 
@@ -98,8 +98,8 @@ def add_src(db_path: Optional[str]=None, name: str="") -> None:
98
98
  maintain provenance and can be useful for data quality analysis.
99
99
 
100
100
  Args:
101
- db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
102
- name (str): Name of the source to add (e.g., "Xeno-Canto", "Macaulay Library").
101
+ - db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
102
+ - name (str): Name of the source to add (e.g., "Xeno-Canto", "Macaulay Library").
103
103
  """
104
104
  from britekit.training_db.training_db import TrainingDatabase
105
105
 
@@ -145,12 +145,12 @@ def add_class(
145
145
  This is typically used to add new species or sound types to the training database.
146
146
 
147
147
  Args:
148
- db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
149
- category (str): Name of the category this class belongs to. Defaults to "default".
150
- name (str): Primary name of the class (e.g., "Common Yellowthroat").
151
- code (str): Primary code for the class (e.g., "COYE").
152
- alt_name (str, optional): Alternate name for the class (e.g., scientific name).
153
- alt_code (str, optional): Alternate code for the class (e.g., scientific code).
148
+ - db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
149
+ - category (str): Name of the category this class belongs to. Defaults to "default".
150
+ - name (str): Primary name of the class (e.g., "Common Yellowthroat").
151
+ - code (str): Primary code for the class (e.g., "COYE").
152
+ - alt_name (str, optional): Alternate name for the class (e.g., scientific name).
153
+ - alt_code (str, optional): Alternate code for the class (e.g., scientific code).
154
154
  """
155
155
  from britekit.training_db.training_db import TrainingDatabase
156
156
 
@@ -20,8 +20,8 @@ def del_cat(db_path: Optional[str]=None, name: Optional[str]=None) -> None:
20
20
  This is a destructive operation that cannot be undone.
21
21
 
22
22
  Args:
23
- db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
24
- name (str): Name of the category to delete (e.g., "Birds", "Mammals").
23
+ - db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
24
+ - name (str): Name of the category to delete (e.g., "Birds", "Mammals").
25
25
  """
26
26
  from britekit.training_db.training_db import TrainingDatabase
27
27
 
@@ -73,8 +73,8 @@ def del_class(db_path: Optional[str]=None, name: Optional[str]=None) -> None:
73
73
  be undone and will affect any training data associated with this class.
74
74
 
75
75
  Args:
76
- db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
77
- name (str): Name of the class to delete (e.g., "Common Yellowthroat").
76
+ - db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
77
+ - name (str): Name of the class to delete (e.g., "Common Yellowthroat").
78
78
  """
79
79
  from britekit.training_db.training_db import TrainingDatabase
80
80
 
@@ -123,8 +123,8 @@ def del_rec(db_path: Optional[str]=None, file_name: Optional[str]=None) -> None:
123
123
  extracted from it.
124
124
 
125
125
  Args:
126
- db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
127
- file_name (str): Name of the recording file to delete (e.g., "XC123456.mp3").
126
+ - db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
127
+ - file_name (str): Name of the recording file to delete (e.g., "XC123456.mp3").
128
128
  """
129
129
  from britekit.training_db.training_db import TrainingDatabase
130
130
 
@@ -167,8 +167,8 @@ def del_sgroup(db_path: Optional[str]=None, name: Optional[str]=None) -> None:
167
167
  This command removes the entire group and all spectrograms within it.
168
168
 
169
169
  Args:
170
- db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
171
- name (str): Name of the spectrogram group to delete (e.g., "default", "augmented").
170
+ - db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
171
+ - name (str): Name of the spectrogram group to delete (e.g., "default", "augmented").
172
172
  """
173
173
  from britekit.training_db.training_db import TrainingDatabase
174
174
 
@@ -212,8 +212,8 @@ def del_stype(db_path: Optional[str]=None, name: Optional[str]=None) -> None:
212
212
  to null, effectively removing the sound type classification while keeping the audio data.
213
213
 
214
214
  Args:
215
- db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
216
- name (str): Name of the sound type to delete (e.g., "Song", "Call", "Alarm").
215
+ - db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
216
+ - name (str): Name of the sound type to delete (e.g., "Song", "Call", "Alarm").
217
217
  """
218
218
  from britekit.training_db.training_db import TrainingDatabase
219
219
 
@@ -257,8 +257,8 @@ def del_src(db_path: Optional[str]=None, name: Optional[str]=None) -> None:
257
257
  removing entire datasets from a specific source (e.g., removing all Xeno-Canto data).
258
258
 
259
259
  Args:
260
- db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
261
- name (str): Name of the source to delete (e.g., "Xeno-Canto", "Macaulay Library").
260
+ - db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
261
+ - name (str): Name of the source to delete (e.g., "Xeno-Canto", "Macaulay Library").
262
262
  """
263
263
  from britekit.training_db.training_db import TrainingDatabase
264
264
 
@@ -305,9 +305,9 @@ def del_seg(db_path: Optional[str]=None, class_name: Optional[str]=None, dir_pat
305
305
  allowing you to remove low-quality or incorrectly labeled segments.
306
306
 
307
307
  Args:
308
- db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
309
- class_name (str): Name of the class whose segments should be considered for deletion.
310
- dir_path (str): Path to directory containing spectrogram image files.
308
+ - db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
309
+ - class_name (str): Name of the class whose segments should be considered for deletion.
310
+ - dir_path (str): Path to directory containing spectrogram image files.
311
311
  """
312
312
  from britekit.training_db.training_db import TrainingDatabase
313
313
 
@@ -23,10 +23,10 @@ def embed(
23
23
  downstream tasks. The embeddings are compressed and stored in the database.
24
24
 
25
25
  Args:
26
- cfg_path (str, optional): Path to YAML file defining configuration overrides.
27
- db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
28
- class_name (str, optional): Name of a specific class to process. If omitted, processes all classes.
29
- spec_group (str): Spectrogram group name to process. Defaults to 'default'.
26
+ - cfg_path (str, optional): Path to YAML file defining configuration overrides.
27
+ - db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
28
+ - class_name (str, optional): Name of a specific class to process. If omitted, processes all classes.
29
+ - spec_group (str): Spectrogram group name to process. Defaults to 'default'.
30
30
  """
31
31
 
32
32
  def embed_block(
@@ -65,13 +65,13 @@ def ensemble(
65
65
  ensembles of the given size and test each one to identify the best ensemble.
66
66
 
67
67
  Args:
68
- cfg_path (str, optional): Path to YAML file defining configuration overrides.
69
- ckpt_path (str): Path to directory containing checkpoints.
70
- ensemble_size (int): Number of checkpoints in ensemble (default=3).
71
- num_tries (int): Maximum number of ensembles to try (default=100).
72
- metric (str): Metric to use to compare ensembles (default=micro_roc).
73
- annotations_path (str): Path to CSV file containing ground truth annotations.
74
- recordings_path (str, optional): Directory containing audio recordings. Defaults to annotations directory.
68
+ - cfg_path (str, optional): Path to YAML file defining configuration overrides.
69
+ - ckpt_path (str): Path to directory containing checkpoints.
70
+ - ensemble_size (int): Number of checkpoints in ensemble (default=3).
71
+ - num_tries (int): Maximum number of ensembles to try (default=100).
72
+ - metric (str): Metric to use to compare ensembles (default=micro_roc).
73
+ - annotations_path (str): Path to CSV file containing ground truth annotations.
74
+ - recordings_path (str, optional): Directory containing audio recordings. Defaults to annotations directory.
75
75
  """
76
76
  import glob
77
77
  import itertools
@@ -29,15 +29,15 @@ def extract_all(
29
29
  it will be automatically created.
30
30
 
31
31
  Args:
32
- cfg_path (str, optional): Path to YAML file defining configuration overrides.
33
- db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
34
- cat_name (str, optional): Category name for new class creation (e.g., "bird"). Defaults to "default".
35
- class_code (str, optional): Class code for new class creation (e.g., "COYE").
36
- class_name (str): Name of the class for the recordings (e.g., "Common Yellowthroat").
37
- dir_path (str): Path to directory containing audio recordings to process.
38
- overlap (float, optional): Spectrogram overlap in seconds. Defaults to config value.
39
- src_name (str, optional): Source name for the recordings (e.g., "Xeno-Canto"). Defaults to "default".
40
- spec_group (str, optional): Spectrogram group name for organizing extractions. Defaults to "default".
32
+ - cfg_path (str, optional): Path to YAML file defining configuration overrides.
33
+ - db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
34
+ - cat_name (str, optional): Category name for new class creation (e.g., "bird"). Defaults to "default".
35
+ - class_code (str, optional): Class code for new class creation (e.g., "COYE").
36
+ - class_name (str): Name of the class for the recordings (e.g., "Common Yellowthroat").
37
+ - dir_path (str): Path to directory containing audio recordings to process.
38
+ - overlap (float, optional): Spectrogram overlap in seconds. Defaults to config value.
39
+ - src_name (str, optional): Source name for the recordings (e.g., "Xeno-Canto"). Defaults to "default".
40
+ - spec_group (str, optional): Spectrogram group name for organizing extractions. Defaults to "default".
41
41
  """
42
42
  from britekit.training_db.extractor import Extractor
43
43
  from britekit.training_db.training_db import TrainingDatabase
@@ -134,6 +134,145 @@ def _extract_all_cmd(
134
134
  )
135
135
 
136
136
 
137
+ def extract_by_csv(
138
+ cfg_path: Optional[str]=None,
139
+ db_path: Optional[str]=None,
140
+ cat_name: Optional[str]=None,
141
+ class_code: Optional[str]=None,
142
+ class_name: str="",
143
+ rec_dir: str="",
144
+ csv_path: str="",
145
+ dest_dir: Optional[str]=None,
146
+ src_name: Optional[str]=None,
147
+ spec_group: Optional[str]=None,
148
+ ) -> None:
149
+ """
150
+ Extract spectrograms that correspond to rows in a CSV file.
151
+
152
+ This command parses a CSV file to identify the corresponding audio
153
+ segments and extracts those spectrograms from the original recordings.
154
+ This is useful when you have pre-selected spectrograms (e.g., from manual review
155
+ or search results) and want to extract only those specific segments. The CSV file
156
+ needs two columns: recording and start_time, where recording is the stem of the
157
+ recording file name (e.g. XC12345) and start_time is the offset in seconds from the
158
+ start of the recording.
159
+
160
+ Args:
161
+ - cfg_path (str, optional): Path to YAML file defining configuration overrides.
162
+ - db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
163
+ - cat_name (str, optional): Category name for new class creation (e.g., "bird"). Defaults to "default".
164
+ - class_code (str, optional): Class code for new class creation (e.g., "COYE").
165
+ - class_name (str): Name of the class for the recordings (e.g., "Common Yellowthroat").
166
+ - rec_dir (str): Path to directory containing the original audio recordings.
167
+ - csv_path (str): Path to CSV file containing two columns (recording and offset) to identify segments to extract.
168
+ - dest_dir (str, optional): If specified, copy used recordings to this directory.
169
+ - src_name (str, optional): Source name for the recordings (e.g., "Xeno-Canto"). Defaults to "default".
170
+ - spec_group (str, optional): Spectrogram group name for organizing extractions. Defaults to "default".
171
+ """
172
+ from britekit.training_db.extractor import Extractor
173
+ from britekit.training_db.training_db import TrainingDatabase
174
+
175
+ cfg = get_config(cfg_path)
176
+ if db_path is not None:
177
+ cfg.train.train_db = db_path
178
+
179
+ with TrainingDatabase(cfg.train.train_db) as db:
180
+ extractor = Extractor(
181
+ db, class_name, class_code, cat_name, src_name, spec_group=spec_group
182
+ )
183
+ count = extractor.extract_by_csv(rec_dir, csv_path, dest_dir)
184
+ logging.info(f"Inserted {count} spectrograms")
185
+
186
+
187
+ @click.command(
188
+ name="extract-by-csv",
189
+ short_help="Insert spectrograms that correspond to rows in a CSV file.",
190
+ help=util.cli_help_from_doc(extract_by_csv.__doc__),
191
+ )
192
+ @click.option(
193
+ "-c",
194
+ "--cfg",
195
+ "cfg_path",
196
+ type=click.Path(exists=True),
197
+ required=False,
198
+ help="Path to YAML file defining config overrides.",
199
+ )
200
+ @click.option(
201
+ "-d", "--db", "db_path", required=False, help="Path to the training database."
202
+ )
203
+ @click.option(
204
+ "--cat",
205
+ "cat_name",
206
+ required=False,
207
+ help="Category name, e.g. 'bird' for when new class is added. Defaults to 'default'.",
208
+ )
209
+ @click.option(
210
+ "--code",
211
+ "class_code",
212
+ required=False,
213
+ help="Class code for when new class is added.",
214
+ )
215
+ @click.option("--name", "class_name", required=True, help="Class name.")
216
+ @click.option(
217
+ "--rec-dir",
218
+ "rec_dir",
219
+ type=click.Path(exists=True, file_okay=False, dir_okay=True),
220
+ required=True,
221
+ help="Path to directory containing recordings.",
222
+ )
223
+ @click.option(
224
+ "--csv-path",
225
+ "csv_path",
226
+ type=click.Path(exists=True, file_okay=True, dir_okay=False),
227
+ required=True,
228
+ help="Path to CSV file containing two columns (recording and offset) to identify segments to extract.",
229
+ )
230
+ @click.option(
231
+ "--dest-dir",
232
+ "dest_dir",
233
+ type=click.Path(exists=True, file_okay=False, dir_okay=True),
234
+ required=False,
235
+ help="Copy used recordings to this directory if specified.",
236
+ )
237
+ @click.option(
238
+ "--src",
239
+ "src_name",
240
+ required=False,
241
+ help="Source name for inserted recordings. Defaults to 'default'.",
242
+ )
243
+ @click.option(
244
+ "--sgroup",
245
+ "spec_group",
246
+ required=False,
247
+ help="Spectrogram group name. Defaults to 'default'.",
248
+ )
249
+ def _extract_by_csv_cmd(
250
+ cfg_path: Optional[str],
251
+ db_path: Optional[str],
252
+ cat_name: Optional[str],
253
+ class_code: Optional[str],
254
+ class_name: str,
255
+ rec_dir: str,
256
+ csv_path: str,
257
+ dest_dir: Optional[str],
258
+ src_name: Optional[str],
259
+ spec_group: Optional[str],
260
+ ) -> None:
261
+ util.set_logging()
262
+ extract_by_csv(
263
+ cfg_path,
264
+ db_path,
265
+ cat_name,
266
+ class_code,
267
+ class_name,
268
+ rec_dir,
269
+ csv_path,
270
+ dest_dir,
271
+ src_name,
272
+ spec_group,
273
+ )
274
+
275
+
137
276
  def extract_by_image(
138
277
  cfg_path: Optional[str]=None,
139
278
  db_path: Optional[str]=None,
@@ -158,16 +297,16 @@ def extract_by_image(
158
297
  that allows the command to locate and extract the corresponding audio segments.
159
298
 
160
299
  Args:
161
- cfg_path (str, optional): Path to YAML file defining configuration overrides.
162
- db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
163
- cat_name (str, optional): Category name for new class creation (e.g., "bird"). Defaults to "default".
164
- class_code (str, optional): Class code for new class creation (e.g., "COYE").
165
- class_name (str): Name of the class for the recordings (e.g., "Common Yellowthroat").
166
- rec_dir (str): Path to directory containing the original audio recordings.
167
- spec_dir (str): Path to directory containing spectrogram image files.
168
- dest_dir (str, optional): If specified, copy used recordings to this directory.
169
- src_name (str, optional): Source name for the recordings (e.g., "Xeno-Canto"). Defaults to "default".
170
- spec_group (str, optional): Spectrogram group name for organizing extractions. Defaults to "default".
300
+ - cfg_path (str, optional): Path to YAML file defining configuration overrides.
301
+ - db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
302
+ - cat_name (str, optional): Category name for new class creation (e.g., "bird"). Defaults to "default".
303
+ - class_code (str, optional): Class code for new class creation (e.g., "COYE").
304
+ - class_name (str): Name of the class for the recordings (e.g., "Common Yellowthroat").
305
+ - rec_dir (str): Path to directory containing the original audio recordings.
306
+ - spec_dir (str): Path to directory containing spectrogram image files.
307
+ - dest_dir (str, optional): If specified, copy used recordings to this directory.
308
+ - src_name (str, optional): Source name for the recordings (e.g., "Xeno-Canto"). Defaults to "default".
309
+ - spec_group (str, optional): Spectrogram group name for organizing extractions. Defaults to "default".
171
310
  """
172
311
  from britekit.training_db.extractor import Extractor
173
312
  from britekit.training_db.training_db import TrainingDatabase
@@ -31,11 +31,11 @@ def find_dup(
31
31
  using cosine distance.
32
32
 
33
33
  Args:
34
- cfg_path (str, optional): Path to YAML file defining configuration overrides.
35
- db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
36
- class_name (str): Name of the class to scan for duplicates (e.g., "Common Yellowthroat").
37
- delete (bool): If True, remove duplicate recordings from the database. If False, only report them.
38
- spec_group (str): Spectrogram group name to use for embedding comparison. Defaults to "default".
34
+ - cfg_path (str, optional): Path to YAML file defining configuration overrides.
35
+ - db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
36
+ - class_name (str): Name of the class to scan for duplicates (e.g., "Common Yellowthroat").
37
+ - delete (bool): If True, remove duplicate recordings from the database. If False, only report them.
38
+ - spec_group (str): Spectrogram group name to use for embedding comparison. Defaults to "default".
39
39
  """
40
40
 
41
41
  class Recording:
@@ -54,10 +54,10 @@ def inat(
54
54
  The command respects the maximum download limit and can optionally add filename prefixes.
55
55
 
56
56
  Args:
57
- output_dir (str): Directory where downloaded recordings will be saved.
58
- max_downloads (int): Maximum number of recordings to download. Default is 500.
59
- name (str): Species name to search for (e.g., "Common Yellowthroat", "Geothlypis trichas").
60
- no_prefix (bool): If True, skip adding "N" prefix to filenames. Default adds prefix.
57
+ - output_dir (str): Directory where downloaded recordings will be saved.
58
+ - max_downloads (int): Maximum number of recordings to download. Default is 500.
59
+ - name (str): Species name to search for (e.g., "Common Yellowthroat", "Geothlypis trichas").
60
+ - no_prefix (bool): If True, skip adding "N" prefix to filenames. Default adds prefix.
61
61
  """
62
62
  import pyinaturalist
63
63
 
@@ -32,7 +32,7 @@ def init(dest: Optional[Path]=None) -> None:
32
32
  a default directory structure.
33
33
 
34
34
  Args:
35
- dest (Path): Directory to copy files into. Subdirectories are created as needed.
35
+ - dest (Path): Directory to copy files into. Subdirectories are created as needed.
36
36
 
37
37
  Examples:
38
38
  britekit init --dest .
@@ -27,13 +27,13 @@ def pickle(
27
27
  or specific classes specified by a CSV file.
28
28
 
29
29
  Args:
30
- cfg_path (str, optional): Path to YAML file defining configuration overrides.
31
- classes_path (str, optional): Path to CSV file containing class names to include.
32
- If omitted, includes all classes in the database.
33
- db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
34
- output_path (str, optional): Output pickle file path. Defaults to "data/training.pkl".
35
- max_per_class (int, optional): Maximum number of spectrograms to include per class.
36
- spec_group (str): Spectrogram group name to extract from. Defaults to 'default'.
30
+ - cfg_path (str, optional): Path to YAML file defining configuration overrides.
31
+ - classes_path (str, optional): Path to CSV file containing class names to include.
32
+ If omitted, includes all classes in the database.
33
+ - db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
34
+ - output_path (str, optional): Output pickle file path. Defaults to "data/training.pkl".
35
+ - max_per_class (int, optional): Maximum number of spectrograms to include per class.
36
+ - spec_group (str): Spectrogram group name to extract from. Defaults to 'default'.
37
37
  """
38
38
  from britekit.core.pickler import Pickler
39
39