britekit 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of britekit might be problematic. Click here for more details.

Files changed (42) hide show
  1. britekit/__about__.py +1 -1
  2. britekit/cli.py +6 -1
  3. britekit/commands/__init__.py +2 -1
  4. britekit/commands/_analyze.py +9 -9
  5. britekit/commands/_audioset.py +8 -8
  6. britekit/commands/_calibrate.py +8 -8
  7. britekit/commands/_ckpt_ops.py +6 -6
  8. britekit/commands/_db_add.py +12 -12
  9. britekit/commands/_db_delete.py +15 -15
  10. britekit/commands/_embed.py +4 -4
  11. britekit/commands/_ensemble.py +7 -7
  12. britekit/commands/_extract.py +158 -19
  13. britekit/commands/_find_dup.py +5 -5
  14. britekit/commands/_inat.py +4 -4
  15. britekit/commands/_init.py +1 -1
  16. britekit/commands/_pickle.py +7 -7
  17. britekit/commands/_plot.py +26 -26
  18. britekit/commands/_reextract.py +6 -6
  19. britekit/commands/_reports.py +22 -22
  20. britekit/commands/_search.py +12 -12
  21. britekit/commands/_train.py +6 -6
  22. britekit/commands/_tune.py +12 -12
  23. britekit/commands/_wav2mp3.py +2 -2
  24. britekit/commands/_xeno.py +7 -7
  25. britekit/commands/_youtube.py +3 -3
  26. britekit/core/analyzer.py +8 -8
  27. britekit/core/audio.py +14 -14
  28. britekit/core/data_module.py +2 -2
  29. britekit/core/plot.py +8 -8
  30. britekit/core/predictor.py +21 -21
  31. britekit/core/reextractor.py +6 -6
  32. britekit/core/util.py +8 -8
  33. britekit/occurrence_db/occurrence_data_provider.py +13 -13
  34. britekit/training_db/extractor.py +65 -30
  35. britekit/training_db/training_data_provider.py +1 -1
  36. britekit/training_db/training_db.py +97 -100
  37. britekit-0.1.4.dist-info/METADATA +299 -0
  38. {britekit-0.1.3.dist-info → britekit-0.1.4.dist-info}/RECORD +41 -41
  39. britekit-0.1.3.dist-info/METADATA +0 -290
  40. {britekit-0.1.3.dist-info → britekit-0.1.4.dist-info}/WHEEL +0 -0
  41. {britekit-0.1.3.dist-info → britekit-0.1.4.dist-info}/entry_points.txt +0 -0
  42. {britekit-0.1.3.dist-info → britekit-0.1.4.dist-info}/licenses/LICENSE.txt +0 -0
@@ -76,15 +76,15 @@ def plot_db(
76
76
  number of spectrograms plotted.
77
77
 
78
78
  Args:
79
- cfg_path (str, optional): Path to YAML file defining configuration overrides.
80
- class_name (str): Name of the class to plot spectrograms for (e.g., "Common Yellowthroat").
81
- db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
82
- ndims (bool): If True, do not show time and frequency dimensions on the spectrogram plots.
83
- max_count (int, optional): Maximum number of spectrograms to plot. If omitted, plots all available.
84
- output_path (str): Directory where spectrogram images will be saved.
85
- prefix (str, optional): Only include recordings that start with this filename prefix.
86
- power (float, optional): Raise spectrograms to this power for visualization. Lower values show more detail.
87
- spec_group (str, optional): Spectrogram group name to plot from. Defaults to "default".
79
+ - cfg_path (str, optional): Path to YAML file defining configuration overrides.
80
+ - class_name (str): Name of the class to plot spectrograms for (e.g., "Common Yellowthroat").
81
+ - db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
82
+ - ndims (bool): If True, do not show time and frequency dimensions on the spectrogram plots.
83
+ - max_count (int, optional): Maximum number of spectrograms to plot. If omitted, plots all available.
84
+ - output_path (str): Directory where spectrogram images will be saved.
85
+ - prefix (str, optional): Only include recordings that start with this filename prefix.
86
+ - power (float, optional): Raise spectrograms to this power for visualization. Lower values show more detail.
87
+ - spec_group (str, optional): Spectrogram group name to plot from. Defaults to "default".
88
88
  """
89
89
  from britekit.core.plot import plot_spec
90
90
  from britekit.training_db.training_db import TrainingDatabase
@@ -158,7 +158,7 @@ def plot_db(
158
158
  "--ndims",
159
159
  "ndims",
160
160
  is_flag=True,
161
- help="If specified, do not show time and frequency dimensions on the spectrogram plots.",
161
+ help="If specified, do not show seconds on x-axis and frequencies on y-axis.",
162
162
  )
163
163
  @click.option(
164
164
  "--max",
@@ -237,13 +237,13 @@ def plot_dir(
237
237
  overlapping segments.
238
238
 
239
239
  Args:
240
- cfg_path (str, optional): Path to YAML file defining configuration overrides.
241
- ndims (bool): If True, do not show time and frequency dimensions on the spectrogram plots.
242
- input_path (str): Directory containing audio recordings to process.
243
- output_path (str): Directory where spectrogram images will be saved.
244
- all (bool): If True, plot each recording as one spectrogram. If False, break into segments.
245
- overlap (float): Spectrogram overlap in seconds when breaking recordings into segments. Default is 0.
246
- power (float): Raise spectrograms to this power for visualization. Lower values show more detail. Default is 1.0.
240
+ - cfg_path (str, optional): Path to YAML file defining configuration overrides.
241
+ - ndims (bool): If True, do not show time and frequency dimensions on the spectrogram plots.
242
+ - input_path (str): Directory containing audio recordings to process.
243
+ - output_path (str): Directory where spectrogram images will be saved.
244
+ - all (bool): If True, plot each recording as one spectrogram. If False, break into segments.
245
+ - overlap (float): Spectrogram overlap in seconds when breaking recordings into segments. Default is 0.
246
+ - power (float): Raise spectrograms to this power for visualization. Lower values show more detail. Default is 1.0.
247
247
  """
248
248
  from britekit.core.audio import Audio
249
249
 
@@ -284,7 +284,7 @@ def plot_dir(
284
284
  "--ndims",
285
285
  "ndims",
286
286
  is_flag=True,
287
- help="If specified, show seconds on x-axis and frequencies on y-axis.",
287
+ help="If specified, do not show seconds on x-axis and frequencies on y-axis.",
288
288
  )
289
289
  @click.option(
290
290
  "-i",
@@ -353,13 +353,13 @@ def plot_rec(
353
353
  overlapping segments.
354
354
 
355
355
  Args:
356
- cfg_path (str, optional): Path to YAML file defining configuration overrides.
357
- ndims (bool): If True, do not show time and frequency dimensions on the spectrogram plots.
358
- input_path (str): Path to the audio recording file to process.
359
- output_path (str): Directory where spectrogram images will be saved.
360
- all (bool): If True, plot the entire recording as one spectrogram. If False, break into segments.
361
- overlap (float): Spectrogram overlap in seconds when breaking the recording into segments. Default is 0.
362
- power (float): Raise spectrograms to this power for visualization. Lower values show more detail. Default is 1.0.
356
+ - cfg_path (str, optional): Path to YAML file defining configuration overrides.
357
+ - ndims (bool): If True, do not show time and frequency dimensions on the spectrogram plots.
358
+ - input_path (str): Path to the audio recording file to process.
359
+ - output_path (str): Directory where spectrogram images will be saved.
360
+ - all (bool): If True, plot the entire recording as one spectrogram. If False, break into segments.
361
+ - overlap (float): Spectrogram overlap in seconds when breaking the recording into segments. Default is 0.
362
+ - power (float): Raise spectrograms to this power for visualization. Lower values show more detail. Default is 1.0.
363
363
  """
364
364
  from britekit.core.audio import Audio
365
365
 
@@ -394,7 +394,7 @@ def plot_rec(
394
394
  "--ndims",
395
395
  "ndims",
396
396
  is_flag=True,
397
- help="If specified, show seconds on x-axis and frequencies on y-axis.",
397
+ help="If specified, do not show seconds on x-axis and frequencies on y-axis.",
398
398
  )
399
399
  @click.option(
400
400
  "-i",
@@ -30,12 +30,12 @@ def reextract(
30
30
  updating the database.
31
31
 
32
32
  Args:
33
- cfg_path (str, optional): Path to YAML file defining configuration overrides.
34
- db_path (str, optional): Path to the training database. Defaults to cfg.train.training_db.
35
- class_name (str, optional): Name of a specific class to reextract. If omitted, processes all classes.
36
- classes_path (str, optional): Path to CSV file listing classes to reextract. Alternative to class_name.
37
- check (bool): If True, only check that all recording paths are accessible without updating database.
38
- spec_group (str): Spectrogram group name for storing the extracted spectrograms. Defaults to 'default'.
33
+ - cfg_path (str, optional): Path to YAML file defining configuration overrides.
34
+ - db_path (str, optional): Path to the training database. Defaults to cfg.train.training_db.
35
+ - class_name (str, optional): Name of a specific class to reextract. If omitted, processes all classes.
36
+ - classes_path (str, optional): Path to CSV file listing classes to reextract. Alternative to class_name.
37
+ - check (bool): If True, only check that all recording paths are accessible without updating database.
38
+ - spec_group (str): Spectrogram group name for storing the extracted spectrograms. Defaults to 'default'.
39
39
  """
40
40
  from britekit.core.reextractor import Reextractor
41
41
  cfg = get_config(cfg_path)
@@ -27,8 +27,8 @@ def rpt_ann(
27
27
  breakdowns.
28
28
 
29
29
  Args:
30
- annotations_path (str): Path to CSV file containing per-segment annotations.
31
- output_path (str): Directory where summary reports will be saved.
30
+ - annotations_path (str): Path to CSV file containing per-segment annotations.
31
+ - output_path (str): Directory where summary reports will be saved.
32
32
  """
33
33
  import pandas as pd
34
34
 
@@ -136,9 +136,9 @@ def rpt_db(cfg_path: Optional[str] = None,
136
136
  and can be used for data management and quality control.
137
137
 
138
138
  Args:
139
- cfg_path (str, optional): Path to YAML file defining configuration overrides.
140
- db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
141
- output_path (str): Directory where database reports will be saved.
139
+ - cfg_path (str, optional): Path to YAML file defining configuration overrides.
140
+ - db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
141
+ - output_path (str): Directory where database reports will be saved.
142
142
  """
143
143
  from britekit.training_db.training_db import TrainingDatabase
144
144
  from britekit.training_db.training_data_provider import TrainingDataProvider
@@ -202,10 +202,10 @@ def rpt_epochs(
202
202
  This is useful to determine the number of training epochs needed.
203
203
 
204
204
  Args:
205
- cfg_path (str, optional): Path to YAML file defining configuration overrides.
206
- input_path (str): Checkpoint directory generated by training.
207
- annotations_path (str): Path to CSV file containing ground truth annotations.
208
- output_path (str): Directory where the graph image will be saved.
205
+ - cfg_path (str, optional): Path to YAML file defining configuration overrides.
206
+ - input_path (str): Checkpoint directory generated by training.
207
+ - annotations_path (str): Path to CSV file containing ground truth annotations.
208
+ - output_path (str): Directory where the graph image will be saved.
209
209
  """
210
210
  import matplotlib.pyplot as plt
211
211
  from matplotlib.ticker import MaxNLocator
@@ -397,9 +397,9 @@ def rpt_labels(
397
397
  across different recordings and classes.
398
398
 
399
399
  Args:
400
- label_dir (str): Directory containing inference output (CSV or Audacity labels).
401
- output_path (str): Directory where summary reports will be saved.
402
- min_score (float, optional): Ignore detections below this confidence threshold.
400
+ - label_dir (str): Directory containing inference output (CSV or Audacity labels).
401
+ - output_path (str): Directory where summary reports will be saved.
402
+ - min_score (float, optional): Ignore detections below this confidence threshold.
403
403
  """
404
404
  import pandas as pd
405
405
 
@@ -544,21 +544,21 @@ def rpt_test(
544
544
  F1 scores, and various visualization plots to help understand model behavior.
545
545
 
546
546
  Args:
547
- cfg_path (str, optional): Path to YAML file defining configuration overrides.
548
- granularity (str): Evaluation granularity ("recording", "block", or "segment"). Default is "segment".
549
- annotations_path (str): Path to CSV file containing ground truth annotations.
550
- label_dir (str): Directory containing model prediction labels (Audacity format).
551
- output_path (str): Directory where test reports will be saved.
552
- recordings_path (str, optional): Directory containing audio recordings. Defaults to annotations directory.
553
- min_score (float, optional): Provide detailed reports for this confidence threshold.
554
- block_size (int, optional): block_size in seconds (default=60).
555
- precision (float): For recording granularity, report true positive seconds at this precision. Default is 0.95.
547
+ - cfg_path (str, optional): Path to YAML file defining configuration overrides.
548
+ - granularity (str): Evaluation granularity ("recording", "block", or "segment"). Default is "segment".
549
+ - annotations_path (str): Path to CSV file containing ground truth annotations.
550
+ - label_dir (str): Directory containing model prediction labels (Audacity format).
551
+ - output_path (str): Directory where test reports will be saved.
552
+ - recordings_path (str, optional): Directory containing audio recordings. Defaults to annotations directory.
553
+ - min_score (float, optional): Provide detailed reports for this confidence threshold.
554
+ - block_size (int, optional): block_size in seconds (default=60).
555
+ - precision (float): For recording granularity, report true positive seconds at this precision. Default is 0.95.
556
556
  """
557
557
  from britekit.testing.per_block_tester import PerBlockTester
558
558
  from britekit.testing.per_recording_tester import PerRecordingTester
559
559
  from britekit.testing.per_segment_tester import PerSegmentTester
560
560
 
561
- cfg = get_config()
561
+ cfg = get_config(cfg_path)
562
562
  try:
563
563
  if not recordings_path:
564
564
  recordings_path = str(Path(annotations_path).parent)
@@ -33,18 +33,18 @@ def search(
33
33
  based on embedding similarity. Results are plotted and saved to the output directory.
34
34
 
35
35
  Args:
36
- cfg_path (str): Path to YAML configuration file defining model settings.
37
- db_path (str): Path to the training database containing spectrograms to search.
38
- class_name (str): Name of the class/species to search within the database.
39
- max_dist (float): Maximum distance threshold. Results with distance greater than this are excluded.
40
- exp (float): Exponent to raise spectrograms to for visualization (shows background sounds).
41
- num_to_plot (int): Maximum number of similar spectrograms to plot and save.
42
- output_path (str): Directory where search results and plots will be saved.
43
- input_path (str): Path to the audio file containing the target spectrogram.
44
- offset (float): Time offset in seconds where the target spectrogram is extracted.
45
- exclude_db (str, optional): Path to an exclusion database. Spectrograms in this database are excluded from results.
46
- class_name2 (str, optional): Class name in the exclusion database. Defaults to the search class name.
47
- spec_group (str): Spectrogram group name in the database. Defaults to 'default'.
36
+ - cfg_path (str): Path to YAML configuration file defining model settings.
37
+ - db_path (str): Path to the training database containing spectrograms to search.
38
+ - class_name (str): Name of the class/species to search within the database.
39
+ - max_dist (float): Maximum distance threshold. Results with distance greater than this are excluded.
40
+ - exp (float): Exponent to raise spectrograms to for visualization (shows background sounds).
41
+ - num_to_plot (int): Maximum number of similar spectrograms to plot and save.
42
+ - output_path (str): Directory where search results and plots will be saved.
43
+ - input_path (str): Path to the audio file containing the target spectrogram.
44
+ - offset (float): Time offset in seconds where the target spectrogram is extracted.
45
+ - exclude_db (str, optional): Path to an exclusion database. Spectrograms in this database are excluded from results.
46
+ - class_name2 (str, optional): Class name in the exclusion database. Defaults to the search class name.
47
+ - spec_group (str): Spectrogram group name in the database. Defaults to 'default'.
48
48
  """
49
49
 
50
50
  class SpecInfo:
@@ -26,8 +26,8 @@ def train(
26
26
  automatically. The final trained model can be used for inference and evaluation.
27
27
 
28
28
  Args:
29
- cfg_path (str, optional): Path to YAML file defining configuration overrides.
30
- If not specified, uses default configuration.
29
+ - cfg_path (str, optional): Path to YAML file defining configuration overrides.
30
+ If not specified, uses default configuration.
31
31
  """
32
32
  from britekit.core.trainer import Trainer
33
33
 
@@ -82,10 +82,10 @@ def find_lr(cfg_path: str, num_batches: int):
82
82
  avoiding rates that are too high (causing instability) or too low (slow convergence).
83
83
 
84
84
  Args:
85
- cfg_path (str, optional): Path to YAML file defining configuration overrides.
86
- If not specified, uses default configuration.
87
- num_batches (int): Number of training batches to analyze for learning rate finding.
88
- Default is 100. Higher values provide more accurate results but take longer.
85
+ - cfg_path (str, optional): Path to YAML file defining configuration overrides.
86
+ If not specified, uses default configuration.
87
+ - num_batches (int): Number of training batches to analyze for learning rate finding.
88
+ Default is 100. Higher values provide more accurate results but take longer.
89
89
  """
90
90
  from britekit.core.trainer import Trainer
91
91
 
@@ -41,18 +41,18 @@ def tune(
41
41
  The param_path specifies a YAML file that defines the parameters to be tuned, as described in the README.
42
42
 
43
43
  Args:
44
- cfg_path (str, optional): Path to YAML file defining configuration overrides.
45
- param_path (str, optional): Path to YAML file defining hyperparameters to tune and their search space.
46
- output_path (str): Directory where reports will be saved.
47
- annotations_path (str): Path to CSV file containing ground truth annotations.
48
- metric (str): Metric used to compare runs. Options include various MAP and ROC metrics.
49
- recordings_path (str, optional): Directory containing audio recordings. Defaults to annotations directory.
50
- train_log_path (str, optional): Training log directory. Defaults to "logs/fold-0".
51
- num_trials (int): Number of random trials to run. If 0, performs exhaustive search.
52
- num_runs (int): Number of runs to average for each parameter combination. Default is 1.
53
- extract (bool): Extract new spectrograms before training, to tune spectrogram parameters.
54
- skip_training (bool): Iterate on inference only, using checkpoints from the last training run.
55
- classes_path (str, optional): Path to CSV containing class names for extract option. Default is all classes.
44
+ - cfg_path (str, optional): Path to YAML file defining configuration overrides.
45
+ - param_path (str, optional): Path to YAML file defining hyperparameters to tune and their search space.
46
+ - output_path (str): Directory where reports will be saved.
47
+ - annotations_path (str): Path to CSV file containing ground truth annotations.
48
+ - metric (str): Metric used to compare runs. Options include various MAP and ROC metrics.
49
+ - recordings_path (str, optional): Directory containing audio recordings. Defaults to annotations directory.
50
+ - train_log_path (str, optional): Training log directory. Defaults to "logs/fold-0".
51
+ - num_trials (int): Number of random trials to run. If 0, performs exhaustive search.
52
+ - num_runs (int): Number of runs to average for each parameter combination. Default is 1.
53
+ - extract (bool): Extract new spectrograms before training, to tune spectrogram parameters.
54
+ - skip_training (bool): Iterate on inference only, using checkpoints from the last training run.
55
+ - classes_path (str, optional): Path to CSV containing class names for extract option. Default is all classes.
56
56
  """
57
57
  import yaml
58
58
  from britekit.core.tuner import Tuner
@@ -24,8 +24,8 @@ def wav2mp3(
24
24
  requirements for large audio datasets.
25
25
 
26
26
  Args:
27
- dir (str): Path to directory containing audio files to convert.
28
- sampling_rate (int): Output sampling rate in Hz. Default is 32000 Hz.
27
+ - dir (str): Path to directory containing audio files to convert.
28
+ - sampling_rate (int): Output sampling rate in Hz. Default is 32000 Hz.
29
29
  """
30
30
  CONVERT_TYPES = {
31
31
  ".flac",
@@ -75,13 +75,13 @@ def xeno(
75
75
  Then specify the key in the --key argument, or set the environment variable XCKEY=<key>.
76
76
 
77
77
  Args:
78
- key (str): Xeno-Canto API key for authentication. Can also be set via XCKEY environment variable.
79
- output_dir (str): Directory where downloaded recordings will be saved.
80
- max_downloads (int): Maximum number of recordings to download. Default is 500.
81
- name (str): Species name to search for (common name or scientific name).
82
- ignore_licence (bool): If True, ignore license restrictions. By default, excludes BY-NC-ND licensed recordings.
83
- scientific_name (bool): If True, treat the name as a scientific name rather than common name.
84
- seen_only (bool): If True, only download recordings where the animal was seen (animal-seen=yes).
78
+ - key (str): Xeno-Canto API key for authentication. Can also be set via XCKEY environment variable.
79
+ - output_dir (str): Directory where downloaded recordings will be saved.
80
+ - max_downloads (int): Maximum number of recordings to download. Default is 500.
81
+ - name (str): Species name to search for (common name or scientific name).
82
+ - ignore_licence (bool): If True, ignore license restrictions. By default, excludes BY-NC-ND licensed recordings.
83
+ - scientific_name (bool): If True, treat the name as a scientific name rather than common name.
84
+ - seen_only (bool): If True, only download recordings where the animal was seen (animal-seen=yes).
85
85
  """
86
86
  import requests
87
87
 
@@ -17,9 +17,9 @@ def youtube(
17
17
  Download an audio recording from Youtube, given a Youtube ID.
18
18
 
19
19
  Args:
20
- id (str): ID of the clip to download.
21
- output_dir (str): Directory where downloaded recordings will be saved.
22
- sampling_rate (float): Output sampling rate in Hz. Default is 32000.
20
+ - id (str): ID of the clip to download.
21
+ - output_dir (str): Directory where downloaded recordings will be saved.
22
+ - sampling_rate (float): Output sampling rate in Hz. Default is 32000.
23
23
  """
24
24
  import librosa
25
25
  import numpy as np
britekit/core/analyzer.py CHANGED
@@ -61,9 +61,9 @@ class Analyzer:
61
61
  This runs on its own thread and processes all recordings in the given list.
62
62
 
63
63
  Args:
64
- recording_paths (list): Individual recording paths.
65
- output_path (str): Where to write the output.
66
- rtype (str): Output format: "audacity", "csv" or "both".
64
+ - recording_paths (list): Individual recording paths.
65
+ - output_path (str): Where to write the output.
66
+ - rtype (str): Output format: "audacity", "csv" or "both".
67
67
  """
68
68
  from britekit.core.predictor import Predictor
69
69
 
@@ -91,8 +91,8 @@ class Analyzer:
91
91
  Split the input list into `n` lists based on index modulo `n`.
92
92
 
93
93
  Args:
94
- input_list (list): The input list to split.
95
- n (int): Number of resulting groups.
94
+ - input_list (list): The input list to split.
95
+ - n (int): Number of resulting groups.
96
96
 
97
97
  Returns:
98
98
  List[List]: A list of `n` lists, where each sublist contains elements
@@ -108,9 +108,9 @@ class Analyzer:
108
108
  Run inference.
109
109
 
110
110
  Args:
111
- input_path (str): Recording or directory containing recordings.
112
- output_path (str): Output directory.
113
- rtype (str): Output format: "audacity", "csv" or "both".
111
+ - input_path (str): Recording or directory containing recordings.
112
+ - output_path (str): Output directory.
113
+ - rtype (str): Output format: "audacity", "csv" or "both".
114
114
  """
115
115
  import pandas as pd
116
116
 
britekit/core/audio.py CHANGED
@@ -46,7 +46,7 @@ class Audio:
46
46
  so we downsample rather than upsampling.
47
47
 
48
48
  Args:
49
- cfg (Optional[BaseConfig]): Configuration object. If None, uses default config.
49
+ - cfg (Optional[BaseConfig]): Configuration object. If None, uses default config.
50
50
  """
51
51
  import torchaudio as ta
52
52
 
@@ -113,7 +113,7 @@ class Audio:
113
113
  if choose_channel is enabled in the configuration.
114
114
 
115
115
  Args:
116
- path (str): Path to the audio recording file.
116
+ - path (str): Path to the audio recording file.
117
117
 
118
118
  Returns:
119
119
  tuple: (signal, sampling_rate) where:
@@ -168,18 +168,18 @@ class Audio:
168
168
  Returns both normalized (0-1 range) and unnormalized versions of the spectrograms.
169
169
 
170
170
  Args:
171
- start_times (list[float]): List of start times in seconds from the beginning
172
- of the recording for each spectrogram.
173
- spec_duration (Optional[float]): Length of each spectrogram in seconds.
174
- Defaults to cfg.audio.spec_duration.
175
- freq_scale (Optional[str]): Frequency scale to use ('linear', 'log', 'mel').
176
- Defaults to cfg.audio.freq_scale.
177
- decibels (Optional[float]): Whether to convert to decibels.
178
- Defaults to cfg.audio.decibels.
179
- top_db (Optional[int]): Maximum decibel value for normalization.
180
- Defaults to cfg.audio.top_db.
181
- db_power (Optional[int]): Power to apply after decibel conversion.
182
- Defaults to cfg.audio.db_power.
171
+ - start_times (list[float]): List of start times in seconds from the beginning
172
+ of the recording for each spectrogram.
173
+ - spec_duration (Optional[float]): Length of each spectrogram in seconds.
174
+ Defaults to cfg.audio.spec_duration.
175
+ - freq_scale (Optional[str]): Frequency scale to use ('linear', 'log', 'mel').
176
+ Defaults to cfg.audio.freq_scale.
177
+ - decibels (Optional[float]): Whether to convert to decibels.
178
+ Defaults to cfg.audio.decibels.
179
+ - top_db (Optional[int]): Maximum decibel value for normalization.
180
+ Defaults to cfg.audio.top_db.
181
+ - db_power (Optional[int]): Power to apply after decibel conversion.
182
+ Defaults to cfg.audio.db_power.
183
183
 
184
184
  Returns:
185
185
  tuple: (normalized_specs, unnormalized_specs) where:
@@ -124,7 +124,7 @@ class DataModule(LightningDataModule):
124
124
  Load data from a pickle file with error handling.
125
125
 
126
126
  Args:
127
- path (str): Path to the pickle file
127
+ - path (str): Path to the pickle file
128
128
 
129
129
  Returns:
130
130
  Tuple containing (class_names, class_codes, alt_names, alt_codes, specs, labels)
@@ -175,7 +175,7 @@ class DataModule(LightningDataModule):
175
175
  Prepare train/validation split for a specific fold.
176
176
 
177
177
  Args:
178
- fold_index (int): Index of the fold to prepare
178
+ - fold_index (int): Index of the fold to prepare
179
179
 
180
180
  Raises:
181
181
  ValueError: If fold_index is invalid or val_portion is invalid
britekit/core/plot.py CHANGED
@@ -16,14 +16,14 @@ def plot_spec(
16
16
  Plot and save a spectrogram image.
17
17
 
18
18
  Args:
19
- spec (np.ndarray): Spectrogram of shape (height, width)
20
- output_path (str): Path to save the image (e.g., "output.png")
21
- show_dims (bool): Whether to show frequency and time scales
22
- spec_duration (float, optional): Number of seconds represented.
23
- height (int, optional): Output image height in pixels. If not specified,
24
- the existing square behavior is preserved.
25
- width (int, optional): Output image width in pixels. If not specified,
26
- the existing square behavior is preserved.
19
+ - spec (np.ndarray): Spectrogram of shape (height, width)
20
+ - output_path (str): Path to save the image (e.g., "output.png")
21
+ - show_dims (bool): Whether to show frequency and time scales
22
+ - spec_duration (float, optional): Number of seconds represented.
23
+ - height (int, optional): Output image height in pixels. If not specified,
24
+ the existing square behavior is preserved.
25
+ - width (int, optional): Output image width in pixels. If not specified,
26
+ the existing square behavior is preserved.
27
27
  """
28
28
  import matplotlib.pyplot as plt
29
29
  import numpy as np
@@ -30,10 +30,10 @@ class Predictor:
30
30
  Initialize the Predictor with a model or ensemble of models.
31
31
 
32
32
  Args:
33
- model_path (str): Path to a checkpoint (.ckpt) or ONNX (.onnx) file,
34
- or a directory containing multiple checkpoint/ONNX files for an ensemble.
35
- device (str, optional): Device to use for inference ('cuda', 'cpu', or 'mps').
36
- If None, automatically selects the best available device.
33
+ - model_path (str): Path to a checkpoint (.ckpt) or ONNX (.onnx) file,
34
+ or a directory containing multiple checkpoint/ONNX files for an ensemble.
35
+ - device (str, optional): Device to use for inference ('cuda', 'cpu', or 'mps').
36
+ If None, automatically selects the best available device.
37
37
  """
38
38
  from britekit.core.audio import Audio
39
39
 
@@ -67,7 +67,7 @@ class Predictor:
67
67
  Get scores in array format from the loaded models for the given recording.
68
68
 
69
69
  Args:
70
- recording_path (str): Path to the audio recording file.
70
+ - recording_path (str): Path to the audio recording file.
71
71
 
72
72
  Returns:
73
73
  tuple: A tuple containing:
@@ -139,8 +139,8 @@ class Predictor:
139
139
  Given an array of raw segment-level scores, return dict of labels.
140
140
 
141
141
  Args:
142
- scores (np.ndarray): Array of scores of shape (num_spectrograms, num_species).
143
- start_times (list[float]): Start time in seconds for each spectrogram.
142
+ - scores (np.ndarray): Array of scores of shape (num_spectrograms, num_species).
143
+ - start_times (list[float]): Start time in seconds for each spectrogram.
144
144
 
145
145
  Returns:
146
146
  dict[str, list]: Dictionary mapping species names to lists of Label objects.
@@ -187,7 +187,7 @@ class Predictor:
187
187
  Given a frame map, return dict of labels.
188
188
 
189
189
  Args:
190
- frame_map (np.ndarray): Array of scores of shape (num_frames, num_species).
190
+ - frame_map (np.ndarray): Array of scores of shape (num_frames, num_species).
191
191
 
192
192
  Returns:
193
193
  dict[str, list]: Dictionary mapping species names to lists of Label objects.
@@ -283,11 +283,11 @@ class Predictor:
283
283
  Given an array of raw scores, return as a pandas dataframe.
284
284
 
285
285
  Args:
286
- score_array (np.ndarray): Array of scores of shape (num_spectrograms, num_species).
287
- frame_map (np.ndarray, optional): Frame-level scores of shape (num_frames, num_species).
288
- If provided, uses frame-level labels; otherwise uses segment-level labels.
289
- start_times (list[float]): Start time in seconds for each spectrogram.
290
- recording_name (str): Name of the recording for the dataframe.
286
+ - score_array (np.ndarray): Array of scores of shape (num_spectrograms, num_species).
287
+ - frame_map (np.ndarray, optional): Frame-level scores of shape (num_frames, num_species).
288
+ If provided, uses frame-level labels; otherwise uses segment-level labels.
289
+ - start_times (list[float]): Start time in seconds for each spectrogram.
290
+ - recording_name (str): Name of the recording for the dataframe.
291
291
 
292
292
  Returns:
293
293
  pd.DataFrame: DataFrame with columns ['recording', 'name', 'start_time', 'end_time', 'score']
@@ -332,11 +332,11 @@ class Predictor:
332
332
  Given an array of raw scores, convert to Audacity labels and save in the given file.
333
333
 
334
334
  Args:
335
- scores (np.ndarray): Segment-level scores of shape (num_spectrograms, num_species).
336
- frame_map (np.ndarray, optional): Frame-level scores of shape (num_frames, num_species).
337
- If provided, uses frame-level labels; otherwise uses segment-level labels.
338
- start_times (list[float]): Start time in seconds for each spectrogram.
339
- file_path (str): Output path for the Audacity label file.
335
+ - scores (np.ndarray): Segment-level scores of shape (num_spectrograms, num_species).
336
+ - frame_map (np.ndarray, optional): Frame-level scores of shape (num_frames, num_species).
337
+ If provided, uses frame-level labels; otherwise uses segment-level labels.
338
+ - start_times (list[float]): Start time in seconds for each spectrogram.
339
+ - file_path (str): Output path for the Audacity label file.
340
340
 
341
341
  Returns:
342
342
  None: Writes the labels directly to the specified file.
@@ -369,9 +369,9 @@ class Predictor:
369
369
  Use mean rather than max or weighted values.
370
370
 
371
371
  Args:
372
- frame_scores: (num_specs, num_classes, T_spec) scores in [0, 1].
373
- offsets_sec: start time (s) for each spectrogram within the recording.
374
- recording_duration_sec: total recording length in seconds.
372
+ - frame_scores: (num_specs, num_classes, T_spec) scores in [0, 1].
373
+ - offsets_sec: start time (s) for each spectrogram within the recording.
374
+ - recording_duration_sec: total recording length in seconds.
375
375
 
376
376
  Returns:
377
377
  global_frames: (num_classes, T_global) tensor of scores in [0, 1].
@@ -22,12 +22,12 @@ class Reextractor:
22
22
  updating the database.
23
23
 
24
24
  Args:
25
- cfg_path (str, optional): Path to YAML file defining configuration overrides.
26
- db_path (str, optional): Path to the training database. Defaults to cfg.train.training_db.
27
- class_name (str, optional): Name of a specific class to reextract. If omitted, processes all classes.
28
- classes_path (str, optional): Path to CSV file listing classes to reextract. Alternative to class_name.
29
- check (bool): If True, only check that all recording paths are accessible without updating database.
30
- spec_group (str): Spectrogram group name for storing the extracted spectrograms. Defaults to 'default'.
25
+ - cfg_path (str, optional): Path to YAML file defining configuration overrides.
26
+ - db_path (str, optional): Path to the training database. Defaults to cfg.train.training_db.
27
+ - class_name (str, optional): Name of a specific class to reextract. If omitted, processes all classes.
28
+ - classes_path (str, optional): Path to CSV file listing classes to reextract. Alternative to class_name.
29
+ - check (bool): If True, only check that all recording paths are accessible without updating database.
30
+ - spec_group (str): Spectrogram group name for storing the extracted spectrograms. Defaults to 'default'.
31
31
  """
32
32
 
33
33
  def __init__(
britekit/core/util.py CHANGED
@@ -166,7 +166,7 @@ def cfg_to_pure(obj: Any) -> JSONValue:
166
166
  str, int, float, bool) that can be safely serialized.
167
167
 
168
168
  Args:
169
- obj: Any object to convert to JSON-serializable format
169
+ - obj: Any object to convert to JSON-serializable format
170
170
 
171
171
  Returns:
172
172
  JSON-serializable representation of the input object
@@ -284,8 +284,8 @@ def get_audio_files(path: str, short_names: bool = False) -> List[str]:
284
284
  Return list of audio files in the given directory.
285
285
 
286
286
  Args:
287
- path (str): Directory path
288
- short_names (bool): If true, return file names, else return full paths
287
+ - path (str): Directory path
288
+ - short_names (bool): If true, return file names, else return full paths
289
289
 
290
290
  Returns:
291
291
  List of audio files in the given directory
@@ -325,8 +325,8 @@ def get_file_lines(path: str, encoding: str = "utf-8") -> List[str]:
325
325
  and lines that start with #.
326
326
 
327
327
  Args:
328
- path: Path to text file
329
- encoding: File encoding (default: utf-8)
328
+ - path: Path to text file
329
+ - encoding: File encoding (default: utf-8)
330
330
 
331
331
  Returns:
332
332
  List of lines
@@ -354,7 +354,7 @@ def get_source_name(filename: str) -> str:
354
354
  Return a source name given a recording file name.
355
355
 
356
356
  Args:
357
- filename: Recording file name
357
+ - filename: Recording file name
358
358
 
359
359
  Returns:
360
360
  Source name
@@ -390,7 +390,7 @@ def compress_spectrogram(spec) -> bytes:
390
390
  Compress a spectrogram in preparation for inserting into database.
391
391
 
392
392
  Args:
393
- spec: Uncompressed spectrogram
393
+ - spec: Uncompressed spectrogram
394
394
 
395
395
  Returns:
396
396
  Compressed spectrogram
@@ -421,7 +421,7 @@ def expand_spectrogram(spec: bytes):
421
421
  Decompress a spectrogram, then convert from bytes to floats and reshape it.
422
422
 
423
423
  Args:
424
- spec: Compressed spectrogram
424
+ - spec: Compressed spectrogram
425
425
 
426
426
  Returns:
427
427
  Uncompressed spectrogram