britekit 0.1.2__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of britekit might be problematic. Click here for more details.

Files changed (47) hide show
  1. britekit/__about__.py +1 -1
  2. britekit/__init__.py +6 -2
  3. britekit/cli.py +6 -1
  4. britekit/commands/__init__.py +2 -1
  5. britekit/commands/_analyze.py +40 -12
  6. britekit/commands/_audioset.py +8 -8
  7. britekit/commands/_calibrate.py +8 -8
  8. britekit/commands/_ckpt_ops.py +6 -6
  9. britekit/commands/_db_add.py +12 -12
  10. britekit/commands/_db_delete.py +15 -15
  11. britekit/commands/_embed.py +4 -4
  12. britekit/commands/_ensemble.py +7 -7
  13. britekit/commands/_extract.py +158 -19
  14. britekit/commands/_find_dup.py +5 -5
  15. britekit/commands/_inat.py +4 -4
  16. britekit/commands/_init.py +1 -1
  17. britekit/commands/_pickle.py +13 -7
  18. britekit/commands/_plot.py +26 -26
  19. britekit/commands/_reextract.py +6 -6
  20. britekit/commands/_reports.py +41 -27
  21. britekit/commands/_search.py +12 -12
  22. britekit/commands/_train.py +6 -6
  23. britekit/commands/_tune.py +13 -13
  24. britekit/commands/_wav2mp3.py +2 -2
  25. britekit/commands/_xeno.py +7 -7
  26. britekit/commands/_youtube.py +3 -3
  27. britekit/core/analyzer.py +43 -13
  28. britekit/core/audio.py +14 -14
  29. britekit/core/augmentation.py +24 -0
  30. britekit/core/data_module.py +2 -2
  31. britekit/core/dataset.py +1 -4
  32. britekit/core/plot.py +8 -8
  33. britekit/core/predictor.py +51 -23
  34. britekit/core/reextractor.py +6 -6
  35. britekit/core/util.py +44 -8
  36. britekit/occurrence_db/occurrence_data_provider.py +13 -13
  37. britekit/testing/{per_minute_tester.py → per_block_tester.py} +39 -36
  38. britekit/testing/per_recording_tester.py +2 -2
  39. britekit/training_db/extractor.py +65 -30
  40. britekit/training_db/training_data_provider.py +1 -1
  41. britekit/training_db/training_db.py +97 -100
  42. britekit-0.1.5.dist-info/METADATA +299 -0
  43. {britekit-0.1.2.dist-info → britekit-0.1.5.dist-info}/RECORD +46 -46
  44. britekit-0.1.2.dist-info/METADATA +0 -290
  45. {britekit-0.1.2.dist-info → britekit-0.1.5.dist-info}/WHEEL +0 -0
  46. {britekit-0.1.2.dist-info → britekit-0.1.5.dist-info}/entry_points.txt +0 -0
  47. {britekit-0.1.2.dist-info → britekit-0.1.5.dist-info}/licenses/LICENSE.txt +0 -0
@@ -31,11 +31,11 @@ def find_dup(
31
31
  using cosine distance.
32
32
 
33
33
  Args:
34
- cfg_path (str, optional): Path to YAML file defining configuration overrides.
35
- db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
36
- class_name (str): Name of the class to scan for duplicates (e.g., "Common Yellowthroat").
37
- delete (bool): If True, remove duplicate recordings from the database. If False, only report them.
38
- spec_group (str): Spectrogram group name to use for embedding comparison. Defaults to "default".
34
+ - cfg_path (str, optional): Path to YAML file defining configuration overrides.
35
+ - db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
36
+ - class_name (str): Name of the class to scan for duplicates (e.g., "Common Yellowthroat").
37
+ - delete (bool): If True, remove duplicate recordings from the database. If False, only report them.
38
+ - spec_group (str): Spectrogram group name to use for embedding comparison. Defaults to "default".
39
39
  """
40
40
 
41
41
  class Recording:
@@ -54,10 +54,10 @@ def inat(
54
54
  The command respects the maximum download limit and can optionally add filename prefixes.
55
55
 
56
56
  Args:
57
- output_dir (str): Directory where downloaded recordings will be saved.
58
- max_downloads (int): Maximum number of recordings to download. Default is 500.
59
- name (str): Species name to search for (e.g., "Common Yellowthroat", "Geothlypis trichas").
60
- no_prefix (bool): If True, skip adding "N" prefix to filenames. Default adds prefix.
57
+ - output_dir (str): Directory where downloaded recordings will be saved.
58
+ - max_downloads (int): Maximum number of recordings to download. Default is 500.
59
+ - name (str): Species name to search for (e.g., "Common Yellowthroat", "Geothlypis trichas").
60
+ - no_prefix (bool): If True, skip adding "N" prefix to filenames. Default adds prefix.
61
61
  """
62
62
  import pyinaturalist
63
63
 
@@ -32,7 +32,7 @@ def init(dest: Optional[Path]=None) -> None:
32
32
  a default directory structure.
33
33
 
34
34
  Args:
35
- dest (Path): Directory to copy files into. Subdirectories are created as needed.
35
+ - dest (Path): Directory to copy files into. Subdirectories are created as needed.
36
36
 
37
37
  Examples:
38
38
  britekit init --dest .
@@ -1,6 +1,7 @@
1
1
  # File name starts with _ to keep it out of typeahead for API users.
2
2
  # Defer some imports to improve --help performance.
3
3
  import logging
4
+ import os
4
5
  from pathlib import Path
5
6
  from typing import Optional
6
7
 
@@ -27,13 +28,13 @@ def pickle(
27
28
  or specific classes specified by a CSV file.
28
29
 
29
30
  Args:
30
- cfg_path (str, optional): Path to YAML file defining configuration overrides.
31
- classes_path (str, optional): Path to CSV file containing class names to include.
32
- If omitted, includes all classes in the database.
33
- db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
34
- output_path (str, optional): Output pickle file path. Defaults to "data/training.pkl".
35
- max_per_class (int, optional): Maximum number of spectrograms to include per class.
36
- spec_group (str): Spectrogram group name to extract from. Defaults to 'default'.
31
+ - cfg_path (str, optional): Path to YAML file defining configuration overrides.
32
+ - classes_path (str, optional): Path to CSV file containing class names to include.
33
+ If omitted, includes all classes in the database.
34
+ - db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
35
+ - output_path (str, optional): Output pickle file path. Defaults to "data/training.pkl".
36
+ - max_per_class (int, optional): Maximum number of spectrograms to include per class.
37
+ - spec_group (str): Spectrogram group name to extract from. Defaults to 'default'.
37
38
  """
38
39
  from britekit.core.pickler import Pickler
39
40
 
@@ -41,6 +42,11 @@ def pickle(
41
42
  if db_path is None:
42
43
  db_path = cfg.train.train_db
43
44
 
45
+ if classes_path is not None:
46
+ if not os.path.exists(classes_path):
47
+ logging.error(f"Error: file {classes_path} not found.")
48
+ return
49
+
44
50
  if output_path is None:
45
51
  output_path = str(Path(root_dir) / "data" / "training.pkl")
46
52
 
@@ -76,15 +76,15 @@ def plot_db(
76
76
  number of spectrograms plotted.
77
77
 
78
78
  Args:
79
- cfg_path (str, optional): Path to YAML file defining configuration overrides.
80
- class_name (str): Name of the class to plot spectrograms for (e.g., "Common Yellowthroat").
81
- db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
82
- ndims (bool): If True, do not show time and frequency dimensions on the spectrogram plots.
83
- max_count (int, optional): Maximum number of spectrograms to plot. If omitted, plots all available.
84
- output_path (str): Directory where spectrogram images will be saved.
85
- prefix (str, optional): Only include recordings that start with this filename prefix.
86
- power (float, optional): Raise spectrograms to this power for visualization. Lower values show more detail.
87
- spec_group (str, optional): Spectrogram group name to plot from. Defaults to "default".
79
+ - cfg_path (str, optional): Path to YAML file defining configuration overrides.
80
+ - class_name (str): Name of the class to plot spectrograms for (e.g., "Common Yellowthroat").
81
+ - db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
82
+ - ndims (bool): If True, do not show time and frequency dimensions on the spectrogram plots.
83
+ - max_count (int, optional): Maximum number of spectrograms to plot. If omitted, plots all available.
84
+ - output_path (str): Directory where spectrogram images will be saved.
85
+ - prefix (str, optional): Only include recordings that start with this filename prefix.
86
+ - power (float, optional): Raise spectrograms to this power for visualization. Lower values show more detail.
87
+ - spec_group (str, optional): Spectrogram group name to plot from. Defaults to "default".
88
88
  """
89
89
  from britekit.core.plot import plot_spec
90
90
  from britekit.training_db.training_db import TrainingDatabase
@@ -158,7 +158,7 @@ def plot_db(
158
158
  "--ndims",
159
159
  "ndims",
160
160
  is_flag=True,
161
- help="If specified, do not show time and frequency dimensions on the spectrogram plots.",
161
+ help="If specified, do not show seconds on x-axis and frequencies on y-axis.",
162
162
  )
163
163
  @click.option(
164
164
  "--max",
@@ -237,13 +237,13 @@ def plot_dir(
237
237
  overlapping segments.
238
238
 
239
239
  Args:
240
- cfg_path (str, optional): Path to YAML file defining configuration overrides.
241
- ndims (bool): If True, do not show time and frequency dimensions on the spectrogram plots.
242
- input_path (str): Directory containing audio recordings to process.
243
- output_path (str): Directory where spectrogram images will be saved.
244
- all (bool): If True, plot each recording as one spectrogram. If False, break into segments.
245
- overlap (float): Spectrogram overlap in seconds when breaking recordings into segments. Default is 0.
246
- power (float): Raise spectrograms to this power for visualization. Lower values show more detail. Default is 1.0.
240
+ - cfg_path (str, optional): Path to YAML file defining configuration overrides.
241
+ - ndims (bool): If True, do not show time and frequency dimensions on the spectrogram plots.
242
+ - input_path (str): Directory containing audio recordings to process.
243
+ - output_path (str): Directory where spectrogram images will be saved.
244
+ - all (bool): If True, plot each recording as one spectrogram. If False, break into segments.
245
+ - overlap (float): Spectrogram overlap in seconds when breaking recordings into segments. Default is 0.
246
+ - power (float): Raise spectrograms to this power for visualization. Lower values show more detail. Default is 1.0.
247
247
  """
248
248
  from britekit.core.audio import Audio
249
249
 
@@ -284,7 +284,7 @@ def plot_dir(
284
284
  "--ndims",
285
285
  "ndims",
286
286
  is_flag=True,
287
- help="If specified, show seconds on x-axis and frequencies on y-axis.",
287
+ help="If specified, do not show seconds on x-axis and frequencies on y-axis.",
288
288
  )
289
289
  @click.option(
290
290
  "-i",
@@ -353,13 +353,13 @@ def plot_rec(
353
353
  overlapping segments.
354
354
 
355
355
  Args:
356
- cfg_path (str, optional): Path to YAML file defining configuration overrides.
357
- ndims (bool): If True, do not show time and frequency dimensions on the spectrogram plots.
358
- input_path (str): Path to the audio recording file to process.
359
- output_path (str): Directory where spectrogram images will be saved.
360
- all (bool): If True, plot the entire recording as one spectrogram. If False, break into segments.
361
- overlap (float): Spectrogram overlap in seconds when breaking the recording into segments. Default is 0.
362
- power (float): Raise spectrograms to this power for visualization. Lower values show more detail. Default is 1.0.
356
+ - cfg_path (str, optional): Path to YAML file defining configuration overrides.
357
+ - ndims (bool): If True, do not show time and frequency dimensions on the spectrogram plots.
358
+ - input_path (str): Path to the audio recording file to process.
359
+ - output_path (str): Directory where spectrogram images will be saved.
360
+ - all (bool): If True, plot the entire recording as one spectrogram. If False, break into segments.
361
+ - overlap (float): Spectrogram overlap in seconds when breaking the recording into segments. Default is 0.
362
+ - power (float): Raise spectrograms to this power for visualization. Lower values show more detail. Default is 1.0.
363
363
  """
364
364
  from britekit.core.audio import Audio
365
365
 
@@ -394,7 +394,7 @@ def plot_rec(
394
394
  "--ndims",
395
395
  "ndims",
396
396
  is_flag=True,
397
- help="If specified, show seconds on x-axis and frequencies on y-axis.",
397
+ help="If specified, do not show seconds on x-axis and frequencies on y-axis.",
398
398
  )
399
399
  @click.option(
400
400
  "-i",
@@ -30,12 +30,12 @@ def reextract(
30
30
  updating the database.
31
31
 
32
32
  Args:
33
- cfg_path (str, optional): Path to YAML file defining configuration overrides.
34
- db_path (str, optional): Path to the training database. Defaults to cfg.train.training_db.
35
- class_name (str, optional): Name of a specific class to reextract. If omitted, processes all classes.
36
- classes_path (str, optional): Path to CSV file listing classes to reextract. Alternative to class_name.
37
- check (bool): If True, only check that all recording paths are accessible without updating database.
38
- spec_group (str): Spectrogram group name for storing the extracted spectrograms. Defaults to 'default'.
33
+ - cfg_path (str, optional): Path to YAML file defining configuration overrides.
34
+ - db_path (str, optional): Path to the training database. Defaults to cfg.train.training_db.
35
+ - class_name (str, optional): Name of a specific class to reextract. If omitted, processes all classes.
36
+ - classes_path (str, optional): Path to CSV file listing classes to reextract. Alternative to class_name.
37
+ - check (bool): If True, only check that all recording paths are accessible without updating database.
38
+ - spec_group (str): Spectrogram group name for storing the extracted spectrograms. Defaults to 'default'.
39
39
  """
40
40
  from britekit.core.reextractor import Reextractor
41
41
  cfg = get_config(cfg_path)
@@ -27,8 +27,8 @@ def rpt_ann(
27
27
  breakdowns.
28
28
 
29
29
  Args:
30
- annotations_path (str): Path to CSV file containing per-segment annotations.
31
- output_path (str): Directory where summary reports will be saved.
30
+ - annotations_path (str): Path to CSV file containing per-segment annotations.
31
+ - output_path (str): Directory where summary reports will be saved.
32
32
  """
33
33
  import pandas as pd
34
34
 
@@ -136,9 +136,9 @@ def rpt_db(cfg_path: Optional[str] = None,
136
136
  and can be used for data management and quality control.
137
137
 
138
138
  Args:
139
- cfg_path (str, optional): Path to YAML file defining configuration overrides.
140
- db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
141
- output_path (str): Directory where database reports will be saved.
139
+ - cfg_path (str, optional): Path to YAML file defining configuration overrides.
140
+ - db_path (str, optional): Path to the training database. Defaults to cfg.train.train_db.
141
+ - output_path (str): Directory where database reports will be saved.
142
142
  """
143
143
  from britekit.training_db.training_db import TrainingDatabase
144
144
  from britekit.training_db.training_data_provider import TrainingDataProvider
@@ -202,10 +202,10 @@ def rpt_epochs(
202
202
  This is useful to determine the number of training epochs needed.
203
203
 
204
204
  Args:
205
- cfg_path (str, optional): Path to YAML file defining configuration overrides.
206
- input_path (str): Checkpoint directory generated by training.
207
- annotations_path (str): Path to CSV file containing ground truth annotations.
208
- output_path (str): Directory where the graph image will be saved.
205
+ - cfg_path (str, optional): Path to YAML file defining configuration overrides.
206
+ - input_path (str): Checkpoint directory generated by training.
207
+ - annotations_path (str): Path to CSV file containing ground truth annotations.
208
+ - output_path (str): Directory where the graph image will be saved.
209
209
  """
210
210
  import matplotlib.pyplot as plt
211
211
  from matplotlib.ticker import MaxNLocator
@@ -397,9 +397,9 @@ def rpt_labels(
397
397
  across different recordings and classes.
398
398
 
399
399
  Args:
400
- label_dir (str): Directory containing inference output (CSV or Audacity labels).
401
- output_path (str): Directory where summary reports will be saved.
402
- min_score (float, optional): Ignore detections below this confidence threshold.
400
+ - label_dir (str): Directory containing inference output (CSV or Audacity labels).
401
+ - output_path (str): Directory where summary reports will be saved.
402
+ - min_score (float, optional): Ignore detections below this confidence threshold.
403
403
  """
404
404
  import pandas as pd
405
405
 
@@ -528,6 +528,7 @@ def rpt_test(
528
528
  output_path: str = "",
529
529
  recordings_path: Optional[str] = None,
530
530
  min_score: Optional[float] = None,
531
+ block_size: int = 60,
531
532
  precision: float = 0.95,
532
533
  ):
533
534
  """
@@ -536,27 +537,28 @@ def rpt_test(
536
537
  This command evaluates model performance by comparing inference results against
537
538
  ground truth annotations. It supports three granularity levels:
538
539
  - "recording": Evaluate at the recording level (presence/absence)
539
- - "minute": Evaluate at the minute level (presence/absence per minute)
540
+ - "block": Evaluate at the block level (presence/absence per block)
540
541
  - "segment": Evaluate at the segment level (detailed temporal alignment)
541
542
 
542
543
  The command generates detailed performance metrics including precision, recall,
543
544
  F1 scores, and various visualization plots to help understand model behavior.
544
545
 
545
546
  Args:
546
- cfg_path (str, optional): Path to YAML file defining configuration overrides.
547
- granularity (str): Evaluation granularity ("recording", "minute", or "segment"). Default is "segment".
548
- annotations_path (str): Path to CSV file containing ground truth annotations.
549
- label_dir (str): Directory containing model prediction labels (Audacity format).
550
- output_path (str): Directory where test reports will be saved.
551
- recordings_path (str, optional): Directory containing audio recordings. Defaults to annotations directory.
552
- min_score (float, optional): Provide detailed reports for this confidence threshold.
553
- precision (float): For recording granularity, report true positive seconds at this precision. Default is 0.95.
547
+ - cfg_path (str, optional): Path to YAML file defining configuration overrides.
548
+ - granularity (str): Evaluation granularity ("recording", "block", or "segment"). Default is "segment".
549
+ - annotations_path (str): Path to CSV file containing ground truth annotations.
550
+ - label_dir (str): Directory containing model prediction labels (Audacity format).
551
+ - output_path (str): Directory where test reports will be saved.
552
+ - recordings_path (str, optional): Directory containing audio recordings. Defaults to annotations directory.
553
+ - min_score (float, optional): Provide detailed reports for this confidence threshold.
554
+ - block_size (int, optional): block_size in seconds (default=60).
555
+ - precision (float): For recording granularity, report true positive seconds at this precision. Default is 0.95.
554
556
  """
555
- from britekit.testing.per_minute_tester import PerMinuteTester
557
+ from britekit.testing.per_block_tester import PerBlockTester
556
558
  from britekit.testing.per_recording_tester import PerRecordingTester
557
559
  from britekit.testing.per_segment_tester import PerSegmentTester
558
560
 
559
- cfg = get_config()
561
+ cfg = get_config(cfg_path)
560
562
  try:
561
563
  if not recordings_path:
562
564
  recordings_path = str(Path(annotations_path).parent)
@@ -582,13 +584,14 @@ def rpt_test(
582
584
  min_score,
583
585
  precision,
584
586
  ).run()
585
- elif granularity.startswith("min"):
586
- PerMinuteTester(
587
+ elif granularity.startswith("bl"):
588
+ PerBlockTester(
587
589
  annotations_path,
588
590
  recordings_path,
589
591
  labels_path,
590
592
  output_path,
591
593
  min_score,
594
+ block_size,
592
595
  ).run()
593
596
  elif granularity.startswith("seg"):
594
597
  PerSegmentTester(
@@ -600,7 +603,7 @@ def rpt_test(
600
603
  ).run()
601
604
  else:
602
605
  logging.error(
603
- 'Invalid granularity (expected "recording", "minute" or "segment").'
606
+ 'Invalid granularity (expected "recording", "block" or "segment").'
604
607
  )
605
608
 
606
609
  except InputError as e:
@@ -626,7 +629,7 @@ def rpt_test(
626
629
  "granularity",
627
630
  type=str,
628
631
  default="segment",
629
- help='Test annotation and reporting granularity ("recording", "minute" or "segment"). Default = "segment".',
632
+ help='Test annotation and reporting granularity ("recording", "block" or "segment"). Default = "segment".',
630
633
  )
631
634
  @click.option(
632
635
  "-a",
@@ -668,6 +671,15 @@ def rpt_test(
668
671
  required=False,
669
672
  help="Provide detailed reports for this threshold.",
670
673
  )
674
+ @click.option(
675
+ "-b",
676
+ "--block",
677
+ "block_size",
678
+ type=int,
679
+ required=False,
680
+ default=60,
681
+ help="Block size in seconds, when granularity=block (default=60).",
682
+ )
671
683
  @click.option(
672
684
  "--precision",
673
685
  required=False,
@@ -683,6 +695,7 @@ def _rpt_test_cmd(
683
695
  output_path: str,
684
696
  recordings_path: Optional[str],
685
697
  min_score: Optional[float],
698
+ block_size: int,
686
699
  precision: float,
687
700
  ):
688
701
  util.set_logging()
@@ -694,5 +707,6 @@ def _rpt_test_cmd(
694
707
  output_path,
695
708
  recordings_path,
696
709
  min_score,
710
+ block_size,
697
711
  precision,
698
712
  )
@@ -33,18 +33,18 @@ def search(
33
33
  based on embedding similarity. Results are plotted and saved to the output directory.
34
34
 
35
35
  Args:
36
- cfg_path (str): Path to YAML configuration file defining model settings.
37
- db_path (str): Path to the training database containing spectrograms to search.
38
- class_name (str): Name of the class/species to search within the database.
39
- max_dist (float): Maximum distance threshold. Results with distance greater than this are excluded.
40
- exp (float): Exponent to raise spectrograms to for visualization (shows background sounds).
41
- num_to_plot (int): Maximum number of similar spectrograms to plot and save.
42
- output_path (str): Directory where search results and plots will be saved.
43
- input_path (str): Path to the audio file containing the target spectrogram.
44
- offset (float): Time offset in seconds where the target spectrogram is extracted.
45
- exclude_db (str, optional): Path to an exclusion database. Spectrograms in this database are excluded from results.
46
- class_name2 (str, optional): Class name in the exclusion database. Defaults to the search class name.
47
- spec_group (str): Spectrogram group name in the database. Defaults to 'default'.
36
+ - cfg_path (str): Path to YAML configuration file defining model settings.
37
+ - db_path (str): Path to the training database containing spectrograms to search.
38
+ - class_name (str): Name of the class/species to search within the database.
39
+ - max_dist (float): Maximum distance threshold. Results with distance greater than this are excluded.
40
+ - exp (float): Exponent to raise spectrograms to for visualization (shows background sounds).
41
+ - num_to_plot (int): Maximum number of similar spectrograms to plot and save.
42
+ - output_path (str): Directory where search results and plots will be saved.
43
+ - input_path (str): Path to the audio file containing the target spectrogram.
44
+ - offset (float): Time offset in seconds where the target spectrogram is extracted.
45
+ - exclude_db (str, optional): Path to an exclusion database. Spectrograms in this database are excluded from results.
46
+ - class_name2 (str, optional): Class name in the exclusion database. Defaults to the search class name.
47
+ - spec_group (str): Spectrogram group name in the database. Defaults to 'default'.
48
48
  """
49
49
 
50
50
  class SpecInfo:
@@ -26,8 +26,8 @@ def train(
26
26
  automatically. The final trained model can be used for inference and evaluation.
27
27
 
28
28
  Args:
29
- cfg_path (str, optional): Path to YAML file defining configuration overrides.
30
- If not specified, uses default configuration.
29
+ - cfg_path (str, optional): Path to YAML file defining configuration overrides.
30
+ If not specified, uses default configuration.
31
31
  """
32
32
  from britekit.core.trainer import Trainer
33
33
 
@@ -82,10 +82,10 @@ def find_lr(cfg_path: str, num_batches: int):
82
82
  avoiding rates that are too high (causing instability) or too low (slow convergence).
83
83
 
84
84
  Args:
85
- cfg_path (str, optional): Path to YAML file defining configuration overrides.
86
- If not specified, uses default configuration.
87
- num_batches (int): Number of training batches to analyze for learning rate finding.
88
- Default is 100. Higher values provide more accurate results but take longer.
85
+ - cfg_path (str, optional): Path to YAML file defining configuration overrides.
86
+ If not specified, uses default configuration.
87
+ - num_batches (int): Number of training batches to analyze for learning rate finding.
88
+ Default is 100. Higher values provide more accurate results but take longer.
89
89
  """
90
90
  from britekit.core.trainer import Trainer
91
91
 
@@ -41,18 +41,18 @@ def tune(
41
41
  The param_path specifies a YAML file that defines the parameters to be tuned, as described in the README.
42
42
 
43
43
  Args:
44
- cfg_path (str, optional): Path to YAML file defining configuration overrides.
45
- param_path (str, optional): Path to YAML file defining hyperparameters to tune and their search space.
46
- output_path (str): Directory where reports will be saved.
47
- annotations_path (str): Path to CSV file containing ground truth annotations.
48
- metric (str): Metric used to compare runs. Options include various MAP and ROC metrics.
49
- recordings_path (str, optional): Directory containing audio recordings. Defaults to annotations directory.
50
- train_log_path (str, optional): Training log directory. Defaults to "logs/fold-0".
51
- num_trials (int): Number of random trials to run. If 0, performs exhaustive search.
52
- num_runs (int): Number of runs to average for each parameter combination. Default is 1.
53
- extract (bool): Extract new spectrograms before training, to tune spectrogram parameters.
54
- skip_training (bool): Iterate on inference only, using checkpoints from the last training run.
55
- classes_path (str, optional): Path to CSV containing class names for extract option. Default is all classes.
44
+ - cfg_path (str, optional): Path to YAML file defining configuration overrides.
45
+ - param_path (str, optional): Path to YAML file defining hyperparameters to tune and their search space.
46
+ - output_path (str): Directory where reports will be saved.
47
+ - annotations_path (str): Path to CSV file containing ground truth annotations.
48
+ - metric (str): Metric used to compare runs. Options include various MAP and ROC metrics.
49
+ - recordings_path (str, optional): Directory containing audio recordings. Defaults to annotations directory.
50
+ - train_log_path (str, optional): Training log directory. Defaults to "logs".
51
+ - num_trials (int): Number of random trials to run. If 0, performs exhaustive search.
52
+ - num_runs (int): Number of runs to average for each parameter combination. Default is 1.
53
+ - extract (bool): Extract new spectrograms before training, to tune spectrogram parameters.
54
+ - skip_training (bool): Iterate on inference only, using checkpoints from the last training run.
55
+ - classes_path (str, optional): Path to CSV containing class names for extract option. Default is all classes.
56
56
  """
57
57
  import yaml
58
58
  from britekit.core.tuner import Tuner
@@ -72,7 +72,7 @@ def tune(
72
72
  recordings_path = str(Path(annotations_path).parent)
73
73
 
74
74
  if not train_log_path:
75
- train_log_path = str(Path("logs") / "fold-0")
75
+ train_log_path = "logs"
76
76
 
77
77
  if param_path is not None:
78
78
  with open(param_path) as input_file:
@@ -24,8 +24,8 @@ def wav2mp3(
24
24
  requirements for large audio datasets.
25
25
 
26
26
  Args:
27
- dir (str): Path to directory containing audio files to convert.
28
- sampling_rate (int): Output sampling rate in Hz. Default is 32000 Hz.
27
+ - dir (str): Path to directory containing audio files to convert.
28
+ - sampling_rate (int): Output sampling rate in Hz. Default is 32000 Hz.
29
29
  """
30
30
  CONVERT_TYPES = {
31
31
  ".flac",
@@ -75,13 +75,13 @@ def xeno(
75
75
  Then specify the key in the --key argument, or set the environment variable XCKEY=<key>.
76
76
 
77
77
  Args:
78
- key (str): Xeno-Canto API key for authentication. Can also be set via XCKEY environment variable.
79
- output_dir (str): Directory where downloaded recordings will be saved.
80
- max_downloads (int): Maximum number of recordings to download. Default is 500.
81
- name (str): Species name to search for (common name or scientific name).
82
- ignore_licence (bool): If True, ignore license restrictions. By default, excludes BY-NC-ND licensed recordings.
83
- scientific_name (bool): If True, treat the name as a scientific name rather than common name.
84
- seen_only (bool): If True, only download recordings where the animal was seen (animal-seen=yes).
78
+ - key (str): Xeno-Canto API key for authentication. Can also be set via XCKEY environment variable.
79
+ - output_dir (str): Directory where downloaded recordings will be saved.
80
+ - max_downloads (int): Maximum number of recordings to download. Default is 500.
81
+ - name (str): Species name to search for (common name or scientific name).
82
+ - ignore_licence (bool): If True, ignore license restrictions. By default, excludes BY-NC-ND licensed recordings.
83
+ - scientific_name (bool): If True, treat the name as a scientific name rather than common name.
84
+ - seen_only (bool): If True, only download recordings where the animal was seen (animal-seen=yes).
85
85
  """
86
86
  import requests
87
87
 
@@ -17,9 +17,9 @@ def youtube(
17
17
  Download an audio recording from Youtube, given a Youtube ID.
18
18
 
19
19
  Args:
20
- id (str): ID of the clip to download.
21
- output_dir (str): Directory where downloaded recordings will be saved.
22
- sampling_rate (float): Output sampling rate in Hz. Default is 32000.
20
+ - id (str): ID of the clip to download.
21
+ - output_dir (str): Directory where downloaded recordings will be saved.
22
+ - sampling_rate (float): Output sampling rate in Hz. Default is 32000.
23
23
  """
24
24
  import librosa
25
25
  import numpy as np
britekit/core/analyzer.py CHANGED
@@ -56,21 +56,29 @@ class Analyzer:
56
56
  with open(Path(output_path) / "manifest.yaml", "w") as out_file:
57
57
  out_file.write(info_str)
58
58
 
59
- def _process_recordings(self, recording_paths, output_path, rtype, thread_num):
59
+ def _process_recordings(
60
+ self, recording_paths, output_path, rtype, start_seconds, thread_num, debug_mode=False
61
+ ):
60
62
  """
61
63
  This runs on its own thread and processes all recordings in the given list.
62
64
 
63
65
  Args:
64
- recording_paths (list): Individual recording paths.
65
- output_path (str): Where to write the output.
66
- rtype (str): Output format: "audacity", "csv" or "both".
66
+ - recording_paths (list): Individual recording paths.
67
+ - output_path (str): Where to write the output.
68
+ - rtype (str): Output format: "audacity", "csv" or "both".
69
+ - start_seconds (float): Where to start processing each recording, in seconds from start.
67
70
  """
68
71
  from britekit.core.predictor import Predictor
69
72
 
70
73
  predictor = Predictor(self.cfg.misc.ckpt_folder)
71
74
  for recording_path in recording_paths:
72
75
  logging.info(f"[Thread {thread_num}] Processing {recording_path}")
73
- scores, frame_map, offsets = predictor.get_raw_scores(recording_path)
76
+ scores, frame_map, offsets = predictor.get_raw_scores(
77
+ recording_path, start_seconds
78
+ )
79
+ if debug_mode:
80
+ predictor.log_scores(scores) # log the scores for debugging
81
+
74
82
  recording_name = Path(recording_path).stem
75
83
  if rtype in {"audacity", "both"}:
76
84
  file_path = str(Path(output_path) / f"{recording_name}_scores.txt")
@@ -82,6 +90,9 @@ class Analyzer:
82
90
  )
83
91
  self.dataframes.append(dataframe)
84
92
 
93
+ if debug_mode:
94
+ break
95
+
85
96
  if thread_num == 1:
86
97
  self._save_manifest(output_path, predictor)
87
98
 
@@ -91,8 +102,8 @@ class Analyzer:
91
102
  Split the input list into `n` lists based on index modulo `n`.
92
103
 
93
104
  Args:
94
- input_list (list): The input list to split.
95
- n (int): Number of resulting groups.
105
+ - input_list (list): The input list to split.
106
+ - n (int): Number of resulting groups.
96
107
 
97
108
  Returns:
98
109
  List[List]: A list of `n` lists, where each sublist contains elements
@@ -103,14 +114,24 @@ class Analyzer:
103
114
  result[i % n].append(item)
104
115
  return result
105
116
 
106
- def run(self, input_path: str, output_path: str, rtype: str = "audacity"):
117
+ def run(
118
+ self,
119
+ input_path: str,
120
+ output_path: str,
121
+ rtype: str = "audacity",
122
+ start_seconds: float = 0,
123
+ debug_mode: bool = False
124
+ ):
107
125
  """
108
126
  Run inference.
109
127
 
110
128
  Args:
111
- input_path (str): Recording or directory containing recordings.
112
- output_path (str): Output directory.
113
- rtype (str): Output format: "audacity", "csv" or "both".
129
+ - input_path (str): Recording or directory containing recordings.
130
+ - output_path (str): Output directory.
131
+ - rtype (str): Output format: "audacity", "csv" or "both".
132
+ - start_seconds (float): Where to start processing each recording, in seconds.
133
+ - debug_mode (bool): If true, log scores for the first spectrogram, then stop.
134
+ For example, '71' and '1:11' have the same meaning, and cause the first 71 seconds to be ignored. Default = 0.
114
135
  """
115
136
  import pandas as pd
116
137
 
@@ -127,14 +148,23 @@ class Analyzer:
127
148
  self.dataframes = []
128
149
  num_threads = min(self.cfg.infer.num_threads, len(recording_paths))
129
150
  if num_threads == 1:
130
- self._process_recordings(recording_paths, output_path, rtype, 1)
151
+ self._process_recordings(
152
+ recording_paths, output_path, rtype, start_seconds, 1, debug_mode,
153
+ )
131
154
  else:
132
155
  recordings_per_thread = self._split_list(recording_paths, num_threads)
133
156
  threads = []
134
157
  for i in range(num_threads):
135
158
  thread = threading.Thread(
136
159
  target=self._process_recordings,
137
- args=(recordings_per_thread[i], output_path, rtype, i + 1),
160
+ args=(
161
+ recordings_per_thread[i],
162
+ output_path,
163
+ rtype,
164
+ start_seconds,
165
+ i + 1,
166
+ debug_mode,
167
+ ),
138
168
  )
139
169
  thread.start()
140
170
  threads.append(thread)