bacpipe 1.3.0.dev0__tar.gz → 1.3.0.dev2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. {bacpipe-1.3.0.dev0 → bacpipe-1.3.0.dev2}/PKG-INFO +1 -1
  2. bacpipe-1.3.0.dev2/bacpipe/core/__init__.py +0 -0
  3. bacpipe-1.3.0.dev2/bacpipe/core/audio_processor.py +177 -0
  4. bacpipe-1.3.0.dev2/bacpipe/core/constants.py +62 -0
  5. bacpipe-1.3.0.dev2/bacpipe/core/experiment_manager.py +722 -0
  6. bacpipe-1.3.0.dev2/bacpipe/core/workflows.py +699 -0
  7. bacpipe-1.3.0.dev2/bacpipe/embedding_evaluation/__init__.py +0 -0
  8. bacpipe-1.3.0.dev2/bacpipe/embedding_evaluation/classification/__init__.py +0 -0
  9. bacpipe-1.3.0.dev2/bacpipe/embedding_evaluation/classification/classify.py +213 -0
  10. bacpipe-1.3.0.dev2/bacpipe/embedding_evaluation/classification/evaluate_classifier.py +141 -0
  11. bacpipe-1.3.0.dev2/bacpipe/embedding_evaluation/classification/train_classifier.py +247 -0
  12. bacpipe-1.3.0.dev2/bacpipe/embedding_evaluation/clustering/__init__.py +0 -0
  13. bacpipe-1.3.0.dev2/bacpipe/embedding_evaluation/clustering/cluster.py +213 -0
  14. bacpipe-1.3.0.dev2/bacpipe/embedding_evaluation/label_embeddings.py +1236 -0
  15. bacpipe-1.3.0.dev2/bacpipe/embedding_evaluation/visualization/__init__.py +0 -0
  16. bacpipe-1.3.0.dev2/bacpipe/embedding_evaluation/visualization/dashboard.py +756 -0
  17. bacpipe-1.3.0.dev2/bacpipe/embedding_evaluation/visualization/dashboard_utils.py +321 -0
  18. bacpipe-1.3.0.dev2/bacpipe/embedding_evaluation/visualization/visualize.py +362 -0
  19. bacpipe-1.3.0.dev2/bacpipe/embedding_evaluation/visualization/visualize_embeddings.py +824 -0
  20. bacpipe-1.3.0.dev2/bacpipe/embedding_evaluation/visualization/visualize_predictions.py +661 -0
  21. bacpipe-1.3.0.dev2/bacpipe/embedding_evaluation/visualization/visualize_spectrograms.py +144 -0
  22. bacpipe-1.3.0.dev2/bacpipe/imgs/__init__.py +0 -0
  23. bacpipe-1.3.0.dev2/bacpipe/model_pipelines/__init__.py +0 -0
  24. bacpipe-1.3.0.dev2/bacpipe/model_pipelines/dimensionality_reduction/pca.py +18 -0
  25. bacpipe-1.3.0.dev2/bacpipe/model_pipelines/dimensionality_reduction/sparse_pca.py +18 -0
  26. bacpipe-1.3.0.dev2/bacpipe/model_pipelines/dimensionality_reduction/t_sne.py +22 -0
  27. bacpipe-1.3.0.dev2/bacpipe/model_pipelines/dimensionality_reduction/umap.py +25 -0
  28. bacpipe-1.3.0.dev2/bacpipe/model_pipelines/feature_extractors/__init__.py +0 -0
  29. bacpipe-1.3.0.dev2/bacpipe/model_pipelines/feature_extractors/audiomae.py +159 -0
  30. bacpipe-1.3.0.dev2/bacpipe/model_pipelines/feature_extractors/audioprotopnet.py +68 -0
  31. bacpipe-1.3.0.dev2/bacpipe/model_pipelines/feature_extractors/aves_especies.py +67 -0
  32. bacpipe-1.3.0.dev2/bacpipe/model_pipelines/feature_extractors/avesecho_passt.py +152 -0
  33. bacpipe-1.3.0.dev2/bacpipe/model_pipelines/feature_extractors/bat.py +73 -0
  34. bacpipe-1.3.0.dev2/bacpipe/model_pipelines/feature_extractors/beats.py +82 -0
  35. bacpipe-1.3.0.dev2/bacpipe/model_pipelines/feature_extractors/biolingual.py +39 -0
  36. bacpipe-1.3.0.dev2/bacpipe/model_pipelines/feature_extractors/birdaves_especies.py +6 -0
  37. bacpipe-1.3.0.dev2/bacpipe/model_pipelines/feature_extractors/birdmae.py +38 -0
  38. bacpipe-1.3.0.dev2/bacpipe/model_pipelines/feature_extractors/birdnet.py +133 -0
  39. bacpipe-1.3.0.dev2/bacpipe/model_pipelines/feature_extractors/convnext_birdset.py +51 -0
  40. bacpipe-1.3.0.dev2/bacpipe/model_pipelines/feature_extractors/google_whale.py +49 -0
  41. bacpipe-1.3.0.dev2/bacpipe/model_pipelines/feature_extractors/hbdet.py +19 -0
  42. bacpipe-1.3.0.dev2/bacpipe/model_pipelines/feature_extractors/insect459.py +50 -0
  43. bacpipe-1.3.0.dev2/bacpipe/model_pipelines/feature_extractors/insect66.py +94 -0
  44. bacpipe-1.3.0.dev2/bacpipe/model_pipelines/feature_extractors/mix2.py +33 -0
  45. bacpipe-1.3.0.dev2/bacpipe/model_pipelines/feature_extractors/naturebeats.py +45 -0
  46. bacpipe-1.3.0.dev2/bacpipe/model_pipelines/feature_extractors/perch_bird.py +15 -0
  47. bacpipe-1.3.0.dev2/bacpipe/model_pipelines/feature_extractors/perch_v2.py +81 -0
  48. bacpipe-1.3.0.dev2/bacpipe/model_pipelines/feature_extractors/protoclr.py +68 -0
  49. bacpipe-1.3.0.dev2/bacpipe/model_pipelines/feature_extractors/rcl_fs_bsed.py +50 -0
  50. bacpipe-1.3.0.dev2/bacpipe/model_pipelines/feature_extractors/surfperch.py +15 -0
  51. bacpipe-1.3.0.dev2/bacpipe/model_pipelines/feature_extractors/vggish.py +26 -0
  52. bacpipe-1.3.0.dev2/bacpipe/model_pipelines/model_specific_utils/audiomae/dataset.py +247 -0
  53. bacpipe-1.3.0.dev2/bacpipe/model_pipelines/model_specific_utils/audiomae/models_vit.py +251 -0
  54. bacpipe-1.3.0.dev2/bacpipe/model_pipelines/model_specific_utils/bat/module.py +155 -0
  55. bacpipe-1.3.0.dev2/bacpipe/model_pipelines/model_specific_utils/bat/prepare_data.py +67 -0
  56. bacpipe-1.3.0.dev2/bacpipe/model_pipelines/model_specific_utils/convnext_birdset/preprocess.py +92 -0
  57. bacpipe-1.3.0.dev2/bacpipe/model_pipelines/model_specific_utils/mix2/mobile_net_v3.py +659 -0
  58. bacpipe-1.3.0.dev2/bacpipe/model_pipelines/model_specific_utils/naturebeats/BEATs.py +207 -0
  59. bacpipe-1.3.0.dev2/bacpipe/model_pipelines/model_specific_utils/naturebeats/backbone.py +813 -0
  60. bacpipe-1.3.0.dev2/bacpipe/model_pipelines/model_specific_utils/naturebeats/modules.py +221 -0
  61. bacpipe-1.3.0.dev2/bacpipe/model_pipelines/model_specific_utils/perch_v2/perch_hoplite/__init__.py +0 -0
  62. bacpipe-1.3.0.dev2/bacpipe/model_pipelines/model_specific_utils/perch_v2/perch_hoplite/taxonomy/namespace.py +228 -0
  63. bacpipe-1.3.0.dev2/bacpipe/model_pipelines/model_specific_utils/perch_v2/perch_hoplite/zoo/hub.py +89 -0
  64. bacpipe-1.3.0.dev2/bacpipe/model_pipelines/model_specific_utils/perch_v2/perch_hoplite/zoo/model_configs.py +278 -0
  65. bacpipe-1.3.0.dev2/bacpipe/model_pipelines/model_specific_utils/perch_v2/perch_hoplite/zoo/models_tf.py +546 -0
  66. bacpipe-1.3.0.dev2/bacpipe/model_pipelines/model_specific_utils/perch_v2/perch_hoplite/zoo/taxonomy_model_tf.py +322 -0
  67. bacpipe-1.3.0.dev2/bacpipe/model_pipelines/model_specific_utils/perch_v2/perch_hoplite/zoo/zoo_interface.py +355 -0
  68. bacpipe-1.3.0.dev2/bacpipe/model_pipelines/model_specific_utils/protoclr/config/__init__.py +4 -0
  69. bacpipe-1.3.0.dev2/bacpipe/model_pipelines/model_specific_utils/protoclr/config/comm.py +132 -0
  70. bacpipe-1.3.0.dev2/bacpipe/model_pipelines/model_specific_utils/protoclr/cvt.py +695 -0
  71. bacpipe-1.3.0.dev2/bacpipe/model_pipelines/model_specific_utils/rcl_fs_bsed/resnet.py +106 -0
  72. bacpipe-1.3.0.dev2/bacpipe/model_pipelines/model_utils.py +152 -0
  73. bacpipe-1.3.0.dev2/bacpipe/model_pipelines/runner.py +728 -0
  74. bacpipe-1.3.0.dev2/bacpipe/tests/__init__.py +0 -0
  75. bacpipe-1.3.0.dev2/bacpipe/tests/conftest.py +54 -0
  76. bacpipe-1.3.0.dev2/bacpipe/tests/test_embedding_creation.py +114 -0
  77. {bacpipe-1.3.0.dev0 → bacpipe-1.3.0.dev2}/bacpipe.egg-info/PKG-INFO +1 -1
  78. bacpipe-1.3.0.dev2/bacpipe.egg-info/SOURCES.txt +87 -0
  79. {bacpipe-1.3.0.dev0 → bacpipe-1.3.0.dev2}/pyproject.toml +3 -5
  80. bacpipe-1.3.0.dev0/bacpipe.egg-info/SOURCES.txt +0 -12
  81. {bacpipe-1.3.0.dev0 → bacpipe-1.3.0.dev2}/LICENSE +0 -0
  82. {bacpipe-1.3.0.dev0 → bacpipe-1.3.0.dev2}/README.md +0 -0
  83. {bacpipe-1.3.0.dev0 → bacpipe-1.3.0.dev2}/bacpipe/__init__.py +0 -0
  84. {bacpipe-1.3.0.dev0 → bacpipe-1.3.0.dev2}/bacpipe/config.yaml +0 -0
  85. {bacpipe-1.3.0.dev0 → bacpipe-1.3.0.dev2}/bacpipe/settings.yaml +0 -0
  86. {bacpipe-1.3.0.dev0 → bacpipe-1.3.0.dev2}/bacpipe/testing.py +0 -0
  87. {bacpipe-1.3.0.dev0 → bacpipe-1.3.0.dev2}/bacpipe.egg-info/dependency_links.txt +0 -0
  88. {bacpipe-1.3.0.dev0 → bacpipe-1.3.0.dev2}/bacpipe.egg-info/requires.txt +0 -0
  89. {bacpipe-1.3.0.dev0 → bacpipe-1.3.0.dev2}/bacpipe.egg-info/top_level.txt +0 -0
  90. {bacpipe-1.3.0.dev0 → bacpipe-1.3.0.dev2}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: bacpipe
3
- Version: 1.3.0.dev0
3
+ Version: 1.3.0.dev2
4
4
  Summary: Use bacpipe to streamline the process of generating embeddings and analysing your PAM datasets.
5
5
  Author-email: "Vincent S. Kather" <vkather@gmail.com>
6
6
  Requires-Python: >=3.11
File without changes
@@ -0,0 +1,177 @@
1
+
2
+ import torch
3
+ import logging
4
+ import numpy as np
5
+ import librosa as lb
6
+ import torchaudio as ta
7
+ from pathlib import Path
8
+
9
+ logger = logging.getLogger("bacpipe")
10
+
11
+
12
+ class AudioHandler:
13
+ def __init__(self, model, padding, audio_dir,
14
+ bool_slowdown=False, slowdown_rate=None,
15
+ **kwargs):
16
+ """
17
+ Helper class for all methods related to loading and padding audio.
18
+
19
+ Parameters
20
+ ----------
21
+ model : Model object
22
+ has attributes for all the model characteristics like
23
+ sample rate, segment length etc. as well as the methods
24
+ to run the model
25
+ padding : str
26
+ padding function to use for where padding is necessary
27
+ audio_dir : pathlib.Path object
28
+ path to audio dir
29
+ """
30
+ self.model = model
31
+ self.padding = padding
32
+ self.audio_dir = audio_dir
33
+ self.bool_slowdown = bool_slowdown
34
+ self.slowdown_rate = slowdown_rate
35
+ self.kwargs = kwargs
36
+
37
+ def prepare_audio(self, sample):
38
+ """
39
+ Use bacpipe pipeline to load audio file, window it according to
40
+ model specific window length and preprocess the data, ready for
41
+ batch inference computation. Also log file length and shape for
42
+ metadata files.
43
+
44
+ Parameters
45
+ ----------
46
+ sample : pathlib.Path or str
47
+ path to audio file
48
+
49
+ Returns
50
+ -------
51
+ torch.Tensor
52
+ audio frames preprocessed with model specific preprocessing
53
+ """
54
+ audio = self._load_and_resample(sample)
55
+ # audio = audio.to(self.model.device)
56
+ if self.model.only_embed_annotations:
57
+ frames = self._only_load_annotated_segments(sample, audio, **self.kwargs)
58
+ else:
59
+ frames = self._window_audio(audio)
60
+ preprocessed_frames = self.model.preprocess(frames)
61
+ self.file_length[sample.stem] = len(audio[0]) / self.model.sr
62
+ self.preprocessed_shape = tuple(preprocessed_frames.shape)
63
+ if self.model.device == 'cuda':
64
+ del audio, frames
65
+ torch.cuda.empty_cache()
66
+ return preprocessed_frames
67
+
68
+ def _load_and_resample(self, path):
69
+ try:
70
+ if not self.bool_slowdown:
71
+ audio, sr = lb.load(
72
+ str(path), sr=self.model.sr, mono=True
73
+ )
74
+ else:
75
+ #TODO Need to ensure that input length get's prolonged accordingly
76
+ audio, sr = lb.load(
77
+ str(path), sr=None, mono=True
78
+ )
79
+ audio = lb.resample(
80
+ audio,
81
+ orig_sr=int(sr * self.slowdown_rate),
82
+ target_sr=self.model.sr
83
+ )
84
+ audio = audio.reshape(1, -1)
85
+ except Exception as e:
86
+ logger.exception(
87
+ f"\nError loading audio. Skipping {str(path)}."
88
+ f"Error: {e}"
89
+ )
90
+ raise e
91
+ if len(audio) == 0:
92
+ error = f"Audio file {path} is empty. " f"Skipping {path}."
93
+ logger.exception(error)
94
+ raise ValueError(error)
95
+ return torch.tensor(audio)
96
+
97
+ def _only_load_annotated_segments(
98
+ self, file_path, audio, annotations_filename='annotations.csv', **_
99
+ ):
100
+ import pandas as pd
101
+ annots = pd.read_csv(Path(self.audio_dir) / annotations_filename)
102
+ # filter current file
103
+ file_annots = annots[annots.audiofilename==file_path.relative_to(self.audio_dir)]
104
+ if len(file_annots) == 0:
105
+ file_annots = annots[annots.audiofilename==file_path.stem+file_path.suffix]
106
+ if len(file_annots) == 0:
107
+ file_annots = annots[annots.audiofilename==str(file_path.relative_to(self.audio_dir))]
108
+
109
+ starts = np.array(file_annots.start, dtype=np.float32)*self.model.sr
110
+ ends = np.array(file_annots.end, dtype=np.float32)*self.model.sr
111
+
112
+ audio = audio.cpu().squeeze()
113
+ for idx, (s, e) in enumerate(zip(starts, ends)):
114
+ s, e = int(s), int(e)
115
+ if s > len(audio):
116
+ logger.warning(
117
+ f"Annotation with start {s} and end {e} is outside of "
118
+ f"range of {file_path}. Skipping annotation."
119
+ )
120
+ continue
121
+ segments = lb.util.fix_length(
122
+ audio[s:e+1],
123
+ size=self.model.segment_length,
124
+ mode=self.padding
125
+ )
126
+ if idx == 0:
127
+ cumulative_segments = segments
128
+ else:
129
+ cumulative_segments = np.vstack([cumulative_segments, segments])
130
+ cumulative_segments = torch.Tensor(cumulative_segments)
131
+ cumulative_segments = cumulative_segments.to(self.device)
132
+ return cumulative_segments
133
+
134
+ def _load_audio_based_on_fixed_segment_length(self, audio, segment_length, **_):
135
+ nr_segments = len(audio) // segment_length +1
136
+ starts = np.arange(nr_segments) * segment_length * self.model.sr
137
+ ends = np.arange(1, nr_segments+1) * segment_length * self.model.sr
138
+ return starts, ends
139
+
140
+ def _load_and_pad_audio_based_on_grid(self, audio, starts, ends, file_path):
141
+ audio = audio.cpu().squeeze()
142
+ for idx, (s, e) in enumerate(zip(starts, ends)):
143
+ s, e = int(s), int(e)
144
+ if s > len(audio):
145
+ logger.warning(
146
+ f"Annotation with start {s} and end {e} is outside of "
147
+ f"range of {file_path}. Skipping annotation."
148
+ )
149
+ continue
150
+ segments = lb.util.fix_length(
151
+ audio[s:e+1],
152
+ size=self.model.segment_length,
153
+ mode=self.padding
154
+ )
155
+ if idx == 0:
156
+ cumulative_segments = segments
157
+ else:
158
+ cumulative_segments = np.vstack([cumulative_segments, segments])
159
+ cumulative_segments = torch.Tensor(cumulative_segments)
160
+ cumulative_segments = cumulative_segments.to(self.device)
161
+ return cumulative_segments
162
+
163
+ def _window_audio(self, audio):
164
+ num_frames = int(np.ceil(len(audio[0]) / self.model.segment_length))
165
+ if isinstance(audio, torch.Tensor):
166
+ audio = audio.cpu()
167
+ padded_audio = lb.util.fix_length(
168
+ audio,
169
+ size=int(num_frames * self.model.segment_length),
170
+ mode=self.padding,
171
+ )
172
+ logger.debug(f"{self.padding} was used on an audio segment.")
173
+ frames = padded_audio.reshape([num_frames, self.model.segment_length])
174
+ if not isinstance(frames, torch.Tensor):
175
+ frames = torch.tensor(frames)
176
+ # frames = frames.to(self.model.device)
177
+ return frames
@@ -0,0 +1,62 @@
1
+
2
+
3
+ TF_MODELS = [
4
+ 'birdnet',
5
+ 'perch_v2',
6
+ 'perch_bird',
7
+ 'google_whale',
8
+ 'surfperch',
9
+ 'vggish',
10
+ 'hbdet',
11
+ ]
12
+
13
+ EMBEDDING_DIMENSIONS = {
14
+ "audiomae": 768,
15
+ "audioprotopnet": 1024,
16
+ "avesecho_passt": 768,
17
+ "aves_especies": 768,
18
+ "bat": 64,
19
+ "beats": 768,
20
+ "birdaves_especies": 1024,
21
+ "biolingual": 512,
22
+ "birdnet": 1024,
23
+ "birdmae": 1280,
24
+ "convnext_birdset": 1024,
25
+ "hbdet": 2048,
26
+ "insect66": 1280,
27
+ "insect459": 1280,
28
+ "mix2": 960,
29
+ "naturebeats": 768,
30
+ "perch_bird": 1280,
31
+ "perch_v2": 1536,
32
+ "protoclr": 384,
33
+ "rcl_fs_bsed": 2048,
34
+ "surfperch": 1280,
35
+ "google_whale": 1280,
36
+ "vggish": 128,
37
+ }
38
+
39
+ NEEDS_CHECKPOINT = [
40
+ "audiomae",
41
+ "avesecho_passt",
42
+ "aves_especies",
43
+ "bat",
44
+ "beats",
45
+ "birdaves_especies",
46
+ "birdnet",
47
+ "hbdet",
48
+ "insect66",
49
+ "insect459",
50
+ "mix2",
51
+ "naturebeats",
52
+ "protoclr",
53
+ "rcl_fs_bsed"
54
+ ]
55
+
56
+
57
+ supported_models = list(EMBEDDING_DIMENSIONS.keys())
58
+ """list[str]: Supported embedding models available in bacpipe."""
59
+
60
+ models_needing_checkpoint = NEEDS_CHECKPOINT
61
+ """list[str]: Models that require a checkpoint to be downloaded before use."""
62
+