birdnet-analyzer 2.0.1__py3-none-any.whl → 2.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (121) hide show
  1. birdnet_analyzer/__init__.py +9 -9
  2. birdnet_analyzer/analyze/__init__.py +19 -5
  3. birdnet_analyzer/analyze/__main__.py +3 -3
  4. birdnet_analyzer/analyze/cli.py +30 -25
  5. birdnet_analyzer/analyze/core.py +268 -241
  6. birdnet_analyzer/analyze/utils.py +700 -692
  7. birdnet_analyzer/audio.py +368 -368
  8. birdnet_analyzer/cli.py +732 -709
  9. birdnet_analyzer/config.py +243 -242
  10. birdnet_analyzer/eBird_taxonomy_codes_2024E.json +13046 -0
  11. birdnet_analyzer/embeddings/__init__.py +3 -3
  12. birdnet_analyzer/embeddings/__main__.py +3 -3
  13. birdnet_analyzer/embeddings/cli.py +12 -12
  14. birdnet_analyzer/embeddings/core.py +70 -69
  15. birdnet_analyzer/embeddings/utils.py +173 -179
  16. birdnet_analyzer/evaluation/__init__.py +189 -196
  17. birdnet_analyzer/evaluation/__main__.py +3 -3
  18. birdnet_analyzer/evaluation/assessment/metrics.py +388 -388
  19. birdnet_analyzer/evaluation/assessment/performance_assessor.py +364 -409
  20. birdnet_analyzer/evaluation/assessment/plotting.py +378 -379
  21. birdnet_analyzer/evaluation/preprocessing/data_processor.py +631 -631
  22. birdnet_analyzer/evaluation/preprocessing/utils.py +98 -98
  23. birdnet_analyzer/gui/__init__.py +19 -19
  24. birdnet_analyzer/gui/__main__.py +3 -3
  25. birdnet_analyzer/gui/analysis.py +179 -175
  26. birdnet_analyzer/gui/assets/arrow_down.svg +4 -4
  27. birdnet_analyzer/gui/assets/arrow_left.svg +4 -4
  28. birdnet_analyzer/gui/assets/arrow_right.svg +4 -4
  29. birdnet_analyzer/gui/assets/arrow_up.svg +4 -4
  30. birdnet_analyzer/gui/assets/gui.css +36 -28
  31. birdnet_analyzer/gui/assets/gui.js +89 -93
  32. birdnet_analyzer/gui/embeddings.py +638 -619
  33. birdnet_analyzer/gui/evaluation.py +801 -795
  34. birdnet_analyzer/gui/localization.py +75 -75
  35. birdnet_analyzer/gui/multi_file.py +265 -245
  36. birdnet_analyzer/gui/review.py +472 -519
  37. birdnet_analyzer/gui/segments.py +191 -191
  38. birdnet_analyzer/gui/settings.py +149 -128
  39. birdnet_analyzer/gui/single_file.py +264 -267
  40. birdnet_analyzer/gui/species.py +95 -95
  41. birdnet_analyzer/gui/train.py +687 -696
  42. birdnet_analyzer/gui/utils.py +803 -810
  43. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_af.txt +6522 -6522
  44. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ar.txt +6522 -6522
  45. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_bg.txt +6522 -6522
  46. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ca.txt +6522 -6522
  47. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_cs.txt +6522 -6522
  48. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_da.txt +6522 -6522
  49. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_de.txt +6522 -6522
  50. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_el.txt +6522 -6522
  51. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_en_uk.txt +6522 -6522
  52. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_es.txt +6522 -6522
  53. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_fi.txt +6522 -6522
  54. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_fr.txt +6522 -6522
  55. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_he.txt +6522 -6522
  56. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_hr.txt +6522 -6522
  57. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_hu.txt +6522 -6522
  58. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_in.txt +6522 -6522
  59. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_is.txt +6522 -6522
  60. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_it.txt +6522 -6522
  61. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ja.txt +6522 -6522
  62. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ko.txt +6522 -6522
  63. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_lt.txt +6522 -6522
  64. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ml.txt +6522 -6522
  65. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_nl.txt +6522 -6522
  66. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_no.txt +6522 -6522
  67. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_pl.txt +6522 -6522
  68. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_pt_BR.txt +6522 -6522
  69. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_pt_PT.txt +6522 -6522
  70. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ro.txt +6522 -6522
  71. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ru.txt +6522 -6522
  72. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sk.txt +6522 -6522
  73. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sl.txt +6522 -6522
  74. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sr.txt +6522 -6522
  75. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sv.txt +6522 -6522
  76. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_th.txt +6522 -6522
  77. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_tr.txt +6522 -6522
  78. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_uk.txt +6522 -6522
  79. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_zh.txt +6522 -6522
  80. birdnet_analyzer/lang/de.json +342 -334
  81. birdnet_analyzer/lang/en.json +342 -334
  82. birdnet_analyzer/lang/fi.json +342 -334
  83. birdnet_analyzer/lang/fr.json +342 -334
  84. birdnet_analyzer/lang/id.json +342 -334
  85. birdnet_analyzer/lang/pt-br.json +342 -334
  86. birdnet_analyzer/lang/ru.json +342 -334
  87. birdnet_analyzer/lang/se.json +342 -334
  88. birdnet_analyzer/lang/tlh.json +342 -334
  89. birdnet_analyzer/lang/zh_TW.json +342 -334
  90. birdnet_analyzer/model.py +1213 -1212
  91. birdnet_analyzer/search/__init__.py +3 -3
  92. birdnet_analyzer/search/__main__.py +3 -3
  93. birdnet_analyzer/search/cli.py +11 -11
  94. birdnet_analyzer/search/core.py +78 -78
  95. birdnet_analyzer/search/utils.py +104 -107
  96. birdnet_analyzer/segments/__init__.py +3 -3
  97. birdnet_analyzer/segments/__main__.py +3 -3
  98. birdnet_analyzer/segments/cli.py +13 -13
  99. birdnet_analyzer/segments/core.py +81 -81
  100. birdnet_analyzer/segments/utils.py +383 -383
  101. birdnet_analyzer/species/__init__.py +3 -3
  102. birdnet_analyzer/species/__main__.py +3 -3
  103. birdnet_analyzer/species/cli.py +13 -13
  104. birdnet_analyzer/species/core.py +35 -35
  105. birdnet_analyzer/species/utils.py +73 -74
  106. birdnet_analyzer/train/__init__.py +3 -3
  107. birdnet_analyzer/train/__main__.py +3 -3
  108. birdnet_analyzer/train/cli.py +13 -13
  109. birdnet_analyzer/train/core.py +113 -113
  110. birdnet_analyzer/train/utils.py +878 -877
  111. birdnet_analyzer/translate.py +132 -133
  112. birdnet_analyzer/utils.py +425 -426
  113. {birdnet_analyzer-2.0.1.dist-info → birdnet_analyzer-2.1.1.dist-info}/METADATA +147 -137
  114. birdnet_analyzer-2.1.1.dist-info/RECORD +124 -0
  115. {birdnet_analyzer-2.0.1.dist-info → birdnet_analyzer-2.1.1.dist-info}/WHEEL +1 -1
  116. {birdnet_analyzer-2.0.1.dist-info → birdnet_analyzer-2.1.1.dist-info}/licenses/LICENSE +18 -18
  117. birdnet_analyzer/eBird_taxonomy_codes_2021E.json +0 -25280
  118. birdnet_analyzer/playground.py +0 -5
  119. birdnet_analyzer-2.0.1.dist-info/RECORD +0 -125
  120. {birdnet_analyzer-2.0.1.dist-info → birdnet_analyzer-2.1.1.dist-info}/entry_points.txt +0 -0
  121. {birdnet_analyzer-2.0.1.dist-info → birdnet_analyzer-2.1.1.dist-info}/top_level.txt +0 -0
birdnet_analyzer/model.py CHANGED
@@ -1,1212 +1,1213 @@
1
- # ruff: noqa: PLW0603
2
- """Contains functions to use the BirdNET models."""
3
-
4
- import os
5
- import sys
6
- import warnings
7
-
8
- import numpy as np
9
-
10
- import birdnet_analyzer.config as cfg
11
- from birdnet_analyzer import utils
12
-
13
- SCRIPT_DIR = os.path.abspath(os.path.dirname(__file__))
14
-
15
-
16
- os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
17
- os.environ["CUDA_VISIBLE_DEVICES"] = ""
18
-
19
- warnings.filterwarnings("ignore")
20
-
21
- # Import TFLite from runtime or Tensorflow;
22
- # import Keras if protobuf model;
23
- # NOTE: we have to use TFLite if we want to use
24
- # the metadata model or want to extract embeddings
25
- try:
26
- import tflite_runtime.interpreter as tflite # type: ignore
27
- except ModuleNotFoundError:
28
- from tensorflow import lite as tflite
29
- if not cfg.MODEL_PATH.endswith(".tflite"):
30
- from tensorflow import keras
31
-
32
- INTERPRETER: tflite.Interpreter = None
33
- C_INTERPRETER: tflite.Interpreter = None
34
- M_INTERPRETER: tflite.Interpreter = None
35
- PBMODEL = None
36
- C_PBMODEL = None
37
- EMPTY_CLASS_EXCEPTION_REF = None
38
-
39
-
40
- def get_empty_class_exception():
41
- import keras_tuner.errors
42
-
43
- global EMPTY_CLASS_EXCEPTION_REF
44
-
45
- if EMPTY_CLASS_EXCEPTION_REF:
46
- return EMPTY_CLASS_EXCEPTION_REF
47
-
48
- class EmptyClassException(keras_tuner.errors.FatalError):
49
- """
50
- Exception raised when a class is found to be empty.
51
-
52
- Attributes:
53
- index (int): The index of the empty class.
54
- message (str): The error message indicating which class is empty.
55
- """
56
-
57
- def __init__(self, *args, index=None):
58
- super().__init__(*args)
59
- self.index = index
60
- self.message = f"Class {index} is empty."
61
-
62
- EMPTY_CLASS_EXCEPTION_REF = EmptyClassException
63
- return EMPTY_CLASS_EXCEPTION_REF
64
-
65
-
66
- def label_smoothing(y: np.ndarray, alpha=0.1):
67
- """
68
- Applies label smoothing to the given labels.
69
- Label smoothing is a technique used to prevent the model from becoming overconfident by adjusting the target labels.
70
- It subtracts a small value (alpha) from the correct label and distributes it among the other labels.
71
- Args:
72
- y (numpy.ndarray): Array of labels to be smoothed. The array should be of shape (num_labels,).
73
- alpha (float, optional): Smoothing parameter. Default is 0.1.
74
- Returns:
75
- numpy.ndarray: The smoothed labels.
76
- """
77
- # Subtract alpha from correct label when it is >0
78
- y[y > 0] -= alpha
79
-
80
- # Assigned alpha to all other labels
81
- y[y == 0] = alpha / y.shape[0]
82
-
83
- return y
84
-
85
-
86
- def mixup(x, y, augmentation_ratio=0.25, alpha=0.2):
87
- """Apply mixup to the given data.
88
-
89
- Mixup is a data augmentation technique that generates new samples by
90
- mixing two samples and their labels.
91
-
92
- Args:
93
- x: Samples.
94
- y: One-hot labels.
95
- augmentation_ratio: The ratio of augmented samples.
96
- alpha: The beta distribution parameter.
97
-
98
- Returns:
99
- Augmented data.
100
- """
101
- rng = np.random.default_rng(cfg.RANDOM_SEED)
102
-
103
- # Get indices of all positive samples
104
- positive_indices = np.unique(np.where(y[:, :] == 1)[0])
105
-
106
- # Calculate the number of samples to augment based on the ratio
107
- num_samples_to_augment = int(len(positive_indices) * augmentation_ratio)
108
-
109
- # Indices of samples, that are already mixed up
110
- mixed_up_indices = []
111
-
112
- for _ in range(num_samples_to_augment):
113
- # Randomly choose one instance from the positive samples
114
- index = rng.choice(positive_indices)
115
-
116
- # Choose another one, when the chosen one was already mixed up
117
- while index in mixed_up_indices:
118
- index = rng.choice(positive_indices)
119
-
120
- x1, y1 = x[index], y[index]
121
-
122
- # Randomly choose a different instance from the dataset
123
- second_index = rng.choice(positive_indices)
124
-
125
- # Choose again, when the same or an already mixed up sample was selected
126
- while second_index == index or second_index in mixed_up_indices:
127
- second_index = rng.choice(positive_indices)
128
- x2, y2 = x[second_index], y[second_index]
129
-
130
- # Generate a random mixing coefficient (lambda)
131
- lambda_ = rng.beta(alpha, alpha)
132
-
133
- # Mix the embeddings and labels
134
- mixed_x = lambda_ * x1 + (1 - lambda_) * x2
135
- mixed_y = lambda_ * y1 + (1 - lambda_) * y2
136
-
137
- # Replace one of the original samples and labels with the augmented sample and labels
138
- x[index] = mixed_x
139
- y[index] = mixed_y
140
-
141
- # Mark the sample as already mixed up
142
- mixed_up_indices.append(index)
143
-
144
- del mixed_x
145
- del mixed_y
146
-
147
- return x, y
148
-
149
-
150
- def random_split(x, y, val_ratio=0.2):
151
- """Splits the data into training and validation data.
152
-
153
- Makes sure that each class is represented in both sets.
154
-
155
- Args:
156
- x: Samples.
157
- y: One-hot labels.
158
- val_ratio: The ratio of validation data.
159
-
160
- Returns:
161
- A tuple of (x_train, y_train, x_val, y_val).
162
- """
163
- rng = np.random.default_rng(cfg.RANDOM_SEED)
164
-
165
- # Get number of classes
166
- num_classes = y.shape[1]
167
-
168
- # Initialize training and validation data
169
- x_train, y_train, x_val, y_val = [], [], [], []
170
-
171
- # Split data
172
- for i in range(num_classes):
173
- # Get indices of positive samples of current class
174
- positive_indices = np.where(y[:, i] == 1)[0]
175
-
176
- # Get indices of negative samples of current class
177
- negative_indices = np.where(y[:, i] == -1)[0]
178
-
179
- # Get number of samples for each set
180
- num_samples = len(positive_indices)
181
- num_samples_train = max(1, int(num_samples * (1 - val_ratio)))
182
- num_samples_val = max(0, num_samples - num_samples_train)
183
-
184
- # Randomly choose samples for training and validation
185
- rng.shuffle(positive_indices)
186
- train_indices = positive_indices[:num_samples_train]
187
- val_indices = positive_indices[num_samples_train : num_samples_train + num_samples_val]
188
-
189
- # Append samples to training and validation data
190
- x_train.append(x[train_indices])
191
- y_train.append(y[train_indices])
192
- x_val.append(x[val_indices])
193
- y_val.append(y[val_indices])
194
-
195
- # Append negative samples to training data
196
- x_train.append(x[negative_indices])
197
- y_train.append(y[negative_indices])
198
-
199
- # Add samples for non-event classes to training and validation data
200
- non_event_indices = np.where(np.sum(y[:, :], axis=1) == 0)[0]
201
- num_samples = len(non_event_indices)
202
- num_samples_train = max(1, int(num_samples * (1 - val_ratio)))
203
- num_samples_val = max(0, num_samples - num_samples_train)
204
- rng.shuffle(non_event_indices)
205
- train_indices = non_event_indices[:num_samples_train]
206
- val_indices = non_event_indices[num_samples_train : num_samples_train + num_samples_val]
207
- x_train.append(x[train_indices])
208
- y_train.append(y[train_indices])
209
- x_val.append(x[val_indices])
210
- y_val.append(y[val_indices])
211
-
212
- # Concatenate data
213
- x_train = np.concatenate(x_train)
214
- y_train = np.concatenate(y_train)
215
- x_val = np.concatenate(x_val)
216
- y_val = np.concatenate(y_val)
217
-
218
- # Shuffle data
219
- indices = np.arange(len(x_train))
220
- rng.shuffle(indices)
221
- x_train = x_train[indices]
222
- y_train = y_train[indices]
223
-
224
- indices = np.arange(len(x_val))
225
- rng.shuffle(indices)
226
- x_val = x_val[indices]
227
- y_val = y_val[indices]
228
-
229
- return x_train, y_train, x_val, y_val
230
-
231
-
232
- def random_multilabel_split(x, y, val_ratio=0.2):
233
- """Splits the data into training and validation data.
234
-
235
- Makes sure that each combination of classes is represented in both sets.
236
-
237
- Args:
238
- x: Samples.
239
- y: One-hot labels.
240
- val_ratio: The ratio of validation data.
241
-
242
- Returns:
243
- A tuple of (x_train, y_train, x_val, y_val).
244
-
245
- """
246
- rng = np.random.default_rng(cfg.RANDOM_SEED)
247
-
248
- # Find all combinations of labels
249
- class_combinations = np.unique(y, axis=0)
250
-
251
- # Initialize training and validation data
252
- x_train, y_train, x_val, y_val = [], [], [], []
253
-
254
- # Split the data for each combination of labels
255
- for class_combination in class_combinations:
256
- # find all indices
257
- indices = np.where((y == class_combination).all(axis=1))[0]
258
-
259
- # When negative sample use only for training
260
- if -1 in class_combination:
261
- x_train.append(x[indices])
262
- y_train.append(y[indices])
263
- # Otherwise split according to the validation split
264
- else:
265
- # Get number of samples for each set
266
- num_samples = len(indices)
267
- num_samples_train = max(1, int(num_samples * (1 - val_ratio)))
268
- num_samples_val = max(0, num_samples - num_samples_train)
269
- # Randomly choose samples for training and validation
270
- rng.shuffle(indices)
271
- train_indices = indices[:num_samples_train]
272
- val_indices = indices[num_samples_train : num_samples_train + num_samples_val]
273
- # Append samples to training and validation data
274
- x_train.append(x[train_indices])
275
- y_train.append(y[train_indices])
276
- x_val.append(x[val_indices])
277
- y_val.append(y[val_indices])
278
-
279
- # Concatenate data
280
- x_train = np.concatenate(x_train)
281
- y_train = np.concatenate(y_train)
282
- x_val = np.concatenate(x_val)
283
- y_val = np.concatenate(y_val)
284
-
285
- # Shuffle data
286
- indices = np.arange(len(x_train))
287
- rng.shuffle(indices)
288
- x_train = x_train[indices]
289
- y_train = y_train[indices]
290
-
291
- indices = np.arange(len(x_val))
292
- rng.shuffle(indices)
293
- x_val = x_val[indices]
294
- y_val = y_val[indices]
295
-
296
- return x_train, y_train, x_val, y_val
297
-
298
-
299
- def upsample_core(x: np.ndarray, y: np.ndarray, min_samples: int, apply: callable, size=2):
300
- """
301
- Upsamples the minority class in the dataset using the specified apply function.
302
- Parameters:
303
- x (np.ndarray): The feature matrix.
304
- y (np.ndarray): The target labels.
305
- min_samples (int): The minimum number of samples required for the minority class.
306
- apply (callable): A function that applies the SMOTE or any other algorithm to the data.
307
- size (int, optional): The number of samples to generate in each iteration. Default is 2.
308
- Returns:
309
- tuple: A tuple containing the upsampled feature matrix and target labels.
310
- """
311
- rng = np.random.default_rng(cfg.RANDOM_SEED)
312
- y_temp = []
313
- x_temp = []
314
-
315
- if cfg.BINARY_CLASSIFICATION:
316
- # Determine if 1 or 0 is the minority class
317
- minority_label = 1 if y.sum(axis=0) < len(y) - y.sum(axis=0) else 0
318
-
319
- while np.where(y == minority_label)[0].shape[0] + len(y_temp) < min_samples:
320
- # Randomly choose a sample from the minority class
321
- random_index = rng.choice(np.where(y == minority_label)[0], size=size)
322
-
323
- # Apply SMOTE
324
- x_app, y_app = apply(x, y, random_index)
325
- y_temp.append(y_app)
326
- x_temp.append(x_app)
327
- else:
328
- for i in range(y.shape[1]):
329
- while y[:, i].sum() + len(y_temp) < min_samples:
330
- try:
331
- # Randomly choose a sample from the minority class
332
- random_index = rng.choice(np.where(y[:, i] == 1)[0], size=size)
333
- except ValueError as e:
334
- raise get_empty_class_exception()(index=i) from e
335
-
336
- # Apply SMOTE
337
- x_app, y_app = apply(x, y, random_index)
338
- y_temp.append(y_app)
339
- x_temp.append(x_app)
340
-
341
- return x_temp, y_temp
342
-
343
-
344
- def upsampling(x: np.ndarray, y: np.ndarray, ratio=0.5, mode="repeat"):
345
- """Balance data through upsampling.
346
-
347
- We upsample minority classes to have at least 10% (ratio=0.1) of the samples of the majority class.
348
-
349
- Args:
350
- x: Samples.
351
- y: One-hot labels.
352
- ratio: The minimum ratio of minority to majority samples.
353
- mode: The upsampling mode. Either 'repeat', 'mean', 'linear' or 'smote'.
354
-
355
- Returns:
356
- Upsampled data.
357
- """
358
-
359
- # Set numpy random seed
360
- rng = np.random.default_rng(cfg.RANDOM_SEED)
361
-
362
- # Determine min number of samples
363
- min_samples = (
364
- int(max(y.sum(axis=0), len(y) - y.sum(axis=0)) * ratio)
365
- if cfg.BINARY_CLASSIFICATION
366
- else int(np.max(y.sum(axis=0)) * ratio)
367
- )
368
-
369
- x_temp = []
370
- y_temp = []
371
-
372
- if mode == "repeat":
373
-
374
- def applyRepeat(x, y, random_index):
375
- return x[random_index[0]], y[random_index[0]]
376
-
377
- x_temp, y_temp = upsample_core(x, y, min_samples, applyRepeat, size=1)
378
-
379
- elif mode == "mean":
380
- # For each class with less than min_samples
381
- # select two random samples and calculate the mean
382
- def applyMean(x, y, random_indices):
383
- # Calculate the mean of the two samples
384
- mean = np.mean(x[random_indices], axis=0)
385
-
386
- # Append the mean and label to a temp list
387
- return mean, y[random_indices[0]]
388
-
389
- x_temp, y_temp = upsample_core(x, y, min_samples, applyMean)
390
-
391
- elif mode == "linear":
392
- # For each class with less than min_samples
393
- # select two random samples and calculate the linear combination
394
- def applyLinearCombination(x, y, random_indices):
395
- # Calculate the linear combination of the two samples
396
- alpha = rng.uniform(0, 1)
397
- new_sample = alpha * x[random_indices[0]] + (1 - alpha) * x[random_indices[1]]
398
-
399
- # Append the new sample and label to a temp list
400
- return new_sample, y[random_indices[0]]
401
-
402
- x_temp, y_temp = upsample_core(x, y, min_samples, applyLinearCombination)
403
-
404
- elif mode == "smote":
405
- # For each class with less than min_samples apply SMOTE
406
- def applySmote(x, y, random_index, k=5):
407
- # Get the k nearest neighbors
408
- distances = np.sqrt(np.sum((x - x[random_index[0]]) ** 2, axis=1))
409
- indices = np.argsort(distances)[1 : k + 1]
410
-
411
- # Randomly choose one of the neighbors
412
- random_neighbor = rng.choice(indices)
413
-
414
- # Calculate the difference vector
415
- diff = x[random_neighbor] - x[random_index[0]]
416
-
417
- # Randomly choose a weight between 0 and 1
418
- weight = rng.uniform(0, 1)
419
-
420
- # Calculate the new sample
421
- new_sample = x[random_index[0]] + weight * diff
422
-
423
- # Append the new sample and label to a temp list
424
- return new_sample, y[random_index[0]]
425
-
426
- x_temp, y_temp = upsample_core(x, y, min_samples, applySmote, size=1)
427
-
428
- # Append the temp list to the original data
429
- if len(x_temp) > 0:
430
- x = np.vstack((x, np.array(x_temp)))
431
- y = np.vstack((y, np.array(y_temp)))
432
-
433
- # Shuffle data
434
- indices = np.arange(len(x))
435
- rng.shuffle(indices)
436
- x = x[indices]
437
- y = y[indices]
438
-
439
- del x_temp
440
- del y_temp
441
-
442
- return x, y
443
-
444
-
445
- def save_model_params(path):
446
- """Saves the model parameters to a file.
447
-
448
- Args:
449
- path: Path to the file.
450
- """
451
- utils.save_params(
452
- path,
453
- (
454
- "Hidden units",
455
- "Dropout",
456
- "Batchsize",
457
- "Learning rate",
458
- "Crop mode",
459
- "Crop overlap",
460
- "Audio speed",
461
- "Upsamling mode",
462
- "Upsamling ratio",
463
- "use mixup",
464
- "use label smoothing",
465
- "BirdNET Model version",
466
- ),
467
- (
468
- cfg.TRAIN_HIDDEN_UNITS,
469
- cfg.TRAIN_DROPOUT,
470
- cfg.TRAIN_BATCH_SIZE,
471
- cfg.TRAIN_LEARNING_RATE,
472
- cfg.SAMPLE_CROP_MODE,
473
- cfg.SIG_OVERLAP,
474
- cfg.AUDIO_SPEED,
475
- cfg.UPSAMPLING_MODE,
476
- cfg.UPSAMPLING_RATIO,
477
- cfg.TRAIN_WITH_MIXUP,
478
- cfg.TRAIN_WITH_LABEL_SMOOTHING,
479
- cfg.MODEL_VERSION,
480
- ),
481
- )
482
-
483
-
484
- def reset_custom_classifier():
485
- """
486
- Resets the custom classifier by setting the global variables C_INTERPRETER and C_PBMODEL to None.
487
- This function is used to clear any existing custom classifier models and interpreters, effectively
488
- resetting the state of the custom classifier.
489
- """
490
- global C_INTERPRETER
491
- global C_PBMODEL
492
-
493
- C_INTERPRETER = None
494
- C_PBMODEL = None
495
-
496
-
497
- def load_model(class_output=True):
498
- """
499
- Loads the machine learning model based on the configuration provided.
500
- This function loads either a TensorFlow Lite (TFLite) model or a protobuf model
501
- depending on the file extension of the model path specified in the configuration.
502
- It sets up the global variables for the model interpreter and input/output layer indices.
503
-
504
- Args:
505
- class_output (bool): If True, sets the output layer index to the classification output.
506
- If False, sets the output layer index to the feature embeddings.
507
- """
508
- global PBMODEL
509
- global INTERPRETER
510
- global INPUT_LAYER_INDEX
511
- global OUTPUT_LAYER_INDEX
512
-
513
- # Do we have to load the tflite or protobuf model?
514
- if cfg.MODEL_PATH.endswith(".tflite"):
515
- # Load TFLite model and allocate tensors.
516
- INTERPRETER = tflite.Interpreter(
517
- model_path=os.path.join(SCRIPT_DIR, cfg.MODEL_PATH), num_threads=cfg.TFLITE_THREADS
518
- )
519
- INTERPRETER.allocate_tensors()
520
-
521
- # Get input and output tensors.
522
- input_details = INTERPRETER.get_input_details()
523
- output_details = INTERPRETER.get_output_details()
524
-
525
- # Get input tensor index
526
- INPUT_LAYER_INDEX = input_details[0]["index"]
527
-
528
- # Get classification output or feature embeddings
529
- OUTPUT_LAYER_INDEX = output_details[0]["index"] if class_output else output_details[0]["index"] - 1
530
-
531
- else:
532
- # Load protobuf model
533
- # Note: This will throw a bunch of warnings about custom gradients
534
- # which we will ignore until TF lets us block them
535
- PBMODEL = keras.models.load_model(os.path.join(SCRIPT_DIR, cfg.MODEL_PATH), compile=False)
536
-
537
-
538
- def load_custom_classifier():
539
- """
540
- Loads a custom classifier model based on the file extension of the provided model path.
541
- If the model file ends with ".tflite", it loads a TensorFlow Lite model and sets up the interpreter,
542
- input layer index, output layer index, and input size.
543
- If the model file does not end with ".tflite", it loads a TensorFlow SavedModel.
544
- """
545
- global C_INTERPRETER
546
- global C_INPUT_LAYER_INDEX
547
- global C_OUTPUT_LAYER_INDEX
548
- global C_INPUT_SIZE
549
- global C_PBMODEL
550
-
551
- if cfg.CUSTOM_CLASSIFIER.endswith(".tflite"):
552
- # Load TFLite model and allocate tensors.
553
- C_INTERPRETER = tflite.Interpreter(model_path=cfg.CUSTOM_CLASSIFIER, num_threads=cfg.TFLITE_THREADS)
554
- C_INTERPRETER.allocate_tensors()
555
-
556
- # Get input and output tensors.
557
- input_details = C_INTERPRETER.get_input_details()
558
- output_details = C_INTERPRETER.get_output_details()
559
-
560
- # Get input tensor index
561
- C_INPUT_LAYER_INDEX = input_details[0]["index"]
562
-
563
- C_INPUT_SIZE = input_details[0]["shape"][-1]
564
-
565
- # Get classification output
566
- C_OUTPUT_LAYER_INDEX = output_details[0]["index"]
567
- else:
568
- import tensorflow as tf
569
-
570
- tf.get_logger().setLevel("ERROR")
571
-
572
- C_PBMODEL = tf.saved_model.load(cfg.CUSTOM_CLASSIFIER)
573
-
574
-
575
- def load_meta_model():
576
- """Loads the model for species prediction.
577
-
578
- Initializes the model used to predict species list, based on coordinates and week of year.
579
- """
580
- global M_INTERPRETER
581
- global M_INPUT_LAYER_INDEX
582
- global M_OUTPUT_LAYER_INDEX
583
-
584
- # Load TFLite model and allocate tensors.
585
- M_INTERPRETER = tflite.Interpreter(
586
- model_path=os.path.join(SCRIPT_DIR, cfg.MDATA_MODEL_PATH), num_threads=cfg.TFLITE_THREADS
587
- )
588
- M_INTERPRETER.allocate_tensors()
589
-
590
- # Get input and output tensors.
591
- input_details = M_INTERPRETER.get_input_details()
592
- output_details = M_INTERPRETER.get_output_details()
593
-
594
- # Get input tensor index
595
- M_INPUT_LAYER_INDEX = input_details[0]["index"]
596
- M_OUTPUT_LAYER_INDEX = output_details[0]["index"]
597
-
598
-
599
- def build_linear_classifier(num_labels, input_size, hidden_units=0, dropout=0.0):
600
- """Builds a classifier.
601
-
602
- Args:
603
- num_labels: Output size.
604
- input_size: Size of the input.
605
- hidden_units: If > 0, creates another hidden layer with the given number of units.
606
- dropout: Dropout rate.
607
-
608
- Returns:
609
- A new classifier.
610
- """
611
- # import keras
612
- from tensorflow import keras
613
-
614
- # Build a simple one- or two-layer linear classifier
615
- model = keras.Sequential()
616
-
617
- # Input layer
618
- model.add(keras.layers.InputLayer(input_shape=(input_size,)))
619
-
620
- # Batch normalization on input to standardize embeddings
621
- model.add(keras.layers.BatchNormalization())
622
-
623
- # Optional L2 regularization for all dense layers
624
- regularizer = keras.regularizers.l2(1e-5)
625
-
626
- # Hidden layer with improved architecture
627
- if hidden_units > 0:
628
- # Dropout layer before hidden layer
629
- if dropout > 0:
630
- model.add(keras.layers.Dropout(dropout))
631
-
632
- # Add a hidden layer with L2 regularization
633
- model.add(
634
- keras.layers.Dense(
635
- hidden_units, activation="relu", kernel_regularizer=regularizer, kernel_initializer="he_normal"
636
- )
637
- )
638
-
639
- # Add another batch normalization after the hidden layer
640
- model.add(keras.layers.BatchNormalization())
641
-
642
- # Dropout layer before output
643
- if dropout > 0:
644
- model.add(keras.layers.Dropout(dropout))
645
-
646
- # Classification layer with L2 regularization
647
- model.add(keras.layers.Dense(num_labels, kernel_regularizer=regularizer, kernel_initializer="glorot_uniform"))
648
-
649
- # Activation layer
650
- model.add(keras.layers.Activation("sigmoid"))
651
-
652
- return model
653
-
654
-
655
- def train_linear_classifier(
656
- classifier,
657
- x_train,
658
- y_train,
659
- x_test,
660
- y_test,
661
- epochs,
662
- batch_size,
663
- learning_rate,
664
- val_split,
665
- upsampling_ratio,
666
- upsampling_mode,
667
- train_with_mixup,
668
- train_with_label_smoothing,
669
- train_with_focal_loss=False,
670
- focal_loss_gamma=2.0,
671
- focal_loss_alpha=0.25,
672
- on_epoch_end=None,
673
- ):
674
- """Trains a custom classifier.
675
-
676
- Trains a new classifier for BirdNET based on the given data.
677
-
678
- Args:
679
- classifier: The classifier to be trained.
680
- x_train: Samples.
681
- y_train: Labels.
682
- x_test: Validation samples.
683
- y_test: Validation labels.
684
- epochs: Number of epochs to train.
685
- batch_size: Batch size.
686
- learning_rate: The learning rate during training.
687
- val_split: Validation split ratio (is 0 when using test data).
688
- upsampling_ratio: Upsampling ratio.
689
- upsampling_mode: Upsampling mode.
690
- train_with_mixup: If True, applies mixup to the training data.
691
- train_with_label_smoothing: If True, applies label smoothing to the training data.
692
- train_with_focal_loss: If True, uses focal loss instead of binary cross-entropy loss.
693
- focal_loss_gamma: Focal loss gamma parameter.
694
- focal_loss_alpha: Focal loss alpha parameter.
695
- on_epoch_end: Optional callback `function(epoch, logs)`.
696
-
697
- Returns:
698
- (classifier, history)
699
- """
700
- # import keras
701
- from tensorflow import keras
702
-
703
- class FunctionCallback(keras.callbacks.Callback):
704
- def __init__(self, on_epoch_end=None) -> None:
705
- super().__init__()
706
- self.on_epoch_end_fn = on_epoch_end
707
-
708
- def on_epoch_end(self, epoch, logs=None):
709
- if self.on_epoch_end_fn:
710
- self.on_epoch_end_fn(epoch, logs)
711
-
712
- # Set random seed
713
- rng = np.random.default_rng(cfg.RANDOM_SEED)
714
-
715
- # Shuffle data
716
- idx = np.arange(x_train.shape[0])
717
- rng.shuffle(idx)
718
- x_train = x_train[idx]
719
- y_train = y_train[idx]
720
-
721
- # Random val split
722
- if val_split > 0:
723
- if not cfg.MULTI_LABEL:
724
- x_train, y_train, x_val, y_val = random_split(x_train, y_train, val_split)
725
- else:
726
- x_train, y_train, x_val, y_val = random_multilabel_split(x_train, y_train, val_split)
727
- else:
728
- x_val = x_test
729
- y_val = y_test
730
-
731
- print(
732
- f"Training on {x_train.shape[0]} samples, validating on {x_val.shape[0]} samples.",
733
- flush=True,
734
- )
735
-
736
- # Upsample training data
737
- if upsampling_ratio > 0:
738
- x_train, y_train = upsampling(x_train, y_train, upsampling_ratio, upsampling_mode)
739
- print(f"Upsampled training data to {x_train.shape[0]} samples.", flush=True)
740
-
741
- # Apply mixup to training data
742
- if train_with_mixup and not cfg.BINARY_CLASSIFICATION:
743
- x_train, y_train = mixup(x_train, y_train)
744
-
745
- # Apply label smoothing
746
- if train_with_label_smoothing and not cfg.BINARY_CLASSIFICATION:
747
- y_train = label_smoothing(y_train)
748
-
749
- # Early stopping with patience depending on dataset size
750
- patience = min(10, max(5, int(epochs / 10)))
751
- min_delta = 0.001
752
-
753
- callbacks = [
754
- # EarlyStopping with restore_best_weights
755
- keras.callbacks.EarlyStopping(
756
- monitor="val_AUPRC",
757
- mode="max",
758
- patience=patience,
759
- verbose=1,
760
- min_delta=min_delta,
761
- restore_best_weights=True,
762
- ),
763
- # Function callback for progress tracking
764
- FunctionCallback(on_epoch_end=on_epoch_end),
765
- ]
766
-
767
- # Learning rate schedule - use cosine decay with warmup
768
- warmup_epochs = min(5, int(epochs * 0.1))
769
-
770
- def lr_schedule(epoch, lr):
771
- if epoch < warmup_epochs:
772
- # Linear warmup
773
- return learning_rate * (epoch + 1) / warmup_epochs
774
-
775
- # Cosine decay
776
- progress = (epoch - warmup_epochs) / (epochs - warmup_epochs)
777
- return learning_rate * (0.1 + 0.9 * (1 + np.cos(np.pi * progress)) / 2)
778
-
779
- # Add LR scheduler callback
780
- callbacks.append(keras.callbacks.LearningRateScheduler(lr_schedule))
781
-
782
- optimizer_cls = keras.optimizers.legacy.Adam if sys.platform == "darwin" else keras.optimizers.Adam
783
-
784
- def _focal_loss(y_true, y_pred):
785
- return focal_loss(y_true, y_pred, gamma=cfg.FOCAL_LOSS_GAMMA, alpha=cfg.FOCAL_LOSS_ALPHA)
786
-
787
- # Choose the loss function based on config
788
- loss_function = _focal_loss if train_with_focal_loss else custom_loss
789
-
790
- # Compile model with appropriate metrics for classification task
791
- classifier.compile(
792
- optimizer=optimizer_cls(learning_rate=learning_rate),
793
- loss=loss_function,
794
- metrics=[
795
- keras.metrics.AUC(
796
- curve="PR",
797
- multi_label=cfg.MULTI_LABEL,
798
- name="AUPRC",
799
- num_labels=y_train.shape[1] if cfg.MULTI_LABEL else None,
800
- from_logits=True,
801
- ),
802
- keras.metrics.AUC(
803
- curve="ROC",
804
- multi_label=cfg.MULTI_LABEL,
805
- name="AUROC",
806
- num_labels=y_train.shape[1] if cfg.MULTI_LABEL else None,
807
- from_logits=True,
808
- ),
809
- ],
810
- )
811
-
812
- # Train model
813
- history = classifier.fit(
814
- x_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(x_val, y_val), callbacks=callbacks
815
- )
816
-
817
- return classifier, history
818
-
819
-
820
- def save_linear_classifier(classifier, model_path: str, labels: list[str], mode="replace"):
821
- """Saves the classifier as a tflite model, as well as the used labels in a .txt.
822
-
823
- Args:
824
- classifier: The custom classifier.
825
- model_path: Path the model will be saved at.
826
- labels: List of labels used for the classifier.
827
- """
828
- import tensorflow as tf
829
-
830
- global PBMODEL
831
-
832
- tf.get_logger().setLevel("ERROR")
833
-
834
- if PBMODEL is None:
835
- PBMODEL = tf.keras.models.load_model(os.path.join(SCRIPT_DIR, cfg.PB_MODEL), compile=False)
836
-
837
- saved_model = PBMODEL
838
-
839
- # Remove activation layer
840
- classifier.pop()
841
-
842
- if mode == "replace":
843
- combined_model = tf.keras.Sequential([saved_model.embeddings_model, classifier], "basic")
844
- elif mode == "append":
845
- intermediate = classifier(saved_model.model.get_layer("GLOBAL_AVG_POOL").output)
846
-
847
- output = tf.keras.layers.concatenate([saved_model.model.output, intermediate], name="combined_output")
848
-
849
- combined_model = tf.keras.Model(inputs=saved_model.model.input, outputs=output)
850
- else:
851
- raise ValueError("Model save mode must be either 'replace' or 'append'")
852
-
853
- # Append .tflite if necessary
854
- if not model_path.endswith(".tflite"):
855
- model_path += ".tflite"
856
-
857
- # Make folders
858
- os.makedirs(os.path.dirname(model_path), exist_ok=True)
859
-
860
- # Save model as tflite
861
- converter = tf.lite.TFLiteConverter.from_keras_model(combined_model)
862
- tflite_model = converter.convert()
863
-
864
- with open(model_path, "wb") as f:
865
- f.write(tflite_model)
866
-
867
- if mode == "append":
868
- labels = [*utils.read_lines(os.path.join(SCRIPT_DIR, cfg.LABELS_FILE)), *labels]
869
-
870
- # Save labels
871
- with open(model_path.replace(".tflite", "_Labels.txt"), "w", encoding="utf-8") as f:
872
- for label in labels:
873
- f.write(label + "\n")
874
-
875
- save_model_params(model_path.replace(".tflite", "_Params.csv"))
876
-
877
-
878
- def save_raven_model(classifier, model_path: str, labels: list[str], mode="replace"):
879
- """
880
- Save a TensorFlow model with a custom classifier and associated metadata for use with BirdNET.
881
-
882
- Args:
883
- classifier (tf.keras.Model): The custom classifier model to be saved.
884
- model_path (str): The path where the model will be saved.
885
- labels (list[str]): A list of labels associated with the classifier.
886
- mode (str, optional): The mode for saving the model. Can be either "replace" or "append".
887
- Defaults to "replace".
888
-
889
- Raises:
890
- ValueError: If the mode is not "replace" or "append".
891
-
892
- Returns:
893
- None
894
- """
895
- import csv
896
- import json
897
-
898
- import tensorflow as tf
899
-
900
- global PBMODEL
901
-
902
- tf.get_logger().setLevel("ERROR")
903
-
904
- if PBMODEL is None:
905
- PBMODEL = tf.keras.models.load_model(os.path.join(SCRIPT_DIR, cfg.PB_MODEL), compile=False)
906
-
907
- saved_model = PBMODEL
908
-
909
- if mode == "replace":
910
- combined_model = tf.keras.Sequential([saved_model.embeddings_model, classifier], "basic")
911
- elif mode == "append":
912
- # Remove activation layer
913
- classifier.pop()
914
- intermediate = classifier(saved_model.model.get_layer("GLOBAL_AVG_POOL").output)
915
-
916
- output = tf.keras.layers.concatenate([saved_model.model.output, intermediate], name="combined_output")
917
-
918
- combined_model = tf.keras.Model(inputs=saved_model.model.input, outputs=output)
919
- else:
920
- raise ValueError("Model save mode must be either 'replace' or 'append'")
921
-
922
- # Make signatures
923
- class SignatureModule(tf.Module):
924
- def __init__(self, keras_model):
925
- super().__init__()
926
- self.model = keras_model
927
-
928
- @tf.function(input_signature=[tf.TensorSpec(shape=[None, 144000], dtype=tf.float32)])
929
- def basic(self, inputs):
930
- return {"scores": self.model(inputs)}
931
-
932
- smodel = SignatureModule(combined_model)
933
- signatures = {
934
- "basic": smodel.basic,
935
- }
936
-
937
- # Save signature model
938
- os.makedirs(os.path.dirname(model_path), exist_ok=True)
939
- model_path = model_path.removesuffix(".tflite")
940
- tf.saved_model.save(smodel, model_path, signatures=signatures)
941
-
942
- if mode == "append":
943
- labels = [*utils.read_lines(os.path.join(SCRIPT_DIR, cfg.LABELS_FILE)), *labels]
944
-
945
- # Save label file
946
- labelIds = [label[:4].replace(" ", "") + str(i) for i, label in enumerate(labels, 1)]
947
- labels_dir = os.path.join(model_path, "labels")
948
-
949
- os.makedirs(labels_dir, exist_ok=True)
950
-
951
- with open(os.path.join(labels_dir, "label_names.csv"), "w", newline="") as labelsfile:
952
- labelwriter = csv.writer(labelsfile)
953
- labelwriter.writerows(zip(labelIds, labels, strict=True))
954
-
955
- # Save class names file
956
- classes_dir = os.path.join(model_path, "classes")
957
-
958
- os.makedirs(classes_dir, exist_ok=True)
959
-
960
- with open(os.path.join(classes_dir, "classes.csv"), "w", newline="") as classesfile:
961
- classeswriter = csv.writer(classesfile)
962
- for labelId in labelIds:
963
- classeswriter.writerow((labelId, 0.25, cfg.SIG_FMIN, cfg.SIG_FMAX, False))
964
-
965
- # Save model config
966
- model_config = os.path.join(model_path, "model_config.json")
967
-
968
- with open(model_config, "w") as modelconfigfile:
969
- modelconfig = {
970
- "specVersion": 1,
971
- "modelDescription": "Custom classifier trained with BirdNET "
972
- + cfg.MODEL_VERSION
973
- + " embeddings.\n"
974
- + "BirdNET was developed by the K. Lisa Yang Center for Conservation Bioacoustics"
975
- + "at the Cornell Lab of Ornithology in collaboration with Chemnitz University of Technology.\n\n"
976
- + "https://birdnet.cornell.edu",
977
- "modelTypeConfig": {"modelType": "RECOGNITION"},
978
- "signatures": [
979
- {
980
- "signatureName": "basic",
981
- "modelInputs": [
982
- {
983
- "inputName": "inputs",
984
- "sampleRate": 48000.0,
985
- "inputConfig": ["batch", "samples"],
986
- }
987
- ],
988
- "modelOutputs": [{"outputName": "scores", "outputType": "SCORES"}],
989
- }
990
- ],
991
- "globalSemanticKeys": labelIds,
992
- }
993
- json.dump(modelconfig, modelconfigfile, indent=2)
994
-
995
- model_params = os.path.join(model_path, "model_params.csv")
996
-
997
- save_model_params(model_params)
998
-
999
-
1000
- def predict_filter(lat, lon, week):
1001
- """Predicts the probability for each species.
1002
-
1003
- Args:
1004
- lat: The latitude.
1005
- lon: The longitude.
1006
- week: The week of the year [1-48]. Use -1 for yearlong.
1007
-
1008
- Returns:
1009
- A list of probabilities for all species.
1010
- """
1011
- # Does interpreter exist?
1012
- if M_INTERPRETER is None:
1013
- load_meta_model()
1014
-
1015
- # Prepare mdata as sample
1016
- sample = np.expand_dims(np.array([lat, lon, week], dtype="float32"), 0)
1017
-
1018
- # Run inference
1019
- M_INTERPRETER.set_tensor(M_INPUT_LAYER_INDEX, sample)
1020
- M_INTERPRETER.invoke()
1021
-
1022
- return M_INTERPRETER.get_tensor(M_OUTPUT_LAYER_INDEX)[0]
1023
-
1024
-
1025
- def explore(lat: float, lon: float, week: int):
1026
- """Predicts the species list.
1027
-
1028
- Predicts the species list based on the coordinates and week of year.
1029
-
1030
- Args:
1031
- lat: The latitude.
1032
- lon: The longitude.
1033
- week: The week of the year [1-48]. Use -1 for yearlong.
1034
-
1035
- Returns:
1036
- A sorted list of tuples with the score and the species.
1037
- """
1038
- # Make filter prediction
1039
- l_filter = predict_filter(lat, lon, week)
1040
-
1041
- # Apply threshold
1042
- l_filter = np.where(l_filter >= cfg.LOCATION_FILTER_THRESHOLD, l_filter, 0)
1043
-
1044
- # Zip with labels
1045
- l_filter = list(zip(l_filter, cfg.LABELS, strict=True))
1046
-
1047
- # Sort by filter value
1048
- return sorted(l_filter, key=lambda x: x[0], reverse=True)
1049
-
1050
-
1051
- def focal_loss(y_true, y_pred, gamma=2.0, alpha=0.25, epsilon=1e-7):
1052
- """
1053
- Focal loss for better handling of class imbalance.
1054
-
1055
- This loss function gives more weight to hard examples and down-weights easy examples.
1056
- Particularly helpful for imbalanced datasets where some classes have few samples.
1057
-
1058
- Args:
1059
- y_true: Ground truth labels.
1060
- y_pred: Predicted probabilities.
1061
- gamma: Focusing parameter. Higher values mean more focus on hard examples.
1062
- alpha: Balance parameter. Controls weight of positive vs negative examples.
1063
- epsilon: Small constant to prevent log(0).
1064
-
1065
- Returns:
1066
- Focal loss value.
1067
- """
1068
- import tensorflow.keras.backend as K
1069
-
1070
- # Apply sigmoid if not already applied
1071
- y_pred = K.clip(y_pred, epsilon, 1.0 - epsilon)
1072
-
1073
- # Calculate cross entropy
1074
- cross_entropy = -y_true * K.log(y_pred) - (1 - y_true) * K.log(1 - y_pred)
1075
-
1076
- # Calculate focal weight
1077
- p_t = y_true * y_pred + (1 - y_true) * (1 - y_pred)
1078
- focal_weight = K.pow(1 - p_t, gamma)
1079
-
1080
- # Apply alpha balancing
1081
- alpha_factor = y_true * alpha + (1 - y_true) * (1 - alpha)
1082
-
1083
- # Calculate focal loss
1084
- focal_loss = alpha_factor * focal_weight * cross_entropy
1085
-
1086
- # Sum over all classes
1087
- return K.sum(focal_loss, axis=-1)
1088
-
1089
-
1090
- def custom_loss(y_true, y_pred, epsilon=1e-7):
1091
- import tensorflow.keras.backend as K
1092
-
1093
- # Calculate loss for positive labels with epsilon
1094
- positive_loss = -K.sum(y_true * K.log(K.clip(y_pred, epsilon, 1.0 - epsilon)), axis=-1)
1095
-
1096
- # Calculate loss for negative labels with epsilon
1097
- negative_loss = -K.sum((1 - y_true) * K.log(K.clip(1 - y_pred, epsilon, 1.0 - epsilon)), axis=-1)
1098
-
1099
- # Combine both loss terms
1100
- return positive_loss + negative_loss
1101
-
1102
-
1103
- def flat_sigmoid(x, sensitivity=-1, bias=1.0):
1104
- """
1105
- Applies a flat sigmoid function to the input array with a bias shift.
1106
-
1107
- The flat sigmoid function is defined as:
1108
- f(x) = 1 / (1 + exp(sensitivity * clip(x + bias, -20, 20)))
1109
-
1110
- We transform the bias parameter to a range of [-100, 100] with the formula:
1111
- transformed_bias = (bias - 1.0) * 10.0
1112
-
1113
- Thus, higher bias values will shift the sigmoid function to the right on the x-axis, making it more "sensitive".
1114
-
1115
- Note: Not sure why we are clipping, must be for numerical stability somewhere else in the code.
1116
-
1117
- Args:
1118
- x (array-like): Input data.
1119
- sensitivity (float, optional): Sensitivity parameter for the sigmoid function. Default is -1.
1120
- bias (float, optional): Bias parameter to shift the sigmoid function on the x-axis. Must be in the range [0.01, 1.99]. Default is 1.0.
1121
-
1122
- Returns:
1123
- numpy.ndarray: Transformed data after applying the flat sigmoid function.
1124
- """
1125
-
1126
- transformed_bias = (bias - 1.0) * 10.0
1127
-
1128
- return 1 / (1.0 + np.exp(sensitivity * np.clip(x + transformed_bias, -20, 20)))
1129
-
1130
-
1131
- def predict(sample):
1132
- """Uses the main net to predict a sample.
1133
-
1134
- Args:
1135
- sample: Audio sample.
1136
-
1137
- Returns:
1138
- The prediction scores for the sample.
1139
- """
1140
- # Has custom classifier?
1141
- if cfg.CUSTOM_CLASSIFIER is not None:
1142
- return predict_with_custom_classifier(sample)
1143
-
1144
- # Does interpreter or keras model exist?
1145
- if INTERPRETER is None and PBMODEL is None:
1146
- load_model()
1147
-
1148
- if PBMODEL is None:
1149
- # Reshape input tensor
1150
- INTERPRETER.resize_tensor_input(INPUT_LAYER_INDEX, [len(sample), *sample[0].shape])
1151
- INTERPRETER.allocate_tensors()
1152
-
1153
- # Make a prediction (Audio only for now)
1154
- INTERPRETER.set_tensor(INPUT_LAYER_INDEX, np.array(sample, dtype="float32"))
1155
- INTERPRETER.invoke()
1156
- return INTERPRETER.get_tensor(OUTPUT_LAYER_INDEX)
1157
-
1158
- # Make a prediction (Audio only for now)
1159
- return PBMODEL.basic(sample)["scores"]
1160
-
1161
-
1162
- def predict_with_custom_classifier(sample):
1163
- """Uses the custom classifier to make a prediction.
1164
-
1165
- Args:
1166
- sample: Audio sample.
1167
-
1168
- Returns:
1169
- The prediction scores for the sample.
1170
- """
1171
- # Does interpreter exist?
1172
- if C_INTERPRETER is None and C_PBMODEL is None:
1173
- load_custom_classifier()
1174
-
1175
- if C_PBMODEL is None:
1176
- vector = embeddings(sample) if C_INPUT_SIZE != 144000 else sample
1177
-
1178
- # Reshape input tensor
1179
- C_INTERPRETER.resize_tensor_input(C_INPUT_LAYER_INDEX, [len(vector), *vector[0].shape])
1180
- C_INTERPRETER.allocate_tensors()
1181
-
1182
- # Make a prediction
1183
- C_INTERPRETER.set_tensor(C_INPUT_LAYER_INDEX, np.array(vector, dtype="float32"))
1184
- C_INTERPRETER.invoke()
1185
-
1186
- return C_INTERPRETER.get_tensor(C_OUTPUT_LAYER_INDEX)
1187
-
1188
- return C_PBMODEL.basic(sample)["scores"]
1189
-
1190
-
1191
- def embeddings(sample):
1192
- """Extracts the embeddings for a sample.
1193
-
1194
- Args:
1195
- sample: Audio samples.
1196
-
1197
- Returns:
1198
- The embeddings.
1199
- """
1200
- # Does interpreter exist?
1201
- if INTERPRETER is None:
1202
- load_model(False)
1203
-
1204
- # Reshape input tensor
1205
- INTERPRETER.resize_tensor_input(INPUT_LAYER_INDEX, [len(sample), *sample[0].shape])
1206
- INTERPRETER.allocate_tensors()
1207
-
1208
- # Extract feature embeddings
1209
- INTERPRETER.set_tensor(INPUT_LAYER_INDEX, np.array(sample, dtype="float32"))
1210
- INTERPRETER.invoke()
1211
-
1212
- return INTERPRETER.get_tensor(OUTPUT_LAYER_INDEX)
1
+ # ruff: noqa: PLW0603
2
+ """Contains functions to use the BirdNET models."""
3
+
4
+ import os
5
+ import sys
6
+ import warnings
7
+
8
+ import numpy as np
9
+
10
+ import birdnet_analyzer.config as cfg
11
+ from birdnet_analyzer import utils
12
+
13
+ SCRIPT_DIR = os.path.abspath(os.path.dirname(__file__))
14
+
15
+
16
+ os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
17
+ os.environ["CUDA_VISIBLE_DEVICES"] = ""
18
+
19
+ warnings.filterwarnings("ignore")
20
+
21
+ # Import TFLite from runtime or Tensorflow;
22
+ # import Keras if protobuf model;
23
+ # NOTE: we have to use TFLite if we want to use
24
+ # the metadata model or want to extract embeddings
25
+ try:
26
+ import tflite_runtime.interpreter as tflite # type: ignore
27
+ except ModuleNotFoundError:
28
+ from tensorflow import lite as tflite
29
+ if not cfg.MODEL_PATH.endswith(".tflite"):
30
+ from tensorflow import keras
31
+
32
+ INTERPRETER: tflite.Interpreter = None
33
+ C_INTERPRETER: tflite.Interpreter = None
34
+ M_INTERPRETER: tflite.Interpreter = None
35
+ OUTPUT_DETAILS = None
36
+ PBMODEL = None
37
+ C_PBMODEL = None
38
+ EMPTY_CLASS_EXCEPTION_REF = None
39
+
40
+
41
+ def get_empty_class_exception():
42
+ import keras_tuner.errors
43
+
44
+ global EMPTY_CLASS_EXCEPTION_REF
45
+
46
+ if EMPTY_CLASS_EXCEPTION_REF:
47
+ return EMPTY_CLASS_EXCEPTION_REF
48
+
49
+ class EmptyClassException(keras_tuner.errors.FatalError):
50
+ """
51
+ Exception raised when a class is found to be empty.
52
+
53
+ Attributes:
54
+ index (int): The index of the empty class.
55
+ message (str): The error message indicating which class is empty.
56
+ """
57
+
58
+ def __init__(self, *args, index=None):
59
+ super().__init__(*args)
60
+ self.index = index
61
+ self.message = f"Class {index} is empty."
62
+
63
+ EMPTY_CLASS_EXCEPTION_REF = EmptyClassException
64
+ return EMPTY_CLASS_EXCEPTION_REF
65
+
66
+
67
+ def label_smoothing(y: np.ndarray, alpha=0.1):
68
+ """
69
+ Applies label smoothing to the given labels.
70
+ Label smoothing is a technique used to prevent the model from becoming overconfident by adjusting the target labels.
71
+ It subtracts a small value (alpha) from the correct label and distributes it among the other labels.
72
+ Args:
73
+ y (numpy.ndarray): Array of labels to be smoothed. The array should be of shape (num_labels,).
74
+ alpha (float, optional): Smoothing parameter. Default is 0.1.
75
+ Returns:
76
+ numpy.ndarray: The smoothed labels.
77
+ """
78
+ # Subtract alpha from correct label when it is >0
79
+ y[y > 0] -= alpha
80
+
81
+ # Assigned alpha to all other labels
82
+ y[y == 0] = alpha / y.shape[0]
83
+
84
+ return y
85
+
86
+
87
+ def mixup(x, y, augmentation_ratio=0.25, alpha=0.2):
88
+ """Apply mixup to the given data.
89
+
90
+ Mixup is a data augmentation technique that generates new samples by
91
+ mixing two samples and their labels.
92
+
93
+ Args:
94
+ x: Samples.
95
+ y: One-hot labels.
96
+ augmentation_ratio: The ratio of augmented samples.
97
+ alpha: The beta distribution parameter.
98
+
99
+ Returns:
100
+ Augmented data.
101
+ """
102
+ rng = np.random.default_rng(cfg.RANDOM_SEED)
103
+
104
+ # Get indices of all positive samples
105
+ positive_indices = np.unique(np.where(y[:, :] == 1)[0])
106
+
107
+ # Calculate the number of samples to augment based on the ratio
108
+ num_samples_to_augment = int(len(positive_indices) * augmentation_ratio)
109
+
110
+ # Indices of samples, that are already mixed up
111
+ mixed_up_indices = []
112
+
113
+ for _ in range(num_samples_to_augment):
114
+ # Randomly choose one instance from the positive samples
115
+ index = rng.choice(positive_indices)
116
+
117
+ # Choose another one, when the chosen one was already mixed up
118
+ while index in mixed_up_indices:
119
+ index = rng.choice(positive_indices)
120
+
121
+ x1, y1 = x[index], y[index]
122
+
123
+ # Randomly choose a different instance from the dataset
124
+ second_index = rng.choice(positive_indices)
125
+
126
+ # Choose again, when the same or an already mixed up sample was selected
127
+ while second_index == index or second_index in mixed_up_indices:
128
+ second_index = rng.choice(positive_indices)
129
+ x2, y2 = x[second_index], y[second_index]
130
+
131
+ # Generate a random mixing coefficient (lambda)
132
+ lambda_ = rng.beta(alpha, alpha)
133
+
134
+ # Mix the embeddings and labels
135
+ mixed_x = lambda_ * x1 + (1 - lambda_) * x2
136
+ mixed_y = lambda_ * y1 + (1 - lambda_) * y2
137
+
138
+ # Replace one of the original samples and labels with the augmented sample and labels
139
+ x[index] = mixed_x
140
+ y[index] = mixed_y
141
+
142
+ # Mark the sample as already mixed up
143
+ mixed_up_indices.append(index)
144
+
145
+ del mixed_x
146
+ del mixed_y
147
+
148
+ return x, y
149
+
150
+
151
+ def random_split(x, y, val_ratio=0.2):
152
+ """Splits the data into training and validation data.
153
+
154
+ Makes sure that each class is represented in both sets.
155
+
156
+ Args:
157
+ x: Samples.
158
+ y: One-hot labels.
159
+ val_ratio: The ratio of validation data.
160
+
161
+ Returns:
162
+ A tuple of (x_train, y_train, x_val, y_val).
163
+ """
164
+ rng = np.random.default_rng(cfg.RANDOM_SEED)
165
+
166
+ # Get number of classes
167
+ num_classes = y.shape[1]
168
+
169
+ # Initialize training and validation data
170
+ x_train, y_train, x_val, y_val = [], [], [], []
171
+
172
+ # Split data
173
+ for i in range(num_classes):
174
+ # Get indices of positive samples of current class
175
+ positive_indices = np.where(y[:, i] == 1)[0]
176
+
177
+ # Get indices of negative samples of current class
178
+ negative_indices = np.where(y[:, i] == -1)[0]
179
+
180
+ # Get number of samples for each set
181
+ num_samples = len(positive_indices)
182
+ num_samples_train = max(1, int(num_samples * (1 - val_ratio)))
183
+ num_samples_val = max(0, num_samples - num_samples_train)
184
+
185
+ # Randomly choose samples for training and validation
186
+ rng.shuffle(positive_indices)
187
+ train_indices = positive_indices[:num_samples_train]
188
+ val_indices = positive_indices[num_samples_train : num_samples_train + num_samples_val]
189
+
190
+ # Append samples to training and validation data
191
+ x_train.append(x[train_indices])
192
+ y_train.append(y[train_indices])
193
+ x_val.append(x[val_indices])
194
+ y_val.append(y[val_indices])
195
+
196
+ # Append negative samples to training data
197
+ x_train.append(x[negative_indices])
198
+ y_train.append(y[negative_indices])
199
+
200
+ # Add samples for non-event classes to training and validation data
201
+ non_event_indices = np.where(np.sum(y[:, :], axis=1) == 0)[0]
202
+ num_samples = len(non_event_indices)
203
+ num_samples_train = max(1, int(num_samples * (1 - val_ratio)))
204
+ num_samples_val = max(0, num_samples - num_samples_train)
205
+ rng.shuffle(non_event_indices)
206
+ train_indices = non_event_indices[:num_samples_train]
207
+ val_indices = non_event_indices[num_samples_train : num_samples_train + num_samples_val]
208
+ x_train.append(x[train_indices])
209
+ y_train.append(y[train_indices])
210
+ x_val.append(x[val_indices])
211
+ y_val.append(y[val_indices])
212
+
213
+ # Concatenate data
214
+ x_train = np.concatenate(x_train)
215
+ y_train = np.concatenate(y_train)
216
+ x_val = np.concatenate(x_val)
217
+ y_val = np.concatenate(y_val)
218
+
219
+ # Shuffle data
220
+ indices = np.arange(len(x_train))
221
+ rng.shuffle(indices)
222
+ x_train = x_train[indices]
223
+ y_train = y_train[indices]
224
+
225
+ indices = np.arange(len(x_val))
226
+ rng.shuffle(indices)
227
+ x_val = x_val[indices]
228
+ y_val = y_val[indices]
229
+
230
+ return x_train, y_train, x_val, y_val
231
+
232
+
233
+ def random_multilabel_split(x, y, val_ratio=0.2):
234
+ """Splits the data into training and validation data.
235
+
236
+ Makes sure that each combination of classes is represented in both sets.
237
+
238
+ Args:
239
+ x: Samples.
240
+ y: One-hot labels.
241
+ val_ratio: The ratio of validation data.
242
+
243
+ Returns:
244
+ A tuple of (x_train, y_train, x_val, y_val).
245
+
246
+ """
247
+ rng = np.random.default_rng(cfg.RANDOM_SEED)
248
+
249
+ # Find all combinations of labels
250
+ class_combinations = np.unique(y, axis=0)
251
+
252
+ # Initialize training and validation data
253
+ x_train, y_train, x_val, y_val = [], [], [], []
254
+
255
+ # Split the data for each combination of labels
256
+ for class_combination in class_combinations:
257
+ # find all indices
258
+ indices = np.where((y == class_combination).all(axis=1))[0]
259
+
260
+ # When negative sample use only for training
261
+ if -1 in class_combination:
262
+ x_train.append(x[indices])
263
+ y_train.append(y[indices])
264
+ # Otherwise split according to the validation split
265
+ else:
266
+ # Get number of samples for each set
267
+ num_samples = len(indices)
268
+ num_samples_train = max(1, int(num_samples * (1 - val_ratio)))
269
+ num_samples_val = max(0, num_samples - num_samples_train)
270
+ # Randomly choose samples for training and validation
271
+ rng.shuffle(indices)
272
+ train_indices = indices[:num_samples_train]
273
+ val_indices = indices[num_samples_train : num_samples_train + num_samples_val]
274
+ # Append samples to training and validation data
275
+ x_train.append(x[train_indices])
276
+ y_train.append(y[train_indices])
277
+ x_val.append(x[val_indices])
278
+ y_val.append(y[val_indices])
279
+
280
+ # Concatenate data
281
+ x_train = np.concatenate(x_train)
282
+ y_train = np.concatenate(y_train)
283
+ x_val = np.concatenate(x_val)
284
+ y_val = np.concatenate(y_val)
285
+
286
+ # Shuffle data
287
+ indices = np.arange(len(x_train))
288
+ rng.shuffle(indices)
289
+ x_train = x_train[indices]
290
+ y_train = y_train[indices]
291
+
292
+ indices = np.arange(len(x_val))
293
+ rng.shuffle(indices)
294
+ x_val = x_val[indices]
295
+ y_val = y_val[indices]
296
+
297
+ return x_train, y_train, x_val, y_val
298
+
299
+
300
+ def upsample_core(x: np.ndarray, y: np.ndarray, min_samples: int, apply, size=2):
301
+ """
302
+ Upsamples the minority class in the dataset using the specified apply function.
303
+ Parameters:
304
+ x (np.ndarray): The feature matrix.
305
+ y (np.ndarray): The target labels.
306
+ min_samples (int): The minimum number of samples required for the minority class.
307
+ apply (callable): A function that applies the SMOTE or any other algorithm to the data.
308
+ size (int, optional): The number of samples to generate in each iteration. Default is 2.
309
+ Returns:
310
+ tuple: A tuple containing the upsampled feature matrix and target labels.
311
+ """
312
+ rng = np.random.default_rng(cfg.RANDOM_SEED)
313
+ y_temp = []
314
+ x_temp = []
315
+
316
+ if cfg.BINARY_CLASSIFICATION:
317
+ # Determine if 1 or 0 is the minority class
318
+ minority_label = 1 if y.sum(axis=0) < len(y) - y.sum(axis=0) else 0
319
+
320
+ while np.where(y == minority_label)[0].shape[0] + len(y_temp) < min_samples:
321
+ # Randomly choose a sample from the minority class
322
+ random_index = rng.choice(np.where(y == minority_label)[0], size=size)
323
+
324
+ # Apply SMOTE
325
+ x_app, y_app = apply(x, y, random_index)
326
+ y_temp.append(y_app)
327
+ x_temp.append(x_app)
328
+ else:
329
+ for i in range(y.shape[1]):
330
+ while y[:, i].sum() + len(y_temp) < min_samples:
331
+ try:
332
+ # Randomly choose a sample from the minority class
333
+ random_index = rng.choice(np.where(y[:, i] == 1)[0], size=size)
334
+ except ValueError as e:
335
+ raise get_empty_class_exception()(index=i) from e
336
+
337
+ # Apply SMOTE
338
+ x_app, y_app = apply(x, y, random_index)
339
+ y_temp.append(y_app)
340
+ x_temp.append(x_app)
341
+
342
+ return x_temp, y_temp
343
+
344
+
345
+ def upsampling(x: np.ndarray, y: np.ndarray, ratio=0.5, mode="repeat"):
346
+ """Balance data through upsampling.
347
+
348
+ We upsample minority classes to have at least 10% (ratio=0.1) of the samples of the majority class.
349
+
350
+ Args:
351
+ x: Samples.
352
+ y: One-hot labels.
353
+ ratio: The minimum ratio of minority to majority samples.
354
+ mode: The upsampling mode. Either 'repeat', 'mean', 'linear' or 'smote'.
355
+
356
+ Returns:
357
+ Upsampled data.
358
+ """
359
+
360
+ # Set numpy random seed
361
+ rng = np.random.default_rng(cfg.RANDOM_SEED)
362
+
363
+ # Determine min number of samples
364
+ min_samples = (
365
+ int(max(y.sum(axis=0), len(y) - y.sum(axis=0)) * ratio)
366
+ if cfg.BINARY_CLASSIFICATION
367
+ else int(np.max(y.sum(axis=0)) * ratio)
368
+ )
369
+
370
+ x_temp = []
371
+ y_temp = []
372
+
373
+ if mode == "repeat":
374
+
375
+ def applyRepeat(x, y, random_index):
376
+ return x[random_index[0]], y[random_index[0]]
377
+
378
+ x_temp, y_temp = upsample_core(x, y, min_samples, applyRepeat, size=1)
379
+
380
+ elif mode == "mean":
381
+ # For each class with less than min_samples
382
+ # select two random samples and calculate the mean
383
+ def applyMean(x, y, random_indices):
384
+ # Calculate the mean of the two samples
385
+ mean = np.mean(x[random_indices], axis=0)
386
+
387
+ # Append the mean and label to a temp list
388
+ return mean, y[random_indices[0]]
389
+
390
+ x_temp, y_temp = upsample_core(x, y, min_samples, applyMean)
391
+
392
+ elif mode == "linear":
393
+ # For each class with less than min_samples
394
+ # select two random samples and calculate the linear combination
395
+ def applyLinearCombination(x, y, random_indices):
396
+ # Calculate the linear combination of the two samples
397
+ alpha = rng.uniform(0, 1)
398
+ new_sample = alpha * x[random_indices[0]] + (1 - alpha) * x[random_indices[1]]
399
+
400
+ # Append the new sample and label to a temp list
401
+ return new_sample, y[random_indices[0]]
402
+
403
+ x_temp, y_temp = upsample_core(x, y, min_samples, applyLinearCombination)
404
+
405
+ elif mode == "smote":
406
+ # For each class with less than min_samples apply SMOTE
407
+ def applySmote(x, y, random_index, k=5):
408
+ # Get the k nearest neighbors
409
+ distances = np.sqrt(np.sum((x - x[random_index[0]]) ** 2, axis=1))
410
+ indices = np.argsort(distances)[1 : k + 1]
411
+
412
+ # Randomly choose one of the neighbors
413
+ random_neighbor = rng.choice(indices)
414
+
415
+ # Calculate the difference vector
416
+ diff = x[random_neighbor] - x[random_index[0]]
417
+
418
+ # Randomly choose a weight between 0 and 1
419
+ weight = rng.uniform(0, 1)
420
+
421
+ # Calculate the new sample
422
+ new_sample = x[random_index[0]] + weight * diff
423
+
424
+ # Append the new sample and label to a temp list
425
+ return new_sample, y[random_index[0]]
426
+
427
+ x_temp, y_temp = upsample_core(x, y, min_samples, applySmote, size=1)
428
+
429
+ # Append the temp list to the original data
430
+ if len(x_temp) > 0:
431
+ x = np.vstack((x, np.array(x_temp)))
432
+ y = np.vstack((y, np.array(y_temp)))
433
+
434
+ # Shuffle data
435
+ indices = np.arange(len(x))
436
+ rng.shuffle(indices)
437
+ x = x[indices]
438
+ y = y[indices]
439
+
440
+ del x_temp
441
+ del y_temp
442
+
443
+ return x, y
444
+
445
+
446
+ def save_model_params(path):
447
+ """Saves the model parameters to a file.
448
+
449
+ Args:
450
+ path: Path to the file.
451
+ """
452
+ utils.save_params(
453
+ path,
454
+ (
455
+ "Hidden units",
456
+ "Dropout",
457
+ "Batchsize",
458
+ "Learning rate",
459
+ "Crop mode",
460
+ "Crop overlap",
461
+ "Audio speed",
462
+ "Upsamling mode",
463
+ "Upsamling ratio",
464
+ "use mixup",
465
+ "use label smoothing",
466
+ "BirdNET Model version",
467
+ ),
468
+ (
469
+ cfg.TRAIN_HIDDEN_UNITS,
470
+ cfg.TRAIN_DROPOUT,
471
+ cfg.TRAIN_BATCH_SIZE,
472
+ cfg.TRAIN_LEARNING_RATE,
473
+ cfg.SAMPLE_CROP_MODE,
474
+ cfg.SIG_OVERLAP,
475
+ cfg.AUDIO_SPEED,
476
+ cfg.UPSAMPLING_MODE,
477
+ cfg.UPSAMPLING_RATIO,
478
+ cfg.TRAIN_WITH_MIXUP,
479
+ cfg.TRAIN_WITH_LABEL_SMOOTHING,
480
+ cfg.MODEL_VERSION,
481
+ ),
482
+ )
483
+
484
+
485
+ def reset_custom_classifier():
486
+ """
487
+ Resets the custom classifier by setting the global variables C_INTERPRETER and C_PBMODEL to None.
488
+ This function is used to clear any existing custom classifier models and interpreters, effectively
489
+ resetting the state of the custom classifier.
490
+ """
491
+ global C_INTERPRETER
492
+ global C_PBMODEL
493
+
494
+ C_INTERPRETER = None
495
+ C_PBMODEL = None
496
+
497
+
498
+ def load_model(class_output=True):
499
+ """
500
+ Loads the machine learning model based on the configuration provided.
501
+ This function loads either a TensorFlow Lite (TFLite) model or a protobuf model
502
+ depending on the file extension of the model path specified in the configuration.
503
+ It sets up the global variables for the model interpreter and input/output layer indices.
504
+
505
+ Args:
506
+ class_output (bool): If True, sets the output layer index to the classification output.
507
+ If False, sets the output layer index to the feature embeddings.
508
+ """
509
+ global PBMODEL
510
+ global INTERPRETER
511
+ global INPUT_LAYER_INDEX
512
+ global OUTPUT_LAYER_INDEX
513
+ global OUTPUT_DETAILS
514
+
515
+ # Do we have to load the tflite or protobuf model?
516
+ if cfg.MODEL_PATH.endswith(".tflite"):
517
+ if not INTERPRETER:
518
+ # Load TFLite model and allocate tensors.
519
+ INTERPRETER = tflite.Interpreter(
520
+ model_path=os.path.join(SCRIPT_DIR, cfg.MODEL_PATH), num_threads=cfg.TFLITE_THREADS
521
+ )
522
+ INTERPRETER.allocate_tensors()
523
+
524
+ # Get input and output tensors.
525
+ input_details = INTERPRETER.get_input_details()
526
+ OUTPUT_DETAILS = INTERPRETER.get_output_details()
527
+
528
+ # Get input tensor index
529
+ INPUT_LAYER_INDEX = input_details[0]["index"]
530
+
531
+ # Get classification output or feature embeddings
532
+ OUTPUT_LAYER_INDEX = OUTPUT_DETAILS[0]["index"] if class_output else OUTPUT_DETAILS[0]["index"] - 1
533
+
534
+ elif not PBMODEL:
535
+ # Load protobuf model
536
+ # Note: This will throw a bunch of warnings about custom gradients
537
+ # which we will ignore until TF lets us block them
538
+ PBMODEL = keras.models.load_model(os.path.join(SCRIPT_DIR, cfg.MODEL_PATH), compile=False)
539
+
540
+
541
+ def load_custom_classifier():
542
+ """
543
+ Loads a custom classifier model based on the file extension of the provided model path.
544
+ If the model file ends with ".tflite", it loads a TensorFlow Lite model and sets up the interpreter,
545
+ input layer index, output layer index, and input size.
546
+ If the model file does not end with ".tflite", it loads a TensorFlow SavedModel.
547
+ """
548
+ global C_INTERPRETER
549
+ global C_INPUT_LAYER_INDEX
550
+ global C_OUTPUT_LAYER_INDEX
551
+ global C_INPUT_SIZE
552
+ global C_PBMODEL
553
+
554
+ if cfg.CUSTOM_CLASSIFIER.endswith(".tflite"):
555
+ # Load TFLite model and allocate tensors.
556
+ C_INTERPRETER = tflite.Interpreter(model_path=cfg.CUSTOM_CLASSIFIER, num_threads=cfg.TFLITE_THREADS)
557
+ C_INTERPRETER.allocate_tensors()
558
+
559
+ # Get input and output tensors.
560
+ input_details = C_INTERPRETER.get_input_details()
561
+ output_details = C_INTERPRETER.get_output_details()
562
+
563
+ # Get input tensor index
564
+ C_INPUT_LAYER_INDEX = input_details[0]["index"]
565
+
566
+ C_INPUT_SIZE = input_details[0]["shape"][-1]
567
+
568
+ # Get classification output
569
+ C_OUTPUT_LAYER_INDEX = output_details[0]["index"]
570
+ else:
571
+ import tensorflow as tf
572
+
573
+ tf.get_logger().setLevel("ERROR")
574
+
575
+ C_PBMODEL = tf.saved_model.load(cfg.CUSTOM_CLASSIFIER)
576
+
577
+
578
+ def load_meta_model():
579
+ """Loads the model for species prediction.
580
+
581
+ Initializes the model used to predict species list, based on coordinates and week of year.
582
+ """
583
+ global M_INTERPRETER
584
+ global M_INPUT_LAYER_INDEX
585
+ global M_OUTPUT_LAYER_INDEX
586
+
587
+ # Load TFLite model and allocate tensors.
588
+ M_INTERPRETER = tflite.Interpreter(
589
+ model_path=os.path.join(SCRIPT_DIR, cfg.MDATA_MODEL_PATH), num_threads=cfg.TFLITE_THREADS
590
+ )
591
+ M_INTERPRETER.allocate_tensors()
592
+
593
+ # Get input and output tensors.
594
+ input_details = M_INTERPRETER.get_input_details()
595
+ output_details = M_INTERPRETER.get_output_details()
596
+
597
+ # Get input tensor index
598
+ M_INPUT_LAYER_INDEX = input_details[0]["index"]
599
+ M_OUTPUT_LAYER_INDEX = output_details[0]["index"]
600
+
601
+
602
+ def build_linear_classifier(num_labels, input_size, hidden_units=0, dropout=0.0):
603
+ """Builds a classifier.
604
+
605
+ Args:
606
+ num_labels: Output size.
607
+ input_size: Size of the input.
608
+ hidden_units: If > 0, creates another hidden layer with the given number of units.
609
+ dropout: Dropout rate.
610
+
611
+ Returns:
612
+ A new classifier.
613
+ """
614
+ # import keras
615
+ from tensorflow import keras
616
+
617
+ # Build a simple one- or two-layer linear classifier
618
+ model = keras.Sequential()
619
+
620
+ # Input layer
621
+ model.add(keras.layers.InputLayer(input_shape=(input_size,)))
622
+
623
+ # Batch normalization on input to standardize embeddings
624
+ model.add(keras.layers.BatchNormalization())
625
+
626
+ # Optional L2 regularization for all dense layers
627
+ regularizer = keras.regularizers.l2(1e-5)
628
+
629
+ # Hidden layer with improved architecture
630
+ if hidden_units > 0:
631
+ # Dropout layer before hidden layer
632
+ if dropout > 0:
633
+ model.add(keras.layers.Dropout(dropout))
634
+
635
+ # Add a hidden layer with L2 regularization
636
+ model.add(
637
+ keras.layers.Dense(
638
+ hidden_units, activation="relu", kernel_regularizer=regularizer, kernel_initializer="he_normal"
639
+ )
640
+ )
641
+
642
+ # Add another batch normalization after the hidden layer
643
+ model.add(keras.layers.BatchNormalization())
644
+
645
+ # Dropout layer before output
646
+ if dropout > 0:
647
+ model.add(keras.layers.Dropout(dropout))
648
+
649
+ # Classification layer with L2 regularization
650
+ model.add(keras.layers.Dense(num_labels, kernel_regularizer=regularizer, kernel_initializer="glorot_uniform"))
651
+
652
+ # Activation layer
653
+ model.add(keras.layers.Activation("sigmoid"))
654
+
655
+ return model
656
+
657
+
658
+ def train_linear_classifier(
659
+ classifier,
660
+ x_train,
661
+ y_train,
662
+ x_test,
663
+ y_test,
664
+ epochs,
665
+ batch_size,
666
+ learning_rate,
667
+ val_split,
668
+ upsampling_ratio,
669
+ upsampling_mode,
670
+ train_with_mixup,
671
+ train_with_label_smoothing,
672
+ train_with_focal_loss=False,
673
+ focal_loss_gamma=2.0,
674
+ focal_loss_alpha=0.25,
675
+ on_epoch_end=None,
676
+ ):
677
+ """Trains a custom classifier.
678
+
679
+ Trains a new classifier for BirdNET based on the given data.
680
+
681
+ Args:
682
+ classifier: The classifier to be trained.
683
+ x_train: Samples.
684
+ y_train: Labels.
685
+ x_test: Validation samples.
686
+ y_test: Validation labels.
687
+ epochs: Number of epochs to train.
688
+ batch_size: Batch size.
689
+ learning_rate: The learning rate during training.
690
+ val_split: Validation split ratio (is 0 when using test data).
691
+ upsampling_ratio: Upsampling ratio.
692
+ upsampling_mode: Upsampling mode.
693
+ train_with_mixup: If True, applies mixup to the training data.
694
+ train_with_label_smoothing: If True, applies label smoothing to the training data.
695
+ train_with_focal_loss: If True, uses focal loss instead of binary cross-entropy loss.
696
+ focal_loss_gamma: Focal loss gamma parameter.
697
+ focal_loss_alpha: Focal loss alpha parameter.
698
+ on_epoch_end: Optional callback `function(epoch, logs)`.
699
+
700
+ Returns:
701
+ (classifier, history)
702
+ """
703
+ # import keras
704
+ from tensorflow import keras
705
+
706
+ class FunctionCallback(keras.callbacks.Callback):
707
+ def __init__(self, on_epoch_end=None) -> None:
708
+ super().__init__()
709
+ self.on_epoch_end_fn = on_epoch_end
710
+
711
+ def on_epoch_end(self, epoch, logs=None):
712
+ if self.on_epoch_end_fn:
713
+ self.on_epoch_end_fn(epoch, logs)
714
+
715
+ # Set random seed
716
+ rng = np.random.default_rng(cfg.RANDOM_SEED)
717
+
718
+ # Shuffle data
719
+ idx = np.arange(x_train.shape[0])
720
+ rng.shuffle(idx)
721
+ x_train = x_train[idx]
722
+ y_train = y_train[idx]
723
+
724
+ # Random val split
725
+ if val_split > 0:
726
+ if not cfg.MULTI_LABEL:
727
+ x_train, y_train, x_val, y_val = random_split(x_train, y_train, val_split)
728
+ else:
729
+ x_train, y_train, x_val, y_val = random_multilabel_split(x_train, y_train, val_split)
730
+ else:
731
+ x_val = x_test
732
+ y_val = y_test
733
+
734
+ print(
735
+ f"Training on {x_train.shape[0]} samples, validating on {x_val.shape[0]} samples.",
736
+ flush=True,
737
+ )
738
+
739
+ # Upsample training data
740
+ if upsampling_ratio > 0:
741
+ x_train, y_train = upsampling(x_train, y_train, upsampling_ratio, upsampling_mode)
742
+ print(f"Upsampled training data to {x_train.shape[0]} samples.", flush=True)
743
+
744
+ # Apply mixup to training data
745
+ if train_with_mixup and not cfg.BINARY_CLASSIFICATION:
746
+ x_train, y_train = mixup(x_train, y_train)
747
+
748
+ # Apply label smoothing
749
+ if train_with_label_smoothing and not cfg.BINARY_CLASSIFICATION:
750
+ y_train = label_smoothing(y_train)
751
+
752
+ # Early stopping with patience depending on dataset size
753
+ patience = min(10, max(5, int(epochs / 10)))
754
+ min_delta = 0.001
755
+
756
+ callbacks = [
757
+ # EarlyStopping with restore_best_weights
758
+ keras.callbacks.EarlyStopping(
759
+ monitor="val_AUPRC",
760
+ mode="max",
761
+ patience=patience,
762
+ verbose=1,
763
+ min_delta=min_delta,
764
+ restore_best_weights=True,
765
+ ),
766
+ # Function callback for progress tracking
767
+ FunctionCallback(on_epoch_end=on_epoch_end),
768
+ ]
769
+
770
+ # Learning rate schedule - use cosine decay with warmup
771
+ warmup_epochs = min(5, int(epochs * 0.1))
772
+
773
+ def lr_schedule(epoch, lr):
774
+ if epoch < warmup_epochs:
775
+ # Linear warmup
776
+ return learning_rate * (epoch + 1) / warmup_epochs
777
+
778
+ # Cosine decay
779
+ progress = (epoch - warmup_epochs) / (epochs - warmup_epochs)
780
+ return learning_rate * (0.1 + 0.9 * (1 + np.cos(np.pi * progress)) / 2)
781
+
782
+ # Add LR scheduler callback
783
+ callbacks.append(keras.callbacks.LearningRateScheduler(lr_schedule))
784
+
785
+ optimizer_cls = keras.optimizers.legacy.Adam if sys.platform == "darwin" else keras.optimizers.Adam
786
+
787
+ def _focal_loss(y_true, y_pred):
788
+ return focal_loss(y_true, y_pred, gamma=cfg.FOCAL_LOSS_GAMMA, alpha=cfg.FOCAL_LOSS_ALPHA)
789
+
790
+ # Choose the loss function based on config
791
+ loss_function = _focal_loss if train_with_focal_loss else custom_loss
792
+
793
+ # Compile model with appropriate metrics for classification task
794
+ classifier.compile(
795
+ optimizer=optimizer_cls(learning_rate=learning_rate),
796
+ loss=loss_function,
797
+ metrics=[
798
+ keras.metrics.AUC(
799
+ curve="PR",
800
+ multi_label=cfg.MULTI_LABEL,
801
+ name="AUPRC",
802
+ num_labels=y_train.shape[1] if cfg.MULTI_LABEL else None,
803
+ from_logits=True,
804
+ ),
805
+ keras.metrics.AUC(
806
+ curve="ROC",
807
+ multi_label=cfg.MULTI_LABEL,
808
+ name="AUROC",
809
+ num_labels=y_train.shape[1] if cfg.MULTI_LABEL else None,
810
+ from_logits=True,
811
+ ),
812
+ ],
813
+ )
814
+
815
+ # Train model
816
+ history = classifier.fit(
817
+ x_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(x_val, y_val), callbacks=callbacks
818
+ )
819
+
820
+ return classifier, history
821
+
822
+
823
+ def save_linear_classifier(classifier, model_path: str, labels: list[str], mode="replace"):
824
+ """Saves the classifier as a tflite model, as well as the used labels in a .txt.
825
+
826
+ Args:
827
+ classifier: The custom classifier.
828
+ model_path: Path the model will be saved at.
829
+ labels: List of labels used for the classifier.
830
+ """
831
+ import tensorflow as tf
832
+
833
+ global PBMODEL
834
+
835
+ tf.get_logger().setLevel("ERROR")
836
+
837
+ if PBMODEL is None:
838
+ PBMODEL = tf.keras.models.load_model(os.path.join(SCRIPT_DIR, cfg.PB_MODEL), compile=False)
839
+
840
+ saved_model = PBMODEL
841
+
842
+ # Remove activation layer
843
+ classifier.pop()
844
+
845
+ if mode == "replace":
846
+ combined_model = tf.keras.Sequential([saved_model.embeddings_model, classifier], "basic")
847
+ elif mode == "append":
848
+ intermediate = classifier(saved_model.model.get_layer("GLOBAL_AVG_POOL").output)
849
+
850
+ output = tf.keras.layers.concatenate([saved_model.model.output, intermediate], name="combined_output")
851
+
852
+ combined_model = tf.keras.Model(inputs=saved_model.model.input, outputs=output)
853
+ else:
854
+ raise ValueError("Model save mode must be either 'replace' or 'append'")
855
+
856
+ # Append .tflite if necessary
857
+ if not model_path.endswith(".tflite"):
858
+ model_path += ".tflite"
859
+
860
+ # Make folders
861
+ os.makedirs(os.path.dirname(model_path), exist_ok=True)
862
+
863
+ # Save model as tflite
864
+ converter = tf.lite.TFLiteConverter.from_keras_model(combined_model)
865
+ tflite_model = converter.convert()
866
+
867
+ with open(model_path, "wb") as f:
868
+ f.write(tflite_model)
869
+
870
+ if mode == "append":
871
+ labels = [*utils.read_lines(os.path.join(SCRIPT_DIR, cfg.LABELS_FILE)), *labels]
872
+
873
+ # Save labels
874
+ with open(model_path.replace(".tflite", "_Labels.txt"), "w", encoding="utf-8") as f:
875
+ f.writelines(label + "\n" for label in labels)
876
+
877
+ save_model_params(model_path.replace(".tflite", "_Params.csv"))
878
+
879
+
880
+ def save_raven_model(classifier, model_path: str, labels: list[str], mode="replace"):
881
+ """
882
+ Save a TensorFlow model with a custom classifier and associated metadata for use with BirdNET.
883
+
884
+ Args:
885
+ classifier (tf.keras.Model): The custom classifier model to be saved.
886
+ model_path (str): The path where the model will be saved.
887
+ labels (list[str]): A list of labels associated with the classifier.
888
+ mode (str, optional): The mode for saving the model. Can be either "replace" or "append".
889
+ Defaults to "replace".
890
+
891
+ Raises:
892
+ ValueError: If the mode is not "replace" or "append".
893
+
894
+ Returns:
895
+ None
896
+ """
897
+ import csv
898
+ import json
899
+
900
+ import tensorflow as tf
901
+
902
+ global PBMODEL
903
+
904
+ tf.get_logger().setLevel("ERROR")
905
+
906
+ if PBMODEL is None:
907
+ PBMODEL = tf.keras.models.load_model(os.path.join(SCRIPT_DIR, cfg.PB_MODEL), compile=False)
908
+
909
+ saved_model = PBMODEL
910
+
911
+ if mode == "replace":
912
+ combined_model = tf.keras.Sequential([saved_model.embeddings_model, classifier], "basic")
913
+ elif mode == "append":
914
+ # Remove activation layer
915
+ classifier.pop()
916
+ intermediate = classifier(saved_model.model.get_layer("GLOBAL_AVG_POOL").output)
917
+
918
+ output = tf.keras.layers.concatenate([saved_model.model.output, intermediate], name="combined_output")
919
+
920
+ combined_model = tf.keras.Model(inputs=saved_model.model.input, outputs=output)
921
+ else:
922
+ raise ValueError("Model save mode must be either 'replace' or 'append'")
923
+
924
+ # Make signatures
925
+ class SignatureModule(tf.Module):
926
+ def __init__(self, keras_model):
927
+ super().__init__()
928
+ self.model = keras_model
929
+
930
+ @tf.function(input_signature=[tf.TensorSpec(shape=[None, 144000], dtype=tf.float32)])
931
+ def basic(self, inputs):
932
+ return {"scores": self.model(inputs)}
933
+
934
+ smodel = SignatureModule(combined_model)
935
+ signatures = {
936
+ "basic": smodel.basic,
937
+ }
938
+
939
+ # Save signature model
940
+ os.makedirs(os.path.dirname(model_path), exist_ok=True)
941
+ model_path = model_path.removesuffix(".tflite")
942
+ tf.saved_model.save(smodel, model_path, signatures=signatures)
943
+
944
+ if mode == "append":
945
+ labels = [*utils.read_lines(os.path.join(SCRIPT_DIR, cfg.LABELS_FILE)), *labels]
946
+
947
+ # Save label file
948
+ labelIds = [label[:4].replace(" ", "") + str(i) for i, label in enumerate(labels, 1)]
949
+ labels_dir = os.path.join(model_path, "labels")
950
+
951
+ os.makedirs(labels_dir, exist_ok=True)
952
+
953
+ with open(os.path.join(labels_dir, "label_names.csv"), "w", newline="") as labelsfile:
954
+ labelwriter = csv.writer(labelsfile)
955
+ labelwriter.writerows(zip(labelIds, labels, strict=True))
956
+
957
+ # Save class names file
958
+ classes_dir = os.path.join(model_path, "classes")
959
+
960
+ os.makedirs(classes_dir, exist_ok=True)
961
+
962
+ with open(os.path.join(classes_dir, "classes.csv"), "w", newline="") as classesfile:
963
+ classeswriter = csv.writer(classesfile)
964
+ for labelId in labelIds:
965
+ classeswriter.writerow((labelId, 0.25, cfg.SIG_FMIN, cfg.SIG_FMAX, False))
966
+
967
+ # Save model config
968
+ model_config = os.path.join(model_path, "model_config.json")
969
+
970
+ with open(model_config, "w") as modelconfigfile:
971
+ modelconfig = {
972
+ "specVersion": 1,
973
+ "modelDescription": "Custom classifier trained with BirdNET "
974
+ + cfg.MODEL_VERSION
975
+ + " embeddings.\n"
976
+ + "BirdNET was developed by the K. Lisa Yang Center for Conservation Bioacoustics"
977
+ + "at the Cornell Lab of Ornithology in collaboration with Chemnitz University of Technology.\n\n"
978
+ + "https://birdnet.cornell.edu",
979
+ "modelTypeConfig": {"modelType": "RECOGNITION"},
980
+ "signatures": [
981
+ {
982
+ "signatureName": "basic",
983
+ "modelInputs": [
984
+ {
985
+ "inputName": "inputs",
986
+ "sampleRate": 48000.0,
987
+ "inputConfig": ["batch", "samples"],
988
+ }
989
+ ],
990
+ "modelOutputs": [{"outputName": "scores", "outputType": "SCORES"}],
991
+ }
992
+ ],
993
+ "globalSemanticKeys": labelIds,
994
+ }
995
+ json.dump(modelconfig, modelconfigfile, indent=2)
996
+
997
+ model_params = os.path.join(model_path, "model_params.csv")
998
+
999
+ save_model_params(model_params)
1000
+
1001
+
1002
+ def predict_filter(lat, lon, week):
1003
+ """Predicts the probability for each species.
1004
+
1005
+ Args:
1006
+ lat: The latitude.
1007
+ lon: The longitude.
1008
+ week: The week of the year [1-48]. Use -1 for yearlong.
1009
+
1010
+ Returns:
1011
+ A list of probabilities for all species.
1012
+ """
1013
+ # Does interpreter exist?
1014
+ if M_INTERPRETER is None:
1015
+ load_meta_model()
1016
+
1017
+ # Prepare mdata as sample
1018
+ sample = np.expand_dims(np.array([lat, lon, week], dtype="float32"), 0)
1019
+
1020
+ # Run inference
1021
+ M_INTERPRETER.set_tensor(M_INPUT_LAYER_INDEX, sample)
1022
+ M_INTERPRETER.invoke()
1023
+
1024
+ return M_INTERPRETER.get_tensor(M_OUTPUT_LAYER_INDEX)[0]
1025
+
1026
+
1027
+ def explore(lat: float, lon: float, week: int):
1028
+ """Predicts the species list.
1029
+
1030
+ Predicts the species list based on the coordinates and week of year.
1031
+
1032
+ Args:
1033
+ lat: The latitude.
1034
+ lon: The longitude.
1035
+ week: The week of the year [1-48]. Use -1 for yearlong.
1036
+
1037
+ Returns:
1038
+ A sorted list of tuples with the score and the species.
1039
+ """
1040
+ # Make filter prediction
1041
+ l_filter = predict_filter(lat, lon, week)
1042
+
1043
+ # Apply threshold
1044
+ l_filter = np.where(l_filter >= cfg.LOCATION_FILTER_THRESHOLD, l_filter, 0)
1045
+
1046
+ # Zip with labels
1047
+ l_filter = list(zip(l_filter, cfg.LABELS, strict=True))
1048
+
1049
+ # Sort by filter value
1050
+ return sorted(l_filter, key=lambda x: x[0], reverse=True)
1051
+
1052
+
1053
+ def focal_loss(y_true, y_pred, gamma=2.0, alpha=0.25, epsilon=1e-7):
1054
+ """
1055
+ Focal loss for better handling of class imbalance.
1056
+
1057
+ This loss function gives more weight to hard examples and down-weights easy examples.
1058
+ Particularly helpful for imbalanced datasets where some classes have few samples.
1059
+
1060
+ Args:
1061
+ y_true: Ground truth labels.
1062
+ y_pred: Predicted probabilities.
1063
+ gamma: Focusing parameter. Higher values mean more focus on hard examples.
1064
+ alpha: Balance parameter. Controls weight of positive vs negative examples.
1065
+ epsilon: Small constant to prevent log(0).
1066
+
1067
+ Returns:
1068
+ Focal loss value.
1069
+ """
1070
+ import tensorflow.keras.backend as K
1071
+
1072
+ # Apply sigmoid if not already applied
1073
+ y_pred = K.clip(y_pred, epsilon, 1.0 - epsilon)
1074
+
1075
+ # Calculate cross entropy
1076
+ cross_entropy = -y_true * K.log(y_pred) - (1 - y_true) * K.log(1 - y_pred)
1077
+
1078
+ # Calculate focal weight
1079
+ p_t = y_true * y_pred + (1 - y_true) * (1 - y_pred)
1080
+ focal_weight = K.pow(1 - p_t, gamma)
1081
+
1082
+ # Apply alpha balancing
1083
+ alpha_factor = y_true * alpha + (1 - y_true) * (1 - alpha)
1084
+
1085
+ # Calculate focal loss
1086
+ focal_loss = alpha_factor * focal_weight * cross_entropy
1087
+
1088
+ # Sum over all classes
1089
+ return K.sum(focal_loss, axis=-1)
1090
+
1091
+
1092
+ def custom_loss(y_true, y_pred, epsilon=1e-7):
1093
+ import tensorflow.keras.backend as K
1094
+
1095
+ # Calculate loss for positive labels with epsilon
1096
+ positive_loss = -K.sum(y_true * K.log(K.clip(y_pred, epsilon, 1.0 - epsilon)), axis=-1)
1097
+
1098
+ # Calculate loss for negative labels with epsilon
1099
+ negative_loss = -K.sum((1 - y_true) * K.log(K.clip(1 - y_pred, epsilon, 1.0 - epsilon)), axis=-1)
1100
+
1101
+ # Combine both loss terms
1102
+ return positive_loss + negative_loss
1103
+
1104
+
1105
+ def flat_sigmoid(x, sensitivity=-1, bias=1.0):
1106
+ """
1107
+ Applies a flat sigmoid function to the input array with a bias shift.
1108
+
1109
+ The flat sigmoid function is defined as:
1110
+ f(x) = 1 / (1 + exp(sensitivity * clip(x + bias, -20, 20)))
1111
+
1112
+ We transform the bias parameter to a range of [-100, 100] with the formula:
1113
+ transformed_bias = (bias - 1.0) * 10.0
1114
+
1115
+ Thus, higher bias values will shift the sigmoid function to the right on the x-axis, making it more "sensitive".
1116
+
1117
+ Note: Not sure why we are clipping, must be for numerical stability somewhere else in the code.
1118
+
1119
+ Args:
1120
+ x (array-like): Input data.
1121
+ sensitivity (float, optional): Sensitivity parameter for the sigmoid function. Default is -1.
1122
+ bias (float, optional): Bias parameter to shift the sigmoid function on the x-axis. Must be in the range [0.01, 1.99]. Default is 1.0.
1123
+
1124
+ Returns:
1125
+ numpy.ndarray: Transformed data after applying the flat sigmoid function.
1126
+ """
1127
+
1128
+ transformed_bias = (bias - 1.0) * 10.0
1129
+
1130
+ return 1 / (1.0 + np.exp(sensitivity * np.clip(x + transformed_bias, -20, 20)))
1131
+
1132
+
1133
+ def predict(sample):
1134
+ """Uses the main net to predict a sample.
1135
+
1136
+ Args:
1137
+ sample: Audio sample.
1138
+
1139
+ Returns:
1140
+ The prediction scores for the sample.
1141
+ """
1142
+ # Has custom classifier?
1143
+ if cfg.CUSTOM_CLASSIFIER is not None:
1144
+ return predict_with_custom_classifier(sample)
1145
+
1146
+ load_model()
1147
+
1148
+ if PBMODEL is None:
1149
+ # Reshape input tensor
1150
+ INTERPRETER.resize_tensor_input(INPUT_LAYER_INDEX, [len(sample), *sample[0].shape])
1151
+ INTERPRETER.allocate_tensors()
1152
+
1153
+ # Make a prediction (Audio only for now)
1154
+ INTERPRETER.set_tensor(INPUT_LAYER_INDEX, np.array(sample, dtype="float32"))
1155
+ INTERPRETER.invoke()
1156
+ return INTERPRETER.get_tensor(OUTPUT_LAYER_INDEX)
1157
+
1158
+ # Make a prediction (Audio only for now)
1159
+ return PBMODEL.basic(sample)["scores"]
1160
+
1161
+
1162
+ def predict_with_custom_classifier(sample):
1163
+ """Uses the custom classifier to make a prediction.
1164
+
1165
+ Args:
1166
+ sample: Audio sample.
1167
+
1168
+ Returns:
1169
+ The prediction scores for the sample.
1170
+ """
1171
+ # Does interpreter exist?
1172
+ if C_INTERPRETER is None and C_PBMODEL is None:
1173
+ load_custom_classifier()
1174
+
1175
+ if C_PBMODEL is None:
1176
+ vector = embeddings(sample) if C_INPUT_SIZE != 144000 else sample
1177
+
1178
+ # Reshape input tensor
1179
+ C_INTERPRETER.resize_tensor_input(C_INPUT_LAYER_INDEX, [len(vector), *vector[0].shape])
1180
+ C_INTERPRETER.allocate_tensors()
1181
+
1182
+ # Make a prediction
1183
+ C_INTERPRETER.set_tensor(C_INPUT_LAYER_INDEX, np.array(vector, dtype="float32"))
1184
+ C_INTERPRETER.invoke()
1185
+
1186
+ return C_INTERPRETER.get_tensor(C_OUTPUT_LAYER_INDEX)
1187
+
1188
+ return C_PBMODEL.basic(sample)["scores"]
1189
+
1190
+
1191
+ def embeddings(sample):
1192
+ """Extracts the embeddings for a sample.
1193
+
1194
+ Args:
1195
+ sample: Audio samples.
1196
+
1197
+ Returns:
1198
+ The embeddings.
1199
+ """
1200
+
1201
+ load_model(False)
1202
+
1203
+ sample = np.array(sample, dtype="float32")
1204
+
1205
+ # Reshape input tensor
1206
+ INTERPRETER.resize_tensor_input(INPUT_LAYER_INDEX, [len(sample), *sample[0].shape])
1207
+ INTERPRETER.allocate_tensors()
1208
+
1209
+ # Extract feature embeddings
1210
+ INTERPRETER.set_tensor(INPUT_LAYER_INDEX, sample)
1211
+ INTERPRETER.invoke()
1212
+
1213
+ return INTERPRETER.get_tensor(OUTPUT_LAYER_INDEX)