birdnet-analyzer 2.0.0__py3-none-any.whl → 2.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. birdnet_analyzer/__init__.py +9 -8
  2. birdnet_analyzer/analyze/__init__.py +19 -5
  3. birdnet_analyzer/analyze/__main__.py +3 -4
  4. birdnet_analyzer/analyze/cli.py +30 -25
  5. birdnet_analyzer/analyze/core.py +246 -245
  6. birdnet_analyzer/analyze/utils.py +694 -701
  7. birdnet_analyzer/audio.py +368 -372
  8. birdnet_analyzer/cli.py +732 -707
  9. birdnet_analyzer/config.py +243 -242
  10. birdnet_analyzer/eBird_taxonomy_codes_2024E.json +13046 -0
  11. birdnet_analyzer/embeddings/__init__.py +3 -4
  12. birdnet_analyzer/embeddings/__main__.py +3 -3
  13. birdnet_analyzer/embeddings/cli.py +12 -13
  14. birdnet_analyzer/embeddings/core.py +70 -70
  15. birdnet_analyzer/embeddings/utils.py +220 -193
  16. birdnet_analyzer/evaluation/__init__.py +189 -195
  17. birdnet_analyzer/evaluation/__main__.py +3 -3
  18. birdnet_analyzer/evaluation/assessment/__init__.py +0 -0
  19. birdnet_analyzer/evaluation/assessment/metrics.py +388 -0
  20. birdnet_analyzer/evaluation/assessment/performance_assessor.py +364 -0
  21. birdnet_analyzer/evaluation/assessment/plotting.py +378 -0
  22. birdnet_analyzer/evaluation/preprocessing/__init__.py +0 -0
  23. birdnet_analyzer/evaluation/preprocessing/data_processor.py +631 -0
  24. birdnet_analyzer/evaluation/preprocessing/utils.py +98 -0
  25. birdnet_analyzer/gui/__init__.py +19 -23
  26. birdnet_analyzer/gui/__main__.py +3 -3
  27. birdnet_analyzer/gui/analysis.py +179 -174
  28. birdnet_analyzer/gui/assets/arrow_down.svg +4 -4
  29. birdnet_analyzer/gui/assets/arrow_left.svg +4 -4
  30. birdnet_analyzer/gui/assets/arrow_right.svg +4 -4
  31. birdnet_analyzer/gui/assets/arrow_up.svg +4 -4
  32. birdnet_analyzer/gui/assets/gui.css +36 -28
  33. birdnet_analyzer/gui/assets/gui.js +93 -93
  34. birdnet_analyzer/gui/embeddings.py +638 -620
  35. birdnet_analyzer/gui/evaluation.py +801 -813
  36. birdnet_analyzer/gui/localization.py +75 -68
  37. birdnet_analyzer/gui/multi_file.py +265 -246
  38. birdnet_analyzer/gui/review.py +472 -527
  39. birdnet_analyzer/gui/segments.py +191 -191
  40. birdnet_analyzer/gui/settings.py +149 -129
  41. birdnet_analyzer/gui/single_file.py +264 -269
  42. birdnet_analyzer/gui/species.py +95 -95
  43. birdnet_analyzer/gui/train.py +687 -698
  44. birdnet_analyzer/gui/utils.py +797 -808
  45. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_af.txt +6522 -6522
  46. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ar.txt +6522 -6522
  47. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_bg.txt +6522 -6522
  48. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ca.txt +6522 -6522
  49. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_cs.txt +6522 -6522
  50. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_da.txt +6522 -6522
  51. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_de.txt +6522 -6522
  52. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_el.txt +6522 -6522
  53. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_en_uk.txt +6522 -6522
  54. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_es.txt +6522 -6522
  55. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_fi.txt +6522 -6522
  56. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_fr.txt +6522 -6522
  57. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_he.txt +6522 -6522
  58. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_hr.txt +6522 -6522
  59. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_hu.txt +6522 -6522
  60. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_in.txt +6522 -6522
  61. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_is.txt +6522 -6522
  62. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_it.txt +6522 -6522
  63. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ja.txt +6522 -6522
  64. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ko.txt +6522 -6522
  65. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_lt.txt +6522 -6522
  66. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ml.txt +6522 -6522
  67. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_nl.txt +6522 -6522
  68. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_no.txt +6522 -6522
  69. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_pl.txt +6522 -6522
  70. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_pt_BR.txt +6522 -6522
  71. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_pt_PT.txt +6522 -6522
  72. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ro.txt +6522 -6522
  73. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ru.txt +6522 -6522
  74. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sk.txt +6522 -6522
  75. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sl.txt +6522 -6522
  76. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sr.txt +6522 -6522
  77. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sv.txt +6522 -6522
  78. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_th.txt +6522 -6522
  79. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_tr.txt +6522 -6522
  80. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_uk.txt +6522 -6522
  81. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_zh.txt +6522 -6522
  82. birdnet_analyzer/lang/de.json +341 -334
  83. birdnet_analyzer/lang/en.json +341 -334
  84. birdnet_analyzer/lang/fi.json +341 -334
  85. birdnet_analyzer/lang/fr.json +341 -334
  86. birdnet_analyzer/lang/id.json +341 -334
  87. birdnet_analyzer/lang/pt-br.json +341 -334
  88. birdnet_analyzer/lang/ru.json +341 -334
  89. birdnet_analyzer/lang/se.json +341 -334
  90. birdnet_analyzer/lang/tlh.json +341 -334
  91. birdnet_analyzer/lang/zh_TW.json +341 -334
  92. birdnet_analyzer/model.py +1212 -1243
  93. birdnet_analyzer/playground.py +5 -0
  94. birdnet_analyzer/search/__init__.py +3 -3
  95. birdnet_analyzer/search/__main__.py +3 -3
  96. birdnet_analyzer/search/cli.py +11 -12
  97. birdnet_analyzer/search/core.py +78 -78
  98. birdnet_analyzer/search/utils.py +107 -111
  99. birdnet_analyzer/segments/__init__.py +3 -3
  100. birdnet_analyzer/segments/__main__.py +3 -3
  101. birdnet_analyzer/segments/cli.py +13 -14
  102. birdnet_analyzer/segments/core.py +81 -78
  103. birdnet_analyzer/segments/utils.py +383 -394
  104. birdnet_analyzer/species/__init__.py +3 -3
  105. birdnet_analyzer/species/__main__.py +3 -3
  106. birdnet_analyzer/species/cli.py +13 -14
  107. birdnet_analyzer/species/core.py +35 -35
  108. birdnet_analyzer/species/utils.py +74 -75
  109. birdnet_analyzer/train/__init__.py +3 -3
  110. birdnet_analyzer/train/__main__.py +3 -3
  111. birdnet_analyzer/train/cli.py +13 -14
  112. birdnet_analyzer/train/core.py +113 -113
  113. birdnet_analyzer/train/utils.py +877 -847
  114. birdnet_analyzer/translate.py +133 -104
  115. birdnet_analyzer/utils.py +425 -419
  116. {birdnet_analyzer-2.0.0.dist-info → birdnet_analyzer-2.1.0.dist-info}/METADATA +146 -129
  117. birdnet_analyzer-2.1.0.dist-info/RECORD +125 -0
  118. {birdnet_analyzer-2.0.0.dist-info → birdnet_analyzer-2.1.0.dist-info}/WHEEL +1 -1
  119. {birdnet_analyzer-2.0.0.dist-info → birdnet_analyzer-2.1.0.dist-info}/licenses/LICENSE +18 -18
  120. birdnet_analyzer/eBird_taxonomy_codes_2021E.json +0 -25280
  121. birdnet_analyzer-2.0.0.dist-info/RECORD +0 -117
  122. {birdnet_analyzer-2.0.0.dist-info → birdnet_analyzer-2.1.0.dist-info}/entry_points.txt +0 -0
  123. {birdnet_analyzer-2.0.0.dist-info → birdnet_analyzer-2.1.0.dist-info}/top_level.txt +0 -0
birdnet_analyzer/model.py CHANGED
@@ -1,1243 +1,1212 @@
1
- """Contains functions to use the BirdNET models."""
2
-
3
- import os
4
- import sys
5
- import warnings
6
-
7
- import numpy as np
8
-
9
- import birdnet_analyzer.config as cfg
10
- import birdnet_analyzer.utils as utils
11
-
12
- SCRIPT_DIR = os.path.abspath(os.path.dirname(__file__))
13
-
14
- os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
15
- os.environ["CUDA_VISIBLE_DEVICES"] = ""
16
-
17
- warnings.filterwarnings("ignore")
18
-
19
- # Import TFLite from runtime or Tensorflow;
20
- # import Keras if protobuf model;
21
- # NOTE: we have to use TFLite if we want to use
22
- # the metadata model or want to extract embeddings
23
- try:
24
- import tflite_runtime.interpreter as tflite # type: ignore
25
- except ModuleNotFoundError:
26
- from tensorflow import lite as tflite
27
- if not cfg.MODEL_PATH.endswith(".tflite"):
28
- from tensorflow import keras
29
-
30
- INTERPRETER: tflite.Interpreter = None
31
- C_INTERPRETER: tflite.Interpreter = None
32
- M_INTERPRETER: tflite.Interpreter = None
33
- PBMODEL = None
34
- C_PBMODEL = None
35
- EMPTY_CLASS_EXCEPTION_REF = None
36
-
37
- def get_empty_class_exception():
38
- import keras_tuner.errors
39
- global EMPTY_CLASS_EXCEPTION_REF
40
-
41
-
42
- if EMPTY_CLASS_EXCEPTION_REF:
43
- return EMPTY_CLASS_EXCEPTION_REF
44
-
45
- class EmptyClassException(keras_tuner.errors.FatalError):
46
- """
47
- Exception raised when a class is found to be empty.
48
-
49
- Attributes:
50
- index (int): The index of the empty class.
51
- message (str): The error message indicating which class is empty.
52
- """
53
-
54
- def __init__(self, *args, index=None):
55
- super().__init__(*args)
56
- self.index = index
57
- self.message = f"Class {index} is empty."
58
-
59
- EMPTY_CLASS_EXCEPTION_REF = EmptyClassException
60
- return EMPTY_CLASS_EXCEPTION_REF
61
-
62
-
63
- def label_smoothing(y: np.ndarray, alpha=0.1):
64
- """
65
- Applies label smoothing to the given labels.
66
- Label smoothing is a technique used to prevent the model from becoming overconfident by adjusting the target labels.
67
- It subtracts a small value (alpha) from the correct label and distributes it among the other labels.
68
- Args:
69
- y (numpy.ndarray): Array of labels to be smoothed. The array should be of shape (num_labels,).
70
- alpha (float, optional): Smoothing parameter. Default is 0.1.
71
- Returns:
72
- numpy.ndarray: The smoothed labels.
73
- """
74
- # Subtract alpha from correct label when it is >0
75
- y[y > 0] -= alpha
76
-
77
- # Assigned alpha to all other labels
78
- y[y == 0] = alpha / y.shape[0]
79
-
80
- return y
81
-
82
-
83
- def mixup(x, y, augmentation_ratio=0.25, alpha=0.2):
84
- """Apply mixup to the given data.
85
-
86
- Mixup is a data augmentation technique that generates new samples by
87
- mixing two samples and their labels.
88
-
89
- Args:
90
- x: Samples.
91
- y: One-hot labels.
92
- augmentation_ratio: The ratio of augmented samples.
93
- alpha: The beta distribution parameter.
94
-
95
- Returns:
96
- Augmented data.
97
- """
98
-
99
- # Set numpy random seed
100
- np.random.seed(cfg.RANDOM_SEED)
101
-
102
- # Get indices of all positive samples
103
- positive_indices = np.unique(np.where(y[:, :] == 1)[0])
104
-
105
- # Calculate the number of samples to augment based on the ratio
106
- num_samples_to_augment = int(len(positive_indices) * augmentation_ratio)
107
-
108
- # Indices of samples, that are already mixed up
109
- mixed_up_indices = []
110
-
111
- for _ in range(num_samples_to_augment):
112
- # Randomly choose one instance from the positive samples
113
- index = np.random.choice(positive_indices)
114
-
115
- # Choose another one, when the chosen one was already mixed up
116
- while index in mixed_up_indices:
117
- index = np.random.choice(positive_indices)
118
-
119
- x1, y1 = x[index], y[index]
120
-
121
- # Randomly choose a different instance from the dataset
122
- second_index = np.random.choice(positive_indices)
123
-
124
- # Choose again, when the same or an already mixed up sample was selected
125
- while second_index == index or second_index in mixed_up_indices:
126
- second_index = np.random.choice(positive_indices)
127
- x2, y2 = x[second_index], y[second_index]
128
-
129
- # Generate a random mixing coefficient (lambda)
130
- lambda_ = np.random.beta(alpha, alpha)
131
-
132
- # Mix the embeddings and labels
133
- mixed_x = lambda_ * x1 + (1 - lambda_) * x2
134
- mixed_y = lambda_ * y1 + (1 - lambda_) * y2
135
-
136
- # Replace one of the original samples and labels with the augmented sample and labels
137
- x[index] = mixed_x
138
- y[index] = mixed_y
139
-
140
- # Mark the sample as already mixed up
141
- mixed_up_indices.append(index)
142
-
143
- del mixed_x
144
- del mixed_y
145
-
146
- return x, y
147
-
148
-
149
- def random_split(x, y, val_ratio=0.2):
150
- """Splits the data into training and validation data.
151
-
152
- Makes sure that each class is represented in both sets.
153
-
154
- Args:
155
- x: Samples.
156
- y: One-hot labels.
157
- val_ratio: The ratio of validation data.
158
-
159
- Returns:
160
- A tuple of (x_train, y_train, x_val, y_val).
161
- """
162
-
163
- # Set numpy random seed
164
- np.random.seed(cfg.RANDOM_SEED)
165
-
166
- # Get number of classes
167
- num_classes = y.shape[1]
168
-
169
- # Initialize training and validation data
170
- x_train, y_train, x_val, y_val = [], [], [], []
171
-
172
- # Split data
173
- for i in range(num_classes):
174
- # Get indices of positive samples of current class
175
- positive_indices = np.where(y[:, i] == 1)[0]
176
-
177
- # Get indices of negative samples of current class
178
- negative_indices = np.where(y[:, i] == -1)[0]
179
-
180
- # Get number of samples for each set
181
- num_samples = len(positive_indices)
182
- num_samples_train = max(1, int(num_samples * (1 - val_ratio)))
183
- num_samples_val = max(0, num_samples - num_samples_train)
184
-
185
- # Randomly choose samples for training and validation
186
- np.random.shuffle(positive_indices)
187
- train_indices = positive_indices[:num_samples_train]
188
- val_indices = positive_indices[num_samples_train : num_samples_train + num_samples_val]
189
-
190
- # Append samples to training and validation data
191
- x_train.append(x[train_indices])
192
- y_train.append(y[train_indices])
193
- x_val.append(x[val_indices])
194
- y_val.append(y[val_indices])
195
-
196
- # Append negative samples to training data
197
- x_train.append(x[negative_indices])
198
- y_train.append(y[negative_indices])
199
-
200
- # Add samples for non-event classes to training and validation data
201
- non_event_indices = np.where(np.sum(y[:, :], axis=1) == 0)[0]
202
- num_samples = len(non_event_indices)
203
- num_samples_train = max(1, int(num_samples * (1 - val_ratio)))
204
- num_samples_val = max(0, num_samples - num_samples_train)
205
- np.random.shuffle(non_event_indices)
206
- train_indices = non_event_indices[:num_samples_train]
207
- val_indices = non_event_indices[num_samples_train : num_samples_train + num_samples_val]
208
- x_train.append(x[train_indices])
209
- y_train.append(y[train_indices])
210
- x_val.append(x[val_indices])
211
- y_val.append(y[val_indices])
212
-
213
- # Concatenate data
214
- x_train = np.concatenate(x_train)
215
- y_train = np.concatenate(y_train)
216
- x_val = np.concatenate(x_val)
217
- y_val = np.concatenate(y_val)
218
-
219
- # Shuffle data
220
- indices = np.arange(len(x_train))
221
- np.random.shuffle(indices)
222
- x_train = x_train[indices]
223
- y_train = y_train[indices]
224
-
225
- indices = np.arange(len(x_val))
226
- np.random.shuffle(indices)
227
- x_val = x_val[indices]
228
- y_val = y_val[indices]
229
-
230
- return x_train, y_train, x_val, y_val
231
-
232
-
233
- def random_multilabel_split(x, y, val_ratio=0.2):
234
- """Splits the data into training and validation data.
235
-
236
- Makes sure that each combination of classes is represented in both sets.
237
-
238
- Args:
239
- x: Samples.
240
- y: One-hot labels.
241
- val_ratio: The ratio of validation data.
242
-
243
- Returns:
244
- A tuple of (x_train, y_train, x_val, y_val).
245
-
246
- """
247
-
248
- # Set numpy random seed
249
- np.random.seed(cfg.RANDOM_SEED)
250
-
251
- # Find all combinations of labels
252
- class_combinations = np.unique(y, axis=0)
253
-
254
- # Initialize training and validation data
255
- x_train, y_train, x_val, y_val = [], [], [], []
256
-
257
- # Split the data for each combination of labels
258
- for class_combination in class_combinations:
259
- # find all indices
260
- indices = np.where((y == class_combination).all(axis=1))[0]
261
-
262
- # When negative sample use only for training
263
- if -1 in class_combination:
264
- x_train.append(x[indices])
265
- y_train.append(y[indices])
266
- # Otherwise split according to the validation split
267
- else:
268
- # Get number of samples for each set
269
- num_samples = len(indices)
270
- num_samples_train = max(1, int(num_samples * (1 - val_ratio)))
271
- num_samples_val = max(0, num_samples - num_samples_train)
272
- # Randomly choose samples for training and validation
273
- np.random.shuffle(indices)
274
- train_indices = indices[:num_samples_train]
275
- val_indices = indices[num_samples_train : num_samples_train + num_samples_val]
276
- # Append samples to training and validation data
277
- x_train.append(x[train_indices])
278
- y_train.append(y[train_indices])
279
- x_val.append(x[val_indices])
280
- y_val.append(y[val_indices])
281
-
282
- # Concatenate data
283
- x_train = np.concatenate(x_train)
284
- y_train = np.concatenate(y_train)
285
- x_val = np.concatenate(x_val)
286
- y_val = np.concatenate(y_val)
287
-
288
- # Shuffle data
289
- indices = np.arange(len(x_train))
290
- np.random.shuffle(indices)
291
- x_train = x_train[indices]
292
- y_train = y_train[indices]
293
-
294
- indices = np.arange(len(x_val))
295
- np.random.shuffle(indices)
296
- x_val = x_val[indices]
297
- y_val = y_val[indices]
298
-
299
- return x_train, y_train, x_val, y_val
300
-
301
-
302
- def upsample_core(x: np.ndarray, y: np.ndarray, min_samples: int, apply: callable, size=2):
303
- """
304
- Upsamples the minority class in the dataset using the specified apply function.
305
- Parameters:
306
- x (np.ndarray): The feature matrix.
307
- y (np.ndarray): The target labels.
308
- min_samples (int): The minimum number of samples required for the minority class.
309
- apply (callable): A function that applies the SMOTE or any other algorithm to the data.
310
- size (int, optional): The number of samples to generate in each iteration. Default is 2.
311
- Returns:
312
- tuple: A tuple containing the upsampled feature matrix and target labels.
313
- """
314
- y_temp = []
315
- x_temp = []
316
-
317
- if cfg.BINARY_CLASSIFICATION:
318
- # Determine if 1 or 0 is the minority class
319
- if y.sum(axis=0) < len(y) - y.sum(axis=0):
320
- minority_label = 1
321
- else:
322
- minority_label = 0
323
-
324
- while np.where(y == minority_label)[0].shape[0] + len(y_temp) < min_samples:
325
- # Randomly choose a sample from the minority class
326
- random_index = np.random.choice(np.where(y == minority_label)[0], size=size)
327
-
328
- # Apply SMOTE
329
- x_app, y_app = apply(x, y, random_index)
330
- y_temp.append(y_app)
331
- x_temp.append(x_app)
332
- else:
333
- for i in range(y.shape[1]):
334
- while y[:, i].sum() + len(y_temp) < min_samples:
335
- try:
336
- # Randomly choose a sample from the minority class
337
- random_index = np.random.choice(np.where(y[:, i] == 1)[0], size=size)
338
- except ValueError as e:
339
- raise get_empty_class_exception()(index=i) from e
340
-
341
- # Apply SMOTE
342
- x_app, y_app = apply(x, y, random_index)
343
- y_temp.append(y_app)
344
- x_temp.append(x_app)
345
-
346
- return x_temp, y_temp
347
-
348
-
349
- def upsampling(x: np.ndarray, y: np.ndarray, ratio=0.5, mode="repeat"):
350
- """Balance data through upsampling.
351
-
352
- We upsample minority classes to have at least 10% (ratio=0.1) of the samples of the majority class.
353
-
354
- Args:
355
- x: Samples.
356
- y: One-hot labels.
357
- ratio: The minimum ratio of minority to majority samples.
358
- mode: The upsampling mode. Either 'repeat', 'mean', 'linear' or 'smote'.
359
-
360
- Returns:
361
- Upsampled data.
362
- """
363
-
364
- # Set numpy random seed
365
- np.random.seed(cfg.RANDOM_SEED)
366
-
367
- # Determine min number of samples
368
- if cfg.BINARY_CLASSIFICATION:
369
- min_samples = int(max(y.sum(axis=0), len(y) - y.sum(axis=0)) * ratio)
370
- else:
371
- min_samples = int(np.max(y.sum(axis=0)) * ratio)
372
-
373
- x_temp = []
374
- y_temp = []
375
-
376
- if mode == "repeat":
377
-
378
- def applyRepeat(x, y, random_index):
379
- return x[random_index[0]], y[random_index[0]]
380
-
381
- x_temp, y_temp = upsample_core(x, y, min_samples, applyRepeat, size=1)
382
-
383
- elif mode == "mean":
384
- # For each class with less than min_samples
385
- # select two random samples and calculate the mean
386
- def applyMean(x, y, random_indices):
387
- # Calculate the mean of the two samples
388
- mean = np.mean(x[random_indices], axis=0)
389
-
390
- # Append the mean and label to a temp list
391
- return mean, y[random_indices[0]]
392
-
393
- x_temp, y_temp = upsample_core(x, y, min_samples, applyMean)
394
-
395
- elif mode == "linear":
396
- # For each class with less than min_samples
397
- # select two random samples and calculate the linear combination
398
- def applyLinearCombination(x, y, random_indices):
399
- # Calculate the linear combination of the two samples
400
- alpha = np.random.uniform(0, 1)
401
- new_sample = alpha * x[random_indices[0]] + (1 - alpha) * x[random_indices[1]]
402
-
403
- # Append the new sample and label to a temp list
404
- return new_sample, y[random_indices[0]]
405
-
406
- x_temp, y_temp = upsample_core(x, y, min_samples, applyLinearCombination)
407
-
408
- elif mode == "smote":
409
- # For each class with less than min_samples apply SMOTE
410
- def applySmote(x, y, random_index, k=5):
411
- # Get the k nearest neighbors
412
- distances = np.sqrt(np.sum((x - x[random_index[0]]) ** 2, axis=1))
413
- indices = np.argsort(distances)[1 : k + 1]
414
-
415
- # Randomly choose one of the neighbors
416
- random_neighbor = np.random.choice(indices)
417
-
418
- # Calculate the difference vector
419
- diff = x[random_neighbor] - x[random_index[0]]
420
-
421
- # Randomly choose a weight between 0 and 1
422
- weight = np.random.uniform(0, 1)
423
-
424
- # Calculate the new sample
425
- new_sample = x[random_index[0]] + weight * diff
426
-
427
- # Append the new sample and label to a temp list
428
- return new_sample, y[random_index[0]]
429
-
430
- x_temp, y_temp = upsample_core(x, y, min_samples, applySmote, size=1)
431
-
432
- # Append the temp list to the original data
433
- if len(x_temp) > 0:
434
- x = np.vstack((x, np.array(x_temp)))
435
- y = np.vstack((y, np.array(y_temp)))
436
-
437
- # Shuffle data
438
- indices = np.arange(len(x))
439
- np.random.shuffle(indices)
440
- x = x[indices]
441
- y = y[indices]
442
-
443
- del x_temp
444
- del y_temp
445
-
446
- return x, y
447
-
448
-
449
- def save_model_params(path):
450
- """Saves the model parameters to a file.
451
-
452
- Args:
453
- path: Path to the file.
454
- """
455
- utils.save_params(
456
- path,
457
- (
458
- "Hidden units",
459
- "Dropout",
460
- "Batchsize",
461
- "Learning rate",
462
- "Crop mode",
463
- "Crop overlap",
464
- "Audio speed",
465
- "Upsamling mode",
466
- "Upsamling ratio",
467
- "use mixup",
468
- "use label smoothing",
469
- "BirdNET Model version",
470
- ),
471
- (
472
- cfg.TRAIN_HIDDEN_UNITS,
473
- cfg.TRAIN_DROPOUT,
474
- cfg.TRAIN_BATCH_SIZE,
475
- cfg.TRAIN_LEARNING_RATE,
476
- cfg.SAMPLE_CROP_MODE,
477
- cfg.SIG_OVERLAP,
478
- cfg.AUDIO_SPEED,
479
- cfg.UPSAMPLING_MODE,
480
- cfg.UPSAMPLING_RATIO,
481
- cfg.TRAIN_WITH_MIXUP,
482
- cfg.TRAIN_WITH_LABEL_SMOOTHING,
483
- cfg.MODEL_VERSION,
484
- ),
485
- )
486
-
487
-
488
- def reset_custom_classifier():
489
- """
490
- Resets the custom classifier by setting the global variables C_INTERPRETER and C_PBMODEL to None.
491
- This function is used to clear any existing custom classifier models and interpreters, effectively
492
- resetting the state of the custom classifier.
493
- """
494
- global C_INTERPRETER
495
- global C_PBMODEL
496
-
497
- C_INTERPRETER = None
498
- C_PBMODEL = None
499
-
500
-
501
- def load_model(class_output=True):
502
- """
503
- Loads the machine learning model based on the configuration provided.
504
- This function loads either a TensorFlow Lite (TFLite) model or a protobuf model
505
- depending on the file extension of the model path specified in the configuration.
506
- It sets up the global variables for the model interpreter and input/output layer indices.
507
-
508
- Args:
509
- class_output (bool): If True, sets the output layer index to the classification output.
510
- If False, sets the output layer index to the feature embeddings.
511
- """
512
- global PBMODEL
513
- global INTERPRETER
514
- global INPUT_LAYER_INDEX
515
- global OUTPUT_LAYER_INDEX
516
-
517
- # Do we have to load the tflite or protobuf model?
518
- if cfg.MODEL_PATH.endswith(".tflite"):
519
- # Load TFLite model and allocate tensors.
520
- INTERPRETER = tflite.Interpreter(
521
- model_path=os.path.join(SCRIPT_DIR, cfg.MODEL_PATH), num_threads=cfg.TFLITE_THREADS
522
- )
523
- INTERPRETER.allocate_tensors()
524
-
525
- # Get input and output tensors.
526
- input_details = INTERPRETER.get_input_details()
527
- output_details = INTERPRETER.get_output_details()
528
-
529
- # Get input tensor index
530
- INPUT_LAYER_INDEX = input_details[0]["index"]
531
-
532
- # Get classification output or feature embeddings
533
- if class_output:
534
- OUTPUT_LAYER_INDEX = output_details[0]["index"]
535
- else:
536
- OUTPUT_LAYER_INDEX = output_details[0]["index"] - 1
537
-
538
- else:
539
- # Load protobuf model
540
- # Note: This will throw a bunch of warnings about custom gradients
541
- # which we will ignore until TF lets us block them
542
- PBMODEL = keras.models.load_model(os.path.join(SCRIPT_DIR, cfg.MODEL_PATH), compile=False)
543
-
544
-
545
- def load_custom_classifier():
546
- """
547
- Loads a custom classifier model based on the file extension of the provided model path.
548
- If the model file ends with ".tflite", it loads a TensorFlow Lite model and sets up the interpreter,
549
- input layer index, output layer index, and input size.
550
- If the model file does not end with ".tflite", it loads a TensorFlow SavedModel.
551
- """
552
- global C_INTERPRETER
553
- global C_INPUT_LAYER_INDEX
554
- global C_OUTPUT_LAYER_INDEX
555
- global C_INPUT_SIZE
556
- global C_PBMODEL
557
-
558
- if cfg.CUSTOM_CLASSIFIER.endswith(".tflite"):
559
- # Load TFLite model and allocate tensors.
560
- C_INTERPRETER = tflite.Interpreter(model_path=cfg.CUSTOM_CLASSIFIER, num_threads=cfg.TFLITE_THREADS)
561
- C_INTERPRETER.allocate_tensors()
562
-
563
- # Get input and output tensors.
564
- input_details = C_INTERPRETER.get_input_details()
565
- output_details = C_INTERPRETER.get_output_details()
566
-
567
- # Get input tensor index
568
- C_INPUT_LAYER_INDEX = input_details[0]["index"]
569
-
570
- C_INPUT_SIZE = input_details[0]["shape"][-1]
571
-
572
- # Get classification output
573
- C_OUTPUT_LAYER_INDEX = output_details[0]["index"]
574
- else:
575
- import tensorflow as tf
576
-
577
- tf.get_logger().setLevel("ERROR")
578
-
579
- C_PBMODEL = tf.saved_model.load(cfg.CUSTOM_CLASSIFIER)
580
-
581
-
582
- def load_meta_model():
583
- """Loads the model for species prediction.
584
-
585
- Initializes the model used to predict species list, based on coordinates and week of year.
586
- """
587
- global M_INTERPRETER
588
- global M_INPUT_LAYER_INDEX
589
- global M_OUTPUT_LAYER_INDEX
590
-
591
- # Load TFLite model and allocate tensors.
592
- M_INTERPRETER = tflite.Interpreter(
593
- model_path=os.path.join(SCRIPT_DIR, cfg.MDATA_MODEL_PATH), num_threads=cfg.TFLITE_THREADS
594
- )
595
- M_INTERPRETER.allocate_tensors()
596
-
597
- # Get input and output tensors.
598
- input_details = M_INTERPRETER.get_input_details()
599
- output_details = M_INTERPRETER.get_output_details()
600
-
601
- # Get input tensor index
602
- M_INPUT_LAYER_INDEX = input_details[0]["index"]
603
- M_OUTPUT_LAYER_INDEX = output_details[0]["index"]
604
-
605
-
606
- def build_linear_classifier(num_labels, input_size, hidden_units=0, dropout=0.0):
607
- """Builds a classifier.
608
-
609
- Args:
610
- num_labels: Output size.
611
- input_size: Size of the input.
612
- hidden_units: If > 0, creates another hidden layer with the given number of units.
613
- dropout: Dropout rate.
614
-
615
- Returns:
616
- A new classifier.
617
- """
618
- # import keras
619
- from tensorflow import keras
620
-
621
- # Build a simple one- or two-layer linear classifier
622
- model = keras.Sequential()
623
-
624
- # Input layer
625
- model.add(keras.layers.InputLayer(input_shape=(input_size,)))
626
-
627
- # Batch normalization on input to standardize embeddings
628
- model.add(keras.layers.BatchNormalization())
629
-
630
- # Optional L2 regularization for all dense layers
631
- regularizer = keras.regularizers.l2(1e-5)
632
-
633
- # Hidden layer with improved architecture
634
- if hidden_units > 0:
635
- # Dropout layer before hidden layer
636
- if dropout > 0:
637
- model.add(keras.layers.Dropout(dropout))
638
-
639
- # Add a hidden layer with L2 regularization
640
- model.add(keras.layers.Dense(hidden_units,
641
- activation="relu",
642
- kernel_regularizer=regularizer,
643
- kernel_initializer='he_normal'))
644
-
645
- # Add another batch normalization after the hidden layer
646
- model.add(keras.layers.BatchNormalization())
647
-
648
- # Dropout layer before output
649
- if dropout > 0:
650
- model.add(keras.layers.Dropout(dropout))
651
-
652
- # Classification layer with L2 regularization
653
- model.add(keras.layers.Dense(num_labels,
654
- kernel_regularizer=regularizer,
655
- kernel_initializer='glorot_uniform'))
656
-
657
- # Activation layer
658
- model.add(keras.layers.Activation("sigmoid"))
659
-
660
- return model
661
-
662
-
663
- def train_linear_classifier(
664
- classifier,
665
- x_train,
666
- y_train,
667
- x_test,
668
- y_test,
669
- epochs,
670
- batch_size,
671
- learning_rate,
672
- val_split,
673
- upsampling_ratio,
674
- upsampling_mode,
675
- train_with_mixup,
676
- train_with_label_smoothing,
677
- train_with_focal_loss=False,
678
- focal_loss_gamma=2.0,
679
- focal_loss_alpha=0.25,
680
- on_epoch_end=None,
681
- ):
682
- """Trains a custom classifier.
683
-
684
- Trains a new classifier for BirdNET based on the given data.
685
-
686
- Args:
687
- classifier: The classifier to be trained.
688
- x_train: Samples.
689
- y_train: Labels.
690
- x_test: Validation samples.
691
- y_test: Validation labels.
692
- epochs: Number of epochs to train.
693
- batch_size: Batch size.
694
- learning_rate: The learning rate during training.
695
- val_split: Validation split ratio (is 0 when using test data).
696
- upsampling_ratio: Upsampling ratio.
697
- upsampling_mode: Upsampling mode.
698
- train_with_mixup: If True, applies mixup to the training data.
699
- train_with_label_smoothing: If True, applies label smoothing to the training data.
700
- train_with_focal_loss: If True, uses focal loss instead of binary cross-entropy loss.
701
- focal_loss_gamma: Focal loss gamma parameter.
702
- focal_loss_alpha: Focal loss alpha parameter.
703
- on_epoch_end: Optional callback `function(epoch, logs)`.
704
-
705
- Returns:
706
- (classifier, history)
707
- """
708
- # import keras
709
- from tensorflow import keras
710
-
711
- class FunctionCallback(keras.callbacks.Callback):
712
- def __init__(self, on_epoch_end=None) -> None:
713
- super().__init__()
714
- self.on_epoch_end_fn = on_epoch_end
715
-
716
- def on_epoch_end(self, epoch, logs=None):
717
- if self.on_epoch_end_fn:
718
- self.on_epoch_end_fn(epoch, logs)
719
-
720
- # Set random seed
721
- np.random.seed(cfg.RANDOM_SEED)
722
-
723
- # Shuffle data
724
- idx = np.arange(x_train.shape[0])
725
- np.random.shuffle(idx)
726
- x_train = x_train[idx]
727
- y_train = y_train[idx]
728
-
729
- # Random val split
730
- if val_split > 0:
731
- if not cfg.MULTI_LABEL:
732
- x_train, y_train, x_val, y_val = random_split(x_train, y_train, val_split)
733
- else:
734
- x_train, y_train, x_val, y_val = random_multilabel_split(x_train, y_train, val_split)
735
- else:
736
- x_val = x_test
737
- y_val = y_test
738
-
739
- print(
740
- f"Training on {x_train.shape[0]} samples, validating on {x_val.shape[0]} samples.",
741
- flush=True,
742
- )
743
-
744
- # Upsample training data
745
- if upsampling_ratio > 0:
746
- x_train, y_train = upsampling(x_train, y_train, upsampling_ratio, upsampling_mode)
747
- print(f"Upsampled training data to {x_train.shape[0]} samples.", flush=True)
748
-
749
- # Apply mixup to training data
750
- if train_with_mixup and not cfg.BINARY_CLASSIFICATION:
751
- x_train, y_train = mixup(x_train, y_train)
752
-
753
- # Apply label smoothing
754
- if train_with_label_smoothing and not cfg.BINARY_CLASSIFICATION:
755
- y_train = label_smoothing(y_train)
756
-
757
- # Early stopping with patience depending on dataset size
758
- patience = min(10, max(5, int(epochs / 10)))
759
- min_delta = 0.001
760
-
761
- callbacks = [
762
- # EarlyStopping with restore_best_weights
763
- keras.callbacks.EarlyStopping(
764
- monitor="val_AUPRC",
765
- mode="max",
766
- patience=patience,
767
- verbose=1,
768
- min_delta=min_delta,
769
- restore_best_weights=True,
770
- ),
771
- # Function callback for progress tracking
772
- FunctionCallback(on_epoch_end=on_epoch_end),
773
- ]
774
-
775
- # Learning rate schedule - use cosine decay with warmup
776
- warmup_epochs = min(5, int(epochs * 0.1))
777
- total_steps = epochs * x_train.shape[0] / batch_size
778
- warmup_steps = warmup_epochs * x_train.shape[0] / batch_size
779
-
780
- def lr_schedule(epoch, lr):
781
- if epoch < warmup_epochs:
782
- # Linear warmup
783
- return learning_rate * (epoch + 1) / warmup_epochs
784
- else:
785
- # Cosine decay
786
- progress = (epoch - warmup_epochs) / (epochs - warmup_epochs)
787
- return learning_rate * (0.1 + 0.9 * (1 + np.cos(np.pi * progress)) / 2)
788
-
789
- # Add LR scheduler callback
790
- callbacks.append(keras.callbacks.LearningRateScheduler(lr_schedule))
791
-
792
- optimizer_cls = keras.optimizers.legacy.Adam if sys.platform == "darwin" else keras.optimizers.Adam
793
-
794
- # Choose the loss function based on config
795
- loss_function = custom_loss
796
- if train_with_focal_loss:
797
- loss_function = lambda y_true, y_pred: focal_loss(
798
- y_true, y_pred, gamma=cfg.FOCAL_LOSS_GAMMA, alpha=cfg.FOCAL_LOSS_ALPHA
799
- )
800
-
801
- # Compile model with appropriate metrics for classification task
802
- classifier.compile(
803
- optimizer=optimizer_cls(learning_rate=learning_rate),
804
- loss=loss_function,
805
- metrics=[
806
- keras.metrics.AUC(
807
- curve="PR",
808
- multi_label=cfg.MULTI_LABEL,
809
- name="AUPRC",
810
- num_labels=y_train.shape[1] if cfg.MULTI_LABEL else None,
811
- from_logits=True,
812
- ),
813
- keras.metrics.AUC(
814
- curve="ROC",
815
- multi_label=cfg.MULTI_LABEL,
816
- name="AUROC",
817
- num_labels=y_train.shape[1] if cfg.MULTI_LABEL else None,
818
- from_logits=True,
819
- ),
820
- ],
821
- )
822
-
823
- # Train model
824
- history = classifier.fit(
825
- x_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(x_val, y_val), callbacks=callbacks
826
- )
827
-
828
- return classifier, history
829
-
830
-
831
- def save_linear_classifier(classifier, model_path: str, labels: list[str], mode="replace"):
832
- """Saves the classifier as a tflite model, as well as the used labels in a .txt.
833
-
834
- Args:
835
- classifier: The custom classifier.
836
- model_path: Path the model will be saved at.
837
- labels: List of labels used for the classifier.
838
- """
839
- import tensorflow as tf
840
-
841
- global PBMODEL
842
-
843
- tf.get_logger().setLevel("ERROR")
844
-
845
- if PBMODEL is None:
846
- PBMODEL = tf.keras.models.load_model(os.path.join(SCRIPT_DIR, cfg.PB_MODEL), compile=False)
847
-
848
- saved_model = PBMODEL
849
-
850
- # Remove activation layer
851
- classifier.pop()
852
-
853
- if mode == "replace":
854
- combined_model = tf.keras.Sequential([saved_model.embeddings_model, classifier], "basic")
855
- elif mode == "append":
856
- intermediate = classifier(saved_model.model.get_layer("GLOBAL_AVG_POOL").output)
857
-
858
- output = tf.keras.layers.concatenate([saved_model.model.output, intermediate], name="combined_output")
859
-
860
- combined_model = tf.keras.Model(inputs=saved_model.model.input, outputs=output)
861
- else:
862
- raise ValueError("Model save mode must be either 'replace' or 'append'")
863
-
864
- # Append .tflite if necessary
865
- if not model_path.endswith(".tflite"):
866
- model_path += ".tflite"
867
-
868
- # Make folders
869
- os.makedirs(os.path.dirname(model_path), exist_ok=True)
870
-
871
- # Save model as tflite
872
- converter = tf.lite.TFLiteConverter.from_keras_model(combined_model)
873
- tflite_model = converter.convert()
874
- open(model_path, "wb").write(tflite_model)
875
-
876
- if mode == "append":
877
- labels = [*utils.read_lines(os.path.join(SCRIPT_DIR, cfg.LABELS_FILE)), *labels]
878
-
879
- # Save labels
880
- with open(model_path.replace(".tflite", "_Labels.txt"), "w", encoding="utf-8") as f:
881
- for label in labels:
882
- f.write(label + "\n")
883
-
884
- save_model_params(model_path.replace(".tflite", "_Params.csv"))
885
-
886
-
887
- def save_raven_model(classifier, model_path, labels: list[str], mode="replace"):
888
- """
889
- Save a TensorFlow model with a custom classifier and associated metadata for use with BirdNET.
890
-
891
- Args:
892
- classifier (tf.keras.Model): The custom classifier model to be saved.
893
- model_path (str): The path where the model will be saved.
894
- labels (list[str]): A list of labels associated with the classifier.
895
- mode (str, optional): The mode for saving the model. Can be either "replace" or "append".
896
- Defaults to "replace".
897
-
898
- Raises:
899
- ValueError: If the mode is not "replace" or "append".
900
-
901
- Returns:
902
- None
903
- """
904
- import csv
905
- import json
906
-
907
- import tensorflow as tf
908
-
909
- global PBMODEL
910
-
911
- tf.get_logger().setLevel("ERROR")
912
-
913
- if PBMODEL is None:
914
- PBMODEL = tf.keras.models.load_model(os.path.join(SCRIPT_DIR, cfg.PB_MODEL), compile=False)
915
-
916
- saved_model = PBMODEL
917
-
918
- if mode == "replace":
919
- combined_model = tf.keras.Sequential([saved_model.embeddings_model, classifier], "basic")
920
- elif mode == "append":
921
- # Remove activation layer
922
- classifier.pop()
923
- intermediate = classifier(saved_model.model.get_layer("GLOBAL_AVG_POOL").output)
924
-
925
- output = tf.keras.layers.concatenate([saved_model.model.output, intermediate], name="combined_output")
926
-
927
- combined_model = tf.keras.Model(inputs=saved_model.model.input, outputs=output)
928
- else:
929
- raise ValueError("Model save mode must be either 'replace' or 'append'")
930
-
931
- # Make signatures
932
- class SignatureModule(tf.Module):
933
- def __init__(self, keras_model):
934
- super().__init__()
935
- self.model = keras_model
936
-
937
- @tf.function(input_signature=[tf.TensorSpec(shape=[None, 144000], dtype=tf.float32)])
938
- def basic(self, inputs):
939
- return {"scores": self.model(inputs)}
940
-
941
- smodel = SignatureModule(combined_model)
942
- signatures = {
943
- "basic": smodel.basic,
944
- }
945
-
946
- # Save signature model
947
- os.makedirs(os.path.dirname(model_path), exist_ok=True)
948
- model_path = model_path[:-7] if model_path.endswith(".tflite") else model_path
949
- tf.saved_model.save(smodel, model_path, signatures=signatures)
950
-
951
- if mode == "append":
952
- labels = [*utils.read_lines(os.path.join(SCRIPT_DIR, cfg.LABELS_FILE)), *labels]
953
-
954
- # Save label file
955
- labelIds = [label[:4].replace(" ", "") + str(i) for i, label in enumerate(labels, 1)]
956
- labels_dir = os.path.join(model_path, "labels")
957
-
958
- os.makedirs(labels_dir, exist_ok=True)
959
-
960
- with open(os.path.join(labels_dir, "label_names.csv"), "w", newline="") as labelsfile:
961
- labelwriter = csv.writer(labelsfile)
962
- labelwriter.writerows(zip(labelIds, labels))
963
-
964
- # Save class names file
965
- classes_dir = os.path.join(model_path, "classes")
966
-
967
- os.makedirs(classes_dir, exist_ok=True)
968
-
969
- with open(os.path.join(classes_dir, "classes.csv"), "w", newline="") as classesfile:
970
- classeswriter = csv.writer(classesfile)
971
- for labelId in labelIds:
972
- classeswriter.writerow((labelId, 0.25, cfg.SIG_FMIN, cfg.SIG_FMAX, False))
973
-
974
- # Save model config
975
- model_config = os.path.join(model_path, "model_config.json")
976
-
977
- with open(model_config, "w") as modelconfigfile:
978
- modelconfig = {
979
- "specVersion": 1,
980
- "modelDescription": "Custom classifier trained with BirdNET "
981
- + cfg.MODEL_VERSION
982
- + " embeddings.\n"
983
- + "BirdNET was developed by the K. Lisa Yang Center for Conservation Bioacoustics"
984
- + "at the Cornell Lab of Ornithology in collaboration with Chemnitz University of Technology.\n\n"
985
- + "https://birdnet.cornell.edu",
986
- "modelTypeConfig": {"modelType": "RECOGNITION"},
987
- "signatures": [
988
- {
989
- "signatureName": "basic",
990
- "modelInputs": [
991
- {
992
- "inputName": "inputs",
993
- "sampleRate": 48000.0,
994
- "inputConfig": ["batch", "samples"],
995
- }
996
- ],
997
- "modelOutputs": [{"outputName": "scores", "outputType": "SCORES"}],
998
- }
999
- ],
1000
- "globalSemanticKeys": labelIds,
1001
- }
1002
- json.dump(modelconfig, modelconfigfile, indent=2)
1003
-
1004
- model_params = os.path.join(model_path, "model_params.csv")
1005
-
1006
- save_model_params(model_params)
1007
-
1008
-
1009
- def predict_filter(lat, lon, week):
1010
- """Predicts the probability for each species.
1011
-
1012
- Args:
1013
- lat: The latitude.
1014
- lon: The longitude.
1015
- week: The week of the year [1-48]. Use -1 for yearlong.
1016
-
1017
- Returns:
1018
- A list of probabilities for all species.
1019
- """
1020
- global M_INTERPRETER
1021
-
1022
- # Does interpreter exist?
1023
- if M_INTERPRETER is None:
1024
- load_meta_model()
1025
-
1026
- # Prepare mdata as sample
1027
- sample = np.expand_dims(np.array([lat, lon, week], dtype="float32"), 0)
1028
-
1029
- # Run inference
1030
- M_INTERPRETER.set_tensor(M_INPUT_LAYER_INDEX, sample)
1031
- M_INTERPRETER.invoke()
1032
-
1033
- return M_INTERPRETER.get_tensor(M_OUTPUT_LAYER_INDEX)[0]
1034
-
1035
-
1036
- def explore(lat: float, lon: float, week: int):
1037
- """Predicts the species list.
1038
-
1039
- Predicts the species list based on the coordinates and week of year.
1040
-
1041
- Args:
1042
- lat: The latitude.
1043
- lon: The longitude.
1044
- week: The week of the year [1-48]. Use -1 for yearlong.
1045
-
1046
- Returns:
1047
- A sorted list of tuples with the score and the species.
1048
- """
1049
- # Make filter prediction
1050
- l_filter = predict_filter(lat, lon, week)
1051
-
1052
- # Apply threshold
1053
- l_filter = np.where(l_filter >= cfg.LOCATION_FILTER_THRESHOLD, l_filter, 0)
1054
-
1055
- # Zip with labels
1056
- l_filter = list(zip(l_filter, cfg.LABELS))
1057
-
1058
- # Sort by filter value
1059
- l_filter = sorted(l_filter, key=lambda x: x[0], reverse=True)
1060
-
1061
- return l_filter
1062
-
1063
-
1064
- def focal_loss(y_true, y_pred, gamma=2.0, alpha=0.25, epsilon=1e-7):
1065
- """
1066
- Focal loss for better handling of class imbalance.
1067
-
1068
- This loss function gives more weight to hard examples and down-weights easy examples.
1069
- Particularly helpful for imbalanced datasets where some classes have few samples.
1070
-
1071
- Args:
1072
- y_true: Ground truth labels.
1073
- y_pred: Predicted probabilities.
1074
- gamma: Focusing parameter. Higher values mean more focus on hard examples.
1075
- alpha: Balance parameter. Controls weight of positive vs negative examples.
1076
- epsilon: Small constant to prevent log(0).
1077
-
1078
- Returns:
1079
- Focal loss value.
1080
- """
1081
- import tensorflow.keras.backend as K
1082
-
1083
- # Apply sigmoid if not already applied
1084
- y_pred = K.clip(y_pred, epsilon, 1.0 - epsilon)
1085
-
1086
- # Calculate cross entropy
1087
- cross_entropy = -y_true * K.log(y_pred) - (1 - y_true) * K.log(1 - y_pred)
1088
-
1089
- # Calculate focal weight
1090
- p_t = y_true * y_pred + (1 - y_true) * (1 - y_pred)
1091
- focal_weight = K.pow(1 - p_t, gamma)
1092
-
1093
- # Apply alpha balancing
1094
- alpha_factor = y_true * alpha + (1 - y_true) * (1 - alpha)
1095
-
1096
- # Calculate focal loss
1097
- focal_loss = alpha_factor * focal_weight * cross_entropy
1098
-
1099
- # Sum over all classes
1100
- return K.sum(focal_loss, axis=-1)
1101
-
1102
- def custom_loss(y_true, y_pred, epsilon=1e-7):
1103
- import tensorflow.keras.backend as K
1104
-
1105
- # Calculate loss for positive labels with epsilon
1106
- positive_loss = -K.sum(y_true * K.log(K.clip(y_pred, epsilon, 1.0 - epsilon)), axis=-1)
1107
-
1108
- # Calculate loss for negative labels with epsilon
1109
- negative_loss = -K.sum((1 - y_true) * K.log(K.clip(1 - y_pred, epsilon, 1.0 - epsilon)), axis=-1)
1110
-
1111
- # Combine both loss terms
1112
- total_loss = positive_loss + negative_loss
1113
-
1114
- return total_loss
1115
-
1116
-
1117
- def flat_sigmoid(x, sensitivity=-1, bias=1.0):
1118
- """
1119
- Applies a flat sigmoid function to the input array with a bias shift.
1120
-
1121
- The flat sigmoid function is defined as:
1122
- f(x) = 1 / (1 + exp(sensitivity * clip(x + bias, -20, 20)))
1123
-
1124
- We transform the bias parameter to a range of [-100, 100] with the formula:
1125
- transformed_bias = (bias - 1.0) * 10.0
1126
-
1127
- Thus, higher bias values will shift the sigmoid function to the right on the x-axis, making it more "sensitive".
1128
-
1129
- Note: Not sure why we are clipping, must be for numerical stability somewhere else in the code.
1130
-
1131
- Args:
1132
- x (array-like): Input data.
1133
- sensitivity (float, optional): Sensitivity parameter for the sigmoid function. Default is -1.
1134
- bias (float, optional): Bias parameter to shift the sigmoid function on the x-axis. Must be in the range [0.01, 1.99]. Default is 1.0.
1135
-
1136
- Returns:
1137
- numpy.ndarray: Transformed data after applying the flat sigmoid function.
1138
- """
1139
-
1140
- transformed_bias = (bias - 1.0) * 10.0
1141
-
1142
- return 1 / (1.0 + np.exp(sensitivity * np.clip(x + transformed_bias, -20, 20)))
1143
-
1144
-
1145
- def predict(sample):
1146
- """Uses the main net to predict a sample.
1147
-
1148
- Args:
1149
- sample: Audio sample.
1150
-
1151
- Returns:
1152
- The prediction scores for the sample.
1153
- """
1154
- # Has custom classifier?
1155
- if cfg.CUSTOM_CLASSIFIER is not None:
1156
- return predict_with_custom_classifier(sample)
1157
-
1158
- global INTERPRETER
1159
-
1160
- # Does interpreter or keras model exist?
1161
- if INTERPRETER is None and PBMODEL is None:
1162
- load_model()
1163
-
1164
- if PBMODEL is None:
1165
- # Reshape input tensor
1166
- INTERPRETER.resize_tensor_input(INPUT_LAYER_INDEX, [len(sample), *sample[0].shape])
1167
- INTERPRETER.allocate_tensors()
1168
-
1169
- # Make a prediction (Audio only for now)
1170
- INTERPRETER.set_tensor(INPUT_LAYER_INDEX, np.array(sample, dtype="float32"))
1171
- INTERPRETER.invoke()
1172
- prediction = INTERPRETER.get_tensor(OUTPUT_LAYER_INDEX)
1173
-
1174
- return prediction
1175
-
1176
- else:
1177
- # Make a prediction (Audio only for now)
1178
- prediction = PBMODEL.basic(sample)["scores"]
1179
-
1180
- return prediction
1181
-
1182
-
1183
- def predict_with_custom_classifier(sample):
1184
- """Uses the custom classifier to make a prediction.
1185
-
1186
- Args:
1187
- sample: Audio sample.
1188
-
1189
- Returns:
1190
- The prediction scores for the sample.
1191
- """
1192
- global C_INTERPRETER
1193
- global C_INPUT_SIZE
1194
- global C_PBMODEL
1195
-
1196
- # Does interpreter exist?
1197
- if C_INTERPRETER is None and C_PBMODEL is None:
1198
- load_custom_classifier()
1199
-
1200
- if C_PBMODEL is None:
1201
- vector = embeddings(sample) if C_INPUT_SIZE != 144000 else sample
1202
-
1203
- # Reshape input tensor
1204
- C_INTERPRETER.resize_tensor_input(C_INPUT_LAYER_INDEX, [len(vector), *vector[0].shape])
1205
- C_INTERPRETER.allocate_tensors()
1206
-
1207
- # Make a prediction
1208
- C_INTERPRETER.set_tensor(C_INPUT_LAYER_INDEX, np.array(vector, dtype="float32"))
1209
- C_INTERPRETER.invoke()
1210
- prediction = C_INTERPRETER.get_tensor(C_OUTPUT_LAYER_INDEX)
1211
-
1212
- return prediction
1213
- else:
1214
- prediction = C_PBMODEL.basic(sample)["scores"]
1215
-
1216
- return prediction
1217
-
1218
-
1219
- def embeddings(sample):
1220
- """Extracts the embeddings for a sample.
1221
-
1222
- Args:
1223
- sample: Audio samples.
1224
-
1225
- Returns:
1226
- The embeddings.
1227
- """
1228
- global INTERPRETER
1229
-
1230
- # Does interpreter exist?
1231
- if INTERPRETER is None:
1232
- load_model(False)
1233
-
1234
- # Reshape input tensor
1235
- INTERPRETER.resize_tensor_input(INPUT_LAYER_INDEX, [len(sample), *sample[0].shape])
1236
- INTERPRETER.allocate_tensors()
1237
-
1238
- # Extract feature embeddings
1239
- INTERPRETER.set_tensor(INPUT_LAYER_INDEX, np.array(sample, dtype="float32"))
1240
- INTERPRETER.invoke()
1241
- features = INTERPRETER.get_tensor(OUTPUT_LAYER_INDEX)
1242
-
1243
- return features
1
+ # ruff: noqa: PLW0603
2
+ """Contains functions to use the BirdNET models."""
3
+
4
+ import os
5
+ import sys
6
+ import warnings
7
+
8
+ import numpy as np
9
+
10
+ import birdnet_analyzer.config as cfg
11
+ from birdnet_analyzer import utils
12
+
13
+ SCRIPT_DIR = os.path.abspath(os.path.dirname(__file__))
14
+
15
+
16
+ os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
17
+ os.environ["CUDA_VISIBLE_DEVICES"] = ""
18
+
19
+ warnings.filterwarnings("ignore")
20
+
21
+ # Import TFLite from runtime or Tensorflow;
22
+ # import Keras if protobuf model;
23
+ # NOTE: we have to use TFLite if we want to use
24
+ # the metadata model or want to extract embeddings
25
+ try:
26
+ import tflite_runtime.interpreter as tflite # type: ignore
27
+ except ModuleNotFoundError:
28
+ from tensorflow import lite as tflite
29
+ if not cfg.MODEL_PATH.endswith(".tflite"):
30
+ from tensorflow import keras
31
+
32
+ INTERPRETER: tflite.Interpreter = None
33
+ C_INTERPRETER: tflite.Interpreter = None
34
+ M_INTERPRETER: tflite.Interpreter = None
35
+ OUTPUT_DETAILS = None
36
+ PBMODEL = None
37
+ C_PBMODEL = None
38
+ EMPTY_CLASS_EXCEPTION_REF = None
39
+
40
+
41
+ def get_empty_class_exception():
42
+ import keras_tuner.errors
43
+
44
+ global EMPTY_CLASS_EXCEPTION_REF
45
+
46
+ if EMPTY_CLASS_EXCEPTION_REF:
47
+ return EMPTY_CLASS_EXCEPTION_REF
48
+
49
+ class EmptyClassException(keras_tuner.errors.FatalError):
50
+ """
51
+ Exception raised when a class is found to be empty.
52
+
53
+ Attributes:
54
+ index (int): The index of the empty class.
55
+ message (str): The error message indicating which class is empty.
56
+ """
57
+
58
+ def __init__(self, *args, index=None):
59
+ super().__init__(*args)
60
+ self.index = index
61
+ self.message = f"Class {index} is empty."
62
+
63
+ EMPTY_CLASS_EXCEPTION_REF = EmptyClassException
64
+ return EMPTY_CLASS_EXCEPTION_REF
65
+
66
+
67
+ def label_smoothing(y: np.ndarray, alpha=0.1):
68
+ """
69
+ Applies label smoothing to the given labels.
70
+ Label smoothing is a technique used to prevent the model from becoming overconfident by adjusting the target labels.
71
+ It subtracts a small value (alpha) from the correct label and distributes it among the other labels.
72
+ Args:
73
+ y (numpy.ndarray): Array of labels to be smoothed. The array should be of shape (num_labels,).
74
+ alpha (float, optional): Smoothing parameter. Default is 0.1.
75
+ Returns:
76
+ numpy.ndarray: The smoothed labels.
77
+ """
78
+ # Subtract alpha from correct label when it is >0
79
+ y[y > 0] -= alpha
80
+
81
+ # Assigned alpha to all other labels
82
+ y[y == 0] = alpha / y.shape[0]
83
+
84
+ return y
85
+
86
+
87
+ def mixup(x, y, augmentation_ratio=0.25, alpha=0.2):
88
+ """Apply mixup to the given data.
89
+
90
+ Mixup is a data augmentation technique that generates new samples by
91
+ mixing two samples and their labels.
92
+
93
+ Args:
94
+ x: Samples.
95
+ y: One-hot labels.
96
+ augmentation_ratio: The ratio of augmented samples.
97
+ alpha: The beta distribution parameter.
98
+
99
+ Returns:
100
+ Augmented data.
101
+ """
102
+ rng = np.random.default_rng(cfg.RANDOM_SEED)
103
+
104
+ # Get indices of all positive samples
105
+ positive_indices = np.unique(np.where(y[:, :] == 1)[0])
106
+
107
+ # Calculate the number of samples to augment based on the ratio
108
+ num_samples_to_augment = int(len(positive_indices) * augmentation_ratio)
109
+
110
+ # Indices of samples, that are already mixed up
111
+ mixed_up_indices = []
112
+
113
+ for _ in range(num_samples_to_augment):
114
+ # Randomly choose one instance from the positive samples
115
+ index = rng.choice(positive_indices)
116
+
117
+ # Choose another one, when the chosen one was already mixed up
118
+ while index in mixed_up_indices:
119
+ index = rng.choice(positive_indices)
120
+
121
+ x1, y1 = x[index], y[index]
122
+
123
+ # Randomly choose a different instance from the dataset
124
+ second_index = rng.choice(positive_indices)
125
+
126
+ # Choose again, when the same or an already mixed up sample was selected
127
+ while second_index == index or second_index in mixed_up_indices:
128
+ second_index = rng.choice(positive_indices)
129
+ x2, y2 = x[second_index], y[second_index]
130
+
131
+ # Generate a random mixing coefficient (lambda)
132
+ lambda_ = rng.beta(alpha, alpha)
133
+
134
+ # Mix the embeddings and labels
135
+ mixed_x = lambda_ * x1 + (1 - lambda_) * x2
136
+ mixed_y = lambda_ * y1 + (1 - lambda_) * y2
137
+
138
+ # Replace one of the original samples and labels with the augmented sample and labels
139
+ x[index] = mixed_x
140
+ y[index] = mixed_y
141
+
142
+ # Mark the sample as already mixed up
143
+ mixed_up_indices.append(index)
144
+
145
+ del mixed_x
146
+ del mixed_y
147
+
148
+ return x, y
149
+
150
+
151
+ def random_split(x, y, val_ratio=0.2):
152
+ """Splits the data into training and validation data.
153
+
154
+ Makes sure that each class is represented in both sets.
155
+
156
+ Args:
157
+ x: Samples.
158
+ y: One-hot labels.
159
+ val_ratio: The ratio of validation data.
160
+
161
+ Returns:
162
+ A tuple of (x_train, y_train, x_val, y_val).
163
+ """
164
+ rng = np.random.default_rng(cfg.RANDOM_SEED)
165
+
166
+ # Get number of classes
167
+ num_classes = y.shape[1]
168
+
169
+ # Initialize training and validation data
170
+ x_train, y_train, x_val, y_val = [], [], [], []
171
+
172
+ # Split data
173
+ for i in range(num_classes):
174
+ # Get indices of positive samples of current class
175
+ positive_indices = np.where(y[:, i] == 1)[0]
176
+
177
+ # Get indices of negative samples of current class
178
+ negative_indices = np.where(y[:, i] == -1)[0]
179
+
180
+ # Get number of samples for each set
181
+ num_samples = len(positive_indices)
182
+ num_samples_train = max(1, int(num_samples * (1 - val_ratio)))
183
+ num_samples_val = max(0, num_samples - num_samples_train)
184
+
185
+ # Randomly choose samples for training and validation
186
+ rng.shuffle(positive_indices)
187
+ train_indices = positive_indices[:num_samples_train]
188
+ val_indices = positive_indices[num_samples_train : num_samples_train + num_samples_val]
189
+
190
+ # Append samples to training and validation data
191
+ x_train.append(x[train_indices])
192
+ y_train.append(y[train_indices])
193
+ x_val.append(x[val_indices])
194
+ y_val.append(y[val_indices])
195
+
196
+ # Append negative samples to training data
197
+ x_train.append(x[negative_indices])
198
+ y_train.append(y[negative_indices])
199
+
200
+ # Add samples for non-event classes to training and validation data
201
+ non_event_indices = np.where(np.sum(y[:, :], axis=1) == 0)[0]
202
+ num_samples = len(non_event_indices)
203
+ num_samples_train = max(1, int(num_samples * (1 - val_ratio)))
204
+ num_samples_val = max(0, num_samples - num_samples_train)
205
+ rng.shuffle(non_event_indices)
206
+ train_indices = non_event_indices[:num_samples_train]
207
+ val_indices = non_event_indices[num_samples_train : num_samples_train + num_samples_val]
208
+ x_train.append(x[train_indices])
209
+ y_train.append(y[train_indices])
210
+ x_val.append(x[val_indices])
211
+ y_val.append(y[val_indices])
212
+
213
+ # Concatenate data
214
+ x_train = np.concatenate(x_train)
215
+ y_train = np.concatenate(y_train)
216
+ x_val = np.concatenate(x_val)
217
+ y_val = np.concatenate(y_val)
218
+
219
+ # Shuffle data
220
+ indices = np.arange(len(x_train))
221
+ rng.shuffle(indices)
222
+ x_train = x_train[indices]
223
+ y_train = y_train[indices]
224
+
225
+ indices = np.arange(len(x_val))
226
+ rng.shuffle(indices)
227
+ x_val = x_val[indices]
228
+ y_val = y_val[indices]
229
+
230
+ return x_train, y_train, x_val, y_val
231
+
232
+
233
+ def random_multilabel_split(x, y, val_ratio=0.2):
234
+ """Splits the data into training and validation data.
235
+
236
+ Makes sure that each combination of classes is represented in both sets.
237
+
238
+ Args:
239
+ x: Samples.
240
+ y: One-hot labels.
241
+ val_ratio: The ratio of validation data.
242
+
243
+ Returns:
244
+ A tuple of (x_train, y_train, x_val, y_val).
245
+
246
+ """
247
+ rng = np.random.default_rng(cfg.RANDOM_SEED)
248
+
249
+ # Find all combinations of labels
250
+ class_combinations = np.unique(y, axis=0)
251
+
252
+ # Initialize training and validation data
253
+ x_train, y_train, x_val, y_val = [], [], [], []
254
+
255
+ # Split the data for each combination of labels
256
+ for class_combination in class_combinations:
257
+ # find all indices
258
+ indices = np.where((y == class_combination).all(axis=1))[0]
259
+
260
+ # When negative sample use only for training
261
+ if -1 in class_combination:
262
+ x_train.append(x[indices])
263
+ y_train.append(y[indices])
264
+ # Otherwise split according to the validation split
265
+ else:
266
+ # Get number of samples for each set
267
+ num_samples = len(indices)
268
+ num_samples_train = max(1, int(num_samples * (1 - val_ratio)))
269
+ num_samples_val = max(0, num_samples - num_samples_train)
270
+ # Randomly choose samples for training and validation
271
+ rng.shuffle(indices)
272
+ train_indices = indices[:num_samples_train]
273
+ val_indices = indices[num_samples_train : num_samples_train + num_samples_val]
274
+ # Append samples to training and validation data
275
+ x_train.append(x[train_indices])
276
+ y_train.append(y[train_indices])
277
+ x_val.append(x[val_indices])
278
+ y_val.append(y[val_indices])
279
+
280
+ # Concatenate data
281
+ x_train = np.concatenate(x_train)
282
+ y_train = np.concatenate(y_train)
283
+ x_val = np.concatenate(x_val)
284
+ y_val = np.concatenate(y_val)
285
+
286
+ # Shuffle data
287
+ indices = np.arange(len(x_train))
288
+ rng.shuffle(indices)
289
+ x_train = x_train[indices]
290
+ y_train = y_train[indices]
291
+
292
+ indices = np.arange(len(x_val))
293
+ rng.shuffle(indices)
294
+ x_val = x_val[indices]
295
+ y_val = y_val[indices]
296
+
297
+ return x_train, y_train, x_val, y_val
298
+
299
+
300
+ def upsample_core(x: np.ndarray, y: np.ndarray, min_samples: int, apply, size=2):
301
+ """
302
+ Upsamples the minority class in the dataset using the specified apply function.
303
+ Parameters:
304
+ x (np.ndarray): The feature matrix.
305
+ y (np.ndarray): The target labels.
306
+ min_samples (int): The minimum number of samples required for the minority class.
307
+ apply (callable): A function that applies the SMOTE or any other algorithm to the data.
308
+ size (int, optional): The number of samples to generate in each iteration. Default is 2.
309
+ Returns:
310
+ tuple: A tuple containing the upsampled feature matrix and target labels.
311
+ """
312
+ rng = np.random.default_rng(cfg.RANDOM_SEED)
313
+ y_temp = []
314
+ x_temp = []
315
+
316
+ if cfg.BINARY_CLASSIFICATION:
317
+ # Determine if 1 or 0 is the minority class
318
+ minority_label = 1 if y.sum(axis=0) < len(y) - y.sum(axis=0) else 0
319
+
320
+ while np.where(y == minority_label)[0].shape[0] + len(y_temp) < min_samples:
321
+ # Randomly choose a sample from the minority class
322
+ random_index = rng.choice(np.where(y == minority_label)[0], size=size)
323
+
324
+ # Apply SMOTE
325
+ x_app, y_app = apply(x, y, random_index)
326
+ y_temp.append(y_app)
327
+ x_temp.append(x_app)
328
+ else:
329
+ for i in range(y.shape[1]):
330
+ while y[:, i].sum() + len(y_temp) < min_samples:
331
+ try:
332
+ # Randomly choose a sample from the minority class
333
+ random_index = rng.choice(np.where(y[:, i] == 1)[0], size=size)
334
+ except ValueError as e:
335
+ raise get_empty_class_exception()(index=i) from e
336
+
337
+ # Apply SMOTE
338
+ x_app, y_app = apply(x, y, random_index)
339
+ y_temp.append(y_app)
340
+ x_temp.append(x_app)
341
+
342
+ return x_temp, y_temp
343
+
344
+
345
+ def upsampling(x: np.ndarray, y: np.ndarray, ratio=0.5, mode="repeat"):
346
+ """Balance data through upsampling.
347
+
348
+ We upsample minority classes to have at least 10% (ratio=0.1) of the samples of the majority class.
349
+
350
+ Args:
351
+ x: Samples.
352
+ y: One-hot labels.
353
+ ratio: The minimum ratio of minority to majority samples.
354
+ mode: The upsampling mode. Either 'repeat', 'mean', 'linear' or 'smote'.
355
+
356
+ Returns:
357
+ Upsampled data.
358
+ """
359
+
360
+ # Set numpy random seed
361
+ rng = np.random.default_rng(cfg.RANDOM_SEED)
362
+
363
+ # Determine min number of samples
364
+ min_samples = (
365
+ int(max(y.sum(axis=0), len(y) - y.sum(axis=0)) * ratio)
366
+ if cfg.BINARY_CLASSIFICATION
367
+ else int(np.max(y.sum(axis=0)) * ratio)
368
+ )
369
+
370
+ x_temp = []
371
+ y_temp = []
372
+
373
+ if mode == "repeat":
374
+
375
+ def applyRepeat(x, y, random_index):
376
+ return x[random_index[0]], y[random_index[0]]
377
+
378
+ x_temp, y_temp = upsample_core(x, y, min_samples, applyRepeat, size=1)
379
+
380
+ elif mode == "mean":
381
+ # For each class with less than min_samples
382
+ # select two random samples and calculate the mean
383
+ def applyMean(x, y, random_indices):
384
+ # Calculate the mean of the two samples
385
+ mean = np.mean(x[random_indices], axis=0)
386
+
387
+ # Append the mean and label to a temp list
388
+ return mean, y[random_indices[0]]
389
+
390
+ x_temp, y_temp = upsample_core(x, y, min_samples, applyMean)
391
+
392
+ elif mode == "linear":
393
+ # For each class with less than min_samples
394
+ # select two random samples and calculate the linear combination
395
+ def applyLinearCombination(x, y, random_indices):
396
+ # Calculate the linear combination of the two samples
397
+ alpha = rng.uniform(0, 1)
398
+ new_sample = alpha * x[random_indices[0]] + (1 - alpha) * x[random_indices[1]]
399
+
400
+ # Append the new sample and label to a temp list
401
+ return new_sample, y[random_indices[0]]
402
+
403
+ x_temp, y_temp = upsample_core(x, y, min_samples, applyLinearCombination)
404
+
405
+ elif mode == "smote":
406
+ # For each class with less than min_samples apply SMOTE
407
+ def applySmote(x, y, random_index, k=5):
408
+ # Get the k nearest neighbors
409
+ distances = np.sqrt(np.sum((x - x[random_index[0]]) ** 2, axis=1))
410
+ indices = np.argsort(distances)[1 : k + 1]
411
+
412
+ # Randomly choose one of the neighbors
413
+ random_neighbor = rng.choice(indices)
414
+
415
+ # Calculate the difference vector
416
+ diff = x[random_neighbor] - x[random_index[0]]
417
+
418
+ # Randomly choose a weight between 0 and 1
419
+ weight = rng.uniform(0, 1)
420
+
421
+ # Calculate the new sample
422
+ new_sample = x[random_index[0]] + weight * diff
423
+
424
+ # Append the new sample and label to a temp list
425
+ return new_sample, y[random_index[0]]
426
+
427
+ x_temp, y_temp = upsample_core(x, y, min_samples, applySmote, size=1)
428
+
429
+ # Append the temp list to the original data
430
+ if len(x_temp) > 0:
431
+ x = np.vstack((x, np.array(x_temp)))
432
+ y = np.vstack((y, np.array(y_temp)))
433
+
434
+ # Shuffle data
435
+ indices = np.arange(len(x))
436
+ rng.shuffle(indices)
437
+ x = x[indices]
438
+ y = y[indices]
439
+
440
+ del x_temp
441
+ del y_temp
442
+
443
+ return x, y
444
+
445
+
446
+ def save_model_params(path):
447
+ """Saves the model parameters to a file.
448
+
449
+ Args:
450
+ path: Path to the file.
451
+ """
452
+ utils.save_params(
453
+ path,
454
+ (
455
+ "Hidden units",
456
+ "Dropout",
457
+ "Batchsize",
458
+ "Learning rate",
459
+ "Crop mode",
460
+ "Crop overlap",
461
+ "Audio speed",
462
+ "Upsamling mode",
463
+ "Upsamling ratio",
464
+ "use mixup",
465
+ "use label smoothing",
466
+ "BirdNET Model version",
467
+ ),
468
+ (
469
+ cfg.TRAIN_HIDDEN_UNITS,
470
+ cfg.TRAIN_DROPOUT,
471
+ cfg.TRAIN_BATCH_SIZE,
472
+ cfg.TRAIN_LEARNING_RATE,
473
+ cfg.SAMPLE_CROP_MODE,
474
+ cfg.SIG_OVERLAP,
475
+ cfg.AUDIO_SPEED,
476
+ cfg.UPSAMPLING_MODE,
477
+ cfg.UPSAMPLING_RATIO,
478
+ cfg.TRAIN_WITH_MIXUP,
479
+ cfg.TRAIN_WITH_LABEL_SMOOTHING,
480
+ cfg.MODEL_VERSION,
481
+ ),
482
+ )
483
+
484
+
485
+ def reset_custom_classifier():
486
+ """
487
+ Resets the custom classifier by setting the global variables C_INTERPRETER and C_PBMODEL to None.
488
+ This function is used to clear any existing custom classifier models and interpreters, effectively
489
+ resetting the state of the custom classifier.
490
+ """
491
+ global C_INTERPRETER
492
+ global C_PBMODEL
493
+
494
+ C_INTERPRETER = None
495
+ C_PBMODEL = None
496
+
497
+
498
+ def load_model(class_output=True):
499
+ """
500
+ Loads the machine learning model based on the configuration provided.
501
+ This function loads either a TensorFlow Lite (TFLite) model or a protobuf model
502
+ depending on the file extension of the model path specified in the configuration.
503
+ It sets up the global variables for the model interpreter and input/output layer indices.
504
+
505
+ Args:
506
+ class_output (bool): If True, sets the output layer index to the classification output.
507
+ If False, sets the output layer index to the feature embeddings.
508
+ """
509
+ global PBMODEL
510
+ global INTERPRETER
511
+ global INPUT_LAYER_INDEX
512
+ global OUTPUT_LAYER_INDEX
513
+ global OUTPUT_DETAILS
514
+
515
+ # Do we have to load the tflite or protobuf model?
516
+ if cfg.MODEL_PATH.endswith(".tflite"):
517
+ if not INTERPRETER:
518
+ # Load TFLite model and allocate tensors.
519
+ INTERPRETER = tflite.Interpreter(
520
+ model_path=os.path.join(SCRIPT_DIR, cfg.MODEL_PATH), num_threads=cfg.TFLITE_THREADS
521
+ )
522
+ INTERPRETER.allocate_tensors()
523
+
524
+ # Get input and output tensors.
525
+ input_details = INTERPRETER.get_input_details()
526
+ OUTPUT_DETAILS = INTERPRETER.get_output_details()
527
+
528
+ # Get input tensor index
529
+ INPUT_LAYER_INDEX = input_details[0]["index"]
530
+
531
+ # Get classification output or feature embeddings
532
+ OUTPUT_LAYER_INDEX = OUTPUT_DETAILS[0]["index"] if class_output else OUTPUT_DETAILS[0]["index"] - 1
533
+
534
+ elif not PBMODEL:
535
+ # Load protobuf model
536
+ # Note: This will throw a bunch of warnings about custom gradients
537
+ # which we will ignore until TF lets us block them
538
+ PBMODEL = keras.models.load_model(os.path.join(SCRIPT_DIR, cfg.MODEL_PATH), compile=False)
539
+
540
+
541
+ def load_custom_classifier():
542
+ """
543
+ Loads a custom classifier model based on the file extension of the provided model path.
544
+ If the model file ends with ".tflite", it loads a TensorFlow Lite model and sets up the interpreter,
545
+ input layer index, output layer index, and input size.
546
+ If the model file does not end with ".tflite", it loads a TensorFlow SavedModel.
547
+ """
548
+ global C_INTERPRETER
549
+ global C_INPUT_LAYER_INDEX
550
+ global C_OUTPUT_LAYER_INDEX
551
+ global C_INPUT_SIZE
552
+ global C_PBMODEL
553
+
554
+ if cfg.CUSTOM_CLASSIFIER.endswith(".tflite"):
555
+ # Load TFLite model and allocate tensors.
556
+ C_INTERPRETER = tflite.Interpreter(model_path=cfg.CUSTOM_CLASSIFIER, num_threads=cfg.TFLITE_THREADS)
557
+ C_INTERPRETER.allocate_tensors()
558
+
559
+ # Get input and output tensors.
560
+ input_details = C_INTERPRETER.get_input_details()
561
+ output_details = C_INTERPRETER.get_output_details()
562
+
563
+ # Get input tensor index
564
+ C_INPUT_LAYER_INDEX = input_details[0]["index"]
565
+
566
+ C_INPUT_SIZE = input_details[0]["shape"][-1]
567
+
568
+ # Get classification output
569
+ C_OUTPUT_LAYER_INDEX = output_details[0]["index"]
570
+ else:
571
+ import tensorflow as tf
572
+
573
+ tf.get_logger().setLevel("ERROR")
574
+
575
+ C_PBMODEL = tf.saved_model.load(cfg.CUSTOM_CLASSIFIER)
576
+
577
+
578
+ def load_meta_model():
579
+ """Loads the model for species prediction.
580
+
581
+ Initializes the model used to predict species list, based on coordinates and week of year.
582
+ """
583
+ global M_INTERPRETER
584
+ global M_INPUT_LAYER_INDEX
585
+ global M_OUTPUT_LAYER_INDEX
586
+
587
+ # Load TFLite model and allocate tensors.
588
+ M_INTERPRETER = tflite.Interpreter(
589
+ model_path=os.path.join(SCRIPT_DIR, cfg.MDATA_MODEL_PATH), num_threads=cfg.TFLITE_THREADS
590
+ )
591
+ M_INTERPRETER.allocate_tensors()
592
+
593
+ # Get input and output tensors.
594
+ input_details = M_INTERPRETER.get_input_details()
595
+ output_details = M_INTERPRETER.get_output_details()
596
+
597
+ # Get input tensor index
598
+ M_INPUT_LAYER_INDEX = input_details[0]["index"]
599
+ M_OUTPUT_LAYER_INDEX = output_details[0]["index"]
600
+
601
+
602
+ def build_linear_classifier(num_labels, input_size, hidden_units=0, dropout=0.0):
603
+ """Builds a classifier.
604
+
605
+ Args:
606
+ num_labels: Output size.
607
+ input_size: Size of the input.
608
+ hidden_units: If > 0, creates another hidden layer with the given number of units.
609
+ dropout: Dropout rate.
610
+
611
+ Returns:
612
+ A new classifier.
613
+ """
614
+ # import keras
615
+ from tensorflow import keras
616
+
617
+ # Build a simple one- or two-layer linear classifier
618
+ model = keras.Sequential()
619
+
620
+ # Input layer
621
+ model.add(keras.layers.InputLayer(input_shape=(input_size,)))
622
+
623
+ # Batch normalization on input to standardize embeddings
624
+ model.add(keras.layers.BatchNormalization())
625
+
626
+ # Optional L2 regularization for all dense layers
627
+ regularizer = keras.regularizers.l2(1e-5)
628
+
629
+ # Hidden layer with improved architecture
630
+ if hidden_units > 0:
631
+ # Dropout layer before hidden layer
632
+ if dropout > 0:
633
+ model.add(keras.layers.Dropout(dropout))
634
+
635
+ # Add a hidden layer with L2 regularization
636
+ model.add(
637
+ keras.layers.Dense(
638
+ hidden_units, activation="relu", kernel_regularizer=regularizer, kernel_initializer="he_normal"
639
+ )
640
+ )
641
+
642
+ # Add another batch normalization after the hidden layer
643
+ model.add(keras.layers.BatchNormalization())
644
+
645
+ # Dropout layer before output
646
+ if dropout > 0:
647
+ model.add(keras.layers.Dropout(dropout))
648
+
649
+ # Classification layer with L2 regularization
650
+ model.add(keras.layers.Dense(num_labels, kernel_regularizer=regularizer, kernel_initializer="glorot_uniform"))
651
+
652
+ # Activation layer
653
+ model.add(keras.layers.Activation("sigmoid"))
654
+
655
+ return model
656
+
657
+
658
+ def train_linear_classifier(
659
+ classifier,
660
+ x_train,
661
+ y_train,
662
+ x_test,
663
+ y_test,
664
+ epochs,
665
+ batch_size,
666
+ learning_rate,
667
+ val_split,
668
+ upsampling_ratio,
669
+ upsampling_mode,
670
+ train_with_mixup,
671
+ train_with_label_smoothing,
672
+ train_with_focal_loss=False,
673
+ focal_loss_gamma=2.0,
674
+ focal_loss_alpha=0.25,
675
+ on_epoch_end=None,
676
+ ):
677
+ """Trains a custom classifier.
678
+
679
+ Trains a new classifier for BirdNET based on the given data.
680
+
681
+ Args:
682
+ classifier: The classifier to be trained.
683
+ x_train: Samples.
684
+ y_train: Labels.
685
+ x_test: Validation samples.
686
+ y_test: Validation labels.
687
+ epochs: Number of epochs to train.
688
+ batch_size: Batch size.
689
+ learning_rate: The learning rate during training.
690
+ val_split: Validation split ratio (is 0 when using test data).
691
+ upsampling_ratio: Upsampling ratio.
692
+ upsampling_mode: Upsampling mode.
693
+ train_with_mixup: If True, applies mixup to the training data.
694
+ train_with_label_smoothing: If True, applies label smoothing to the training data.
695
+ train_with_focal_loss: If True, uses focal loss instead of binary cross-entropy loss.
696
+ focal_loss_gamma: Focal loss gamma parameter.
697
+ focal_loss_alpha: Focal loss alpha parameter.
698
+ on_epoch_end: Optional callback `function(epoch, logs)`.
699
+
700
+ Returns:
701
+ (classifier, history)
702
+ """
703
+ # import keras
704
+ from tensorflow import keras
705
+
706
+ class FunctionCallback(keras.callbacks.Callback):
707
+ def __init__(self, on_epoch_end=None) -> None:
708
+ super().__init__()
709
+ self.on_epoch_end_fn = on_epoch_end
710
+
711
+ def on_epoch_end(self, epoch, logs=None):
712
+ if self.on_epoch_end_fn:
713
+ self.on_epoch_end_fn(epoch, logs)
714
+
715
+ # Set random seed
716
+ rng = np.random.default_rng(cfg.RANDOM_SEED)
717
+
718
+ # Shuffle data
719
+ idx = np.arange(x_train.shape[0])
720
+ rng.shuffle(idx)
721
+ x_train = x_train[idx]
722
+ y_train = y_train[idx]
723
+
724
+ # Random val split
725
+ if val_split > 0:
726
+ if not cfg.MULTI_LABEL:
727
+ x_train, y_train, x_val, y_val = random_split(x_train, y_train, val_split)
728
+ else:
729
+ x_train, y_train, x_val, y_val = random_multilabel_split(x_train, y_train, val_split)
730
+ else:
731
+ x_val = x_test
732
+ y_val = y_test
733
+
734
+ print(
735
+ f"Training on {x_train.shape[0]} samples, validating on {x_val.shape[0]} samples.",
736
+ flush=True,
737
+ )
738
+
739
+ # Upsample training data
740
+ if upsampling_ratio > 0:
741
+ x_train, y_train = upsampling(x_train, y_train, upsampling_ratio, upsampling_mode)
742
+ print(f"Upsampled training data to {x_train.shape[0]} samples.", flush=True)
743
+
744
+ # Apply mixup to training data
745
+ if train_with_mixup and not cfg.BINARY_CLASSIFICATION:
746
+ x_train, y_train = mixup(x_train, y_train)
747
+
748
+ # Apply label smoothing
749
+ if train_with_label_smoothing and not cfg.BINARY_CLASSIFICATION:
750
+ y_train = label_smoothing(y_train)
751
+
752
+ # Early stopping with patience depending on dataset size
753
+ patience = min(10, max(5, int(epochs / 10)))
754
+ min_delta = 0.001
755
+
756
+ callbacks = [
757
+ # EarlyStopping with restore_best_weights
758
+ keras.callbacks.EarlyStopping(
759
+ monitor="val_AUPRC",
760
+ mode="max",
761
+ patience=patience,
762
+ verbose=1,
763
+ min_delta=min_delta,
764
+ restore_best_weights=True,
765
+ ),
766
+ # Function callback for progress tracking
767
+ FunctionCallback(on_epoch_end=on_epoch_end),
768
+ ]
769
+
770
+ # Learning rate schedule - use cosine decay with warmup
771
+ warmup_epochs = min(5, int(epochs * 0.1))
772
+
773
+ def lr_schedule(epoch, lr):
774
+ if epoch < warmup_epochs:
775
+ # Linear warmup
776
+ return learning_rate * (epoch + 1) / warmup_epochs
777
+
778
+ # Cosine decay
779
+ progress = (epoch - warmup_epochs) / (epochs - warmup_epochs)
780
+ return learning_rate * (0.1 + 0.9 * (1 + np.cos(np.pi * progress)) / 2)
781
+
782
+ # Add LR scheduler callback
783
+ callbacks.append(keras.callbacks.LearningRateScheduler(lr_schedule))
784
+
785
+ optimizer_cls = keras.optimizers.legacy.Adam if sys.platform == "darwin" else keras.optimizers.Adam
786
+
787
+ def _focal_loss(y_true, y_pred):
788
+ return focal_loss(y_true, y_pred, gamma=cfg.FOCAL_LOSS_GAMMA, alpha=cfg.FOCAL_LOSS_ALPHA)
789
+
790
+ # Choose the loss function based on config
791
+ loss_function = _focal_loss if train_with_focal_loss else custom_loss
792
+
793
+ # Compile model with appropriate metrics for classification task
794
+ classifier.compile(
795
+ optimizer=optimizer_cls(learning_rate=learning_rate),
796
+ loss=loss_function,
797
+ metrics=[
798
+ keras.metrics.AUC(
799
+ curve="PR",
800
+ multi_label=cfg.MULTI_LABEL,
801
+ name="AUPRC",
802
+ num_labels=y_train.shape[1] if cfg.MULTI_LABEL else None,
803
+ from_logits=True,
804
+ ),
805
+ keras.metrics.AUC(
806
+ curve="ROC",
807
+ multi_label=cfg.MULTI_LABEL,
808
+ name="AUROC",
809
+ num_labels=y_train.shape[1] if cfg.MULTI_LABEL else None,
810
+ from_logits=True,
811
+ ),
812
+ ],
813
+ )
814
+
815
+ # Train model
816
+ history = classifier.fit(
817
+ x_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(x_val, y_val), callbacks=callbacks
818
+ )
819
+
820
+ return classifier, history
821
+
822
+
823
+ def save_linear_classifier(classifier, model_path: str, labels: list[str], mode="replace"):
824
+ """Saves the classifier as a tflite model, as well as the used labels in a .txt.
825
+
826
+ Args:
827
+ classifier: The custom classifier.
828
+ model_path: Path the model will be saved at.
829
+ labels: List of labels used for the classifier.
830
+ """
831
+ import tensorflow as tf
832
+
833
+ global PBMODEL
834
+
835
+ tf.get_logger().setLevel("ERROR")
836
+
837
+ if PBMODEL is None:
838
+ PBMODEL = tf.keras.models.load_model(os.path.join(SCRIPT_DIR, cfg.PB_MODEL), compile=False)
839
+
840
+ saved_model = PBMODEL
841
+
842
+ # Remove activation layer
843
+ classifier.pop()
844
+
845
+ if mode == "replace":
846
+ combined_model = tf.keras.Sequential([saved_model.embeddings_model, classifier], "basic")
847
+ elif mode == "append":
848
+ intermediate = classifier(saved_model.model.get_layer("GLOBAL_AVG_POOL").output)
849
+
850
+ output = tf.keras.layers.concatenate([saved_model.model.output, intermediate], name="combined_output")
851
+
852
+ combined_model = tf.keras.Model(inputs=saved_model.model.input, outputs=output)
853
+ else:
854
+ raise ValueError("Model save mode must be either 'replace' or 'append'")
855
+
856
+ # Append .tflite if necessary
857
+ if not model_path.endswith(".tflite"):
858
+ model_path += ".tflite"
859
+
860
+ # Make folders
861
+ os.makedirs(os.path.dirname(model_path), exist_ok=True)
862
+
863
+ # Save model as tflite
864
+ converter = tf.lite.TFLiteConverter.from_keras_model(combined_model)
865
+ tflite_model = converter.convert()
866
+
867
+ with open(model_path, "wb") as f:
868
+ f.write(tflite_model)
869
+
870
+ if mode == "append":
871
+ labels = [*utils.read_lines(os.path.join(SCRIPT_DIR, cfg.LABELS_FILE)), *labels]
872
+
873
+ # Save labels
874
+ with open(model_path.replace(".tflite", "_Labels.txt"), "w", encoding="utf-8") as f:
875
+ for label in labels:
876
+ f.write(label + "\n")
877
+
878
+ save_model_params(model_path.replace(".tflite", "_Params.csv"))
879
+
880
+
881
+ def save_raven_model(classifier, model_path: str, labels: list[str], mode="replace"):
882
+ """
883
+ Save a TensorFlow model with a custom classifier and associated metadata for use with BirdNET.
884
+
885
+ Args:
886
+ classifier (tf.keras.Model): The custom classifier model to be saved.
887
+ model_path (str): The path where the model will be saved.
888
+ labels (list[str]): A list of labels associated with the classifier.
889
+ mode (str, optional): The mode for saving the model. Can be either "replace" or "append".
890
+ Defaults to "replace".
891
+
892
+ Raises:
893
+ ValueError: If the mode is not "replace" or "append".
894
+
895
+ Returns:
896
+ None
897
+ """
898
+ import csv
899
+ import json
900
+
901
+ import tensorflow as tf
902
+
903
+ global PBMODEL
904
+
905
+ tf.get_logger().setLevel("ERROR")
906
+
907
+ if PBMODEL is None:
908
+ PBMODEL = tf.keras.models.load_model(os.path.join(SCRIPT_DIR, cfg.PB_MODEL), compile=False)
909
+
910
+ saved_model = PBMODEL
911
+
912
+ if mode == "replace":
913
+ combined_model = tf.keras.Sequential([saved_model.embeddings_model, classifier], "basic")
914
+ elif mode == "append":
915
+ # Remove activation layer
916
+ classifier.pop()
917
+ intermediate = classifier(saved_model.model.get_layer("GLOBAL_AVG_POOL").output)
918
+
919
+ output = tf.keras.layers.concatenate([saved_model.model.output, intermediate], name="combined_output")
920
+
921
+ combined_model = tf.keras.Model(inputs=saved_model.model.input, outputs=output)
922
+ else:
923
+ raise ValueError("Model save mode must be either 'replace' or 'append'")
924
+
925
+ # Make signatures
926
+ class SignatureModule(tf.Module):
927
+ def __init__(self, keras_model):
928
+ super().__init__()
929
+ self.model = keras_model
930
+
931
+ @tf.function(input_signature=[tf.TensorSpec(shape=[None, 144000], dtype=tf.float32)])
932
+ def basic(self, inputs):
933
+ return {"scores": self.model(inputs)}
934
+
935
+ smodel = SignatureModule(combined_model)
936
+ signatures = {
937
+ "basic": smodel.basic,
938
+ }
939
+
940
+ # Save signature model
941
+ os.makedirs(os.path.dirname(model_path), exist_ok=True)
942
+ model_path = model_path.removesuffix(".tflite")
943
+ tf.saved_model.save(smodel, model_path, signatures=signatures)
944
+
945
+ if mode == "append":
946
+ labels = [*utils.read_lines(os.path.join(SCRIPT_DIR, cfg.LABELS_FILE)), *labels]
947
+
948
+ # Save label file
949
+ labelIds = [label[:4].replace(" ", "") + str(i) for i, label in enumerate(labels, 1)]
950
+ labels_dir = os.path.join(model_path, "labels")
951
+
952
+ os.makedirs(labels_dir, exist_ok=True)
953
+
954
+ with open(os.path.join(labels_dir, "label_names.csv"), "w", newline="") as labelsfile:
955
+ labelwriter = csv.writer(labelsfile)
956
+ labelwriter.writerows(zip(labelIds, labels, strict=True))
957
+
958
+ # Save class names file
959
+ classes_dir = os.path.join(model_path, "classes")
960
+
961
+ os.makedirs(classes_dir, exist_ok=True)
962
+
963
+ with open(os.path.join(classes_dir, "classes.csv"), "w", newline="") as classesfile:
964
+ classeswriter = csv.writer(classesfile)
965
+ for labelId in labelIds:
966
+ classeswriter.writerow((labelId, 0.25, cfg.SIG_FMIN, cfg.SIG_FMAX, False))
967
+
968
+ # Save model config
969
+ model_config = os.path.join(model_path, "model_config.json")
970
+
971
+ with open(model_config, "w") as modelconfigfile:
972
+ modelconfig = {
973
+ "specVersion": 1,
974
+ "modelDescription": "Custom classifier trained with BirdNET "
975
+ + cfg.MODEL_VERSION
976
+ + " embeddings.\n"
977
+ + "BirdNET was developed by the K. Lisa Yang Center for Conservation Bioacoustics"
978
+ + "at the Cornell Lab of Ornithology in collaboration with Chemnitz University of Technology.\n\n"
979
+ + "https://birdnet.cornell.edu",
980
+ "modelTypeConfig": {"modelType": "RECOGNITION"},
981
+ "signatures": [
982
+ {
983
+ "signatureName": "basic",
984
+ "modelInputs": [
985
+ {
986
+ "inputName": "inputs",
987
+ "sampleRate": 48000.0,
988
+ "inputConfig": ["batch", "samples"],
989
+ }
990
+ ],
991
+ "modelOutputs": [{"outputName": "scores", "outputType": "SCORES"}],
992
+ }
993
+ ],
994
+ "globalSemanticKeys": labelIds,
995
+ }
996
+ json.dump(modelconfig, modelconfigfile, indent=2)
997
+
998
+ model_params = os.path.join(model_path, "model_params.csv")
999
+
1000
+ save_model_params(model_params)
1001
+
1002
+
1003
+ def predict_filter(lat, lon, week):
1004
+ """Predicts the probability for each species.
1005
+
1006
+ Args:
1007
+ lat: The latitude.
1008
+ lon: The longitude.
1009
+ week: The week of the year [1-48]. Use -1 for yearlong.
1010
+
1011
+ Returns:
1012
+ A list of probabilities for all species.
1013
+ """
1014
+ # Does interpreter exist?
1015
+ if M_INTERPRETER is None:
1016
+ load_meta_model()
1017
+
1018
+ # Prepare mdata as sample
1019
+ sample = np.expand_dims(np.array([lat, lon, week], dtype="float32"), 0)
1020
+
1021
+ # Run inference
1022
+ M_INTERPRETER.set_tensor(M_INPUT_LAYER_INDEX, sample)
1023
+ M_INTERPRETER.invoke()
1024
+
1025
+ return M_INTERPRETER.get_tensor(M_OUTPUT_LAYER_INDEX)[0]
1026
+
1027
+
1028
+ def explore(lat: float, lon: float, week: int):
1029
+ """Predicts the species list.
1030
+
1031
+ Predicts the species list based on the coordinates and week of year.
1032
+
1033
+ Args:
1034
+ lat: The latitude.
1035
+ lon: The longitude.
1036
+ week: The week of the year [1-48]. Use -1 for yearlong.
1037
+
1038
+ Returns:
1039
+ A sorted list of tuples with the score and the species.
1040
+ """
1041
+ # Make filter prediction
1042
+ l_filter = predict_filter(lat, lon, week)
1043
+
1044
+ # Apply threshold
1045
+ l_filter = np.where(l_filter >= cfg.LOCATION_FILTER_THRESHOLD, l_filter, 0)
1046
+
1047
+ # Zip with labels
1048
+ l_filter = list(zip(l_filter, cfg.LABELS, strict=True))
1049
+
1050
+ # Sort by filter value
1051
+ return sorted(l_filter, key=lambda x: x[0], reverse=True)
1052
+
1053
+
1054
+ def focal_loss(y_true, y_pred, gamma=2.0, alpha=0.25, epsilon=1e-7):
1055
+ """
1056
+ Focal loss for better handling of class imbalance.
1057
+
1058
+ This loss function gives more weight to hard examples and down-weights easy examples.
1059
+ Particularly helpful for imbalanced datasets where some classes have few samples.
1060
+
1061
+ Args:
1062
+ y_true: Ground truth labels.
1063
+ y_pred: Predicted probabilities.
1064
+ gamma: Focusing parameter. Higher values mean more focus on hard examples.
1065
+ alpha: Balance parameter. Controls weight of positive vs negative examples.
1066
+ epsilon: Small constant to prevent log(0).
1067
+
1068
+ Returns:
1069
+ Focal loss value.
1070
+ """
1071
+ import tensorflow.keras.backend as K
1072
+
1073
+ # Apply sigmoid if not already applied
1074
+ y_pred = K.clip(y_pred, epsilon, 1.0 - epsilon)
1075
+
1076
+ # Calculate cross entropy
1077
+ cross_entropy = -y_true * K.log(y_pred) - (1 - y_true) * K.log(1 - y_pred)
1078
+
1079
+ # Calculate focal weight
1080
+ p_t = y_true * y_pred + (1 - y_true) * (1 - y_pred)
1081
+ focal_weight = K.pow(1 - p_t, gamma)
1082
+
1083
+ # Apply alpha balancing
1084
+ alpha_factor = y_true * alpha + (1 - y_true) * (1 - alpha)
1085
+
1086
+ # Calculate focal loss
1087
+ focal_loss = alpha_factor * focal_weight * cross_entropy
1088
+
1089
+ # Sum over all classes
1090
+ return K.sum(focal_loss, axis=-1)
1091
+
1092
+
1093
+ def custom_loss(y_true, y_pred, epsilon=1e-7):
1094
+ import tensorflow.keras.backend as K
1095
+
1096
+ # Calculate loss for positive labels with epsilon
1097
+ positive_loss = -K.sum(y_true * K.log(K.clip(y_pred, epsilon, 1.0 - epsilon)), axis=-1)
1098
+
1099
+ # Calculate loss for negative labels with epsilon
1100
+ negative_loss = -K.sum((1 - y_true) * K.log(K.clip(1 - y_pred, epsilon, 1.0 - epsilon)), axis=-1)
1101
+
1102
+ # Combine both loss terms
1103
+ return positive_loss + negative_loss
1104
+
1105
+
1106
+ def flat_sigmoid(x, sensitivity=-1, bias=1.0):
1107
+ """
1108
+ Applies a flat sigmoid function to the input array with a bias shift.
1109
+
1110
+ The flat sigmoid function is defined as:
1111
+ f(x) = 1 / (1 + exp(sensitivity * clip(x + bias, -20, 20)))
1112
+
1113
+ We transform the bias parameter to a range of [-100, 100] with the formula:
1114
+ transformed_bias = (bias - 1.0) * 10.0
1115
+
1116
+ Thus, higher bias values will shift the sigmoid function to the right on the x-axis, making it more "sensitive".
1117
+
1118
+ Note: Not sure why we are clipping, must be for numerical stability somewhere else in the code.
1119
+
1120
+ Args:
1121
+ x (array-like): Input data.
1122
+ sensitivity (float, optional): Sensitivity parameter for the sigmoid function. Default is -1.
1123
+ bias (float, optional): Bias parameter to shift the sigmoid function on the x-axis. Must be in the range [0.01, 1.99]. Default is 1.0.
1124
+
1125
+ Returns:
1126
+ numpy.ndarray: Transformed data after applying the flat sigmoid function.
1127
+ """
1128
+
1129
+ transformed_bias = (bias - 1.0) * 10.0
1130
+
1131
+ return 1 / (1.0 + np.exp(sensitivity * np.clip(x + transformed_bias, -20, 20)))
1132
+
1133
+
1134
+ def predict(sample):
1135
+ """Uses the main net to predict a sample.
1136
+
1137
+ Args:
1138
+ sample: Audio sample.
1139
+
1140
+ Returns:
1141
+ The prediction scores for the sample.
1142
+ """
1143
+ # Has custom classifier?
1144
+ if cfg.CUSTOM_CLASSIFIER is not None:
1145
+ return predict_with_custom_classifier(sample)
1146
+
1147
+ load_model()
1148
+
1149
+ if PBMODEL is None:
1150
+ # Reshape input tensor
1151
+ INTERPRETER.resize_tensor_input(INPUT_LAYER_INDEX, [len(sample), *sample[0].shape])
1152
+ INTERPRETER.allocate_tensors()
1153
+
1154
+ # Make a prediction (Audio only for now)
1155
+ INTERPRETER.set_tensor(INPUT_LAYER_INDEX, np.array(sample, dtype="float32"))
1156
+ INTERPRETER.invoke()
1157
+ return INTERPRETER.get_tensor(OUTPUT_LAYER_INDEX)
1158
+
1159
+ # Make a prediction (Audio only for now)
1160
+ return PBMODEL.basic(sample)["scores"]
1161
+
1162
+
1163
+ def predict_with_custom_classifier(sample):
1164
+ """Uses the custom classifier to make a prediction.
1165
+
1166
+ Args:
1167
+ sample: Audio sample.
1168
+
1169
+ Returns:
1170
+ The prediction scores for the sample.
1171
+ """
1172
+ # Does interpreter exist?
1173
+ if C_INTERPRETER is None and C_PBMODEL is None:
1174
+ load_custom_classifier()
1175
+
1176
+ if C_PBMODEL is None:
1177
+ vector = embeddings(sample) if C_INPUT_SIZE != 144000 else sample
1178
+
1179
+ # Reshape input tensor
1180
+ C_INTERPRETER.resize_tensor_input(C_INPUT_LAYER_INDEX, [len(vector), *vector[0].shape])
1181
+ C_INTERPRETER.allocate_tensors()
1182
+
1183
+ # Make a prediction
1184
+ C_INTERPRETER.set_tensor(C_INPUT_LAYER_INDEX, np.array(vector, dtype="float32"))
1185
+ C_INTERPRETER.invoke()
1186
+
1187
+ return C_INTERPRETER.get_tensor(C_OUTPUT_LAYER_INDEX)
1188
+
1189
+ return C_PBMODEL.basic(sample)["scores"]
1190
+
1191
+
1192
+ def embeddings(sample):
1193
+ """Extracts the embeddings for a sample.
1194
+
1195
+ Args:
1196
+ sample: Audio samples.
1197
+
1198
+ Returns:
1199
+ The embeddings.
1200
+ """
1201
+
1202
+ load_model(False)
1203
+
1204
+ # Reshape input tensor
1205
+ INTERPRETER.resize_tensor_input(INPUT_LAYER_INDEX, [len(sample), *sample[0].shape])
1206
+ INTERPRETER.allocate_tensors()
1207
+
1208
+ # Extract feature embeddings
1209
+ INTERPRETER.set_tensor(INPUT_LAYER_INDEX, np.array(sample, dtype="float32"))
1210
+ INTERPRETER.invoke()
1211
+
1212
+ return INTERPRETER.get_tensor(OUTPUT_LAYER_INDEX)