birdnet-analyzer 2.0.0__py3-none-any.whl → 2.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- birdnet_analyzer/__init__.py +9 -8
- birdnet_analyzer/analyze/__init__.py +5 -5
- birdnet_analyzer/analyze/__main__.py +3 -4
- birdnet_analyzer/analyze/cli.py +25 -25
- birdnet_analyzer/analyze/core.py +241 -245
- birdnet_analyzer/analyze/utils.py +692 -701
- birdnet_analyzer/audio.py +368 -372
- birdnet_analyzer/cli.py +709 -707
- birdnet_analyzer/config.py +242 -242
- birdnet_analyzer/eBird_taxonomy_codes_2021E.json +25279 -25279
- birdnet_analyzer/embeddings/__init__.py +3 -4
- birdnet_analyzer/embeddings/__main__.py +3 -3
- birdnet_analyzer/embeddings/cli.py +12 -13
- birdnet_analyzer/embeddings/core.py +69 -70
- birdnet_analyzer/embeddings/utils.py +179 -193
- birdnet_analyzer/evaluation/__init__.py +196 -195
- birdnet_analyzer/evaluation/__main__.py +3 -3
- birdnet_analyzer/evaluation/assessment/__init__.py +0 -0
- birdnet_analyzer/evaluation/assessment/metrics.py +388 -0
- birdnet_analyzer/evaluation/assessment/performance_assessor.py +409 -0
- birdnet_analyzer/evaluation/assessment/plotting.py +379 -0
- birdnet_analyzer/evaluation/preprocessing/__init__.py +0 -0
- birdnet_analyzer/evaluation/preprocessing/data_processor.py +631 -0
- birdnet_analyzer/evaluation/preprocessing/utils.py +98 -0
- birdnet_analyzer/gui/__init__.py +19 -23
- birdnet_analyzer/gui/__main__.py +3 -3
- birdnet_analyzer/gui/analysis.py +175 -174
- birdnet_analyzer/gui/assets/arrow_down.svg +4 -4
- birdnet_analyzer/gui/assets/arrow_left.svg +4 -4
- birdnet_analyzer/gui/assets/arrow_right.svg +4 -4
- birdnet_analyzer/gui/assets/arrow_up.svg +4 -4
- birdnet_analyzer/gui/assets/gui.css +28 -28
- birdnet_analyzer/gui/assets/gui.js +93 -93
- birdnet_analyzer/gui/embeddings.py +619 -620
- birdnet_analyzer/gui/evaluation.py +795 -813
- birdnet_analyzer/gui/localization.py +75 -68
- birdnet_analyzer/gui/multi_file.py +245 -246
- birdnet_analyzer/gui/review.py +519 -527
- birdnet_analyzer/gui/segments.py +191 -191
- birdnet_analyzer/gui/settings.py +128 -129
- birdnet_analyzer/gui/single_file.py +267 -269
- birdnet_analyzer/gui/species.py +95 -95
- birdnet_analyzer/gui/train.py +696 -698
- birdnet_analyzer/gui/utils.py +810 -808
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_af.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ar.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_bg.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ca.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_cs.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_da.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_de.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_el.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_en_uk.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_es.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_fi.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_fr.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_he.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_hr.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_hu.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_in.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_is.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_it.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ja.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ko.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_lt.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ml.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_nl.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_no.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_pl.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_pt_BR.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_pt_PT.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ro.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ru.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sk.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sl.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sr.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sv.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_th.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_tr.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_uk.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_zh.txt +6522 -6522
- birdnet_analyzer/lang/de.json +334 -334
- birdnet_analyzer/lang/en.json +334 -334
- birdnet_analyzer/lang/fi.json +334 -334
- birdnet_analyzer/lang/fr.json +334 -334
- birdnet_analyzer/lang/id.json +334 -334
- birdnet_analyzer/lang/pt-br.json +334 -334
- birdnet_analyzer/lang/ru.json +334 -334
- birdnet_analyzer/lang/se.json +334 -334
- birdnet_analyzer/lang/tlh.json +334 -334
- birdnet_analyzer/lang/zh_TW.json +334 -334
- birdnet_analyzer/model.py +1212 -1243
- birdnet_analyzer/playground.py +5 -0
- birdnet_analyzer/search/__init__.py +3 -3
- birdnet_analyzer/search/__main__.py +3 -3
- birdnet_analyzer/search/cli.py +11 -12
- birdnet_analyzer/search/core.py +78 -78
- birdnet_analyzer/search/utils.py +107 -111
- birdnet_analyzer/segments/__init__.py +3 -3
- birdnet_analyzer/segments/__main__.py +3 -3
- birdnet_analyzer/segments/cli.py +13 -14
- birdnet_analyzer/segments/core.py +81 -78
- birdnet_analyzer/segments/utils.py +383 -394
- birdnet_analyzer/species/__init__.py +3 -3
- birdnet_analyzer/species/__main__.py +3 -3
- birdnet_analyzer/species/cli.py +13 -14
- birdnet_analyzer/species/core.py +35 -35
- birdnet_analyzer/species/utils.py +74 -75
- birdnet_analyzer/train/__init__.py +3 -3
- birdnet_analyzer/train/__main__.py +3 -3
- birdnet_analyzer/train/cli.py +13 -14
- birdnet_analyzer/train/core.py +113 -113
- birdnet_analyzer/train/utils.py +877 -847
- birdnet_analyzer/translate.py +133 -104
- birdnet_analyzer/utils.py +426 -419
- {birdnet_analyzer-2.0.0.dist-info → birdnet_analyzer-2.0.1.dist-info}/METADATA +137 -129
- birdnet_analyzer-2.0.1.dist-info/RECORD +125 -0
- {birdnet_analyzer-2.0.0.dist-info → birdnet_analyzer-2.0.1.dist-info}/WHEEL +1 -1
- {birdnet_analyzer-2.0.0.dist-info → birdnet_analyzer-2.0.1.dist-info}/licenses/LICENSE +18 -18
- birdnet_analyzer-2.0.0.dist-info/RECORD +0 -117
- {birdnet_analyzer-2.0.0.dist-info → birdnet_analyzer-2.0.1.dist-info}/entry_points.txt +0 -0
- {birdnet_analyzer-2.0.0.dist-info → birdnet_analyzer-2.0.1.dist-info}/top_level.txt +0 -0
birdnet_analyzer/model.py
CHANGED
@@ -1,1243 +1,1212 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
import
|
5
|
-
import
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
import birdnet_analyzer.
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
#
|
22
|
-
#
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
from tensorflow import
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
numpy.ndarray:
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
#
|
78
|
-
y[y
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
del
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
#
|
249
|
-
np.
|
250
|
-
|
251
|
-
#
|
252
|
-
|
253
|
-
|
254
|
-
#
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
#
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
train_indices
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
#
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
#
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
"""
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
"
|
459
|
-
"
|
460
|
-
"
|
461
|
-
"
|
462
|
-
"
|
463
|
-
"
|
464
|
-
"
|
465
|
-
"
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
cfg.
|
473
|
-
cfg.
|
474
|
-
cfg.
|
475
|
-
cfg.
|
476
|
-
cfg.
|
477
|
-
cfg.
|
478
|
-
cfg.
|
479
|
-
cfg.
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
"""
|
490
|
-
|
491
|
-
|
492
|
-
|
493
|
-
|
494
|
-
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
|
521
|
-
|
522
|
-
)
|
523
|
-
INTERPRETER.
|
524
|
-
|
525
|
-
# Get input
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
#
|
533
|
-
|
534
|
-
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
|
540
|
-
|
541
|
-
|
542
|
-
|
543
|
-
|
544
|
-
|
545
|
-
|
546
|
-
|
547
|
-
|
548
|
-
|
549
|
-
|
550
|
-
|
551
|
-
""
|
552
|
-
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
|
557
|
-
|
558
|
-
|
559
|
-
|
560
|
-
|
561
|
-
|
562
|
-
|
563
|
-
|
564
|
-
|
565
|
-
|
566
|
-
|
567
|
-
|
568
|
-
|
569
|
-
|
570
|
-
|
571
|
-
|
572
|
-
|
573
|
-
|
574
|
-
|
575
|
-
|
576
|
-
|
577
|
-
|
578
|
-
|
579
|
-
|
580
|
-
|
581
|
-
|
582
|
-
|
583
|
-
|
584
|
-
|
585
|
-
|
586
|
-
|
587
|
-
|
588
|
-
|
589
|
-
|
590
|
-
|
591
|
-
|
592
|
-
|
593
|
-
|
594
|
-
|
595
|
-
|
596
|
-
|
597
|
-
|
598
|
-
|
599
|
-
|
600
|
-
|
601
|
-
|
602
|
-
|
603
|
-
|
604
|
-
|
605
|
-
|
606
|
-
|
607
|
-
|
608
|
-
|
609
|
-
|
610
|
-
|
611
|
-
|
612
|
-
|
613
|
-
|
614
|
-
|
615
|
-
|
616
|
-
|
617
|
-
|
618
|
-
|
619
|
-
|
620
|
-
|
621
|
-
|
622
|
-
|
623
|
-
|
624
|
-
|
625
|
-
|
626
|
-
|
627
|
-
|
628
|
-
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
636
|
-
|
637
|
-
|
638
|
-
|
639
|
-
# Add
|
640
|
-
model.add(keras.layers.
|
641
|
-
|
642
|
-
|
643
|
-
|
644
|
-
|
645
|
-
|
646
|
-
|
647
|
-
|
648
|
-
|
649
|
-
|
650
|
-
|
651
|
-
|
652
|
-
|
653
|
-
|
654
|
-
|
655
|
-
|
656
|
-
|
657
|
-
|
658
|
-
|
659
|
-
|
660
|
-
|
661
|
-
|
662
|
-
|
663
|
-
|
664
|
-
|
665
|
-
|
666
|
-
|
667
|
-
|
668
|
-
|
669
|
-
|
670
|
-
|
671
|
-
|
672
|
-
|
673
|
-
|
674
|
-
|
675
|
-
|
676
|
-
|
677
|
-
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
|
682
|
-
|
683
|
-
|
684
|
-
|
685
|
-
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
690
|
-
|
691
|
-
|
692
|
-
|
693
|
-
|
694
|
-
|
695
|
-
|
696
|
-
|
697
|
-
|
698
|
-
|
699
|
-
|
700
|
-
|
701
|
-
|
702
|
-
|
703
|
-
|
704
|
-
|
705
|
-
|
706
|
-
|
707
|
-
|
708
|
-
|
709
|
-
|
710
|
-
|
711
|
-
|
712
|
-
|
713
|
-
|
714
|
-
|
715
|
-
|
716
|
-
|
717
|
-
|
718
|
-
|
719
|
-
|
720
|
-
|
721
|
-
|
722
|
-
|
723
|
-
|
724
|
-
|
725
|
-
|
726
|
-
|
727
|
-
|
728
|
-
|
729
|
-
|
730
|
-
|
731
|
-
|
732
|
-
|
733
|
-
|
734
|
-
|
735
|
-
|
736
|
-
|
737
|
-
|
738
|
-
|
739
|
-
|
740
|
-
|
741
|
-
|
742
|
-
|
743
|
-
|
744
|
-
|
745
|
-
|
746
|
-
|
747
|
-
|
748
|
-
|
749
|
-
#
|
750
|
-
|
751
|
-
|
752
|
-
|
753
|
-
|
754
|
-
|
755
|
-
|
756
|
-
|
757
|
-
|
758
|
-
|
759
|
-
|
760
|
-
|
761
|
-
|
762
|
-
|
763
|
-
|
764
|
-
|
765
|
-
|
766
|
-
|
767
|
-
|
768
|
-
|
769
|
-
|
770
|
-
|
771
|
-
|
772
|
-
|
773
|
-
|
774
|
-
|
775
|
-
|
776
|
-
|
777
|
-
|
778
|
-
|
779
|
-
|
780
|
-
|
781
|
-
|
782
|
-
|
783
|
-
|
784
|
-
|
785
|
-
|
786
|
-
|
787
|
-
|
788
|
-
|
789
|
-
|
790
|
-
|
791
|
-
|
792
|
-
|
793
|
-
|
794
|
-
|
795
|
-
|
796
|
-
|
797
|
-
|
798
|
-
|
799
|
-
|
800
|
-
|
801
|
-
|
802
|
-
|
803
|
-
|
804
|
-
|
805
|
-
|
806
|
-
|
807
|
-
|
808
|
-
|
809
|
-
|
810
|
-
|
811
|
-
|
812
|
-
|
813
|
-
|
814
|
-
|
815
|
-
|
816
|
-
|
817
|
-
|
818
|
-
|
819
|
-
|
820
|
-
|
821
|
-
|
822
|
-
|
823
|
-
|
824
|
-
|
825
|
-
|
826
|
-
|
827
|
-
|
828
|
-
|
829
|
-
|
830
|
-
|
831
|
-
|
832
|
-
""
|
833
|
-
|
834
|
-
|
835
|
-
|
836
|
-
|
837
|
-
|
838
|
-
|
839
|
-
|
840
|
-
|
841
|
-
|
842
|
-
|
843
|
-
|
844
|
-
|
845
|
-
|
846
|
-
|
847
|
-
|
848
|
-
|
849
|
-
|
850
|
-
|
851
|
-
|
852
|
-
|
853
|
-
|
854
|
-
|
855
|
-
|
856
|
-
|
857
|
-
|
858
|
-
|
859
|
-
|
860
|
-
|
861
|
-
|
862
|
-
|
863
|
-
|
864
|
-
|
865
|
-
|
866
|
-
|
867
|
-
|
868
|
-
|
869
|
-
|
870
|
-
|
871
|
-
|
872
|
-
|
873
|
-
|
874
|
-
|
875
|
-
|
876
|
-
|
877
|
-
|
878
|
-
|
879
|
-
|
880
|
-
with
|
881
|
-
|
882
|
-
|
883
|
-
|
884
|
-
|
885
|
-
|
886
|
-
|
887
|
-
|
888
|
-
|
889
|
-
|
890
|
-
|
891
|
-
|
892
|
-
|
893
|
-
|
894
|
-
|
895
|
-
|
896
|
-
|
897
|
-
|
898
|
-
|
899
|
-
|
900
|
-
|
901
|
-
|
902
|
-
|
903
|
-
|
904
|
-
|
905
|
-
|
906
|
-
|
907
|
-
|
908
|
-
|
909
|
-
|
910
|
-
|
911
|
-
|
912
|
-
|
913
|
-
|
914
|
-
|
915
|
-
|
916
|
-
|
917
|
-
|
918
|
-
|
919
|
-
|
920
|
-
|
921
|
-
|
922
|
-
|
923
|
-
|
924
|
-
|
925
|
-
|
926
|
-
|
927
|
-
|
928
|
-
|
929
|
-
|
930
|
-
|
931
|
-
|
932
|
-
|
933
|
-
|
934
|
-
|
935
|
-
|
936
|
-
|
937
|
-
|
938
|
-
|
939
|
-
|
940
|
-
|
941
|
-
|
942
|
-
|
943
|
-
|
944
|
-
|
945
|
-
|
946
|
-
|
947
|
-
os.
|
948
|
-
|
949
|
-
|
950
|
-
|
951
|
-
|
952
|
-
|
953
|
-
|
954
|
-
|
955
|
-
|
956
|
-
|
957
|
-
|
958
|
-
os.makedirs(
|
959
|
-
|
960
|
-
with open(os.path.join(
|
961
|
-
|
962
|
-
|
963
|
-
|
964
|
-
|
965
|
-
|
966
|
-
|
967
|
-
|
968
|
-
|
969
|
-
|
970
|
-
|
971
|
-
|
972
|
-
|
973
|
-
|
974
|
-
|
975
|
-
|
976
|
-
|
977
|
-
|
978
|
-
|
979
|
-
|
980
|
-
|
981
|
-
|
982
|
-
|
983
|
-
|
984
|
-
|
985
|
-
|
986
|
-
|
987
|
-
|
988
|
-
|
989
|
-
|
990
|
-
|
991
|
-
|
992
|
-
|
993
|
-
|
994
|
-
|
995
|
-
|
996
|
-
|
997
|
-
|
998
|
-
|
999
|
-
|
1000
|
-
|
1001
|
-
|
1002
|
-
|
1003
|
-
|
1004
|
-
|
1005
|
-
|
1006
|
-
|
1007
|
-
|
1008
|
-
|
1009
|
-
|
1010
|
-
"""
|
1011
|
-
|
1012
|
-
|
1013
|
-
|
1014
|
-
|
1015
|
-
|
1016
|
-
|
1017
|
-
|
1018
|
-
|
1019
|
-
|
1020
|
-
|
1021
|
-
|
1022
|
-
|
1023
|
-
|
1024
|
-
|
1025
|
-
|
1026
|
-
|
1027
|
-
|
1028
|
-
|
1029
|
-
|
1030
|
-
|
1031
|
-
|
1032
|
-
|
1033
|
-
|
1034
|
-
|
1035
|
-
|
1036
|
-
|
1037
|
-
"""
|
1038
|
-
|
1039
|
-
|
1040
|
-
|
1041
|
-
|
1042
|
-
|
1043
|
-
|
1044
|
-
|
1045
|
-
|
1046
|
-
|
1047
|
-
|
1048
|
-
|
1049
|
-
|
1050
|
-
|
1051
|
-
|
1052
|
-
|
1053
|
-
|
1054
|
-
|
1055
|
-
|
1056
|
-
|
1057
|
-
|
1058
|
-
|
1059
|
-
|
1060
|
-
|
1061
|
-
|
1062
|
-
|
1063
|
-
|
1064
|
-
|
1065
|
-
|
1066
|
-
|
1067
|
-
|
1068
|
-
|
1069
|
-
|
1070
|
-
|
1071
|
-
|
1072
|
-
|
1073
|
-
|
1074
|
-
|
1075
|
-
|
1076
|
-
|
1077
|
-
|
1078
|
-
|
1079
|
-
|
1080
|
-
|
1081
|
-
|
1082
|
-
|
1083
|
-
#
|
1084
|
-
|
1085
|
-
|
1086
|
-
#
|
1087
|
-
|
1088
|
-
|
1089
|
-
|
1090
|
-
|
1091
|
-
|
1092
|
-
|
1093
|
-
#
|
1094
|
-
|
1095
|
-
|
1096
|
-
# Calculate
|
1097
|
-
|
1098
|
-
|
1099
|
-
#
|
1100
|
-
return
|
1101
|
-
|
1102
|
-
|
1103
|
-
|
1104
|
-
|
1105
|
-
|
1106
|
-
|
1107
|
-
|
1108
|
-
|
1109
|
-
|
1110
|
-
|
1111
|
-
|
1112
|
-
|
1113
|
-
|
1114
|
-
|
1115
|
-
|
1116
|
-
|
1117
|
-
|
1118
|
-
|
1119
|
-
|
1120
|
-
|
1121
|
-
|
1122
|
-
|
1123
|
-
|
1124
|
-
|
1125
|
-
|
1126
|
-
|
1127
|
-
|
1128
|
-
|
1129
|
-
|
1130
|
-
|
1131
|
-
|
1132
|
-
|
1133
|
-
|
1134
|
-
|
1135
|
-
|
1136
|
-
|
1137
|
-
|
1138
|
-
|
1139
|
-
|
1140
|
-
|
1141
|
-
|
1142
|
-
|
1143
|
-
|
1144
|
-
|
1145
|
-
|
1146
|
-
|
1147
|
-
|
1148
|
-
|
1149
|
-
|
1150
|
-
|
1151
|
-
|
1152
|
-
|
1153
|
-
|
1154
|
-
|
1155
|
-
|
1156
|
-
return
|
1157
|
-
|
1158
|
-
|
1159
|
-
|
1160
|
-
|
1161
|
-
|
1162
|
-
|
1163
|
-
|
1164
|
-
|
1165
|
-
|
1166
|
-
|
1167
|
-
|
1168
|
-
|
1169
|
-
|
1170
|
-
|
1171
|
-
|
1172
|
-
|
1173
|
-
|
1174
|
-
|
1175
|
-
|
1176
|
-
|
1177
|
-
|
1178
|
-
|
1179
|
-
|
1180
|
-
|
1181
|
-
|
1182
|
-
|
1183
|
-
|
1184
|
-
|
1185
|
-
|
1186
|
-
|
1187
|
-
|
1188
|
-
|
1189
|
-
|
1190
|
-
|
1191
|
-
|
1192
|
-
|
1193
|
-
|
1194
|
-
|
1195
|
-
|
1196
|
-
|
1197
|
-
|
1198
|
-
|
1199
|
-
|
1200
|
-
|
1201
|
-
|
1202
|
-
|
1203
|
-
|
1204
|
-
|
1205
|
-
|
1206
|
-
|
1207
|
-
|
1208
|
-
|
1209
|
-
|
1210
|
-
|
1211
|
-
|
1212
|
-
|
1213
|
-
else:
|
1214
|
-
prediction = C_PBMODEL.basic(sample)["scores"]
|
1215
|
-
|
1216
|
-
return prediction
|
1217
|
-
|
1218
|
-
|
1219
|
-
def embeddings(sample):
|
1220
|
-
"""Extracts the embeddings for a sample.
|
1221
|
-
|
1222
|
-
Args:
|
1223
|
-
sample: Audio samples.
|
1224
|
-
|
1225
|
-
Returns:
|
1226
|
-
The embeddings.
|
1227
|
-
"""
|
1228
|
-
global INTERPRETER
|
1229
|
-
|
1230
|
-
# Does interpreter exist?
|
1231
|
-
if INTERPRETER is None:
|
1232
|
-
load_model(False)
|
1233
|
-
|
1234
|
-
# Reshape input tensor
|
1235
|
-
INTERPRETER.resize_tensor_input(INPUT_LAYER_INDEX, [len(sample), *sample[0].shape])
|
1236
|
-
INTERPRETER.allocate_tensors()
|
1237
|
-
|
1238
|
-
# Extract feature embeddings
|
1239
|
-
INTERPRETER.set_tensor(INPUT_LAYER_INDEX, np.array(sample, dtype="float32"))
|
1240
|
-
INTERPRETER.invoke()
|
1241
|
-
features = INTERPRETER.get_tensor(OUTPUT_LAYER_INDEX)
|
1242
|
-
|
1243
|
-
return features
|
1
|
+
# ruff: noqa: PLW0603
|
2
|
+
"""Contains functions to use the BirdNET models."""
|
3
|
+
|
4
|
+
import os
|
5
|
+
import sys
|
6
|
+
import warnings
|
7
|
+
|
8
|
+
import numpy as np
|
9
|
+
|
10
|
+
import birdnet_analyzer.config as cfg
|
11
|
+
from birdnet_analyzer import utils
|
12
|
+
|
13
|
+
SCRIPT_DIR = os.path.abspath(os.path.dirname(__file__))
|
14
|
+
|
15
|
+
|
16
|
+
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
|
17
|
+
os.environ["CUDA_VISIBLE_DEVICES"] = ""
|
18
|
+
|
19
|
+
warnings.filterwarnings("ignore")
|
20
|
+
|
21
|
+
# Import TFLite from runtime or Tensorflow;
|
22
|
+
# import Keras if protobuf model;
|
23
|
+
# NOTE: we have to use TFLite if we want to use
|
24
|
+
# the metadata model or want to extract embeddings
|
25
|
+
try:
|
26
|
+
import tflite_runtime.interpreter as tflite # type: ignore
|
27
|
+
except ModuleNotFoundError:
|
28
|
+
from tensorflow import lite as tflite
|
29
|
+
if not cfg.MODEL_PATH.endswith(".tflite"):
|
30
|
+
from tensorflow import keras
|
31
|
+
|
32
|
+
INTERPRETER: tflite.Interpreter = None
|
33
|
+
C_INTERPRETER: tflite.Interpreter = None
|
34
|
+
M_INTERPRETER: tflite.Interpreter = None
|
35
|
+
PBMODEL = None
|
36
|
+
C_PBMODEL = None
|
37
|
+
EMPTY_CLASS_EXCEPTION_REF = None
|
38
|
+
|
39
|
+
|
40
|
+
def get_empty_class_exception():
|
41
|
+
import keras_tuner.errors
|
42
|
+
|
43
|
+
global EMPTY_CLASS_EXCEPTION_REF
|
44
|
+
|
45
|
+
if EMPTY_CLASS_EXCEPTION_REF:
|
46
|
+
return EMPTY_CLASS_EXCEPTION_REF
|
47
|
+
|
48
|
+
class EmptyClassException(keras_tuner.errors.FatalError):
|
49
|
+
"""
|
50
|
+
Exception raised when a class is found to be empty.
|
51
|
+
|
52
|
+
Attributes:
|
53
|
+
index (int): The index of the empty class.
|
54
|
+
message (str): The error message indicating which class is empty.
|
55
|
+
"""
|
56
|
+
|
57
|
+
def __init__(self, *args, index=None):
|
58
|
+
super().__init__(*args)
|
59
|
+
self.index = index
|
60
|
+
self.message = f"Class {index} is empty."
|
61
|
+
|
62
|
+
EMPTY_CLASS_EXCEPTION_REF = EmptyClassException
|
63
|
+
return EMPTY_CLASS_EXCEPTION_REF
|
64
|
+
|
65
|
+
|
66
|
+
def label_smoothing(y: np.ndarray, alpha=0.1):
|
67
|
+
"""
|
68
|
+
Applies label smoothing to the given labels.
|
69
|
+
Label smoothing is a technique used to prevent the model from becoming overconfident by adjusting the target labels.
|
70
|
+
It subtracts a small value (alpha) from the correct label and distributes it among the other labels.
|
71
|
+
Args:
|
72
|
+
y (numpy.ndarray): Array of labels to be smoothed. The array should be of shape (num_labels,).
|
73
|
+
alpha (float, optional): Smoothing parameter. Default is 0.1.
|
74
|
+
Returns:
|
75
|
+
numpy.ndarray: The smoothed labels.
|
76
|
+
"""
|
77
|
+
# Subtract alpha from correct label when it is >0
|
78
|
+
y[y > 0] -= alpha
|
79
|
+
|
80
|
+
# Assigned alpha to all other labels
|
81
|
+
y[y == 0] = alpha / y.shape[0]
|
82
|
+
|
83
|
+
return y
|
84
|
+
|
85
|
+
|
86
|
+
def mixup(x, y, augmentation_ratio=0.25, alpha=0.2):
|
87
|
+
"""Apply mixup to the given data.
|
88
|
+
|
89
|
+
Mixup is a data augmentation technique that generates new samples by
|
90
|
+
mixing two samples and their labels.
|
91
|
+
|
92
|
+
Args:
|
93
|
+
x: Samples.
|
94
|
+
y: One-hot labels.
|
95
|
+
augmentation_ratio: The ratio of augmented samples.
|
96
|
+
alpha: The beta distribution parameter.
|
97
|
+
|
98
|
+
Returns:
|
99
|
+
Augmented data.
|
100
|
+
"""
|
101
|
+
rng = np.random.default_rng(cfg.RANDOM_SEED)
|
102
|
+
|
103
|
+
# Get indices of all positive samples
|
104
|
+
positive_indices = np.unique(np.where(y[:, :] == 1)[0])
|
105
|
+
|
106
|
+
# Calculate the number of samples to augment based on the ratio
|
107
|
+
num_samples_to_augment = int(len(positive_indices) * augmentation_ratio)
|
108
|
+
|
109
|
+
# Indices of samples, that are already mixed up
|
110
|
+
mixed_up_indices = []
|
111
|
+
|
112
|
+
for _ in range(num_samples_to_augment):
|
113
|
+
# Randomly choose one instance from the positive samples
|
114
|
+
index = rng.choice(positive_indices)
|
115
|
+
|
116
|
+
# Choose another one, when the chosen one was already mixed up
|
117
|
+
while index in mixed_up_indices:
|
118
|
+
index = rng.choice(positive_indices)
|
119
|
+
|
120
|
+
x1, y1 = x[index], y[index]
|
121
|
+
|
122
|
+
# Randomly choose a different instance from the dataset
|
123
|
+
second_index = rng.choice(positive_indices)
|
124
|
+
|
125
|
+
# Choose again, when the same or an already mixed up sample was selected
|
126
|
+
while second_index == index or second_index in mixed_up_indices:
|
127
|
+
second_index = rng.choice(positive_indices)
|
128
|
+
x2, y2 = x[second_index], y[second_index]
|
129
|
+
|
130
|
+
# Generate a random mixing coefficient (lambda)
|
131
|
+
lambda_ = rng.beta(alpha, alpha)
|
132
|
+
|
133
|
+
# Mix the embeddings and labels
|
134
|
+
mixed_x = lambda_ * x1 + (1 - lambda_) * x2
|
135
|
+
mixed_y = lambda_ * y1 + (1 - lambda_) * y2
|
136
|
+
|
137
|
+
# Replace one of the original samples and labels with the augmented sample and labels
|
138
|
+
x[index] = mixed_x
|
139
|
+
y[index] = mixed_y
|
140
|
+
|
141
|
+
# Mark the sample as already mixed up
|
142
|
+
mixed_up_indices.append(index)
|
143
|
+
|
144
|
+
del mixed_x
|
145
|
+
del mixed_y
|
146
|
+
|
147
|
+
return x, y
|
148
|
+
|
149
|
+
|
150
|
+
def random_split(x, y, val_ratio=0.2):
|
151
|
+
"""Splits the data into training and validation data.
|
152
|
+
|
153
|
+
Makes sure that each class is represented in both sets.
|
154
|
+
|
155
|
+
Args:
|
156
|
+
x: Samples.
|
157
|
+
y: One-hot labels.
|
158
|
+
val_ratio: The ratio of validation data.
|
159
|
+
|
160
|
+
Returns:
|
161
|
+
A tuple of (x_train, y_train, x_val, y_val).
|
162
|
+
"""
|
163
|
+
rng = np.random.default_rng(cfg.RANDOM_SEED)
|
164
|
+
|
165
|
+
# Get number of classes
|
166
|
+
num_classes = y.shape[1]
|
167
|
+
|
168
|
+
# Initialize training and validation data
|
169
|
+
x_train, y_train, x_val, y_val = [], [], [], []
|
170
|
+
|
171
|
+
# Split data
|
172
|
+
for i in range(num_classes):
|
173
|
+
# Get indices of positive samples of current class
|
174
|
+
positive_indices = np.where(y[:, i] == 1)[0]
|
175
|
+
|
176
|
+
# Get indices of negative samples of current class
|
177
|
+
negative_indices = np.where(y[:, i] == -1)[0]
|
178
|
+
|
179
|
+
# Get number of samples for each set
|
180
|
+
num_samples = len(positive_indices)
|
181
|
+
num_samples_train = max(1, int(num_samples * (1 - val_ratio)))
|
182
|
+
num_samples_val = max(0, num_samples - num_samples_train)
|
183
|
+
|
184
|
+
# Randomly choose samples for training and validation
|
185
|
+
rng.shuffle(positive_indices)
|
186
|
+
train_indices = positive_indices[:num_samples_train]
|
187
|
+
val_indices = positive_indices[num_samples_train : num_samples_train + num_samples_val]
|
188
|
+
|
189
|
+
# Append samples to training and validation data
|
190
|
+
x_train.append(x[train_indices])
|
191
|
+
y_train.append(y[train_indices])
|
192
|
+
x_val.append(x[val_indices])
|
193
|
+
y_val.append(y[val_indices])
|
194
|
+
|
195
|
+
# Append negative samples to training data
|
196
|
+
x_train.append(x[negative_indices])
|
197
|
+
y_train.append(y[negative_indices])
|
198
|
+
|
199
|
+
# Add samples for non-event classes to training and validation data
|
200
|
+
non_event_indices = np.where(np.sum(y[:, :], axis=1) == 0)[0]
|
201
|
+
num_samples = len(non_event_indices)
|
202
|
+
num_samples_train = max(1, int(num_samples * (1 - val_ratio)))
|
203
|
+
num_samples_val = max(0, num_samples - num_samples_train)
|
204
|
+
rng.shuffle(non_event_indices)
|
205
|
+
train_indices = non_event_indices[:num_samples_train]
|
206
|
+
val_indices = non_event_indices[num_samples_train : num_samples_train + num_samples_val]
|
207
|
+
x_train.append(x[train_indices])
|
208
|
+
y_train.append(y[train_indices])
|
209
|
+
x_val.append(x[val_indices])
|
210
|
+
y_val.append(y[val_indices])
|
211
|
+
|
212
|
+
# Concatenate data
|
213
|
+
x_train = np.concatenate(x_train)
|
214
|
+
y_train = np.concatenate(y_train)
|
215
|
+
x_val = np.concatenate(x_val)
|
216
|
+
y_val = np.concatenate(y_val)
|
217
|
+
|
218
|
+
# Shuffle data
|
219
|
+
indices = np.arange(len(x_train))
|
220
|
+
rng.shuffle(indices)
|
221
|
+
x_train = x_train[indices]
|
222
|
+
y_train = y_train[indices]
|
223
|
+
|
224
|
+
indices = np.arange(len(x_val))
|
225
|
+
rng.shuffle(indices)
|
226
|
+
x_val = x_val[indices]
|
227
|
+
y_val = y_val[indices]
|
228
|
+
|
229
|
+
return x_train, y_train, x_val, y_val
|
230
|
+
|
231
|
+
|
232
|
+
def random_multilabel_split(x, y, val_ratio=0.2):
|
233
|
+
"""Splits the data into training and validation data.
|
234
|
+
|
235
|
+
Makes sure that each combination of classes is represented in both sets.
|
236
|
+
|
237
|
+
Args:
|
238
|
+
x: Samples.
|
239
|
+
y: One-hot labels.
|
240
|
+
val_ratio: The ratio of validation data.
|
241
|
+
|
242
|
+
Returns:
|
243
|
+
A tuple of (x_train, y_train, x_val, y_val).
|
244
|
+
|
245
|
+
"""
|
246
|
+
rng = np.random.default_rng(cfg.RANDOM_SEED)
|
247
|
+
|
248
|
+
# Find all combinations of labels
|
249
|
+
class_combinations = np.unique(y, axis=0)
|
250
|
+
|
251
|
+
# Initialize training and validation data
|
252
|
+
x_train, y_train, x_val, y_val = [], [], [], []
|
253
|
+
|
254
|
+
# Split the data for each combination of labels
|
255
|
+
for class_combination in class_combinations:
|
256
|
+
# find all indices
|
257
|
+
indices = np.where((y == class_combination).all(axis=1))[0]
|
258
|
+
|
259
|
+
# When negative sample use only for training
|
260
|
+
if -1 in class_combination:
|
261
|
+
x_train.append(x[indices])
|
262
|
+
y_train.append(y[indices])
|
263
|
+
# Otherwise split according to the validation split
|
264
|
+
else:
|
265
|
+
# Get number of samples for each set
|
266
|
+
num_samples = len(indices)
|
267
|
+
num_samples_train = max(1, int(num_samples * (1 - val_ratio)))
|
268
|
+
num_samples_val = max(0, num_samples - num_samples_train)
|
269
|
+
# Randomly choose samples for training and validation
|
270
|
+
rng.shuffle(indices)
|
271
|
+
train_indices = indices[:num_samples_train]
|
272
|
+
val_indices = indices[num_samples_train : num_samples_train + num_samples_val]
|
273
|
+
# Append samples to training and validation data
|
274
|
+
x_train.append(x[train_indices])
|
275
|
+
y_train.append(y[train_indices])
|
276
|
+
x_val.append(x[val_indices])
|
277
|
+
y_val.append(y[val_indices])
|
278
|
+
|
279
|
+
# Concatenate data
|
280
|
+
x_train = np.concatenate(x_train)
|
281
|
+
y_train = np.concatenate(y_train)
|
282
|
+
x_val = np.concatenate(x_val)
|
283
|
+
y_val = np.concatenate(y_val)
|
284
|
+
|
285
|
+
# Shuffle data
|
286
|
+
indices = np.arange(len(x_train))
|
287
|
+
rng.shuffle(indices)
|
288
|
+
x_train = x_train[indices]
|
289
|
+
y_train = y_train[indices]
|
290
|
+
|
291
|
+
indices = np.arange(len(x_val))
|
292
|
+
rng.shuffle(indices)
|
293
|
+
x_val = x_val[indices]
|
294
|
+
y_val = y_val[indices]
|
295
|
+
|
296
|
+
return x_train, y_train, x_val, y_val
|
297
|
+
|
298
|
+
|
299
|
+
def upsample_core(x: np.ndarray, y: np.ndarray, min_samples: int, apply: callable, size=2):
|
300
|
+
"""
|
301
|
+
Upsamples the minority class in the dataset using the specified apply function.
|
302
|
+
Parameters:
|
303
|
+
x (np.ndarray): The feature matrix.
|
304
|
+
y (np.ndarray): The target labels.
|
305
|
+
min_samples (int): The minimum number of samples required for the minority class.
|
306
|
+
apply (callable): A function that applies the SMOTE or any other algorithm to the data.
|
307
|
+
size (int, optional): The number of samples to generate in each iteration. Default is 2.
|
308
|
+
Returns:
|
309
|
+
tuple: A tuple containing the upsampled feature matrix and target labels.
|
310
|
+
"""
|
311
|
+
rng = np.random.default_rng(cfg.RANDOM_SEED)
|
312
|
+
y_temp = []
|
313
|
+
x_temp = []
|
314
|
+
|
315
|
+
if cfg.BINARY_CLASSIFICATION:
|
316
|
+
# Determine if 1 or 0 is the minority class
|
317
|
+
minority_label = 1 if y.sum(axis=0) < len(y) - y.sum(axis=0) else 0
|
318
|
+
|
319
|
+
while np.where(y == minority_label)[0].shape[0] + len(y_temp) < min_samples:
|
320
|
+
# Randomly choose a sample from the minority class
|
321
|
+
random_index = rng.choice(np.where(y == minority_label)[0], size=size)
|
322
|
+
|
323
|
+
# Apply SMOTE
|
324
|
+
x_app, y_app = apply(x, y, random_index)
|
325
|
+
y_temp.append(y_app)
|
326
|
+
x_temp.append(x_app)
|
327
|
+
else:
|
328
|
+
for i in range(y.shape[1]):
|
329
|
+
while y[:, i].sum() + len(y_temp) < min_samples:
|
330
|
+
try:
|
331
|
+
# Randomly choose a sample from the minority class
|
332
|
+
random_index = rng.choice(np.where(y[:, i] == 1)[0], size=size)
|
333
|
+
except ValueError as e:
|
334
|
+
raise get_empty_class_exception()(index=i) from e
|
335
|
+
|
336
|
+
# Apply SMOTE
|
337
|
+
x_app, y_app = apply(x, y, random_index)
|
338
|
+
y_temp.append(y_app)
|
339
|
+
x_temp.append(x_app)
|
340
|
+
|
341
|
+
return x_temp, y_temp
|
342
|
+
|
343
|
+
|
344
|
+
def upsampling(x: np.ndarray, y: np.ndarray, ratio=0.5, mode="repeat"):
|
345
|
+
"""Balance data through upsampling.
|
346
|
+
|
347
|
+
We upsample minority classes to have at least 10% (ratio=0.1) of the samples of the majority class.
|
348
|
+
|
349
|
+
Args:
|
350
|
+
x: Samples.
|
351
|
+
y: One-hot labels.
|
352
|
+
ratio: The minimum ratio of minority to majority samples.
|
353
|
+
mode: The upsampling mode. Either 'repeat', 'mean', 'linear' or 'smote'.
|
354
|
+
|
355
|
+
Returns:
|
356
|
+
Upsampled data.
|
357
|
+
"""
|
358
|
+
|
359
|
+
# Set numpy random seed
|
360
|
+
rng = np.random.default_rng(cfg.RANDOM_SEED)
|
361
|
+
|
362
|
+
# Determine min number of samples
|
363
|
+
min_samples = (
|
364
|
+
int(max(y.sum(axis=0), len(y) - y.sum(axis=0)) * ratio)
|
365
|
+
if cfg.BINARY_CLASSIFICATION
|
366
|
+
else int(np.max(y.sum(axis=0)) * ratio)
|
367
|
+
)
|
368
|
+
|
369
|
+
x_temp = []
|
370
|
+
y_temp = []
|
371
|
+
|
372
|
+
if mode == "repeat":
|
373
|
+
|
374
|
+
def applyRepeat(x, y, random_index):
|
375
|
+
return x[random_index[0]], y[random_index[0]]
|
376
|
+
|
377
|
+
x_temp, y_temp = upsample_core(x, y, min_samples, applyRepeat, size=1)
|
378
|
+
|
379
|
+
elif mode == "mean":
|
380
|
+
# For each class with less than min_samples
|
381
|
+
# select two random samples and calculate the mean
|
382
|
+
def applyMean(x, y, random_indices):
|
383
|
+
# Calculate the mean of the two samples
|
384
|
+
mean = np.mean(x[random_indices], axis=0)
|
385
|
+
|
386
|
+
# Append the mean and label to a temp list
|
387
|
+
return mean, y[random_indices[0]]
|
388
|
+
|
389
|
+
x_temp, y_temp = upsample_core(x, y, min_samples, applyMean)
|
390
|
+
|
391
|
+
elif mode == "linear":
|
392
|
+
# For each class with less than min_samples
|
393
|
+
# select two random samples and calculate the linear combination
|
394
|
+
def applyLinearCombination(x, y, random_indices):
|
395
|
+
# Calculate the linear combination of the two samples
|
396
|
+
alpha = rng.uniform(0, 1)
|
397
|
+
new_sample = alpha * x[random_indices[0]] + (1 - alpha) * x[random_indices[1]]
|
398
|
+
|
399
|
+
# Append the new sample and label to a temp list
|
400
|
+
return new_sample, y[random_indices[0]]
|
401
|
+
|
402
|
+
x_temp, y_temp = upsample_core(x, y, min_samples, applyLinearCombination)
|
403
|
+
|
404
|
+
elif mode == "smote":
|
405
|
+
# For each class with less than min_samples apply SMOTE
|
406
|
+
def applySmote(x, y, random_index, k=5):
|
407
|
+
# Get the k nearest neighbors
|
408
|
+
distances = np.sqrt(np.sum((x - x[random_index[0]]) ** 2, axis=1))
|
409
|
+
indices = np.argsort(distances)[1 : k + 1]
|
410
|
+
|
411
|
+
# Randomly choose one of the neighbors
|
412
|
+
random_neighbor = rng.choice(indices)
|
413
|
+
|
414
|
+
# Calculate the difference vector
|
415
|
+
diff = x[random_neighbor] - x[random_index[0]]
|
416
|
+
|
417
|
+
# Randomly choose a weight between 0 and 1
|
418
|
+
weight = rng.uniform(0, 1)
|
419
|
+
|
420
|
+
# Calculate the new sample
|
421
|
+
new_sample = x[random_index[0]] + weight * diff
|
422
|
+
|
423
|
+
# Append the new sample and label to a temp list
|
424
|
+
return new_sample, y[random_index[0]]
|
425
|
+
|
426
|
+
x_temp, y_temp = upsample_core(x, y, min_samples, applySmote, size=1)
|
427
|
+
|
428
|
+
# Append the temp list to the original data
|
429
|
+
if len(x_temp) > 0:
|
430
|
+
x = np.vstack((x, np.array(x_temp)))
|
431
|
+
y = np.vstack((y, np.array(y_temp)))
|
432
|
+
|
433
|
+
# Shuffle data
|
434
|
+
indices = np.arange(len(x))
|
435
|
+
rng.shuffle(indices)
|
436
|
+
x = x[indices]
|
437
|
+
y = y[indices]
|
438
|
+
|
439
|
+
del x_temp
|
440
|
+
del y_temp
|
441
|
+
|
442
|
+
return x, y
|
443
|
+
|
444
|
+
|
445
|
+
def save_model_params(path):
|
446
|
+
"""Saves the model parameters to a file.
|
447
|
+
|
448
|
+
Args:
|
449
|
+
path: Path to the file.
|
450
|
+
"""
|
451
|
+
utils.save_params(
|
452
|
+
path,
|
453
|
+
(
|
454
|
+
"Hidden units",
|
455
|
+
"Dropout",
|
456
|
+
"Batchsize",
|
457
|
+
"Learning rate",
|
458
|
+
"Crop mode",
|
459
|
+
"Crop overlap",
|
460
|
+
"Audio speed",
|
461
|
+
"Upsamling mode",
|
462
|
+
"Upsamling ratio",
|
463
|
+
"use mixup",
|
464
|
+
"use label smoothing",
|
465
|
+
"BirdNET Model version",
|
466
|
+
),
|
467
|
+
(
|
468
|
+
cfg.TRAIN_HIDDEN_UNITS,
|
469
|
+
cfg.TRAIN_DROPOUT,
|
470
|
+
cfg.TRAIN_BATCH_SIZE,
|
471
|
+
cfg.TRAIN_LEARNING_RATE,
|
472
|
+
cfg.SAMPLE_CROP_MODE,
|
473
|
+
cfg.SIG_OVERLAP,
|
474
|
+
cfg.AUDIO_SPEED,
|
475
|
+
cfg.UPSAMPLING_MODE,
|
476
|
+
cfg.UPSAMPLING_RATIO,
|
477
|
+
cfg.TRAIN_WITH_MIXUP,
|
478
|
+
cfg.TRAIN_WITH_LABEL_SMOOTHING,
|
479
|
+
cfg.MODEL_VERSION,
|
480
|
+
),
|
481
|
+
)
|
482
|
+
|
483
|
+
|
484
|
+
def reset_custom_classifier():
|
485
|
+
"""
|
486
|
+
Resets the custom classifier by setting the global variables C_INTERPRETER and C_PBMODEL to None.
|
487
|
+
This function is used to clear any existing custom classifier models and interpreters, effectively
|
488
|
+
resetting the state of the custom classifier.
|
489
|
+
"""
|
490
|
+
global C_INTERPRETER
|
491
|
+
global C_PBMODEL
|
492
|
+
|
493
|
+
C_INTERPRETER = None
|
494
|
+
C_PBMODEL = None
|
495
|
+
|
496
|
+
|
497
|
+
def load_model(class_output=True):
|
498
|
+
"""
|
499
|
+
Loads the machine learning model based on the configuration provided.
|
500
|
+
This function loads either a TensorFlow Lite (TFLite) model or a protobuf model
|
501
|
+
depending on the file extension of the model path specified in the configuration.
|
502
|
+
It sets up the global variables for the model interpreter and input/output layer indices.
|
503
|
+
|
504
|
+
Args:
|
505
|
+
class_output (bool): If True, sets the output layer index to the classification output.
|
506
|
+
If False, sets the output layer index to the feature embeddings.
|
507
|
+
"""
|
508
|
+
global PBMODEL
|
509
|
+
global INTERPRETER
|
510
|
+
global INPUT_LAYER_INDEX
|
511
|
+
global OUTPUT_LAYER_INDEX
|
512
|
+
|
513
|
+
# Do we have to load the tflite or protobuf model?
|
514
|
+
if cfg.MODEL_PATH.endswith(".tflite"):
|
515
|
+
# Load TFLite model and allocate tensors.
|
516
|
+
INTERPRETER = tflite.Interpreter(
|
517
|
+
model_path=os.path.join(SCRIPT_DIR, cfg.MODEL_PATH), num_threads=cfg.TFLITE_THREADS
|
518
|
+
)
|
519
|
+
INTERPRETER.allocate_tensors()
|
520
|
+
|
521
|
+
# Get input and output tensors.
|
522
|
+
input_details = INTERPRETER.get_input_details()
|
523
|
+
output_details = INTERPRETER.get_output_details()
|
524
|
+
|
525
|
+
# Get input tensor index
|
526
|
+
INPUT_LAYER_INDEX = input_details[0]["index"]
|
527
|
+
|
528
|
+
# Get classification output or feature embeddings
|
529
|
+
OUTPUT_LAYER_INDEX = output_details[0]["index"] if class_output else output_details[0]["index"] - 1
|
530
|
+
|
531
|
+
else:
|
532
|
+
# Load protobuf model
|
533
|
+
# Note: This will throw a bunch of warnings about custom gradients
|
534
|
+
# which we will ignore until TF lets us block them
|
535
|
+
PBMODEL = keras.models.load_model(os.path.join(SCRIPT_DIR, cfg.MODEL_PATH), compile=False)
|
536
|
+
|
537
|
+
|
538
|
+
def load_custom_classifier():
|
539
|
+
"""
|
540
|
+
Loads a custom classifier model based on the file extension of the provided model path.
|
541
|
+
If the model file ends with ".tflite", it loads a TensorFlow Lite model and sets up the interpreter,
|
542
|
+
input layer index, output layer index, and input size.
|
543
|
+
If the model file does not end with ".tflite", it loads a TensorFlow SavedModel.
|
544
|
+
"""
|
545
|
+
global C_INTERPRETER
|
546
|
+
global C_INPUT_LAYER_INDEX
|
547
|
+
global C_OUTPUT_LAYER_INDEX
|
548
|
+
global C_INPUT_SIZE
|
549
|
+
global C_PBMODEL
|
550
|
+
|
551
|
+
if cfg.CUSTOM_CLASSIFIER.endswith(".tflite"):
|
552
|
+
# Load TFLite model and allocate tensors.
|
553
|
+
C_INTERPRETER = tflite.Interpreter(model_path=cfg.CUSTOM_CLASSIFIER, num_threads=cfg.TFLITE_THREADS)
|
554
|
+
C_INTERPRETER.allocate_tensors()
|
555
|
+
|
556
|
+
# Get input and output tensors.
|
557
|
+
input_details = C_INTERPRETER.get_input_details()
|
558
|
+
output_details = C_INTERPRETER.get_output_details()
|
559
|
+
|
560
|
+
# Get input tensor index
|
561
|
+
C_INPUT_LAYER_INDEX = input_details[0]["index"]
|
562
|
+
|
563
|
+
C_INPUT_SIZE = input_details[0]["shape"][-1]
|
564
|
+
|
565
|
+
# Get classification output
|
566
|
+
C_OUTPUT_LAYER_INDEX = output_details[0]["index"]
|
567
|
+
else:
|
568
|
+
import tensorflow as tf
|
569
|
+
|
570
|
+
tf.get_logger().setLevel("ERROR")
|
571
|
+
|
572
|
+
C_PBMODEL = tf.saved_model.load(cfg.CUSTOM_CLASSIFIER)
|
573
|
+
|
574
|
+
|
575
|
+
def load_meta_model():
|
576
|
+
"""Loads the model for species prediction.
|
577
|
+
|
578
|
+
Initializes the model used to predict species list, based on coordinates and week of year.
|
579
|
+
"""
|
580
|
+
global M_INTERPRETER
|
581
|
+
global M_INPUT_LAYER_INDEX
|
582
|
+
global M_OUTPUT_LAYER_INDEX
|
583
|
+
|
584
|
+
# Load TFLite model and allocate tensors.
|
585
|
+
M_INTERPRETER = tflite.Interpreter(
|
586
|
+
model_path=os.path.join(SCRIPT_DIR, cfg.MDATA_MODEL_PATH), num_threads=cfg.TFLITE_THREADS
|
587
|
+
)
|
588
|
+
M_INTERPRETER.allocate_tensors()
|
589
|
+
|
590
|
+
# Get input and output tensors.
|
591
|
+
input_details = M_INTERPRETER.get_input_details()
|
592
|
+
output_details = M_INTERPRETER.get_output_details()
|
593
|
+
|
594
|
+
# Get input tensor index
|
595
|
+
M_INPUT_LAYER_INDEX = input_details[0]["index"]
|
596
|
+
M_OUTPUT_LAYER_INDEX = output_details[0]["index"]
|
597
|
+
|
598
|
+
|
599
|
+
def build_linear_classifier(num_labels, input_size, hidden_units=0, dropout=0.0):
|
600
|
+
"""Builds a classifier.
|
601
|
+
|
602
|
+
Args:
|
603
|
+
num_labels: Output size.
|
604
|
+
input_size: Size of the input.
|
605
|
+
hidden_units: If > 0, creates another hidden layer with the given number of units.
|
606
|
+
dropout: Dropout rate.
|
607
|
+
|
608
|
+
Returns:
|
609
|
+
A new classifier.
|
610
|
+
"""
|
611
|
+
# import keras
|
612
|
+
from tensorflow import keras
|
613
|
+
|
614
|
+
# Build a simple one- or two-layer linear classifier
|
615
|
+
model = keras.Sequential()
|
616
|
+
|
617
|
+
# Input layer
|
618
|
+
model.add(keras.layers.InputLayer(input_shape=(input_size,)))
|
619
|
+
|
620
|
+
# Batch normalization on input to standardize embeddings
|
621
|
+
model.add(keras.layers.BatchNormalization())
|
622
|
+
|
623
|
+
# Optional L2 regularization for all dense layers
|
624
|
+
regularizer = keras.regularizers.l2(1e-5)
|
625
|
+
|
626
|
+
# Hidden layer with improved architecture
|
627
|
+
if hidden_units > 0:
|
628
|
+
# Dropout layer before hidden layer
|
629
|
+
if dropout > 0:
|
630
|
+
model.add(keras.layers.Dropout(dropout))
|
631
|
+
|
632
|
+
# Add a hidden layer with L2 regularization
|
633
|
+
model.add(
|
634
|
+
keras.layers.Dense(
|
635
|
+
hidden_units, activation="relu", kernel_regularizer=regularizer, kernel_initializer="he_normal"
|
636
|
+
)
|
637
|
+
)
|
638
|
+
|
639
|
+
# Add another batch normalization after the hidden layer
|
640
|
+
model.add(keras.layers.BatchNormalization())
|
641
|
+
|
642
|
+
# Dropout layer before output
|
643
|
+
if dropout > 0:
|
644
|
+
model.add(keras.layers.Dropout(dropout))
|
645
|
+
|
646
|
+
# Classification layer with L2 regularization
|
647
|
+
model.add(keras.layers.Dense(num_labels, kernel_regularizer=regularizer, kernel_initializer="glorot_uniform"))
|
648
|
+
|
649
|
+
# Activation layer
|
650
|
+
model.add(keras.layers.Activation("sigmoid"))
|
651
|
+
|
652
|
+
return model
|
653
|
+
|
654
|
+
|
655
|
+
def train_linear_classifier(
|
656
|
+
classifier,
|
657
|
+
x_train,
|
658
|
+
y_train,
|
659
|
+
x_test,
|
660
|
+
y_test,
|
661
|
+
epochs,
|
662
|
+
batch_size,
|
663
|
+
learning_rate,
|
664
|
+
val_split,
|
665
|
+
upsampling_ratio,
|
666
|
+
upsampling_mode,
|
667
|
+
train_with_mixup,
|
668
|
+
train_with_label_smoothing,
|
669
|
+
train_with_focal_loss=False,
|
670
|
+
focal_loss_gamma=2.0,
|
671
|
+
focal_loss_alpha=0.25,
|
672
|
+
on_epoch_end=None,
|
673
|
+
):
|
674
|
+
"""Trains a custom classifier.
|
675
|
+
|
676
|
+
Trains a new classifier for BirdNET based on the given data.
|
677
|
+
|
678
|
+
Args:
|
679
|
+
classifier: The classifier to be trained.
|
680
|
+
x_train: Samples.
|
681
|
+
y_train: Labels.
|
682
|
+
x_test: Validation samples.
|
683
|
+
y_test: Validation labels.
|
684
|
+
epochs: Number of epochs to train.
|
685
|
+
batch_size: Batch size.
|
686
|
+
learning_rate: The learning rate during training.
|
687
|
+
val_split: Validation split ratio (is 0 when using test data).
|
688
|
+
upsampling_ratio: Upsampling ratio.
|
689
|
+
upsampling_mode: Upsampling mode.
|
690
|
+
train_with_mixup: If True, applies mixup to the training data.
|
691
|
+
train_with_label_smoothing: If True, applies label smoothing to the training data.
|
692
|
+
train_with_focal_loss: If True, uses focal loss instead of binary cross-entropy loss.
|
693
|
+
focal_loss_gamma: Focal loss gamma parameter.
|
694
|
+
focal_loss_alpha: Focal loss alpha parameter.
|
695
|
+
on_epoch_end: Optional callback `function(epoch, logs)`.
|
696
|
+
|
697
|
+
Returns:
|
698
|
+
(classifier, history)
|
699
|
+
"""
|
700
|
+
# import keras
|
701
|
+
from tensorflow import keras
|
702
|
+
|
703
|
+
class FunctionCallback(keras.callbacks.Callback):
|
704
|
+
def __init__(self, on_epoch_end=None) -> None:
|
705
|
+
super().__init__()
|
706
|
+
self.on_epoch_end_fn = on_epoch_end
|
707
|
+
|
708
|
+
def on_epoch_end(self, epoch, logs=None):
|
709
|
+
if self.on_epoch_end_fn:
|
710
|
+
self.on_epoch_end_fn(epoch, logs)
|
711
|
+
|
712
|
+
# Set random seed
|
713
|
+
rng = np.random.default_rng(cfg.RANDOM_SEED)
|
714
|
+
|
715
|
+
# Shuffle data
|
716
|
+
idx = np.arange(x_train.shape[0])
|
717
|
+
rng.shuffle(idx)
|
718
|
+
x_train = x_train[idx]
|
719
|
+
y_train = y_train[idx]
|
720
|
+
|
721
|
+
# Random val split
|
722
|
+
if val_split > 0:
|
723
|
+
if not cfg.MULTI_LABEL:
|
724
|
+
x_train, y_train, x_val, y_val = random_split(x_train, y_train, val_split)
|
725
|
+
else:
|
726
|
+
x_train, y_train, x_val, y_val = random_multilabel_split(x_train, y_train, val_split)
|
727
|
+
else:
|
728
|
+
x_val = x_test
|
729
|
+
y_val = y_test
|
730
|
+
|
731
|
+
print(
|
732
|
+
f"Training on {x_train.shape[0]} samples, validating on {x_val.shape[0]} samples.",
|
733
|
+
flush=True,
|
734
|
+
)
|
735
|
+
|
736
|
+
# Upsample training data
|
737
|
+
if upsampling_ratio > 0:
|
738
|
+
x_train, y_train = upsampling(x_train, y_train, upsampling_ratio, upsampling_mode)
|
739
|
+
print(f"Upsampled training data to {x_train.shape[0]} samples.", flush=True)
|
740
|
+
|
741
|
+
# Apply mixup to training data
|
742
|
+
if train_with_mixup and not cfg.BINARY_CLASSIFICATION:
|
743
|
+
x_train, y_train = mixup(x_train, y_train)
|
744
|
+
|
745
|
+
# Apply label smoothing
|
746
|
+
if train_with_label_smoothing and not cfg.BINARY_CLASSIFICATION:
|
747
|
+
y_train = label_smoothing(y_train)
|
748
|
+
|
749
|
+
# Early stopping with patience depending on dataset size
|
750
|
+
patience = min(10, max(5, int(epochs / 10)))
|
751
|
+
min_delta = 0.001
|
752
|
+
|
753
|
+
callbacks = [
|
754
|
+
# EarlyStopping with restore_best_weights
|
755
|
+
keras.callbacks.EarlyStopping(
|
756
|
+
monitor="val_AUPRC",
|
757
|
+
mode="max",
|
758
|
+
patience=patience,
|
759
|
+
verbose=1,
|
760
|
+
min_delta=min_delta,
|
761
|
+
restore_best_weights=True,
|
762
|
+
),
|
763
|
+
# Function callback for progress tracking
|
764
|
+
FunctionCallback(on_epoch_end=on_epoch_end),
|
765
|
+
]
|
766
|
+
|
767
|
+
# Learning rate schedule - use cosine decay with warmup
|
768
|
+
warmup_epochs = min(5, int(epochs * 0.1))
|
769
|
+
|
770
|
+
def lr_schedule(epoch, lr):
|
771
|
+
if epoch < warmup_epochs:
|
772
|
+
# Linear warmup
|
773
|
+
return learning_rate * (epoch + 1) / warmup_epochs
|
774
|
+
|
775
|
+
# Cosine decay
|
776
|
+
progress = (epoch - warmup_epochs) / (epochs - warmup_epochs)
|
777
|
+
return learning_rate * (0.1 + 0.9 * (1 + np.cos(np.pi * progress)) / 2)
|
778
|
+
|
779
|
+
# Add LR scheduler callback
|
780
|
+
callbacks.append(keras.callbacks.LearningRateScheduler(lr_schedule))
|
781
|
+
|
782
|
+
optimizer_cls = keras.optimizers.legacy.Adam if sys.platform == "darwin" else keras.optimizers.Adam
|
783
|
+
|
784
|
+
def _focal_loss(y_true, y_pred):
|
785
|
+
return focal_loss(y_true, y_pred, gamma=cfg.FOCAL_LOSS_GAMMA, alpha=cfg.FOCAL_LOSS_ALPHA)
|
786
|
+
|
787
|
+
# Choose the loss function based on config
|
788
|
+
loss_function = _focal_loss if train_with_focal_loss else custom_loss
|
789
|
+
|
790
|
+
# Compile model with appropriate metrics for classification task
|
791
|
+
classifier.compile(
|
792
|
+
optimizer=optimizer_cls(learning_rate=learning_rate),
|
793
|
+
loss=loss_function,
|
794
|
+
metrics=[
|
795
|
+
keras.metrics.AUC(
|
796
|
+
curve="PR",
|
797
|
+
multi_label=cfg.MULTI_LABEL,
|
798
|
+
name="AUPRC",
|
799
|
+
num_labels=y_train.shape[1] if cfg.MULTI_LABEL else None,
|
800
|
+
from_logits=True,
|
801
|
+
),
|
802
|
+
keras.metrics.AUC(
|
803
|
+
curve="ROC",
|
804
|
+
multi_label=cfg.MULTI_LABEL,
|
805
|
+
name="AUROC",
|
806
|
+
num_labels=y_train.shape[1] if cfg.MULTI_LABEL else None,
|
807
|
+
from_logits=True,
|
808
|
+
),
|
809
|
+
],
|
810
|
+
)
|
811
|
+
|
812
|
+
# Train model
|
813
|
+
history = classifier.fit(
|
814
|
+
x_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(x_val, y_val), callbacks=callbacks
|
815
|
+
)
|
816
|
+
|
817
|
+
return classifier, history
|
818
|
+
|
819
|
+
|
820
|
+
def save_linear_classifier(classifier, model_path: str, labels: list[str], mode="replace"):
|
821
|
+
"""Saves the classifier as a tflite model, as well as the used labels in a .txt.
|
822
|
+
|
823
|
+
Args:
|
824
|
+
classifier: The custom classifier.
|
825
|
+
model_path: Path the model will be saved at.
|
826
|
+
labels: List of labels used for the classifier.
|
827
|
+
"""
|
828
|
+
import tensorflow as tf
|
829
|
+
|
830
|
+
global PBMODEL
|
831
|
+
|
832
|
+
tf.get_logger().setLevel("ERROR")
|
833
|
+
|
834
|
+
if PBMODEL is None:
|
835
|
+
PBMODEL = tf.keras.models.load_model(os.path.join(SCRIPT_DIR, cfg.PB_MODEL), compile=False)
|
836
|
+
|
837
|
+
saved_model = PBMODEL
|
838
|
+
|
839
|
+
# Remove activation layer
|
840
|
+
classifier.pop()
|
841
|
+
|
842
|
+
if mode == "replace":
|
843
|
+
combined_model = tf.keras.Sequential([saved_model.embeddings_model, classifier], "basic")
|
844
|
+
elif mode == "append":
|
845
|
+
intermediate = classifier(saved_model.model.get_layer("GLOBAL_AVG_POOL").output)
|
846
|
+
|
847
|
+
output = tf.keras.layers.concatenate([saved_model.model.output, intermediate], name="combined_output")
|
848
|
+
|
849
|
+
combined_model = tf.keras.Model(inputs=saved_model.model.input, outputs=output)
|
850
|
+
else:
|
851
|
+
raise ValueError("Model save mode must be either 'replace' or 'append'")
|
852
|
+
|
853
|
+
# Append .tflite if necessary
|
854
|
+
if not model_path.endswith(".tflite"):
|
855
|
+
model_path += ".tflite"
|
856
|
+
|
857
|
+
# Make folders
|
858
|
+
os.makedirs(os.path.dirname(model_path), exist_ok=True)
|
859
|
+
|
860
|
+
# Save model as tflite
|
861
|
+
converter = tf.lite.TFLiteConverter.from_keras_model(combined_model)
|
862
|
+
tflite_model = converter.convert()
|
863
|
+
|
864
|
+
with open(model_path, "wb") as f:
|
865
|
+
f.write(tflite_model)
|
866
|
+
|
867
|
+
if mode == "append":
|
868
|
+
labels = [*utils.read_lines(os.path.join(SCRIPT_DIR, cfg.LABELS_FILE)), *labels]
|
869
|
+
|
870
|
+
# Save labels
|
871
|
+
with open(model_path.replace(".tflite", "_Labels.txt"), "w", encoding="utf-8") as f:
|
872
|
+
for label in labels:
|
873
|
+
f.write(label + "\n")
|
874
|
+
|
875
|
+
save_model_params(model_path.replace(".tflite", "_Params.csv"))
|
876
|
+
|
877
|
+
|
878
|
+
def save_raven_model(classifier, model_path: str, labels: list[str], mode="replace"):
|
879
|
+
"""
|
880
|
+
Save a TensorFlow model with a custom classifier and associated metadata for use with BirdNET.
|
881
|
+
|
882
|
+
Args:
|
883
|
+
classifier (tf.keras.Model): The custom classifier model to be saved.
|
884
|
+
model_path (str): The path where the model will be saved.
|
885
|
+
labels (list[str]): A list of labels associated with the classifier.
|
886
|
+
mode (str, optional): The mode for saving the model. Can be either "replace" or "append".
|
887
|
+
Defaults to "replace".
|
888
|
+
|
889
|
+
Raises:
|
890
|
+
ValueError: If the mode is not "replace" or "append".
|
891
|
+
|
892
|
+
Returns:
|
893
|
+
None
|
894
|
+
"""
|
895
|
+
import csv
|
896
|
+
import json
|
897
|
+
|
898
|
+
import tensorflow as tf
|
899
|
+
|
900
|
+
global PBMODEL
|
901
|
+
|
902
|
+
tf.get_logger().setLevel("ERROR")
|
903
|
+
|
904
|
+
if PBMODEL is None:
|
905
|
+
PBMODEL = tf.keras.models.load_model(os.path.join(SCRIPT_DIR, cfg.PB_MODEL), compile=False)
|
906
|
+
|
907
|
+
saved_model = PBMODEL
|
908
|
+
|
909
|
+
if mode == "replace":
|
910
|
+
combined_model = tf.keras.Sequential([saved_model.embeddings_model, classifier], "basic")
|
911
|
+
elif mode == "append":
|
912
|
+
# Remove activation layer
|
913
|
+
classifier.pop()
|
914
|
+
intermediate = classifier(saved_model.model.get_layer("GLOBAL_AVG_POOL").output)
|
915
|
+
|
916
|
+
output = tf.keras.layers.concatenate([saved_model.model.output, intermediate], name="combined_output")
|
917
|
+
|
918
|
+
combined_model = tf.keras.Model(inputs=saved_model.model.input, outputs=output)
|
919
|
+
else:
|
920
|
+
raise ValueError("Model save mode must be either 'replace' or 'append'")
|
921
|
+
|
922
|
+
# Make signatures
|
923
|
+
class SignatureModule(tf.Module):
|
924
|
+
def __init__(self, keras_model):
|
925
|
+
super().__init__()
|
926
|
+
self.model = keras_model
|
927
|
+
|
928
|
+
@tf.function(input_signature=[tf.TensorSpec(shape=[None, 144000], dtype=tf.float32)])
|
929
|
+
def basic(self, inputs):
|
930
|
+
return {"scores": self.model(inputs)}
|
931
|
+
|
932
|
+
smodel = SignatureModule(combined_model)
|
933
|
+
signatures = {
|
934
|
+
"basic": smodel.basic,
|
935
|
+
}
|
936
|
+
|
937
|
+
# Save signature model
|
938
|
+
os.makedirs(os.path.dirname(model_path), exist_ok=True)
|
939
|
+
model_path = model_path.removesuffix(".tflite")
|
940
|
+
tf.saved_model.save(smodel, model_path, signatures=signatures)
|
941
|
+
|
942
|
+
if mode == "append":
|
943
|
+
labels = [*utils.read_lines(os.path.join(SCRIPT_DIR, cfg.LABELS_FILE)), *labels]
|
944
|
+
|
945
|
+
# Save label file
|
946
|
+
labelIds = [label[:4].replace(" ", "") + str(i) for i, label in enumerate(labels, 1)]
|
947
|
+
labels_dir = os.path.join(model_path, "labels")
|
948
|
+
|
949
|
+
os.makedirs(labels_dir, exist_ok=True)
|
950
|
+
|
951
|
+
with open(os.path.join(labels_dir, "label_names.csv"), "w", newline="") as labelsfile:
|
952
|
+
labelwriter = csv.writer(labelsfile)
|
953
|
+
labelwriter.writerows(zip(labelIds, labels, strict=True))
|
954
|
+
|
955
|
+
# Save class names file
|
956
|
+
classes_dir = os.path.join(model_path, "classes")
|
957
|
+
|
958
|
+
os.makedirs(classes_dir, exist_ok=True)
|
959
|
+
|
960
|
+
with open(os.path.join(classes_dir, "classes.csv"), "w", newline="") as classesfile:
|
961
|
+
classeswriter = csv.writer(classesfile)
|
962
|
+
for labelId in labelIds:
|
963
|
+
classeswriter.writerow((labelId, 0.25, cfg.SIG_FMIN, cfg.SIG_FMAX, False))
|
964
|
+
|
965
|
+
# Save model config
|
966
|
+
model_config = os.path.join(model_path, "model_config.json")
|
967
|
+
|
968
|
+
with open(model_config, "w") as modelconfigfile:
|
969
|
+
modelconfig = {
|
970
|
+
"specVersion": 1,
|
971
|
+
"modelDescription": "Custom classifier trained with BirdNET "
|
972
|
+
+ cfg.MODEL_VERSION
|
973
|
+
+ " embeddings.\n"
|
974
|
+
+ "BirdNET was developed by the K. Lisa Yang Center for Conservation Bioacoustics"
|
975
|
+
+ "at the Cornell Lab of Ornithology in collaboration with Chemnitz University of Technology.\n\n"
|
976
|
+
+ "https://birdnet.cornell.edu",
|
977
|
+
"modelTypeConfig": {"modelType": "RECOGNITION"},
|
978
|
+
"signatures": [
|
979
|
+
{
|
980
|
+
"signatureName": "basic",
|
981
|
+
"modelInputs": [
|
982
|
+
{
|
983
|
+
"inputName": "inputs",
|
984
|
+
"sampleRate": 48000.0,
|
985
|
+
"inputConfig": ["batch", "samples"],
|
986
|
+
}
|
987
|
+
],
|
988
|
+
"modelOutputs": [{"outputName": "scores", "outputType": "SCORES"}],
|
989
|
+
}
|
990
|
+
],
|
991
|
+
"globalSemanticKeys": labelIds,
|
992
|
+
}
|
993
|
+
json.dump(modelconfig, modelconfigfile, indent=2)
|
994
|
+
|
995
|
+
model_params = os.path.join(model_path, "model_params.csv")
|
996
|
+
|
997
|
+
save_model_params(model_params)
|
998
|
+
|
999
|
+
|
1000
|
+
def predict_filter(lat, lon, week):
|
1001
|
+
"""Predicts the probability for each species.
|
1002
|
+
|
1003
|
+
Args:
|
1004
|
+
lat: The latitude.
|
1005
|
+
lon: The longitude.
|
1006
|
+
week: The week of the year [1-48]. Use -1 for yearlong.
|
1007
|
+
|
1008
|
+
Returns:
|
1009
|
+
A list of probabilities for all species.
|
1010
|
+
"""
|
1011
|
+
# Does interpreter exist?
|
1012
|
+
if M_INTERPRETER is None:
|
1013
|
+
load_meta_model()
|
1014
|
+
|
1015
|
+
# Prepare mdata as sample
|
1016
|
+
sample = np.expand_dims(np.array([lat, lon, week], dtype="float32"), 0)
|
1017
|
+
|
1018
|
+
# Run inference
|
1019
|
+
M_INTERPRETER.set_tensor(M_INPUT_LAYER_INDEX, sample)
|
1020
|
+
M_INTERPRETER.invoke()
|
1021
|
+
|
1022
|
+
return M_INTERPRETER.get_tensor(M_OUTPUT_LAYER_INDEX)[0]
|
1023
|
+
|
1024
|
+
|
1025
|
+
def explore(lat: float, lon: float, week: int):
|
1026
|
+
"""Predicts the species list.
|
1027
|
+
|
1028
|
+
Predicts the species list based on the coordinates and week of year.
|
1029
|
+
|
1030
|
+
Args:
|
1031
|
+
lat: The latitude.
|
1032
|
+
lon: The longitude.
|
1033
|
+
week: The week of the year [1-48]. Use -1 for yearlong.
|
1034
|
+
|
1035
|
+
Returns:
|
1036
|
+
A sorted list of tuples with the score and the species.
|
1037
|
+
"""
|
1038
|
+
# Make filter prediction
|
1039
|
+
l_filter = predict_filter(lat, lon, week)
|
1040
|
+
|
1041
|
+
# Apply threshold
|
1042
|
+
l_filter = np.where(l_filter >= cfg.LOCATION_FILTER_THRESHOLD, l_filter, 0)
|
1043
|
+
|
1044
|
+
# Zip with labels
|
1045
|
+
l_filter = list(zip(l_filter, cfg.LABELS, strict=True))
|
1046
|
+
|
1047
|
+
# Sort by filter value
|
1048
|
+
return sorted(l_filter, key=lambda x: x[0], reverse=True)
|
1049
|
+
|
1050
|
+
|
1051
|
+
def focal_loss(y_true, y_pred, gamma=2.0, alpha=0.25, epsilon=1e-7):
|
1052
|
+
"""
|
1053
|
+
Focal loss for better handling of class imbalance.
|
1054
|
+
|
1055
|
+
This loss function gives more weight to hard examples and down-weights easy examples.
|
1056
|
+
Particularly helpful for imbalanced datasets where some classes have few samples.
|
1057
|
+
|
1058
|
+
Args:
|
1059
|
+
y_true: Ground truth labels.
|
1060
|
+
y_pred: Predicted probabilities.
|
1061
|
+
gamma: Focusing parameter. Higher values mean more focus on hard examples.
|
1062
|
+
alpha: Balance parameter. Controls weight of positive vs negative examples.
|
1063
|
+
epsilon: Small constant to prevent log(0).
|
1064
|
+
|
1065
|
+
Returns:
|
1066
|
+
Focal loss value.
|
1067
|
+
"""
|
1068
|
+
import tensorflow.keras.backend as K
|
1069
|
+
|
1070
|
+
# Apply sigmoid if not already applied
|
1071
|
+
y_pred = K.clip(y_pred, epsilon, 1.0 - epsilon)
|
1072
|
+
|
1073
|
+
# Calculate cross entropy
|
1074
|
+
cross_entropy = -y_true * K.log(y_pred) - (1 - y_true) * K.log(1 - y_pred)
|
1075
|
+
|
1076
|
+
# Calculate focal weight
|
1077
|
+
p_t = y_true * y_pred + (1 - y_true) * (1 - y_pred)
|
1078
|
+
focal_weight = K.pow(1 - p_t, gamma)
|
1079
|
+
|
1080
|
+
# Apply alpha balancing
|
1081
|
+
alpha_factor = y_true * alpha + (1 - y_true) * (1 - alpha)
|
1082
|
+
|
1083
|
+
# Calculate focal loss
|
1084
|
+
focal_loss = alpha_factor * focal_weight * cross_entropy
|
1085
|
+
|
1086
|
+
# Sum over all classes
|
1087
|
+
return K.sum(focal_loss, axis=-1)
|
1088
|
+
|
1089
|
+
|
1090
|
+
def custom_loss(y_true, y_pred, epsilon=1e-7):
|
1091
|
+
import tensorflow.keras.backend as K
|
1092
|
+
|
1093
|
+
# Calculate loss for positive labels with epsilon
|
1094
|
+
positive_loss = -K.sum(y_true * K.log(K.clip(y_pred, epsilon, 1.0 - epsilon)), axis=-1)
|
1095
|
+
|
1096
|
+
# Calculate loss for negative labels with epsilon
|
1097
|
+
negative_loss = -K.sum((1 - y_true) * K.log(K.clip(1 - y_pred, epsilon, 1.0 - epsilon)), axis=-1)
|
1098
|
+
|
1099
|
+
# Combine both loss terms
|
1100
|
+
return positive_loss + negative_loss
|
1101
|
+
|
1102
|
+
|
1103
|
+
def flat_sigmoid(x, sensitivity=-1, bias=1.0):
|
1104
|
+
"""
|
1105
|
+
Applies a flat sigmoid function to the input array with a bias shift.
|
1106
|
+
|
1107
|
+
The flat sigmoid function is defined as:
|
1108
|
+
f(x) = 1 / (1 + exp(sensitivity * clip(x + bias, -20, 20)))
|
1109
|
+
|
1110
|
+
We transform the bias parameter to a range of [-100, 100] with the formula:
|
1111
|
+
transformed_bias = (bias - 1.0) * 10.0
|
1112
|
+
|
1113
|
+
Thus, higher bias values will shift the sigmoid function to the right on the x-axis, making it more "sensitive".
|
1114
|
+
|
1115
|
+
Note: Not sure why we are clipping, must be for numerical stability somewhere else in the code.
|
1116
|
+
|
1117
|
+
Args:
|
1118
|
+
x (array-like): Input data.
|
1119
|
+
sensitivity (float, optional): Sensitivity parameter for the sigmoid function. Default is -1.
|
1120
|
+
bias (float, optional): Bias parameter to shift the sigmoid function on the x-axis. Must be in the range [0.01, 1.99]. Default is 1.0.
|
1121
|
+
|
1122
|
+
Returns:
|
1123
|
+
numpy.ndarray: Transformed data after applying the flat sigmoid function.
|
1124
|
+
"""
|
1125
|
+
|
1126
|
+
transformed_bias = (bias - 1.0) * 10.0
|
1127
|
+
|
1128
|
+
return 1 / (1.0 + np.exp(sensitivity * np.clip(x + transformed_bias, -20, 20)))
|
1129
|
+
|
1130
|
+
|
1131
|
+
def predict(sample):
|
1132
|
+
"""Uses the main net to predict a sample.
|
1133
|
+
|
1134
|
+
Args:
|
1135
|
+
sample: Audio sample.
|
1136
|
+
|
1137
|
+
Returns:
|
1138
|
+
The prediction scores for the sample.
|
1139
|
+
"""
|
1140
|
+
# Has custom classifier?
|
1141
|
+
if cfg.CUSTOM_CLASSIFIER is not None:
|
1142
|
+
return predict_with_custom_classifier(sample)
|
1143
|
+
|
1144
|
+
# Does interpreter or keras model exist?
|
1145
|
+
if INTERPRETER is None and PBMODEL is None:
|
1146
|
+
load_model()
|
1147
|
+
|
1148
|
+
if PBMODEL is None:
|
1149
|
+
# Reshape input tensor
|
1150
|
+
INTERPRETER.resize_tensor_input(INPUT_LAYER_INDEX, [len(sample), *sample[0].shape])
|
1151
|
+
INTERPRETER.allocate_tensors()
|
1152
|
+
|
1153
|
+
# Make a prediction (Audio only for now)
|
1154
|
+
INTERPRETER.set_tensor(INPUT_LAYER_INDEX, np.array(sample, dtype="float32"))
|
1155
|
+
INTERPRETER.invoke()
|
1156
|
+
return INTERPRETER.get_tensor(OUTPUT_LAYER_INDEX)
|
1157
|
+
|
1158
|
+
# Make a prediction (Audio only for now)
|
1159
|
+
return PBMODEL.basic(sample)["scores"]
|
1160
|
+
|
1161
|
+
|
1162
|
+
def predict_with_custom_classifier(sample):
|
1163
|
+
"""Uses the custom classifier to make a prediction.
|
1164
|
+
|
1165
|
+
Args:
|
1166
|
+
sample: Audio sample.
|
1167
|
+
|
1168
|
+
Returns:
|
1169
|
+
The prediction scores for the sample.
|
1170
|
+
"""
|
1171
|
+
# Does interpreter exist?
|
1172
|
+
if C_INTERPRETER is None and C_PBMODEL is None:
|
1173
|
+
load_custom_classifier()
|
1174
|
+
|
1175
|
+
if C_PBMODEL is None:
|
1176
|
+
vector = embeddings(sample) if C_INPUT_SIZE != 144000 else sample
|
1177
|
+
|
1178
|
+
# Reshape input tensor
|
1179
|
+
C_INTERPRETER.resize_tensor_input(C_INPUT_LAYER_INDEX, [len(vector), *vector[0].shape])
|
1180
|
+
C_INTERPRETER.allocate_tensors()
|
1181
|
+
|
1182
|
+
# Make a prediction
|
1183
|
+
C_INTERPRETER.set_tensor(C_INPUT_LAYER_INDEX, np.array(vector, dtype="float32"))
|
1184
|
+
C_INTERPRETER.invoke()
|
1185
|
+
|
1186
|
+
return C_INTERPRETER.get_tensor(C_OUTPUT_LAYER_INDEX)
|
1187
|
+
|
1188
|
+
return C_PBMODEL.basic(sample)["scores"]
|
1189
|
+
|
1190
|
+
|
1191
|
+
def embeddings(sample):
|
1192
|
+
"""Extracts the embeddings for a sample.
|
1193
|
+
|
1194
|
+
Args:
|
1195
|
+
sample: Audio samples.
|
1196
|
+
|
1197
|
+
Returns:
|
1198
|
+
The embeddings.
|
1199
|
+
"""
|
1200
|
+
# Does interpreter exist?
|
1201
|
+
if INTERPRETER is None:
|
1202
|
+
load_model(False)
|
1203
|
+
|
1204
|
+
# Reshape input tensor
|
1205
|
+
INTERPRETER.resize_tensor_input(INPUT_LAYER_INDEX, [len(sample), *sample[0].shape])
|
1206
|
+
INTERPRETER.allocate_tensors()
|
1207
|
+
|
1208
|
+
# Extract feature embeddings
|
1209
|
+
INTERPRETER.set_tensor(INPUT_LAYER_INDEX, np.array(sample, dtype="float32"))
|
1210
|
+
INTERPRETER.invoke()
|
1211
|
+
|
1212
|
+
return INTERPRETER.get_tensor(OUTPUT_LAYER_INDEX)
|