birdnet-analyzer 2.0.0__py3-none-any.whl → 2.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- birdnet_analyzer/__init__.py +9 -8
- birdnet_analyzer/analyze/__init__.py +19 -5
- birdnet_analyzer/analyze/__main__.py +3 -4
- birdnet_analyzer/analyze/cli.py +30 -25
- birdnet_analyzer/analyze/core.py +246 -245
- birdnet_analyzer/analyze/utils.py +694 -701
- birdnet_analyzer/audio.py +368 -372
- birdnet_analyzer/cli.py +732 -707
- birdnet_analyzer/config.py +243 -242
- birdnet_analyzer/eBird_taxonomy_codes_2024E.json +13046 -0
- birdnet_analyzer/embeddings/__init__.py +3 -4
- birdnet_analyzer/embeddings/__main__.py +3 -3
- birdnet_analyzer/embeddings/cli.py +12 -13
- birdnet_analyzer/embeddings/core.py +70 -70
- birdnet_analyzer/embeddings/utils.py +220 -193
- birdnet_analyzer/evaluation/__init__.py +189 -195
- birdnet_analyzer/evaluation/__main__.py +3 -3
- birdnet_analyzer/evaluation/assessment/__init__.py +0 -0
- birdnet_analyzer/evaluation/assessment/metrics.py +388 -0
- birdnet_analyzer/evaluation/assessment/performance_assessor.py +364 -0
- birdnet_analyzer/evaluation/assessment/plotting.py +378 -0
- birdnet_analyzer/evaluation/preprocessing/__init__.py +0 -0
- birdnet_analyzer/evaluation/preprocessing/data_processor.py +631 -0
- birdnet_analyzer/evaluation/preprocessing/utils.py +98 -0
- birdnet_analyzer/gui/__init__.py +19 -23
- birdnet_analyzer/gui/__main__.py +3 -3
- birdnet_analyzer/gui/analysis.py +179 -174
- birdnet_analyzer/gui/assets/arrow_down.svg +4 -4
- birdnet_analyzer/gui/assets/arrow_left.svg +4 -4
- birdnet_analyzer/gui/assets/arrow_right.svg +4 -4
- birdnet_analyzer/gui/assets/arrow_up.svg +4 -4
- birdnet_analyzer/gui/assets/gui.css +36 -28
- birdnet_analyzer/gui/assets/gui.js +93 -93
- birdnet_analyzer/gui/embeddings.py +638 -620
- birdnet_analyzer/gui/evaluation.py +801 -813
- birdnet_analyzer/gui/localization.py +75 -68
- birdnet_analyzer/gui/multi_file.py +265 -246
- birdnet_analyzer/gui/review.py +472 -527
- birdnet_analyzer/gui/segments.py +191 -191
- birdnet_analyzer/gui/settings.py +149 -129
- birdnet_analyzer/gui/single_file.py +264 -269
- birdnet_analyzer/gui/species.py +95 -95
- birdnet_analyzer/gui/train.py +687 -698
- birdnet_analyzer/gui/utils.py +797 -808
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_af.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ar.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_bg.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ca.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_cs.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_da.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_de.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_el.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_en_uk.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_es.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_fi.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_fr.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_he.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_hr.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_hu.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_in.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_is.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_it.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ja.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ko.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_lt.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ml.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_nl.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_no.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_pl.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_pt_BR.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_pt_PT.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ro.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ru.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sk.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sl.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sr.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sv.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_th.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_tr.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_uk.txt +6522 -6522
- birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_zh.txt +6522 -6522
- birdnet_analyzer/lang/de.json +341 -334
- birdnet_analyzer/lang/en.json +341 -334
- birdnet_analyzer/lang/fi.json +341 -334
- birdnet_analyzer/lang/fr.json +341 -334
- birdnet_analyzer/lang/id.json +341 -334
- birdnet_analyzer/lang/pt-br.json +341 -334
- birdnet_analyzer/lang/ru.json +341 -334
- birdnet_analyzer/lang/se.json +341 -334
- birdnet_analyzer/lang/tlh.json +341 -334
- birdnet_analyzer/lang/zh_TW.json +341 -334
- birdnet_analyzer/model.py +1212 -1243
- birdnet_analyzer/playground.py +5 -0
- birdnet_analyzer/search/__init__.py +3 -3
- birdnet_analyzer/search/__main__.py +3 -3
- birdnet_analyzer/search/cli.py +11 -12
- birdnet_analyzer/search/core.py +78 -78
- birdnet_analyzer/search/utils.py +107 -111
- birdnet_analyzer/segments/__init__.py +3 -3
- birdnet_analyzer/segments/__main__.py +3 -3
- birdnet_analyzer/segments/cli.py +13 -14
- birdnet_analyzer/segments/core.py +81 -78
- birdnet_analyzer/segments/utils.py +383 -394
- birdnet_analyzer/species/__init__.py +3 -3
- birdnet_analyzer/species/__main__.py +3 -3
- birdnet_analyzer/species/cli.py +13 -14
- birdnet_analyzer/species/core.py +35 -35
- birdnet_analyzer/species/utils.py +74 -75
- birdnet_analyzer/train/__init__.py +3 -3
- birdnet_analyzer/train/__main__.py +3 -3
- birdnet_analyzer/train/cli.py +13 -14
- birdnet_analyzer/train/core.py +113 -113
- birdnet_analyzer/train/utils.py +877 -847
- birdnet_analyzer/translate.py +133 -104
- birdnet_analyzer/utils.py +425 -419
- {birdnet_analyzer-2.0.0.dist-info → birdnet_analyzer-2.1.0.dist-info}/METADATA +146 -129
- birdnet_analyzer-2.1.0.dist-info/RECORD +125 -0
- {birdnet_analyzer-2.0.0.dist-info → birdnet_analyzer-2.1.0.dist-info}/WHEEL +1 -1
- {birdnet_analyzer-2.0.0.dist-info → birdnet_analyzer-2.1.0.dist-info}/licenses/LICENSE +18 -18
- birdnet_analyzer/eBird_taxonomy_codes_2021E.json +0 -25280
- birdnet_analyzer-2.0.0.dist-info/RECORD +0 -117
- {birdnet_analyzer-2.0.0.dist-info → birdnet_analyzer-2.1.0.dist-info}/entry_points.txt +0 -0
- {birdnet_analyzer-2.0.0.dist-info → birdnet_analyzer-2.1.0.dist-info}/top_level.txt +0 -0
@@ -1,701 +1,694 @@
|
|
1
|
-
"""Module to analyze audio samples."""
|
2
|
-
|
3
|
-
import datetime
|
4
|
-
import json
|
5
|
-
import operator
|
6
|
-
import os
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
import birdnet_analyzer.config as cfg
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
"
|
30
|
-
"Segment length",
|
31
|
-
"
|
32
|
-
"
|
33
|
-
"
|
34
|
-
"
|
35
|
-
"
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
cfg.
|
42
|
-
cfg.
|
43
|
-
cfg.
|
44
|
-
cfg.
|
45
|
-
cfg.
|
46
|
-
cfg.
|
47
|
-
cfg.
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
cfg.
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
#
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
if "
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
if "
|
414
|
-
|
415
|
-
|
416
|
-
if "
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
The
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
#
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
#
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
|
493
|
-
|
494
|
-
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
|
521
|
-
|
522
|
-
return
|
523
|
-
|
524
|
-
|
525
|
-
def predict(samples):
|
526
|
-
"""Predicts the classes for the given samples.
|
527
|
-
|
528
|
-
Args:
|
529
|
-
samples: Samples to be predicted.
|
530
|
-
|
531
|
-
Returns:
|
532
|
-
The prediction scores.
|
533
|
-
"""
|
534
|
-
# Prepare sample and pass through model
|
535
|
-
data = np.array(samples, dtype="float32")
|
536
|
-
prediction = model.predict(data)
|
537
|
-
|
538
|
-
# Logits or sigmoid activations?
|
539
|
-
if cfg.APPLY_SIGMOID:
|
540
|
-
prediction = model.flat_sigmoid(np.array(prediction), sensitivity=-1, bias=cfg.SIGMOID_SENSITIVITY)
|
541
|
-
|
542
|
-
return prediction
|
543
|
-
|
544
|
-
|
545
|
-
def get_result_file_names(fpath: str):
|
546
|
-
"""
|
547
|
-
Generates a dictionary of result file names based on the input file path and configured result types.
|
548
|
-
|
549
|
-
Args:
|
550
|
-
fpath (str): The file path of the input file.
|
551
|
-
|
552
|
-
Returns:
|
553
|
-
dict: A dictionary where the keys are result types (e.g., "table", "audacity", "r", "kaleidoscope", "csv")
|
554
|
-
and the values are the corresponding output file paths.
|
555
|
-
"""
|
556
|
-
result_names = {}
|
557
|
-
|
558
|
-
rpath = fpath.replace(cfg.INPUT_PATH, "")
|
559
|
-
|
560
|
-
if rpath
|
561
|
-
|
562
|
-
|
563
|
-
|
564
|
-
|
565
|
-
|
566
|
-
|
567
|
-
|
568
|
-
|
569
|
-
|
570
|
-
|
571
|
-
|
572
|
-
|
573
|
-
|
574
|
-
|
575
|
-
|
576
|
-
|
577
|
-
|
578
|
-
|
579
|
-
|
580
|
-
|
581
|
-
|
582
|
-
|
583
|
-
|
584
|
-
|
585
|
-
|
586
|
-
|
587
|
-
|
588
|
-
|
589
|
-
|
590
|
-
|
591
|
-
|
592
|
-
|
593
|
-
|
594
|
-
|
595
|
-
|
596
|
-
|
597
|
-
|
598
|
-
|
599
|
-
|
600
|
-
|
601
|
-
|
602
|
-
|
603
|
-
|
604
|
-
|
605
|
-
|
606
|
-
|
607
|
-
|
608
|
-
|
609
|
-
|
610
|
-
|
611
|
-
|
612
|
-
|
613
|
-
|
614
|
-
|
615
|
-
|
616
|
-
|
617
|
-
|
618
|
-
|
619
|
-
|
620
|
-
|
621
|
-
|
622
|
-
|
623
|
-
|
624
|
-
|
625
|
-
|
626
|
-
|
627
|
-
|
628
|
-
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
636
|
-
|
637
|
-
|
638
|
-
|
639
|
-
|
640
|
-
|
641
|
-
|
642
|
-
|
643
|
-
|
644
|
-
|
645
|
-
|
646
|
-
|
647
|
-
|
648
|
-
|
649
|
-
|
650
|
-
|
651
|
-
#
|
652
|
-
|
653
|
-
|
654
|
-
|
655
|
-
|
656
|
-
|
657
|
-
|
658
|
-
|
659
|
-
|
660
|
-
|
661
|
-
|
662
|
-
|
663
|
-
|
664
|
-
|
665
|
-
#
|
666
|
-
|
667
|
-
|
668
|
-
|
669
|
-
|
670
|
-
|
671
|
-
|
672
|
-
|
673
|
-
|
674
|
-
|
675
|
-
|
676
|
-
|
677
|
-
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
|
682
|
-
|
683
|
-
|
684
|
-
|
685
|
-
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
690
|
-
|
691
|
-
|
692
|
-
|
693
|
-
|
694
|
-
|
695
|
-
|
696
|
-
return None
|
697
|
-
|
698
|
-
delta_time = (datetime.datetime.now() - start_time).total_seconds()
|
699
|
-
print(f"Finished {fpath} in {delta_time:.2f} seconds", flush=True)
|
700
|
-
|
701
|
-
return result_file_names
|
1
|
+
"""Module to analyze audio samples."""
|
2
|
+
|
3
|
+
import datetime
|
4
|
+
import json
|
5
|
+
import operator
|
6
|
+
import os
|
7
|
+
from collections.abc import Sequence
|
8
|
+
|
9
|
+
import numpy as np
|
10
|
+
|
11
|
+
import birdnet_analyzer.config as cfg
|
12
|
+
from birdnet_analyzer import audio, model, utils
|
13
|
+
|
14
|
+
RAVEN_TABLE_HEADER = (
|
15
|
+
"Selection\tView\tChannel\tBegin Time (s)\tEnd Time (s)\tLow Freq (Hz)\tHigh Freq (Hz)\tCommon Name\tSpecies Code\tConfidence\tBegin Path\tFile Offset (s)\n"
|
16
|
+
)
|
17
|
+
KALEIDOSCOPE_HEADER = "INDIR,FOLDER,IN FILE,OFFSET,DURATION,scientific_name,common_name,confidence,lat,lon,week,overlap,sensitivity\n"
|
18
|
+
CSV_HEADER = "Start (s),End (s),Scientific name,Common name,Confidence,File\n"
|
19
|
+
SCRIPT_DIR = os.path.abspath(os.path.dirname(__file__))
|
20
|
+
|
21
|
+
|
22
|
+
def save_analysis_params(path):
|
23
|
+
utils.save_params(
|
24
|
+
path,
|
25
|
+
(
|
26
|
+
"File splitting duration",
|
27
|
+
"Segment length",
|
28
|
+
"Sample rate",
|
29
|
+
"Segment overlap",
|
30
|
+
"Minimum Segment length",
|
31
|
+
"Bandpass filter minimum",
|
32
|
+
"Bandpass filter maximum",
|
33
|
+
"Merge consecutive detections",
|
34
|
+
"Audio speed",
|
35
|
+
"Custom classifier path",
|
36
|
+
),
|
37
|
+
(
|
38
|
+
cfg.FILE_SPLITTING_DURATION,
|
39
|
+
cfg.SIG_LENGTH,
|
40
|
+
cfg.SAMPLE_RATE,
|
41
|
+
cfg.SIG_OVERLAP,
|
42
|
+
cfg.SIG_MINLEN,
|
43
|
+
cfg.BANDPASS_FMIN,
|
44
|
+
cfg.BANDPASS_FMAX,
|
45
|
+
cfg.MERGE_CONSECUTIVE,
|
46
|
+
cfg.AUDIO_SPEED,
|
47
|
+
cfg.CUSTOM_CLASSIFIER,
|
48
|
+
),
|
49
|
+
)
|
50
|
+
|
51
|
+
|
52
|
+
def load_codes():
|
53
|
+
"""Loads the eBird codes.
|
54
|
+
|
55
|
+
Returns:
|
56
|
+
A dictionary containing the eBird codes.
|
57
|
+
"""
|
58
|
+
with open(os.path.join(SCRIPT_DIR, cfg.CODES_FILE)) as cfile:
|
59
|
+
return json.load(cfile)
|
60
|
+
|
61
|
+
|
62
|
+
def generate_raven_table(timestamps: list[str], result: dict[str, list], afile_path: str, result_path: str):
|
63
|
+
"""
|
64
|
+
Generates a Raven selection table from the given timestamps and prediction results.
|
65
|
+
|
66
|
+
Args:
|
67
|
+
timestamps (list[str]): List of timestamp strings in the format "start-end".
|
68
|
+
result (dict[str, list]): Dictionary where keys are timestamp strings and values are lists of predictions.
|
69
|
+
afile_path (str): Path to the audio file being analyzed.
|
70
|
+
result_path (str): Path where the resulting Raven selection table will be saved.
|
71
|
+
|
72
|
+
Returns:
|
73
|
+
None
|
74
|
+
"""
|
75
|
+
selection_id = 0
|
76
|
+
out_string = RAVEN_TABLE_HEADER
|
77
|
+
|
78
|
+
# Read native sample rate
|
79
|
+
high_freq = audio.get_sample_rate(afile_path) / 2
|
80
|
+
|
81
|
+
high_freq = min(high_freq, int(cfg.SIG_FMAX / cfg.AUDIO_SPEED))
|
82
|
+
|
83
|
+
high_freq = int(min(high_freq, int(cfg.BANDPASS_FMAX / cfg.AUDIO_SPEED)))
|
84
|
+
low_freq = max(cfg.SIG_FMIN, int(cfg.BANDPASS_FMIN / cfg.AUDIO_SPEED))
|
85
|
+
|
86
|
+
# Extract valid predictions for every timestamp
|
87
|
+
for timestamp in timestamps:
|
88
|
+
rstring = ""
|
89
|
+
start, end = timestamp.split("-", 1)
|
90
|
+
|
91
|
+
for c in result[timestamp]:
|
92
|
+
selection_id += 1
|
93
|
+
label = cfg.TRANSLATED_LABELS[cfg.LABELS.index(c[0])]
|
94
|
+
code = cfg.CODES[c[0]] if c[0] in cfg.CODES else c[0]
|
95
|
+
rstring += (
|
96
|
+
f"{selection_id}\tSpectrogram 1\t1\t{start}\t{end}\t{low_freq}\t{high_freq}\t{label.split('_', 1)[-1]}\t{code}\t{c[1]:.4f}\t{afile_path}\t{start}\n"
|
97
|
+
)
|
98
|
+
|
99
|
+
# Write result string to file
|
100
|
+
out_string += rstring
|
101
|
+
|
102
|
+
# If we don't have any valid predictions, we still need to add a line to the selection table
|
103
|
+
# in case we want to combine results
|
104
|
+
# TODO: That's a weird way to do it, but it works for now. It would be better to keep track
|
105
|
+
# of file durations during the analysis.
|
106
|
+
if len(out_string) == len(RAVEN_TABLE_HEADER) and cfg.OUTPUT_PATH is not None:
|
107
|
+
selection_id += 1
|
108
|
+
out_string += f"{selection_id}\tSpectrogram 1\t1\t0\t3\t{low_freq}\t{high_freq}\tnocall\tnocall\t1.0\t{afile_path}\t0\n"
|
109
|
+
|
110
|
+
utils.save_result_file(result_path, out_string)
|
111
|
+
|
112
|
+
|
113
|
+
def generate_audacity(timestamps: list[str], result: dict[str, list], result_path: str):
|
114
|
+
"""
|
115
|
+
Generates an Audacity timeline label file from the given timestamps and results.
|
116
|
+
|
117
|
+
Args:
|
118
|
+
timestamps (list[str]): A list of timestamp strings.
|
119
|
+
result (dict[str, list]): A dictionary where keys are timestamps and values are lists of tuples,
|
120
|
+
each containing a label and a confidence score.
|
121
|
+
result_path (str): The file path where the result string will be saved.
|
122
|
+
|
123
|
+
Returns:
|
124
|
+
None
|
125
|
+
"""
|
126
|
+
out_string = ""
|
127
|
+
|
128
|
+
# Audacity timeline labels
|
129
|
+
for timestamp in timestamps:
|
130
|
+
rstring = ""
|
131
|
+
|
132
|
+
for c in result[timestamp]:
|
133
|
+
label = cfg.TRANSLATED_LABELS[cfg.LABELS.index(c[0])]
|
134
|
+
ts = timestamp.replace("-", "\t")
|
135
|
+
lbl = label.replace("_", ", ")
|
136
|
+
rstring += f"{ts}\t{lbl}\t{c[1]:.4f}\n"
|
137
|
+
|
138
|
+
# Write result string to file
|
139
|
+
out_string += rstring
|
140
|
+
|
141
|
+
utils.save_result_file(result_path, out_string)
|
142
|
+
|
143
|
+
|
144
|
+
def generate_kaleidoscope(timestamps: list[str], result: dict[str, list], afile_path: str, result_path: str):
|
145
|
+
"""
|
146
|
+
Generates a Kaleidoscope-compatible CSV string from the given timestamps and results, and saves it to a file.
|
147
|
+
|
148
|
+
Args:
|
149
|
+
timestamps (list[str]): List of timestamp strings in the format "start-end".
|
150
|
+
result (dict[str, list]): Dictionary where keys are timestamp strings and values are lists of tuples containing
|
151
|
+
species label and confidence score.
|
152
|
+
afile_path (str): Path to the audio file being analyzed.
|
153
|
+
result_path (str): Path where the resulting CSV file will be saved.
|
154
|
+
|
155
|
+
Returns:
|
156
|
+
None
|
157
|
+
"""
|
158
|
+
out_string = KALEIDOSCOPE_HEADER
|
159
|
+
|
160
|
+
folder_path, filename = os.path.split(afile_path)
|
161
|
+
parent_folder, folder_name = os.path.split(folder_path)
|
162
|
+
|
163
|
+
for timestamp in timestamps:
|
164
|
+
rstring = ""
|
165
|
+
start, end = timestamp.split("-", 1)
|
166
|
+
|
167
|
+
for c in result[timestamp]:
|
168
|
+
label = cfg.TRANSLATED_LABELS[cfg.LABELS.index(c[0])]
|
169
|
+
rstring += "{},{},{},{},{},{},{},{:.4f},{:.4f},{:.4f},{},{},{}\n".format(
|
170
|
+
parent_folder.rstrip("/"),
|
171
|
+
folder_name,
|
172
|
+
filename,
|
173
|
+
start,
|
174
|
+
float(end) - float(start),
|
175
|
+
label.split("_", 1)[0],
|
176
|
+
label.split("_", 1)[-1],
|
177
|
+
c[1],
|
178
|
+
cfg.LATITUDE,
|
179
|
+
cfg.LONGITUDE,
|
180
|
+
cfg.WEEK,
|
181
|
+
cfg.SIG_OVERLAP,
|
182
|
+
cfg.SIGMOID_SENSITIVITY,
|
183
|
+
)
|
184
|
+
|
185
|
+
# Write result string to file
|
186
|
+
out_string += rstring
|
187
|
+
|
188
|
+
utils.save_result_file(result_path, out_string)
|
189
|
+
|
190
|
+
|
191
|
+
def generate_csv(timestamps: list[str], result: dict[str, list], afile_path: str, result_path: str):
|
192
|
+
"""
|
193
|
+
Generates a CSV file from the given timestamps and results.
|
194
|
+
|
195
|
+
Args:
|
196
|
+
timestamps (list[str]): A list of timestamp strings in the format "start-end".
|
197
|
+
result (dict[str, list]): A dictionary where keys are timestamp strings and values are lists of tuples.
|
198
|
+
Each tuple contains a label and a confidence score.
|
199
|
+
afile_path (str): The file path of the audio file being analyzed.
|
200
|
+
result_path (str): The file path where the resulting CSV file will be saved.
|
201
|
+
|
202
|
+
Returns:
|
203
|
+
None
|
204
|
+
"""
|
205
|
+
from birdnet_analyzer.analyze import POSSIBLE_ADDITIONAL_COLUMNS_MAP
|
206
|
+
|
207
|
+
out_string = CSV_HEADER
|
208
|
+
columns_map = {}
|
209
|
+
|
210
|
+
if cfg.ADDITIONAL_COLUMNS:
|
211
|
+
for col in cfg.ADDITIONAL_COLUMNS:
|
212
|
+
if col in POSSIBLE_ADDITIONAL_COLUMNS_MAP:
|
213
|
+
columns_map[col] = POSSIBLE_ADDITIONAL_COLUMNS_MAP[col]()
|
214
|
+
|
215
|
+
if columns_map:
|
216
|
+
out_string = out_string[:-1] + "," + ",".join(columns_map) + "\n"
|
217
|
+
|
218
|
+
for timestamp in timestamps:
|
219
|
+
rstring = ""
|
220
|
+
|
221
|
+
for c in result[timestamp]:
|
222
|
+
start, end = timestamp.split("-", 1)
|
223
|
+
label = cfg.TRANSLATED_LABELS[cfg.LABELS.index(c[0])]
|
224
|
+
rstring += f"{start},{end},{label.split('_', 1)[0]},{label.split('_', 1)[-1]},{c[1]:.4f},{afile_path}"
|
225
|
+
|
226
|
+
if columns_map:
|
227
|
+
rstring += "," + ",".join(str(val) for val in columns_map.values())
|
228
|
+
|
229
|
+
rstring += "\n"
|
230
|
+
|
231
|
+
# Write result string to file
|
232
|
+
out_string += rstring
|
233
|
+
|
234
|
+
utils.save_result_file(result_path, out_string)
|
235
|
+
|
236
|
+
|
237
|
+
def save_result_files(r: dict[str, list], result_files: dict[str, str], afile_path: str):
|
238
|
+
"""
|
239
|
+
Saves the result files in various formats based on the provided configuration.
|
240
|
+
|
241
|
+
Args:
|
242
|
+
r (dict[str, list]): A dictionary containing the analysis results with timestamps as keys.
|
243
|
+
result_files (dict[str, str]): A dictionary mapping result types to their respective file paths.
|
244
|
+
afile_path (str): The path to the audio file being analyzed.
|
245
|
+
|
246
|
+
Returns:
|
247
|
+
None
|
248
|
+
"""
|
249
|
+
|
250
|
+
os.makedirs(cfg.OUTPUT_PATH, exist_ok=True)
|
251
|
+
|
252
|
+
# Merge consecutive detections of the same species
|
253
|
+
r_merged = merge_consecutive_detections(r, cfg.MERGE_CONSECUTIVE)
|
254
|
+
|
255
|
+
# Selection table
|
256
|
+
timestamps = get_sorted_timestamps(r_merged)
|
257
|
+
|
258
|
+
if "table" in result_files:
|
259
|
+
generate_raven_table(timestamps, r_merged, afile_path, result_files["table"])
|
260
|
+
|
261
|
+
if "audacity" in cfg.RESULT_TYPES:
|
262
|
+
generate_audacity(timestamps, r_merged, result_files["audacity"])
|
263
|
+
|
264
|
+
# if "r" in cfg.RESULT_TYPES:
|
265
|
+
# generate_rtable(timestamps, r, afile_path, result_files["r"])
|
266
|
+
|
267
|
+
if "kaleidoscope" in cfg.RESULT_TYPES:
|
268
|
+
generate_kaleidoscope(timestamps, r_merged, afile_path, result_files["kaleidoscope"])
|
269
|
+
|
270
|
+
if "csv" in cfg.RESULT_TYPES:
|
271
|
+
generate_csv(timestamps, r_merged, afile_path, result_files["csv"])
|
272
|
+
|
273
|
+
|
274
|
+
def combine_raven_tables(saved_results: list[str]):
|
275
|
+
"""
|
276
|
+
Combines multiple Raven selection table files into a single file and adjusts the selection IDs and times.
|
277
|
+
|
278
|
+
Args:
|
279
|
+
saved_results (list[str]): List of file paths to the Raven selection table files to be combined.
|
280
|
+
|
281
|
+
Returns:
|
282
|
+
None
|
283
|
+
"""
|
284
|
+
# Combine all files
|
285
|
+
s_id = 1
|
286
|
+
time_offset = 0
|
287
|
+
audiofiles = []
|
288
|
+
|
289
|
+
with open(os.path.join(cfg.OUTPUT_PATH, cfg.OUTPUT_RAVEN_FILENAME), "w", encoding="utf-8") as f:
|
290
|
+
f.write(RAVEN_TABLE_HEADER)
|
291
|
+
|
292
|
+
for rfile in saved_results:
|
293
|
+
if not rfile:
|
294
|
+
continue
|
295
|
+
with open(rfile, encoding="utf-8") as rf:
|
296
|
+
try:
|
297
|
+
lines = rf.readlines()
|
298
|
+
|
299
|
+
# make sure it's a selection table
|
300
|
+
if "Selection" not in lines[0] or "File Offset" not in lines[0]:
|
301
|
+
continue
|
302
|
+
|
303
|
+
# skip header and add to file
|
304
|
+
f_name = lines[1].split("\t")[10]
|
305
|
+
f_duration = audio.get_audio_file_length(f_name)
|
306
|
+
|
307
|
+
audiofiles.append(f_name)
|
308
|
+
|
309
|
+
for line in lines[1:]:
|
310
|
+
# empty line?
|
311
|
+
if not line.strip():
|
312
|
+
continue
|
313
|
+
|
314
|
+
# Is species code and common name == 'nocall'?
|
315
|
+
# If so, that's a dummy line and we can skip it
|
316
|
+
if line.split("\t")[7] == "nocall" and line.split("\t")[8] == "nocall":
|
317
|
+
continue
|
318
|
+
|
319
|
+
# adjust selection id
|
320
|
+
line_elements = line.split("\t")
|
321
|
+
line_elements[0] = str(s_id)
|
322
|
+
s_id += 1
|
323
|
+
|
324
|
+
# adjust time
|
325
|
+
line_elements[3] = str(float(line_elements[3]) + time_offset)
|
326
|
+
line_elements[4] = str(float(line_elements[4]) + time_offset)
|
327
|
+
|
328
|
+
# write line
|
329
|
+
f.write("\t".join(line_elements))
|
330
|
+
|
331
|
+
# adjust time offset
|
332
|
+
time_offset += f_duration
|
333
|
+
|
334
|
+
except Exception as ex:
|
335
|
+
print(f"Error: Cannot combine results from {rfile}.\n", flush=True)
|
336
|
+
utils.write_error_log(ex)
|
337
|
+
|
338
|
+
listfilesname = cfg.OUTPUT_RAVEN_FILENAME.rsplit(".", 1)[0] + ".list.txt"
|
339
|
+
|
340
|
+
with open(os.path.join(cfg.OUTPUT_PATH, listfilesname), "w", encoding="utf-8") as f:
|
341
|
+
f.writelines(f + "\n" for f in audiofiles)
|
342
|
+
|
343
|
+
|
344
|
+
def combine_kaleidoscope_files(saved_results: list[str]):
|
345
|
+
"""
|
346
|
+
Combines multiple Kaleidoscope result files into a single file.
|
347
|
+
|
348
|
+
Args:
|
349
|
+
saved_results (list[str]): A list of file paths to the saved Kaleidoscope result files.
|
350
|
+
|
351
|
+
Returns:
|
352
|
+
None
|
353
|
+
"""
|
354
|
+
# Combine all files
|
355
|
+
with open(os.path.join(cfg.OUTPUT_PATH, cfg.OUTPUT_KALEIDOSCOPE_FILENAME), "w", encoding="utf-8") as f:
|
356
|
+
f.write(KALEIDOSCOPE_HEADER)
|
357
|
+
|
358
|
+
for rfile in saved_results:
|
359
|
+
with open(rfile, encoding="utf-8") as rf:
|
360
|
+
try:
|
361
|
+
lines = rf.readlines()
|
362
|
+
|
363
|
+
# make sure it's a selection table
|
364
|
+
if "INDIR" not in lines[0] or "sensitivity" not in lines[0]:
|
365
|
+
continue
|
366
|
+
|
367
|
+
# skip header and add to file
|
368
|
+
for line in lines[1:]:
|
369
|
+
f.write(line)
|
370
|
+
|
371
|
+
except Exception as ex:
|
372
|
+
print(f"Error: Cannot combine results from {rfile}.\n", flush=True)
|
373
|
+
utils.write_error_log(ex)
|
374
|
+
|
375
|
+
|
376
|
+
def combine_csv_files(saved_results: list[str]):
|
377
|
+
"""
|
378
|
+
Combines multiple CSV files into a single CSV file.
|
379
|
+
|
380
|
+
Args:
|
381
|
+
saved_results (list[str]): A list of file paths to the CSV files to be combined.
|
382
|
+
"""
|
383
|
+
out_string = ""
|
384
|
+
|
385
|
+
for rfile in saved_results:
|
386
|
+
try:
|
387
|
+
with open(rfile, encoding="utf-8") as rf:
|
388
|
+
lines = rf.readlines()
|
389
|
+
out_string += "".join(lines[1:] if out_string else lines)
|
390
|
+
|
391
|
+
except Exception as ex:
|
392
|
+
print(f"Error: Cannot combine results from {rfile}.\n", flush=True)
|
393
|
+
utils.write_error_log(ex)
|
394
|
+
|
395
|
+
with open(os.path.join(cfg.OUTPUT_PATH, cfg.OUTPUT_CSV_FILENAME), "w", encoding="utf-8") as f:
|
396
|
+
f.write(out_string)
|
397
|
+
|
398
|
+
|
399
|
+
def combine_results(saved_results: Sequence[dict[str, str]| None]):
|
400
|
+
"""
|
401
|
+
Combines various types of result files based on the configuration settings.
|
402
|
+
This function checks the types of results specified in the configuration
|
403
|
+
and combines the corresponding files from the saved results list.
|
404
|
+
|
405
|
+
Args:
|
406
|
+
saved_results (list[dict[str, str]]): A list of dictionaries containing
|
407
|
+
file paths for different result types. Each dictionary represents
|
408
|
+
a set of result files for a particular analysis.
|
409
|
+
|
410
|
+
Returns:
|
411
|
+
None
|
412
|
+
"""
|
413
|
+
if "table" in cfg.RESULT_TYPES:
|
414
|
+
combine_raven_tables([f["table"] for f in saved_results if f])
|
415
|
+
|
416
|
+
if "kaleidoscope" in cfg.RESULT_TYPES:
|
417
|
+
combine_kaleidoscope_files([f["kaleidoscope"] for f in saved_results if f])
|
418
|
+
|
419
|
+
if "csv" in cfg.RESULT_TYPES:
|
420
|
+
combine_csv_files([f["csv"] for f in saved_results if f])
|
421
|
+
|
422
|
+
|
423
|
+
def merge_consecutive_detections(results: dict[str, list], max_consecutive: int | None = None):
|
424
|
+
"""Merges consecutive detections of the same species.
|
425
|
+
Uses the mean of the top-3 highest scoring predictions as
|
426
|
+
confidence score for the merged detection.
|
427
|
+
|
428
|
+
Args:
|
429
|
+
results: The dictionary with {segment: scores}.
|
430
|
+
max_consecutive: The maximum number of consecutive detections to merge.
|
431
|
+
If None, merge all consecutive detections.
|
432
|
+
|
433
|
+
Returns:
|
434
|
+
The dictionary with merged detections.
|
435
|
+
"""
|
436
|
+
|
437
|
+
# If max_consecutive is 0 or 1, return original results
|
438
|
+
if max_consecutive is not None and max_consecutive <= 1:
|
439
|
+
return results
|
440
|
+
|
441
|
+
# For each species, make list of timestamps and scores
|
442
|
+
species = {}
|
443
|
+
for timestamp, scores in results.items():
|
444
|
+
for label, score in scores:
|
445
|
+
if label not in species:
|
446
|
+
species[label] = []
|
447
|
+
species[label].append((timestamp, score))
|
448
|
+
|
449
|
+
# Sort timestamps by start time for each species
|
450
|
+
for label, timestamps in species.items():
|
451
|
+
species[label] = sorted(timestamps, key=lambda t: float(t[0].split("-", 1)[0]))
|
452
|
+
|
453
|
+
# Merge consecutive detections
|
454
|
+
merged_results = {}
|
455
|
+
for label in species:
|
456
|
+
timestamps = species[label]
|
457
|
+
|
458
|
+
# Check if end time of current detection is within the start time of the next detection
|
459
|
+
i = 0
|
460
|
+
while i < len(timestamps) - 1:
|
461
|
+
start, end = timestamps[i][0].split("-", 1)
|
462
|
+
next_start, next_end = timestamps[i + 1][0].split("-", 1)
|
463
|
+
|
464
|
+
if float(end) >= float(next_start):
|
465
|
+
# Merge detections
|
466
|
+
merged_scores = [timestamps[i][1], timestamps[i + 1][1]]
|
467
|
+
timestamps.pop(i)
|
468
|
+
|
469
|
+
while i < len(timestamps) - 1 and float(next_end) >= float(timestamps[i + 1][0].split("-", 1)[0]):
|
470
|
+
if max_consecutive and len(merged_scores) >= max_consecutive:
|
471
|
+
break
|
472
|
+
merged_scores.append(timestamps[i + 1][1])
|
473
|
+
next_end = timestamps[i + 1][0].split("-", 1)[1]
|
474
|
+
timestamps.pop(i + 1)
|
475
|
+
|
476
|
+
# Calculate mean of top 3 scores
|
477
|
+
top_3_scores = sorted(merged_scores, reverse=True)[:3]
|
478
|
+
merged_score = sum(top_3_scores) / len(top_3_scores)
|
479
|
+
|
480
|
+
timestamps[i] = (f"{start}-{next_end}", merged_score)
|
481
|
+
|
482
|
+
i += 1
|
483
|
+
|
484
|
+
merged_results[label] = timestamps
|
485
|
+
|
486
|
+
# Restore original format
|
487
|
+
results = {}
|
488
|
+
for label, timestamps in merged_results.items():
|
489
|
+
for timestamp, score in timestamps:
|
490
|
+
if timestamp not in results:
|
491
|
+
results[timestamp] = []
|
492
|
+
results[timestamp].append((label, score))
|
493
|
+
|
494
|
+
return results
|
495
|
+
|
496
|
+
|
497
|
+
def get_sorted_timestamps(results: dict[str, list]):
|
498
|
+
"""Sorts the results based on the segments.
|
499
|
+
|
500
|
+
Args:
|
501
|
+
results: The dictionary with {segment: scores}.
|
502
|
+
|
503
|
+
Returns:
|
504
|
+
Returns the sorted list of segments and their scores.
|
505
|
+
"""
|
506
|
+
return sorted(results, key=lambda t: float(t.split("-", 1)[0]))
|
507
|
+
|
508
|
+
|
509
|
+
def get_raw_audio_from_file(fpath: str, offset, duration):
|
510
|
+
"""Reads an audio file and splits the signal into chunks.
|
511
|
+
|
512
|
+
Args:
|
513
|
+
fpath: Path to the audio file.
|
514
|
+
|
515
|
+
Returns:
|
516
|
+
The signal split into a list of chunks.
|
517
|
+
"""
|
518
|
+
# Open file
|
519
|
+
sig, rate = audio.open_audio_file(fpath, cfg.SAMPLE_RATE, offset, duration, cfg.BANDPASS_FMIN, cfg.BANDPASS_FMAX, cfg.AUDIO_SPEED)
|
520
|
+
|
521
|
+
# Split into raw audio chunks
|
522
|
+
return audio.split_signal(sig, rate, cfg.SIG_LENGTH, cfg.SIG_OVERLAP, cfg.SIG_MINLEN)
|
523
|
+
|
524
|
+
|
525
|
+
def predict(samples):
|
526
|
+
"""Predicts the classes for the given samples.
|
527
|
+
|
528
|
+
Args:
|
529
|
+
samples: Samples to be predicted.
|
530
|
+
|
531
|
+
Returns:
|
532
|
+
The prediction scores.
|
533
|
+
"""
|
534
|
+
# Prepare sample and pass through model
|
535
|
+
data = np.array(samples, dtype="float32")
|
536
|
+
prediction = model.predict(data)
|
537
|
+
|
538
|
+
# Logits or sigmoid activations?
|
539
|
+
if cfg.APPLY_SIGMOID:
|
540
|
+
prediction = model.flat_sigmoid(np.array(prediction), sensitivity=-1, bias=cfg.SIGMOID_SENSITIVITY)
|
541
|
+
|
542
|
+
return prediction
|
543
|
+
|
544
|
+
|
545
|
+
def get_result_file_names(fpath: str):
|
546
|
+
"""
|
547
|
+
Generates a dictionary of result file names based on the input file path and configured result types.
|
548
|
+
|
549
|
+
Args:
|
550
|
+
fpath (str): The file path of the input file.
|
551
|
+
|
552
|
+
Returns:
|
553
|
+
dict: A dictionary where the keys are result types (e.g., "table", "audacity", "r", "kaleidoscope", "csv")
|
554
|
+
and the values are the corresponding output file paths.
|
555
|
+
"""
|
556
|
+
result_names = {}
|
557
|
+
|
558
|
+
rpath = fpath.replace(cfg.INPUT_PATH, "")
|
559
|
+
|
560
|
+
rpath = (rpath[1:] if rpath[0] in ["/", "\\"] else rpath) if rpath else os.path.basename(fpath)
|
561
|
+
|
562
|
+
file_shorthand = rpath.rsplit(".", 1)[0]
|
563
|
+
|
564
|
+
if "table" in cfg.RESULT_TYPES:
|
565
|
+
result_names["table"] = os.path.join(cfg.OUTPUT_PATH, file_shorthand + ".BirdNET.selection.table.txt")
|
566
|
+
if "audacity" in cfg.RESULT_TYPES:
|
567
|
+
result_names["audacity"] = os.path.join(cfg.OUTPUT_PATH, file_shorthand + ".BirdNET.results.txt")
|
568
|
+
# if "r" in cfg.RESULT_TYPES:
|
569
|
+
# result_names["r"] = os.path.join(cfg.OUTPUT_PATH, file_shorthand + ".BirdNET.results.r.csv")
|
570
|
+
if "kaleidoscope" in cfg.RESULT_TYPES:
|
571
|
+
result_names["kaleidoscope"] = os.path.join(cfg.OUTPUT_PATH, file_shorthand + ".BirdNET.results.kaleidoscope.csv")
|
572
|
+
if "csv" in cfg.RESULT_TYPES:
|
573
|
+
result_names["csv"] = os.path.join(cfg.OUTPUT_PATH, file_shorthand + ".BirdNET.results.csv")
|
574
|
+
|
575
|
+
return result_names
|
576
|
+
|
577
|
+
|
578
|
+
def analyze_file(item) -> dict[str, str] | None:
|
579
|
+
"""
|
580
|
+
Analyzes an audio file and generates prediction results.
|
581
|
+
|
582
|
+
Args:
|
583
|
+
item (tuple): A tuple containing the file path (str) and configuration settings.
|
584
|
+
|
585
|
+
Returns:
|
586
|
+
dict or None: A dictionary of result file names if analysis is successful,
|
587
|
+
None if the file is skipped or an error occurs.
|
588
|
+
Raises:
|
589
|
+
Exception: If there is an error in reading the audio file or saving the results.
|
590
|
+
"""
|
591
|
+
# Get file path and restore cfg
|
592
|
+
fpath: str = item[0]
|
593
|
+
cfg.set_config(item[1])
|
594
|
+
|
595
|
+
result_file_names = get_result_file_names(fpath)
|
596
|
+
|
597
|
+
if cfg.SKIP_EXISTING_RESULTS and all(os.path.exists(f) for f in result_file_names.values()):
|
598
|
+
print(f"Skipping {fpath} as it has already been analyzed", flush=True)
|
599
|
+
return None # or return path to combine later? TODO
|
600
|
+
|
601
|
+
# Start time
|
602
|
+
start_time = datetime.datetime.now()
|
603
|
+
offset = 0
|
604
|
+
duration = int(cfg.FILE_SPLITTING_DURATION / cfg.AUDIO_SPEED)
|
605
|
+
start, end = 0, cfg.SIG_LENGTH
|
606
|
+
results = {}
|
607
|
+
|
608
|
+
# Status
|
609
|
+
print(f"Analyzing {fpath}", flush=True)
|
610
|
+
|
611
|
+
try:
|
612
|
+
fileLengthSeconds = audio.get_audio_file_length(fpath)
|
613
|
+
except Exception as ex:
|
614
|
+
# Write error log
|
615
|
+
print(f"Error: Cannot analyze audio file {fpath}. File corrupt?\n", flush=True)
|
616
|
+
utils.write_error_log(ex)
|
617
|
+
|
618
|
+
return None
|
619
|
+
|
620
|
+
# Process each chunk
|
621
|
+
try:
|
622
|
+
while offset < fileLengthSeconds:
|
623
|
+
chunks = get_raw_audio_from_file(fpath, offset, duration)
|
624
|
+
samples = []
|
625
|
+
timestamps = []
|
626
|
+
|
627
|
+
for chunk_index, chunk in enumerate(chunks):
|
628
|
+
# Add to batch
|
629
|
+
samples.append(chunk)
|
630
|
+
timestamps.append([round(start * cfg.AUDIO_SPEED, 1), round(end * cfg.AUDIO_SPEED, 1)])
|
631
|
+
|
632
|
+
# Advance start and end
|
633
|
+
start += cfg.SIG_LENGTH - cfg.SIG_OVERLAP
|
634
|
+
end = start + cfg.SIG_LENGTH
|
635
|
+
|
636
|
+
# Check if batch is full or last chunk
|
637
|
+
if len(samples) < cfg.BATCH_SIZE and chunk_index < len(chunks) - 1:
|
638
|
+
continue
|
639
|
+
|
640
|
+
# Predict
|
641
|
+
p = predict(samples)
|
642
|
+
|
643
|
+
# Add to results
|
644
|
+
for i in range(len(samples)):
|
645
|
+
# Get timestamp
|
646
|
+
s_start, s_end = timestamps[i]
|
647
|
+
|
648
|
+
# Get prediction
|
649
|
+
pred = p[i]
|
650
|
+
|
651
|
+
# Assign scores to labels
|
652
|
+
p_labels = [
|
653
|
+
p
|
654
|
+
for p in zip(cfg.LABELS, pred, strict=True)
|
655
|
+
if (cfg.TOP_N or p[1] >= cfg.MIN_CONFIDENCE) and (not cfg.SPECIES_LIST or p[0] in cfg.SPECIES_LIST)
|
656
|
+
]
|
657
|
+
|
658
|
+
# Sort by score
|
659
|
+
p_sorted = sorted(p_labels, key=operator.itemgetter(1), reverse=True)
|
660
|
+
|
661
|
+
if cfg.TOP_N:
|
662
|
+
p_sorted = p_sorted[: cfg.TOP_N]
|
663
|
+
|
664
|
+
# TODO: hier schon top n oder min conf raussortieren
|
665
|
+
# Store top 5 results and advance indices
|
666
|
+
results[str(s_start) + "-" + str(s_end)] = p_sorted
|
667
|
+
|
668
|
+
# Clear batch
|
669
|
+
samples = []
|
670
|
+
timestamps = []
|
671
|
+
offset = offset + duration
|
672
|
+
|
673
|
+
except Exception as ex:
|
674
|
+
# Write error log
|
675
|
+
print(f"Error: Cannot analyze audio file {fpath}.\n", flush=True)
|
676
|
+
utils.write_error_log(ex)
|
677
|
+
|
678
|
+
return None
|
679
|
+
|
680
|
+
# Save as selection table
|
681
|
+
try:
|
682
|
+
save_result_files(results, result_file_names, fpath)
|
683
|
+
|
684
|
+
except Exception as ex:
|
685
|
+
# Write error log
|
686
|
+
print(f"Error: Cannot save result for {fpath}.\n", flush=True)
|
687
|
+
utils.write_error_log(ex)
|
688
|
+
|
689
|
+
return None
|
690
|
+
|
691
|
+
delta_time = (datetime.datetime.now() - start_time).total_seconds()
|
692
|
+
print(f"Finished {fpath} in {delta_time:.2f} seconds", flush=True)
|
693
|
+
|
694
|
+
return result_file_names
|