birdnet-analyzer 2.0.0__py3-none-any.whl → 2.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. birdnet_analyzer/__init__.py +9 -8
  2. birdnet_analyzer/analyze/__init__.py +19 -5
  3. birdnet_analyzer/analyze/__main__.py +3 -4
  4. birdnet_analyzer/analyze/cli.py +30 -25
  5. birdnet_analyzer/analyze/core.py +246 -245
  6. birdnet_analyzer/analyze/utils.py +694 -701
  7. birdnet_analyzer/audio.py +368 -372
  8. birdnet_analyzer/cli.py +732 -707
  9. birdnet_analyzer/config.py +243 -242
  10. birdnet_analyzer/eBird_taxonomy_codes_2024E.json +13046 -0
  11. birdnet_analyzer/embeddings/__init__.py +3 -4
  12. birdnet_analyzer/embeddings/__main__.py +3 -3
  13. birdnet_analyzer/embeddings/cli.py +12 -13
  14. birdnet_analyzer/embeddings/core.py +70 -70
  15. birdnet_analyzer/embeddings/utils.py +220 -193
  16. birdnet_analyzer/evaluation/__init__.py +189 -195
  17. birdnet_analyzer/evaluation/__main__.py +3 -3
  18. birdnet_analyzer/evaluation/assessment/__init__.py +0 -0
  19. birdnet_analyzer/evaluation/assessment/metrics.py +388 -0
  20. birdnet_analyzer/evaluation/assessment/performance_assessor.py +364 -0
  21. birdnet_analyzer/evaluation/assessment/plotting.py +378 -0
  22. birdnet_analyzer/evaluation/preprocessing/__init__.py +0 -0
  23. birdnet_analyzer/evaluation/preprocessing/data_processor.py +631 -0
  24. birdnet_analyzer/evaluation/preprocessing/utils.py +98 -0
  25. birdnet_analyzer/gui/__init__.py +19 -23
  26. birdnet_analyzer/gui/__main__.py +3 -3
  27. birdnet_analyzer/gui/analysis.py +179 -174
  28. birdnet_analyzer/gui/assets/arrow_down.svg +4 -4
  29. birdnet_analyzer/gui/assets/arrow_left.svg +4 -4
  30. birdnet_analyzer/gui/assets/arrow_right.svg +4 -4
  31. birdnet_analyzer/gui/assets/arrow_up.svg +4 -4
  32. birdnet_analyzer/gui/assets/gui.css +36 -28
  33. birdnet_analyzer/gui/assets/gui.js +93 -93
  34. birdnet_analyzer/gui/embeddings.py +638 -620
  35. birdnet_analyzer/gui/evaluation.py +801 -813
  36. birdnet_analyzer/gui/localization.py +75 -68
  37. birdnet_analyzer/gui/multi_file.py +265 -246
  38. birdnet_analyzer/gui/review.py +472 -527
  39. birdnet_analyzer/gui/segments.py +191 -191
  40. birdnet_analyzer/gui/settings.py +149 -129
  41. birdnet_analyzer/gui/single_file.py +264 -269
  42. birdnet_analyzer/gui/species.py +95 -95
  43. birdnet_analyzer/gui/train.py +687 -698
  44. birdnet_analyzer/gui/utils.py +797 -808
  45. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_af.txt +6522 -6522
  46. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ar.txt +6522 -6522
  47. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_bg.txt +6522 -6522
  48. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ca.txt +6522 -6522
  49. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_cs.txt +6522 -6522
  50. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_da.txt +6522 -6522
  51. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_de.txt +6522 -6522
  52. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_el.txt +6522 -6522
  53. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_en_uk.txt +6522 -6522
  54. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_es.txt +6522 -6522
  55. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_fi.txt +6522 -6522
  56. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_fr.txt +6522 -6522
  57. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_he.txt +6522 -6522
  58. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_hr.txt +6522 -6522
  59. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_hu.txt +6522 -6522
  60. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_in.txt +6522 -6522
  61. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_is.txt +6522 -6522
  62. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_it.txt +6522 -6522
  63. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ja.txt +6522 -6522
  64. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ko.txt +6522 -6522
  65. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_lt.txt +6522 -6522
  66. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ml.txt +6522 -6522
  67. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_nl.txt +6522 -6522
  68. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_no.txt +6522 -6522
  69. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_pl.txt +6522 -6522
  70. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_pt_BR.txt +6522 -6522
  71. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_pt_PT.txt +6522 -6522
  72. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ro.txt +6522 -6522
  73. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_ru.txt +6522 -6522
  74. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sk.txt +6522 -6522
  75. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sl.txt +6522 -6522
  76. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sr.txt +6522 -6522
  77. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_sv.txt +6522 -6522
  78. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_th.txt +6522 -6522
  79. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_tr.txt +6522 -6522
  80. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_uk.txt +6522 -6522
  81. birdnet_analyzer/labels/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels_zh.txt +6522 -6522
  82. birdnet_analyzer/lang/de.json +341 -334
  83. birdnet_analyzer/lang/en.json +341 -334
  84. birdnet_analyzer/lang/fi.json +341 -334
  85. birdnet_analyzer/lang/fr.json +341 -334
  86. birdnet_analyzer/lang/id.json +341 -334
  87. birdnet_analyzer/lang/pt-br.json +341 -334
  88. birdnet_analyzer/lang/ru.json +341 -334
  89. birdnet_analyzer/lang/se.json +341 -334
  90. birdnet_analyzer/lang/tlh.json +341 -334
  91. birdnet_analyzer/lang/zh_TW.json +341 -334
  92. birdnet_analyzer/model.py +1212 -1243
  93. birdnet_analyzer/playground.py +5 -0
  94. birdnet_analyzer/search/__init__.py +3 -3
  95. birdnet_analyzer/search/__main__.py +3 -3
  96. birdnet_analyzer/search/cli.py +11 -12
  97. birdnet_analyzer/search/core.py +78 -78
  98. birdnet_analyzer/search/utils.py +107 -111
  99. birdnet_analyzer/segments/__init__.py +3 -3
  100. birdnet_analyzer/segments/__main__.py +3 -3
  101. birdnet_analyzer/segments/cli.py +13 -14
  102. birdnet_analyzer/segments/core.py +81 -78
  103. birdnet_analyzer/segments/utils.py +383 -394
  104. birdnet_analyzer/species/__init__.py +3 -3
  105. birdnet_analyzer/species/__main__.py +3 -3
  106. birdnet_analyzer/species/cli.py +13 -14
  107. birdnet_analyzer/species/core.py +35 -35
  108. birdnet_analyzer/species/utils.py +74 -75
  109. birdnet_analyzer/train/__init__.py +3 -3
  110. birdnet_analyzer/train/__main__.py +3 -3
  111. birdnet_analyzer/train/cli.py +13 -14
  112. birdnet_analyzer/train/core.py +113 -113
  113. birdnet_analyzer/train/utils.py +877 -847
  114. birdnet_analyzer/translate.py +133 -104
  115. birdnet_analyzer/utils.py +425 -419
  116. {birdnet_analyzer-2.0.0.dist-info → birdnet_analyzer-2.1.0.dist-info}/METADATA +146 -129
  117. birdnet_analyzer-2.1.0.dist-info/RECORD +125 -0
  118. {birdnet_analyzer-2.0.0.dist-info → birdnet_analyzer-2.1.0.dist-info}/WHEEL +1 -1
  119. {birdnet_analyzer-2.0.0.dist-info → birdnet_analyzer-2.1.0.dist-info}/licenses/LICENSE +18 -18
  120. birdnet_analyzer/eBird_taxonomy_codes_2021E.json +0 -25280
  121. birdnet_analyzer-2.0.0.dist-info/RECORD +0 -117
  122. {birdnet_analyzer-2.0.0.dist-info → birdnet_analyzer-2.1.0.dist-info}/entry_points.txt +0 -0
  123. {birdnet_analyzer-2.0.0.dist-info → birdnet_analyzer-2.1.0.dist-info}/top_level.txt +0 -0
@@ -1,701 +1,694 @@
1
- """Module to analyze audio samples."""
2
-
3
- import datetime
4
- import json
5
- import operator
6
- import os
7
-
8
- import numpy as np
9
-
10
- import birdnet_analyzer.audio as audio
11
- import birdnet_analyzer.config as cfg
12
- import birdnet_analyzer.model as model
13
- import birdnet_analyzer.utils as utils
14
-
15
- # 0 1 2 3 4 5 6 7 8 9 10 11
16
- RAVEN_TABLE_HEADER = "Selection\tView\tChannel\tBegin Time (s)\tEnd Time (s)\tLow Freq (Hz)\tHigh Freq (Hz)\tCommon Name\tSpecies Code\tConfidence\tBegin Path\tFile Offset (s)\n"
17
- RTABLE_HEADER = "filepath,start,end,scientific_name,common_name,confidence,lat,lon,week,overlap,sensitivity,min_conf,species_list,model\n"
18
- KALEIDOSCOPE_HEADER = (
19
- "INDIR,FOLDER,IN FILE,OFFSET,DURATION,scientific_name,common_name,confidence,lat,lon,week,overlap,sensitivity\n"
20
- )
21
- CSV_HEADER = "Start (s),End (s),Scientific name,Common name,Confidence,File\n"
22
- SCRIPT_DIR = os.path.abspath(os.path.dirname(__file__))
23
-
24
-
25
- def save_analysis_params(path):
26
- utils.save_params(
27
- path,
28
- (
29
- "File splitting duration",
30
- "Segment length",
31
- "Sample rate",
32
- "Segment overlap",
33
- "Minimum Segment length",
34
- "Bandpass filter minimum",
35
- "Bandpass filter maximum",
36
- "Merge consecutive detections",
37
- "Audio speed",
38
- "Custom classifier path",
39
- ),
40
- (
41
- cfg.FILE_SPLITTING_DURATION,
42
- cfg.SIG_LENGTH,
43
- cfg.SAMPLE_RATE,
44
- cfg.SIG_OVERLAP,
45
- cfg.SIG_MINLEN,
46
- cfg.BANDPASS_FMIN,
47
- cfg.BANDPASS_FMAX,
48
- cfg.MERGE_CONSECUTIVE,
49
- cfg.AUDIO_SPEED,
50
- cfg.CUSTOM_CLASSIFIER,
51
- ),
52
- )
53
-
54
-
55
- def load_codes():
56
- """Loads the eBird codes.
57
-
58
- Returns:
59
- A dictionary containing the eBird codes.
60
- """
61
- with open(os.path.join(SCRIPT_DIR, cfg.CODES_FILE), "r") as cfile:
62
- codes = json.load(cfile)
63
-
64
- return codes
65
-
66
-
67
- def generate_raven_table(timestamps: list[str], result: dict[str, list], afile_path: str, result_path: str):
68
- """
69
- Generates a Raven selection table from the given timestamps and prediction results.
70
-
71
- Args:
72
- timestamps (list[str]): List of timestamp strings in the format "start-end".
73
- result (dict[str, list]): Dictionary where keys are timestamp strings and values are lists of predictions.
74
- afile_path (str): Path to the audio file being analyzed.
75
- result_path (str): Path where the resulting Raven selection table will be saved.
76
-
77
- Returns:
78
- None
79
- """
80
- selection_id = 0
81
- out_string = RAVEN_TABLE_HEADER
82
-
83
- # Read native sample rate
84
- high_freq = audio.get_sample_rate(afile_path) / 2
85
-
86
- if high_freq > int(cfg.SIG_FMAX / cfg.AUDIO_SPEED):
87
- high_freq = int(cfg.SIG_FMAX / cfg.AUDIO_SPEED)
88
-
89
- high_freq = min(high_freq, int(cfg.BANDPASS_FMAX / cfg.AUDIO_SPEED))
90
- low_freq = max(cfg.SIG_FMIN, int(cfg.BANDPASS_FMIN / cfg.AUDIO_SPEED))
91
-
92
- # Extract valid predictions for every timestamp
93
- for timestamp in timestamps:
94
- rstring = ""
95
- start, end = timestamp.split("-", 1)
96
-
97
- for c in result[timestamp]:
98
- selection_id += 1
99
- label = cfg.TRANSLATED_LABELS[cfg.LABELS.index(c[0])]
100
- code = cfg.CODES[c[0]] if c[0] in cfg.CODES else c[0]
101
- rstring += f"{selection_id}\tSpectrogram 1\t1\t{start}\t{end}\t{low_freq}\t{high_freq}\t{label.split('_', 1)[-1]}\t{code}\t{c[1]:.4f}\t{afile_path}\t{start}\n"
102
-
103
- # Write result string to file
104
- out_string += rstring
105
-
106
- # If we don't have any valid predictions, we still need to add a line to the selection table in case we want to combine results
107
- # TODO: That's a weird way to do it, but it works for now. It would be better to keep track of file durations during the analysis.
108
- if len(out_string) == len(RAVEN_TABLE_HEADER) and cfg.OUTPUT_PATH is not None:
109
- selection_id += 1
110
- out_string += (
111
- f"{selection_id}\tSpectrogram 1\t1\t0\t3\t{low_freq}\t{high_freq}\tnocall\tnocall\t1.0\t{afile_path}\t0\n"
112
- )
113
-
114
- utils.save_result_file(result_path, out_string)
115
-
116
-
117
- def generate_audacity(timestamps: list[str], result: dict[str, list], result_path: str):
118
- """
119
- Generates an Audacity timeline label file from the given timestamps and results.
120
-
121
- Args:
122
- timestamps (list[str]): A list of timestamp strings.
123
- result (dict[str, list]): A dictionary where keys are timestamps and values are lists of tuples,
124
- each containing a label and a confidence score.
125
- result_path (str): The file path where the result string will be saved.
126
-
127
- Returns:
128
- None
129
- """
130
- out_string = ""
131
-
132
- # Audacity timeline labels
133
- for timestamp in timestamps:
134
- rstring = ""
135
-
136
- for c in result[timestamp]:
137
- label = cfg.TRANSLATED_LABELS[cfg.LABELS.index(c[0])]
138
- ts = timestamp.replace("-", "\t")
139
- lbl = label.replace("_", ", ")
140
- rstring += f"{ts}\t{lbl}\t{c[1]:.4f}\n"
141
-
142
- # Write result string to file
143
- out_string += rstring
144
-
145
- utils.save_result_file(result_path, out_string)
146
-
147
-
148
- def generate_kaleidoscope(timestamps: list[str], result: dict[str, list], afile_path: str, result_path: str):
149
- """
150
- Generates a Kaleidoscope-compatible CSV string from the given timestamps and results, and saves it to a file.
151
-
152
- Args:
153
- timestamps (list[str]): List of timestamp strings in the format "start-end".
154
- result (dict[str, list]): Dictionary where keys are timestamp strings and values are lists of tuples containing
155
- species label and confidence score.
156
- afile_path (str): Path to the audio file being analyzed.
157
- result_path (str): Path where the resulting CSV file will be saved.
158
-
159
- Returns:
160
- None
161
- """
162
- out_string = KALEIDOSCOPE_HEADER
163
-
164
- folder_path, filename = os.path.split(afile_path)
165
- parent_folder, folder_name = os.path.split(folder_path)
166
-
167
- for timestamp in timestamps:
168
- rstring = ""
169
- start, end = timestamp.split("-", 1)
170
-
171
- for c in result[timestamp]:
172
- label = cfg.TRANSLATED_LABELS[cfg.LABELS.index(c[0])]
173
- rstring += "{},{},{},{},{},{},{},{:.4f},{:.4f},{:.4f},{},{},{}\n".format(
174
- parent_folder.rstrip("/"),
175
- folder_name,
176
- filename,
177
- start,
178
- float(end) - float(start),
179
- label.split("_", 1)[0],
180
- label.split("_", 1)[-1],
181
- c[1],
182
- cfg.LATITUDE,
183
- cfg.LONGITUDE,
184
- cfg.WEEK,
185
- cfg.SIG_OVERLAP,
186
- cfg.SIGMOID_SENSITIVITY,
187
- )
188
-
189
- # Write result string to file
190
- out_string += rstring
191
-
192
- utils.save_result_file(result_path, out_string)
193
-
194
-
195
- def generate_csv(timestamps: list[str], result: dict[str, list], afile_path: str, result_path: str):
196
- """
197
- Generates a CSV file from the given timestamps and results.
198
-
199
- Args:
200
- timestamps (list[str]): A list of timestamp strings in the format "start-end".
201
- result (dict[str, list]): A dictionary where keys are timestamp strings and values are lists of tuples.
202
- Each tuple contains a label and a confidence score.
203
- afile_path (str): The file path of the audio file being analyzed.
204
- result_path (str): The file path where the resulting CSV file will be saved.
205
-
206
- Returns:
207
- None
208
- """
209
- out_string = CSV_HEADER
210
-
211
- for timestamp in timestamps:
212
- rstring = ""
213
-
214
- for c in result[timestamp]:
215
- start, end = timestamp.split("-", 1)
216
- label = cfg.TRANSLATED_LABELS[cfg.LABELS.index(c[0])]
217
- rstring += f"{start},{end},{label.split('_', 1)[0]},{label.split('_', 1)[-1]},{c[1]:.4f},{afile_path}\n"
218
-
219
- # Write result string to file
220
- out_string += rstring
221
-
222
- utils.save_result_file(result_path, out_string)
223
-
224
-
225
- def save_result_files(r: dict[str, list], result_files: dict[str, str], afile_path: str):
226
- """
227
- Saves the result files in various formats based on the provided configuration.
228
-
229
- Args:
230
- r (dict[str, list]): A dictionary containing the analysis results with timestamps as keys.
231
- result_files (dict[str, str]): A dictionary mapping result types to their respective file paths.
232
- afile_path (str): The path to the audio file being analyzed.
233
-
234
- Returns:
235
- None
236
- """
237
-
238
- os.makedirs(cfg.OUTPUT_PATH, exist_ok=True)
239
-
240
- # Merge consecutive detections of the same species
241
- r_merged = merge_consecutive_detections(r, cfg.MERGE_CONSECUTIVE)
242
-
243
- # Selection table
244
- timestamps = get_sorted_timestamps(r_merged)
245
-
246
- if "table" in result_files:
247
- generate_raven_table(timestamps, r_merged, afile_path, result_files["table"])
248
-
249
- if "audacity" in cfg.RESULT_TYPES:
250
- generate_audacity(timestamps, r_merged, result_files["audacity"])
251
-
252
- # if "r" in cfg.RESULT_TYPES:
253
- # generate_rtable(timestamps, r, afile_path, result_files["r"])
254
-
255
- if "kaleidoscope" in cfg.RESULT_TYPES:
256
- generate_kaleidoscope(timestamps, r_merged, afile_path, result_files["kaleidoscope"])
257
-
258
- if "csv" in cfg.RESULT_TYPES:
259
- generate_csv(timestamps, r_merged, afile_path, result_files["csv"])
260
-
261
-
262
- def combine_raven_tables(saved_results: list[str]):
263
- """
264
- Combines multiple Raven selection table files into a single file and adjusts the selection IDs and times.
265
-
266
- Args:
267
- saved_results (list[str]): List of file paths to the Raven selection table files to be combined.
268
-
269
- Returns:
270
- None
271
- """
272
- # Combine all files
273
- s_id = 1
274
- time_offset = 0
275
- audiofiles = []
276
-
277
- with open(os.path.join(cfg.OUTPUT_PATH, cfg.OUTPUT_RAVEN_FILENAME), "w", encoding="utf-8") as f:
278
- f.write(RAVEN_TABLE_HEADER)
279
-
280
- for rfile in saved_results:
281
- if not rfile:
282
- continue
283
- with open(rfile, "r", encoding="utf-8") as rf:
284
- try:
285
- lines = rf.readlines()
286
-
287
- # make sure it's a selection table
288
- if "Selection" not in lines[0] or "File Offset" not in lines[0]:
289
- continue
290
-
291
- # skip header and add to file
292
- f_name = lines[1].split("\t")[10]
293
- f_duration = audio.get_audio_file_length(f_name)
294
-
295
- audiofiles.append(f_name)
296
-
297
- for line in lines[1:]:
298
- # empty line?
299
- if not line.strip():
300
- continue
301
-
302
- # Is species code and common name == 'nocall'?
303
- # If so, that's a dummy line and we can skip it
304
- if line.split("\t")[7] == "nocall" and line.split("\t")[8] == "nocall":
305
- continue
306
-
307
- # adjust selection id
308
- line = line.split("\t")
309
- line[0] = str(s_id)
310
- s_id += 1
311
-
312
- # adjust time
313
- line[3] = str(float(line[3]) + time_offset)
314
- line[4] = str(float(line[4]) + time_offset)
315
-
316
- # write line
317
- f.write("\t".join(line))
318
-
319
- # adjust time offset
320
- time_offset += f_duration
321
-
322
- except Exception as ex:
323
- print(f"Error: Cannot combine results from {rfile}.\n", flush=True)
324
- utils.write_error_log(ex)
325
-
326
- listfilesname = cfg.OUTPUT_RAVEN_FILENAME.rsplit(".", 1)[0] + ".list.txt"
327
-
328
- with open(os.path.join(cfg.OUTPUT_PATH, listfilesname), "w", encoding="utf-8") as f:
329
- f.writelines((f + "\n" for f in audiofiles))
330
-
331
-
332
- def combine_kaleidoscope_files(saved_results: list[str]):
333
- """
334
- Combines multiple Kaleidoscope result files into a single file.
335
-
336
- Args:
337
- saved_results (list[str]): A list of file paths to the saved Kaleidoscope result files.
338
-
339
- Returns:
340
- None
341
- """
342
- # Combine all files
343
- with open(os.path.join(cfg.OUTPUT_PATH, cfg.OUTPUT_KALEIDOSCOPE_FILENAME), "w", encoding="utf-8") as f:
344
- f.write(KALEIDOSCOPE_HEADER)
345
-
346
- for rfile in saved_results:
347
- with open(rfile, "r", encoding="utf-8") as rf:
348
- try:
349
- lines = rf.readlines()
350
-
351
- # make sure it's a selection table
352
- if "INDIR" not in lines[0] or "sensitivity" not in lines[0]:
353
- continue
354
-
355
- # skip header and add to file
356
- for line in lines[1:]:
357
- f.write(line)
358
-
359
- except Exception as ex:
360
- print(f"Error: Cannot combine results from {rfile}.\n", flush=True)
361
- utils.write_error_log(ex)
362
-
363
-
364
- def combine_csv_files(saved_results: list[str]):
365
- """
366
- Combines multiple CSV files into a single CSV file.
367
-
368
- Args:
369
- saved_results (list[str]): A list of file paths to the CSV files to be combined.
370
- """
371
- # Combine all files
372
- with open(os.path.join(cfg.OUTPUT_PATH, cfg.OUTPUT_CSV_FILENAME), "w", encoding="utf-8") as f:
373
- f.write(CSV_HEADER)
374
-
375
- for rfile in saved_results:
376
- with open(rfile, "r", encoding="utf-8") as rf:
377
- try:
378
- lines = rf.readlines()
379
-
380
- # make sure it's a selection table
381
- if "Start (s)" not in lines[0] or "Confidence" not in lines[0]:
382
- continue
383
-
384
- # skip header and add to file
385
- for line in lines[1:]:
386
- f.write(line)
387
-
388
- except Exception as ex:
389
- print(f"Error: Cannot combine results from {rfile}.\n", flush=True)
390
- utils.write_error_log(ex)
391
-
392
-
393
- def combine_results(saved_results: list[dict[str, str]]):
394
- """
395
- Combines various types of result files based on the configuration settings.
396
- This function checks the types of results specified in the configuration
397
- and combines the corresponding files from the saved results list.
398
-
399
- Args:
400
- saved_results (list[dict[str, str]]): A list of dictionaries containing
401
- file paths for different result types. Each dictionary represents
402
- a set of result files for a particular analysis.
403
-
404
- Returns:
405
- None
406
- """
407
- if "table" in cfg.RESULT_TYPES:
408
- combine_raven_tables([f["table"] for f in saved_results if f])
409
-
410
- # if "r" in cfg.RESULT_TYPES:
411
- # combine_rtable_files([f["r"] for f in saved_results if f])
412
-
413
- if "kaleidoscope" in cfg.RESULT_TYPES:
414
- combine_kaleidoscope_files([f["kaleidoscope"] for f in saved_results if f])
415
-
416
- if "csv" in cfg.RESULT_TYPES:
417
- combine_csv_files([f["csv"] for f in saved_results if f])
418
-
419
-
420
- def merge_consecutive_detections(results: dict[str, list], max_consecutive: int = None):
421
- """Merges consecutive detections of the same species.
422
- Uses the mean of the top-3 highest scoring predictions as
423
- confidence score for the merged detection.
424
-
425
- Args:
426
- results: The dictionary with {segment: scores}.
427
- max_consecutive: The maximum number of consecutive detections to merge. If None, merge all consecutive detections.
428
-
429
- Returns:
430
- The dictionary with merged detections.
431
- """
432
-
433
- # If max_consecutive is 0 or 1, return original results
434
- if max_consecutive is not None and max_consecutive <= 1:
435
- return results
436
-
437
- # For each species, make list of timestamps and scores
438
- species = {}
439
- for timestamp, scores in results.items():
440
- for label, score in scores:
441
- if label not in species:
442
- species[label] = []
443
- species[label].append((timestamp, score))
444
-
445
- # Sort timestamps by start time for each species
446
- for label, timestamps in species.items():
447
- species[label] = sorted(timestamps, key=lambda t: float(t[0].split("-", 1)[0]))
448
-
449
- # Merge consecutive detections
450
- merged_results = {}
451
- for label in species:
452
- timestamps = species[label]
453
-
454
- # Check if end time of current detection is within the start time of the next detection
455
- i = 0
456
- while i < len(timestamps) - 1:
457
- start, end = timestamps[i][0].split("-", 1)
458
- next_start, next_end = timestamps[i + 1][0].split("-", 1)
459
-
460
- if float(end) >= float(next_start):
461
- # Merge detections
462
- merged_scores = [timestamps[i][1], timestamps[i + 1][1]]
463
- timestamps.pop(i)
464
-
465
- while i < len(timestamps) - 1 and float(next_end) >= float(timestamps[i + 1][0].split("-", 1)[0]):
466
- if max_consecutive and len(merged_scores) >= max_consecutive:
467
- break
468
- merged_scores.append(timestamps[i + 1][1])
469
- next_end = timestamps[i + 1][0].split("-", 1)[1]
470
- timestamps.pop(i + 1)
471
-
472
- # Calculate mean of top 3 scores
473
- top_3_scores = sorted(merged_scores, reverse=True)[:3]
474
- merged_score = sum(top_3_scores) / len(top_3_scores)
475
-
476
- timestamps[i] = (f"{start}-{next_end}", merged_score)
477
-
478
- i += 1
479
-
480
- merged_results[label] = timestamps
481
-
482
- # Restore original format
483
- results = {}
484
- for label, timestamps in merged_results.items():
485
- for timestamp, score in timestamps:
486
- if timestamp not in results:
487
- results[timestamp] = []
488
- results[timestamp].append((label, score))
489
-
490
- return results
491
-
492
-
493
- def get_sorted_timestamps(results: dict[str, list]):
494
- """Sorts the results based on the segments.
495
-
496
- Args:
497
- results: The dictionary with {segment: scores}.
498
-
499
- Returns:
500
- Returns the sorted list of segments and their scores.
501
- """
502
- return sorted(results, key=lambda t: float(t.split("-", 1)[0]))
503
-
504
-
505
- def get_raw_audio_from_file(fpath: str, offset, duration):
506
- """Reads an audio file and splits the signal into chunks.
507
-
508
- Args:
509
- fpath: Path to the audio file.
510
-
511
- Returns:
512
- The signal split into a list of chunks.
513
- """
514
- # Open file
515
- sig, rate = audio.open_audio_file(
516
- fpath, cfg.SAMPLE_RATE, offset, duration, cfg.BANDPASS_FMIN, cfg.BANDPASS_FMAX, cfg.AUDIO_SPEED
517
- )
518
-
519
- # Split into raw audio chunks
520
- chunks = audio.split_signal(sig, rate, cfg.SIG_LENGTH, cfg.SIG_OVERLAP, cfg.SIG_MINLEN)
521
-
522
- return chunks
523
-
524
-
525
- def predict(samples):
526
- """Predicts the classes for the given samples.
527
-
528
- Args:
529
- samples: Samples to be predicted.
530
-
531
- Returns:
532
- The prediction scores.
533
- """
534
- # Prepare sample and pass through model
535
- data = np.array(samples, dtype="float32")
536
- prediction = model.predict(data)
537
-
538
- # Logits or sigmoid activations?
539
- if cfg.APPLY_SIGMOID:
540
- prediction = model.flat_sigmoid(np.array(prediction), sensitivity=-1, bias=cfg.SIGMOID_SENSITIVITY)
541
-
542
- return prediction
543
-
544
-
545
- def get_result_file_names(fpath: str):
546
- """
547
- Generates a dictionary of result file names based on the input file path and configured result types.
548
-
549
- Args:
550
- fpath (str): The file path of the input file.
551
-
552
- Returns:
553
- dict: A dictionary where the keys are result types (e.g., "table", "audacity", "r", "kaleidoscope", "csv")
554
- and the values are the corresponding output file paths.
555
- """
556
- result_names = {}
557
-
558
- rpath = fpath.replace(cfg.INPUT_PATH, "")
559
-
560
- if rpath:
561
- rpath = rpath[1:] if rpath[0] in ["/", "\\"] else rpath
562
- else:
563
- rpath = os.path.basename(fpath)
564
-
565
- file_shorthand = rpath.rsplit(".", 1)[0]
566
-
567
- if "table" in cfg.RESULT_TYPES:
568
- result_names["table"] = os.path.join(cfg.OUTPUT_PATH, file_shorthand + ".BirdNET.selection.table.txt")
569
- if "audacity" in cfg.RESULT_TYPES:
570
- result_names["audacity"] = os.path.join(cfg.OUTPUT_PATH, file_shorthand + ".BirdNET.results.txt")
571
- # if "r" in cfg.RESULT_TYPES:
572
- # result_names["r"] = os.path.join(cfg.OUTPUT_PATH, file_shorthand + ".BirdNET.results.r.csv")
573
- if "kaleidoscope" in cfg.RESULT_TYPES:
574
- result_names["kaleidoscope"] = os.path.join(
575
- cfg.OUTPUT_PATH, file_shorthand + ".BirdNET.results.kaleidoscope.csv"
576
- )
577
- if "csv" in cfg.RESULT_TYPES:
578
- result_names["csv"] = os.path.join(cfg.OUTPUT_PATH, file_shorthand + ".BirdNET.results.csv")
579
-
580
- return result_names
581
-
582
-
583
- def analyze_file(item):
584
- """
585
- Analyzes an audio file and generates prediction results.
586
-
587
- Args:
588
- item (tuple): A tuple containing the file path (str) and configuration settings.
589
-
590
- Returns:
591
- dict or None: A dictionary of result file names if analysis is successful,
592
- None if the file is skipped or an error occurs.
593
- Raises:
594
- Exception: If there is an error in reading the audio file or saving the results.
595
- """
596
- # Get file path and restore cfg
597
- fpath: str = item[0]
598
- cfg.set_config(item[1])
599
-
600
- result_file_names = get_result_file_names(fpath)
601
-
602
- if cfg.SKIP_EXISTING_RESULTS:
603
- if all(os.path.exists(f) for f in result_file_names.values()):
604
- print(f"Skipping {fpath} as it has already been analyzed", flush=True)
605
- return None # or return path to combine later? TODO
606
-
607
- # Start time
608
- start_time = datetime.datetime.now()
609
- offset = 0
610
- duration = int(cfg.FILE_SPLITTING_DURATION / cfg.AUDIO_SPEED)
611
- start, end = 0, cfg.SIG_LENGTH
612
- results = {}
613
-
614
- # Status
615
- print(f"Analyzing {fpath}", flush=True)
616
-
617
- try:
618
- fileLengthSeconds = int(audio.get_audio_file_length(fpath) / cfg.AUDIO_SPEED)
619
- except Exception as ex:
620
- # Write error log
621
- print(f"Error: Cannot analyze audio file {fpath}. File corrupt?\n", flush=True)
622
- utils.write_error_log(ex)
623
-
624
- return None
625
-
626
- # Process each chunk
627
- try:
628
- while offset < fileLengthSeconds:
629
- chunks = get_raw_audio_from_file(fpath, offset, duration)
630
- samples = []
631
- timestamps = []
632
-
633
- for chunk_index, chunk in enumerate(chunks):
634
- # Add to batch
635
- samples.append(chunk)
636
- timestamps.append([round(start * cfg.AUDIO_SPEED, 1), round(end * cfg.AUDIO_SPEED, 1)])
637
-
638
- # Advance start and end
639
- start += cfg.SIG_LENGTH - cfg.SIG_OVERLAP
640
- end = start + cfg.SIG_LENGTH
641
-
642
- # Check if batch is full or last chunk
643
- if len(samples) < cfg.BATCH_SIZE and chunk_index < len(chunks) - 1:
644
- continue
645
-
646
- # Predict
647
- p = predict(samples)
648
-
649
- # Add to results
650
- for i in range(len(samples)):
651
- # Get timestamp
652
- s_start, s_end = timestamps[i]
653
-
654
- # Get prediction
655
- pred = p[i]
656
-
657
- # Assign scores to labels
658
- p_labels = [
659
- p
660
- for p in zip(cfg.LABELS, pred, strict=True)
661
- if (cfg.TOP_N or p[1] >= cfg.MIN_CONFIDENCE)
662
- and (not cfg.SPECIES_LIST or p[0] in cfg.SPECIES_LIST)
663
- ]
664
-
665
- # Sort by score
666
- p_sorted = sorted(p_labels, key=operator.itemgetter(1), reverse=True)
667
-
668
- if cfg.TOP_N:
669
- p_sorted = p_sorted[: cfg.TOP_N]
670
-
671
- # TODO hier schon top n oder min conf raussortieren
672
- # Store top 5 results and advance indices
673
- results[str(s_start) + "-" + str(s_end)] = p_sorted
674
-
675
- # Clear batch
676
- samples = []
677
- timestamps = []
678
- offset = offset + duration
679
-
680
- except Exception as ex:
681
- # Write error log
682
- print(f"Error: Cannot analyze audio file {fpath}.\n", flush=True)
683
- utils.write_error_log(ex)
684
-
685
- return None
686
-
687
- # Save as selection table
688
- try:
689
- save_result_files(results, result_file_names, fpath)
690
-
691
- except Exception as ex:
692
- # Write error log
693
- print(f"Error: Cannot save result for {fpath}.\n", flush=True)
694
- utils.write_error_log(ex)
695
-
696
- return None
697
-
698
- delta_time = (datetime.datetime.now() - start_time).total_seconds()
699
- print(f"Finished {fpath} in {delta_time:.2f} seconds", flush=True)
700
-
701
- return result_file_names
1
+ """Module to analyze audio samples."""
2
+
3
+ import datetime
4
+ import json
5
+ import operator
6
+ import os
7
+ from collections.abc import Sequence
8
+
9
+ import numpy as np
10
+
11
+ import birdnet_analyzer.config as cfg
12
+ from birdnet_analyzer import audio, model, utils
13
+
14
+ RAVEN_TABLE_HEADER = (
15
+ "Selection\tView\tChannel\tBegin Time (s)\tEnd Time (s)\tLow Freq (Hz)\tHigh Freq (Hz)\tCommon Name\tSpecies Code\tConfidence\tBegin Path\tFile Offset (s)\n"
16
+ )
17
+ KALEIDOSCOPE_HEADER = "INDIR,FOLDER,IN FILE,OFFSET,DURATION,scientific_name,common_name,confidence,lat,lon,week,overlap,sensitivity\n"
18
+ CSV_HEADER = "Start (s),End (s),Scientific name,Common name,Confidence,File\n"
19
+ SCRIPT_DIR = os.path.abspath(os.path.dirname(__file__))
20
+
21
+
22
+ def save_analysis_params(path):
23
+ utils.save_params(
24
+ path,
25
+ (
26
+ "File splitting duration",
27
+ "Segment length",
28
+ "Sample rate",
29
+ "Segment overlap",
30
+ "Minimum Segment length",
31
+ "Bandpass filter minimum",
32
+ "Bandpass filter maximum",
33
+ "Merge consecutive detections",
34
+ "Audio speed",
35
+ "Custom classifier path",
36
+ ),
37
+ (
38
+ cfg.FILE_SPLITTING_DURATION,
39
+ cfg.SIG_LENGTH,
40
+ cfg.SAMPLE_RATE,
41
+ cfg.SIG_OVERLAP,
42
+ cfg.SIG_MINLEN,
43
+ cfg.BANDPASS_FMIN,
44
+ cfg.BANDPASS_FMAX,
45
+ cfg.MERGE_CONSECUTIVE,
46
+ cfg.AUDIO_SPEED,
47
+ cfg.CUSTOM_CLASSIFIER,
48
+ ),
49
+ )
50
+
51
+
52
+ def load_codes():
53
+ """Loads the eBird codes.
54
+
55
+ Returns:
56
+ A dictionary containing the eBird codes.
57
+ """
58
+ with open(os.path.join(SCRIPT_DIR, cfg.CODES_FILE)) as cfile:
59
+ return json.load(cfile)
60
+
61
+
62
+ def generate_raven_table(timestamps: list[str], result: dict[str, list], afile_path: str, result_path: str):
63
+ """
64
+ Generates a Raven selection table from the given timestamps and prediction results.
65
+
66
+ Args:
67
+ timestamps (list[str]): List of timestamp strings in the format "start-end".
68
+ result (dict[str, list]): Dictionary where keys are timestamp strings and values are lists of predictions.
69
+ afile_path (str): Path to the audio file being analyzed.
70
+ result_path (str): Path where the resulting Raven selection table will be saved.
71
+
72
+ Returns:
73
+ None
74
+ """
75
+ selection_id = 0
76
+ out_string = RAVEN_TABLE_HEADER
77
+
78
+ # Read native sample rate
79
+ high_freq = audio.get_sample_rate(afile_path) / 2
80
+
81
+ high_freq = min(high_freq, int(cfg.SIG_FMAX / cfg.AUDIO_SPEED))
82
+
83
+ high_freq = int(min(high_freq, int(cfg.BANDPASS_FMAX / cfg.AUDIO_SPEED)))
84
+ low_freq = max(cfg.SIG_FMIN, int(cfg.BANDPASS_FMIN / cfg.AUDIO_SPEED))
85
+
86
+ # Extract valid predictions for every timestamp
87
+ for timestamp in timestamps:
88
+ rstring = ""
89
+ start, end = timestamp.split("-", 1)
90
+
91
+ for c in result[timestamp]:
92
+ selection_id += 1
93
+ label = cfg.TRANSLATED_LABELS[cfg.LABELS.index(c[0])]
94
+ code = cfg.CODES[c[0]] if c[0] in cfg.CODES else c[0]
95
+ rstring += (
96
+ f"{selection_id}\tSpectrogram 1\t1\t{start}\t{end}\t{low_freq}\t{high_freq}\t{label.split('_', 1)[-1]}\t{code}\t{c[1]:.4f}\t{afile_path}\t{start}\n"
97
+ )
98
+
99
+ # Write result string to file
100
+ out_string += rstring
101
+
102
+ # If we don't have any valid predictions, we still need to add a line to the selection table
103
+ # in case we want to combine results
104
+ # TODO: That's a weird way to do it, but it works for now. It would be better to keep track
105
+ # of file durations during the analysis.
106
+ if len(out_string) == len(RAVEN_TABLE_HEADER) and cfg.OUTPUT_PATH is not None:
107
+ selection_id += 1
108
+ out_string += f"{selection_id}\tSpectrogram 1\t1\t0\t3\t{low_freq}\t{high_freq}\tnocall\tnocall\t1.0\t{afile_path}\t0\n"
109
+
110
+ utils.save_result_file(result_path, out_string)
111
+
112
+
113
+ def generate_audacity(timestamps: list[str], result: dict[str, list], result_path: str):
114
+ """
115
+ Generates an Audacity timeline label file from the given timestamps and results.
116
+
117
+ Args:
118
+ timestamps (list[str]): A list of timestamp strings.
119
+ result (dict[str, list]): A dictionary where keys are timestamps and values are lists of tuples,
120
+ each containing a label and a confidence score.
121
+ result_path (str): The file path where the result string will be saved.
122
+
123
+ Returns:
124
+ None
125
+ """
126
+ out_string = ""
127
+
128
+ # Audacity timeline labels
129
+ for timestamp in timestamps:
130
+ rstring = ""
131
+
132
+ for c in result[timestamp]:
133
+ label = cfg.TRANSLATED_LABELS[cfg.LABELS.index(c[0])]
134
+ ts = timestamp.replace("-", "\t")
135
+ lbl = label.replace("_", ", ")
136
+ rstring += f"{ts}\t{lbl}\t{c[1]:.4f}\n"
137
+
138
+ # Write result string to file
139
+ out_string += rstring
140
+
141
+ utils.save_result_file(result_path, out_string)
142
+
143
+
144
+ def generate_kaleidoscope(timestamps: list[str], result: dict[str, list], afile_path: str, result_path: str):
145
+ """
146
+ Generates a Kaleidoscope-compatible CSV string from the given timestamps and results, and saves it to a file.
147
+
148
+ Args:
149
+ timestamps (list[str]): List of timestamp strings in the format "start-end".
150
+ result (dict[str, list]): Dictionary where keys are timestamp strings and values are lists of tuples containing
151
+ species label and confidence score.
152
+ afile_path (str): Path to the audio file being analyzed.
153
+ result_path (str): Path where the resulting CSV file will be saved.
154
+
155
+ Returns:
156
+ None
157
+ """
158
+ out_string = KALEIDOSCOPE_HEADER
159
+
160
+ folder_path, filename = os.path.split(afile_path)
161
+ parent_folder, folder_name = os.path.split(folder_path)
162
+
163
+ for timestamp in timestamps:
164
+ rstring = ""
165
+ start, end = timestamp.split("-", 1)
166
+
167
+ for c in result[timestamp]:
168
+ label = cfg.TRANSLATED_LABELS[cfg.LABELS.index(c[0])]
169
+ rstring += "{},{},{},{},{},{},{},{:.4f},{:.4f},{:.4f},{},{},{}\n".format(
170
+ parent_folder.rstrip("/"),
171
+ folder_name,
172
+ filename,
173
+ start,
174
+ float(end) - float(start),
175
+ label.split("_", 1)[0],
176
+ label.split("_", 1)[-1],
177
+ c[1],
178
+ cfg.LATITUDE,
179
+ cfg.LONGITUDE,
180
+ cfg.WEEK,
181
+ cfg.SIG_OVERLAP,
182
+ cfg.SIGMOID_SENSITIVITY,
183
+ )
184
+
185
+ # Write result string to file
186
+ out_string += rstring
187
+
188
+ utils.save_result_file(result_path, out_string)
189
+
190
+
191
+ def generate_csv(timestamps: list[str], result: dict[str, list], afile_path: str, result_path: str):
192
+ """
193
+ Generates a CSV file from the given timestamps and results.
194
+
195
+ Args:
196
+ timestamps (list[str]): A list of timestamp strings in the format "start-end".
197
+ result (dict[str, list]): A dictionary where keys are timestamp strings and values are lists of tuples.
198
+ Each tuple contains a label and a confidence score.
199
+ afile_path (str): The file path of the audio file being analyzed.
200
+ result_path (str): The file path where the resulting CSV file will be saved.
201
+
202
+ Returns:
203
+ None
204
+ """
205
+ from birdnet_analyzer.analyze import POSSIBLE_ADDITIONAL_COLUMNS_MAP
206
+
207
+ out_string = CSV_HEADER
208
+ columns_map = {}
209
+
210
+ if cfg.ADDITIONAL_COLUMNS:
211
+ for col in cfg.ADDITIONAL_COLUMNS:
212
+ if col in POSSIBLE_ADDITIONAL_COLUMNS_MAP:
213
+ columns_map[col] = POSSIBLE_ADDITIONAL_COLUMNS_MAP[col]()
214
+
215
+ if columns_map:
216
+ out_string = out_string[:-1] + "," + ",".join(columns_map) + "\n"
217
+
218
+ for timestamp in timestamps:
219
+ rstring = ""
220
+
221
+ for c in result[timestamp]:
222
+ start, end = timestamp.split("-", 1)
223
+ label = cfg.TRANSLATED_LABELS[cfg.LABELS.index(c[0])]
224
+ rstring += f"{start},{end},{label.split('_', 1)[0]},{label.split('_', 1)[-1]},{c[1]:.4f},{afile_path}"
225
+
226
+ if columns_map:
227
+ rstring += "," + ",".join(str(val) for val in columns_map.values())
228
+
229
+ rstring += "\n"
230
+
231
+ # Write result string to file
232
+ out_string += rstring
233
+
234
+ utils.save_result_file(result_path, out_string)
235
+
236
+
237
+ def save_result_files(r: dict[str, list], result_files: dict[str, str], afile_path: str):
238
+ """
239
+ Saves the result files in various formats based on the provided configuration.
240
+
241
+ Args:
242
+ r (dict[str, list]): A dictionary containing the analysis results with timestamps as keys.
243
+ result_files (dict[str, str]): A dictionary mapping result types to their respective file paths.
244
+ afile_path (str): The path to the audio file being analyzed.
245
+
246
+ Returns:
247
+ None
248
+ """
249
+
250
+ os.makedirs(cfg.OUTPUT_PATH, exist_ok=True)
251
+
252
+ # Merge consecutive detections of the same species
253
+ r_merged = merge_consecutive_detections(r, cfg.MERGE_CONSECUTIVE)
254
+
255
+ # Selection table
256
+ timestamps = get_sorted_timestamps(r_merged)
257
+
258
+ if "table" in result_files:
259
+ generate_raven_table(timestamps, r_merged, afile_path, result_files["table"])
260
+
261
+ if "audacity" in cfg.RESULT_TYPES:
262
+ generate_audacity(timestamps, r_merged, result_files["audacity"])
263
+
264
+ # if "r" in cfg.RESULT_TYPES:
265
+ # generate_rtable(timestamps, r, afile_path, result_files["r"])
266
+
267
+ if "kaleidoscope" in cfg.RESULT_TYPES:
268
+ generate_kaleidoscope(timestamps, r_merged, afile_path, result_files["kaleidoscope"])
269
+
270
+ if "csv" in cfg.RESULT_TYPES:
271
+ generate_csv(timestamps, r_merged, afile_path, result_files["csv"])
272
+
273
+
274
+ def combine_raven_tables(saved_results: list[str]):
275
+ """
276
+ Combines multiple Raven selection table files into a single file and adjusts the selection IDs and times.
277
+
278
+ Args:
279
+ saved_results (list[str]): List of file paths to the Raven selection table files to be combined.
280
+
281
+ Returns:
282
+ None
283
+ """
284
+ # Combine all files
285
+ s_id = 1
286
+ time_offset = 0
287
+ audiofiles = []
288
+
289
+ with open(os.path.join(cfg.OUTPUT_PATH, cfg.OUTPUT_RAVEN_FILENAME), "w", encoding="utf-8") as f:
290
+ f.write(RAVEN_TABLE_HEADER)
291
+
292
+ for rfile in saved_results:
293
+ if not rfile:
294
+ continue
295
+ with open(rfile, encoding="utf-8") as rf:
296
+ try:
297
+ lines = rf.readlines()
298
+
299
+ # make sure it's a selection table
300
+ if "Selection" not in lines[0] or "File Offset" not in lines[0]:
301
+ continue
302
+
303
+ # skip header and add to file
304
+ f_name = lines[1].split("\t")[10]
305
+ f_duration = audio.get_audio_file_length(f_name)
306
+
307
+ audiofiles.append(f_name)
308
+
309
+ for line in lines[1:]:
310
+ # empty line?
311
+ if not line.strip():
312
+ continue
313
+
314
+ # Is species code and common name == 'nocall'?
315
+ # If so, that's a dummy line and we can skip it
316
+ if line.split("\t")[7] == "nocall" and line.split("\t")[8] == "nocall":
317
+ continue
318
+
319
+ # adjust selection id
320
+ line_elements = line.split("\t")
321
+ line_elements[0] = str(s_id)
322
+ s_id += 1
323
+
324
+ # adjust time
325
+ line_elements[3] = str(float(line_elements[3]) + time_offset)
326
+ line_elements[4] = str(float(line_elements[4]) + time_offset)
327
+
328
+ # write line
329
+ f.write("\t".join(line_elements))
330
+
331
+ # adjust time offset
332
+ time_offset += f_duration
333
+
334
+ except Exception as ex:
335
+ print(f"Error: Cannot combine results from {rfile}.\n", flush=True)
336
+ utils.write_error_log(ex)
337
+
338
+ listfilesname = cfg.OUTPUT_RAVEN_FILENAME.rsplit(".", 1)[0] + ".list.txt"
339
+
340
+ with open(os.path.join(cfg.OUTPUT_PATH, listfilesname), "w", encoding="utf-8") as f:
341
+ f.writelines(f + "\n" for f in audiofiles)
342
+
343
+
344
+ def combine_kaleidoscope_files(saved_results: list[str]):
345
+ """
346
+ Combines multiple Kaleidoscope result files into a single file.
347
+
348
+ Args:
349
+ saved_results (list[str]): A list of file paths to the saved Kaleidoscope result files.
350
+
351
+ Returns:
352
+ None
353
+ """
354
+ # Combine all files
355
+ with open(os.path.join(cfg.OUTPUT_PATH, cfg.OUTPUT_KALEIDOSCOPE_FILENAME), "w", encoding="utf-8") as f:
356
+ f.write(KALEIDOSCOPE_HEADER)
357
+
358
+ for rfile in saved_results:
359
+ with open(rfile, encoding="utf-8") as rf:
360
+ try:
361
+ lines = rf.readlines()
362
+
363
+ # make sure it's a selection table
364
+ if "INDIR" not in lines[0] or "sensitivity" not in lines[0]:
365
+ continue
366
+
367
+ # skip header and add to file
368
+ for line in lines[1:]:
369
+ f.write(line)
370
+
371
+ except Exception as ex:
372
+ print(f"Error: Cannot combine results from {rfile}.\n", flush=True)
373
+ utils.write_error_log(ex)
374
+
375
+
376
+ def combine_csv_files(saved_results: list[str]):
377
+ """
378
+ Combines multiple CSV files into a single CSV file.
379
+
380
+ Args:
381
+ saved_results (list[str]): A list of file paths to the CSV files to be combined.
382
+ """
383
+ out_string = ""
384
+
385
+ for rfile in saved_results:
386
+ try:
387
+ with open(rfile, encoding="utf-8") as rf:
388
+ lines = rf.readlines()
389
+ out_string += "".join(lines[1:] if out_string else lines)
390
+
391
+ except Exception as ex:
392
+ print(f"Error: Cannot combine results from {rfile}.\n", flush=True)
393
+ utils.write_error_log(ex)
394
+
395
+ with open(os.path.join(cfg.OUTPUT_PATH, cfg.OUTPUT_CSV_FILENAME), "w", encoding="utf-8") as f:
396
+ f.write(out_string)
397
+
398
+
399
+ def combine_results(saved_results: Sequence[dict[str, str]| None]):
400
+ """
401
+ Combines various types of result files based on the configuration settings.
402
+ This function checks the types of results specified in the configuration
403
+ and combines the corresponding files from the saved results list.
404
+
405
+ Args:
406
+ saved_results (list[dict[str, str]]): A list of dictionaries containing
407
+ file paths for different result types. Each dictionary represents
408
+ a set of result files for a particular analysis.
409
+
410
+ Returns:
411
+ None
412
+ """
413
+ if "table" in cfg.RESULT_TYPES:
414
+ combine_raven_tables([f["table"] for f in saved_results if f])
415
+
416
+ if "kaleidoscope" in cfg.RESULT_TYPES:
417
+ combine_kaleidoscope_files([f["kaleidoscope"] for f in saved_results if f])
418
+
419
+ if "csv" in cfg.RESULT_TYPES:
420
+ combine_csv_files([f["csv"] for f in saved_results if f])
421
+
422
+
423
+ def merge_consecutive_detections(results: dict[str, list], max_consecutive: int | None = None):
424
+ """Merges consecutive detections of the same species.
425
+ Uses the mean of the top-3 highest scoring predictions as
426
+ confidence score for the merged detection.
427
+
428
+ Args:
429
+ results: The dictionary with {segment: scores}.
430
+ max_consecutive: The maximum number of consecutive detections to merge.
431
+ If None, merge all consecutive detections.
432
+
433
+ Returns:
434
+ The dictionary with merged detections.
435
+ """
436
+
437
+ # If max_consecutive is 0 or 1, return original results
438
+ if max_consecutive is not None and max_consecutive <= 1:
439
+ return results
440
+
441
+ # For each species, make list of timestamps and scores
442
+ species = {}
443
+ for timestamp, scores in results.items():
444
+ for label, score in scores:
445
+ if label not in species:
446
+ species[label] = []
447
+ species[label].append((timestamp, score))
448
+
449
+ # Sort timestamps by start time for each species
450
+ for label, timestamps in species.items():
451
+ species[label] = sorted(timestamps, key=lambda t: float(t[0].split("-", 1)[0]))
452
+
453
+ # Merge consecutive detections
454
+ merged_results = {}
455
+ for label in species:
456
+ timestamps = species[label]
457
+
458
+ # Check if end time of current detection is within the start time of the next detection
459
+ i = 0
460
+ while i < len(timestamps) - 1:
461
+ start, end = timestamps[i][0].split("-", 1)
462
+ next_start, next_end = timestamps[i + 1][0].split("-", 1)
463
+
464
+ if float(end) >= float(next_start):
465
+ # Merge detections
466
+ merged_scores = [timestamps[i][1], timestamps[i + 1][1]]
467
+ timestamps.pop(i)
468
+
469
+ while i < len(timestamps) - 1 and float(next_end) >= float(timestamps[i + 1][0].split("-", 1)[0]):
470
+ if max_consecutive and len(merged_scores) >= max_consecutive:
471
+ break
472
+ merged_scores.append(timestamps[i + 1][1])
473
+ next_end = timestamps[i + 1][0].split("-", 1)[1]
474
+ timestamps.pop(i + 1)
475
+
476
+ # Calculate mean of top 3 scores
477
+ top_3_scores = sorted(merged_scores, reverse=True)[:3]
478
+ merged_score = sum(top_3_scores) / len(top_3_scores)
479
+
480
+ timestamps[i] = (f"{start}-{next_end}", merged_score)
481
+
482
+ i += 1
483
+
484
+ merged_results[label] = timestamps
485
+
486
+ # Restore original format
487
+ results = {}
488
+ for label, timestamps in merged_results.items():
489
+ for timestamp, score in timestamps:
490
+ if timestamp not in results:
491
+ results[timestamp] = []
492
+ results[timestamp].append((label, score))
493
+
494
+ return results
495
+
496
+
497
+ def get_sorted_timestamps(results: dict[str, list]):
498
+ """Sorts the results based on the segments.
499
+
500
+ Args:
501
+ results: The dictionary with {segment: scores}.
502
+
503
+ Returns:
504
+ Returns the sorted list of segments and their scores.
505
+ """
506
+ return sorted(results, key=lambda t: float(t.split("-", 1)[0]))
507
+
508
+
509
+ def get_raw_audio_from_file(fpath: str, offset, duration):
510
+ """Reads an audio file and splits the signal into chunks.
511
+
512
+ Args:
513
+ fpath: Path to the audio file.
514
+
515
+ Returns:
516
+ The signal split into a list of chunks.
517
+ """
518
+ # Open file
519
+ sig, rate = audio.open_audio_file(fpath, cfg.SAMPLE_RATE, offset, duration, cfg.BANDPASS_FMIN, cfg.BANDPASS_FMAX, cfg.AUDIO_SPEED)
520
+
521
+ # Split into raw audio chunks
522
+ return audio.split_signal(sig, rate, cfg.SIG_LENGTH, cfg.SIG_OVERLAP, cfg.SIG_MINLEN)
523
+
524
+
525
+ def predict(samples):
526
+ """Predicts the classes for the given samples.
527
+
528
+ Args:
529
+ samples: Samples to be predicted.
530
+
531
+ Returns:
532
+ The prediction scores.
533
+ """
534
+ # Prepare sample and pass through model
535
+ data = np.array(samples, dtype="float32")
536
+ prediction = model.predict(data)
537
+
538
+ # Logits or sigmoid activations?
539
+ if cfg.APPLY_SIGMOID:
540
+ prediction = model.flat_sigmoid(np.array(prediction), sensitivity=-1, bias=cfg.SIGMOID_SENSITIVITY)
541
+
542
+ return prediction
543
+
544
+
545
+ def get_result_file_names(fpath: str):
546
+ """
547
+ Generates a dictionary of result file names based on the input file path and configured result types.
548
+
549
+ Args:
550
+ fpath (str): The file path of the input file.
551
+
552
+ Returns:
553
+ dict: A dictionary where the keys are result types (e.g., "table", "audacity", "r", "kaleidoscope", "csv")
554
+ and the values are the corresponding output file paths.
555
+ """
556
+ result_names = {}
557
+
558
+ rpath = fpath.replace(cfg.INPUT_PATH, "")
559
+
560
+ rpath = (rpath[1:] if rpath[0] in ["/", "\\"] else rpath) if rpath else os.path.basename(fpath)
561
+
562
+ file_shorthand = rpath.rsplit(".", 1)[0]
563
+
564
+ if "table" in cfg.RESULT_TYPES:
565
+ result_names["table"] = os.path.join(cfg.OUTPUT_PATH, file_shorthand + ".BirdNET.selection.table.txt")
566
+ if "audacity" in cfg.RESULT_TYPES:
567
+ result_names["audacity"] = os.path.join(cfg.OUTPUT_PATH, file_shorthand + ".BirdNET.results.txt")
568
+ # if "r" in cfg.RESULT_TYPES:
569
+ # result_names["r"] = os.path.join(cfg.OUTPUT_PATH, file_shorthand + ".BirdNET.results.r.csv")
570
+ if "kaleidoscope" in cfg.RESULT_TYPES:
571
+ result_names["kaleidoscope"] = os.path.join(cfg.OUTPUT_PATH, file_shorthand + ".BirdNET.results.kaleidoscope.csv")
572
+ if "csv" in cfg.RESULT_TYPES:
573
+ result_names["csv"] = os.path.join(cfg.OUTPUT_PATH, file_shorthand + ".BirdNET.results.csv")
574
+
575
+ return result_names
576
+
577
+
578
+ def analyze_file(item) -> dict[str, str] | None:
579
+ """
580
+ Analyzes an audio file and generates prediction results.
581
+
582
+ Args:
583
+ item (tuple): A tuple containing the file path (str) and configuration settings.
584
+
585
+ Returns:
586
+ dict or None: A dictionary of result file names if analysis is successful,
587
+ None if the file is skipped or an error occurs.
588
+ Raises:
589
+ Exception: If there is an error in reading the audio file or saving the results.
590
+ """
591
+ # Get file path and restore cfg
592
+ fpath: str = item[0]
593
+ cfg.set_config(item[1])
594
+
595
+ result_file_names = get_result_file_names(fpath)
596
+
597
+ if cfg.SKIP_EXISTING_RESULTS and all(os.path.exists(f) for f in result_file_names.values()):
598
+ print(f"Skipping {fpath} as it has already been analyzed", flush=True)
599
+ return None # or return path to combine later? TODO
600
+
601
+ # Start time
602
+ start_time = datetime.datetime.now()
603
+ offset = 0
604
+ duration = int(cfg.FILE_SPLITTING_DURATION / cfg.AUDIO_SPEED)
605
+ start, end = 0, cfg.SIG_LENGTH
606
+ results = {}
607
+
608
+ # Status
609
+ print(f"Analyzing {fpath}", flush=True)
610
+
611
+ try:
612
+ fileLengthSeconds = audio.get_audio_file_length(fpath)
613
+ except Exception as ex:
614
+ # Write error log
615
+ print(f"Error: Cannot analyze audio file {fpath}. File corrupt?\n", flush=True)
616
+ utils.write_error_log(ex)
617
+
618
+ return None
619
+
620
+ # Process each chunk
621
+ try:
622
+ while offset < fileLengthSeconds:
623
+ chunks = get_raw_audio_from_file(fpath, offset, duration)
624
+ samples = []
625
+ timestamps = []
626
+
627
+ for chunk_index, chunk in enumerate(chunks):
628
+ # Add to batch
629
+ samples.append(chunk)
630
+ timestamps.append([round(start * cfg.AUDIO_SPEED, 1), round(end * cfg.AUDIO_SPEED, 1)])
631
+
632
+ # Advance start and end
633
+ start += cfg.SIG_LENGTH - cfg.SIG_OVERLAP
634
+ end = start + cfg.SIG_LENGTH
635
+
636
+ # Check if batch is full or last chunk
637
+ if len(samples) < cfg.BATCH_SIZE and chunk_index < len(chunks) - 1:
638
+ continue
639
+
640
+ # Predict
641
+ p = predict(samples)
642
+
643
+ # Add to results
644
+ for i in range(len(samples)):
645
+ # Get timestamp
646
+ s_start, s_end = timestamps[i]
647
+
648
+ # Get prediction
649
+ pred = p[i]
650
+
651
+ # Assign scores to labels
652
+ p_labels = [
653
+ p
654
+ for p in zip(cfg.LABELS, pred, strict=True)
655
+ if (cfg.TOP_N or p[1] >= cfg.MIN_CONFIDENCE) and (not cfg.SPECIES_LIST or p[0] in cfg.SPECIES_LIST)
656
+ ]
657
+
658
+ # Sort by score
659
+ p_sorted = sorted(p_labels, key=operator.itemgetter(1), reverse=True)
660
+
661
+ if cfg.TOP_N:
662
+ p_sorted = p_sorted[: cfg.TOP_N]
663
+
664
+ # TODO: hier schon top n oder min conf raussortieren
665
+ # Store top 5 results and advance indices
666
+ results[str(s_start) + "-" + str(s_end)] = p_sorted
667
+
668
+ # Clear batch
669
+ samples = []
670
+ timestamps = []
671
+ offset = offset + duration
672
+
673
+ except Exception as ex:
674
+ # Write error log
675
+ print(f"Error: Cannot analyze audio file {fpath}.\n", flush=True)
676
+ utils.write_error_log(ex)
677
+
678
+ return None
679
+
680
+ # Save as selection table
681
+ try:
682
+ save_result_files(results, result_file_names, fpath)
683
+
684
+ except Exception as ex:
685
+ # Write error log
686
+ print(f"Error: Cannot save result for {fpath}.\n", flush=True)
687
+ utils.write_error_log(ex)
688
+
689
+ return None
690
+
691
+ delta_time = (datetime.datetime.now() - start_time).total_seconds()
692
+ print(f"Finished {fpath} in {delta_time:.2f} seconds", flush=True)
693
+
694
+ return result_file_names